diff --git a/EmptyShort.py b/EmptyShort.py
index f07100c..6a36a28 100644
--- a/EmptyShort.py
+++ b/EmptyShort.py
@@ -742,6 +742,10 @@ class EmptyShort(IStrategy):
dataframe["volume_mean"] = dataframe["volume"].rolling(20).mean()
dataframe["volume_ratio"] = dataframe["volume"] / dataframe["volume_mean"]
+ dataframe['volume2'] = dataframe['volume']
+ dataframe.loc[dataframe['hapercent'] < 0, 'volume2'] *= -1
+ dataframe['volume_spike'] = (abs(dataframe['volume2']) > abs(dataframe['volume2'].rolling(window=20).mean() * 5)) \
+ & (dataframe['volume'].rolling(window=5).max() > 1000)
dataframe["market_state"] = 0
@@ -874,13 +878,6 @@ class EmptyShort(IStrategy):
# Compter les baisses / hausses consécutives
dataframe = self.calculateDownAndUp(dataframe, limit=0.0001)
- dataframe['volume2'] = dataframe['volume']
- dataframe.loc[dataframe['hapercent'] < 0, 'volume2'] *= -1
-
- # Volume confirmation
- dataframe['volume_spike'] = (abs(dataframe['volume2']) > abs(dataframe['volume2'].rolling(window=20).mean() * 5)) \
- & (dataframe['volume'].rolling(window=5).max() > 1000)
-
dataframe['sma5_1h'] = dataframe['sma5_1h'].rolling(window=60).mean()
# récupérer le dernier trade fermé
diff --git a/FrictradeLearning.json b/FrictradeLearning.json
index 2e1108b..b11535a 100644
--- a/FrictradeLearning.json
+++ b/FrictradeLearning.json
@@ -5,7 +5,7 @@
"0": 10
},
"stoploss": {
- "stoploss": -1.0
+ "stoploss": -0.02
},
"trailing": {
"trailing_stop": false,
diff --git a/FrictradeLearning.py b/FrictradeLearning.py
index ffaa3d6..ae0cbeb 100644
--- a/FrictradeLearning.py
+++ b/FrictradeLearning.py
@@ -50,12 +50,23 @@ from sklearn.metrics import (
roc_curve,
precision_score, recall_score
)
-from sklearn.metrics import f1_score
+from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score
+
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import export_text
from xgboost import XGBClassifier
+import lightgbm as lgb
+import numpy as np
+import pandas as pd
+import optuna
+
+from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score
+from sklearn.model_selection import train_test_split
+from imblearn.over_sampling import SMOTE
+from sklearn.ensemble import RandomForestClassifier
+from lightgbm import LGBMClassifier
# --------------------------------
@@ -156,8 +167,7 @@ class FrictradeLearning(IStrategy):
'mises': {},
'dca_thresholds': {}
}
- for pair in ["BTC/USDC", "ETH/USDC", "DOGE/USDC", "XRP/USDC", "SOL/USDC",
- "BTC/USDT", "ETH/USDT", "DOGE/USDT", "XRP/USDT", "SOL/USDT"]
+ for pair in ["BTC/USDC", "BTC/USDT", "BTC/USDT:USDT"]
}
trades = list()
max_profit_pairs = {}
@@ -549,6 +559,11 @@ class FrictradeLearning(IStrategy):
short_pair = self.getShortName(pair)
self.path = f"user_data/strategies/plots/{short_pair}/" # + ("valide/" if not self.dp.runmode.value in ('backtest') else '')
+ # dataframe['open'] = dataframe['open'] / dataframe['open'].rolling(180).mean()
+ # dataframe['close'] = dataframe['close'] / dataframe['close'].rolling(180).mean()
+ # dataframe['low'] = dataframe['low'] / dataframe['low'].rolling(180).mean()
+ # dataframe['high'] = dataframe['high'] / dataframe['high'].rolling(180).mean()
+
heikinashi = qtpylib.heikinashi(dataframe)
dataframe['haopen'] = heikinashi['open']
dataframe['haclose'] = heikinashi['close']
@@ -606,7 +621,7 @@ class FrictradeLearning(IStrategy):
dataframe['max5'] = talib.MAX(dataframe['mid'], timeperiod=5)
dataframe['min180'] = talib.MIN(dataframe['mid'], timeperiod=180)
dataframe['max180'] = talib.MAX(dataframe['mid'], timeperiod=180)
- dataframe['pct180'] = ((dataframe["mid"] - dataframe['min180']) / (dataframe['max180'] - dataframe['min180']))
+ # dataframe['pct180'] = ((dataframe["mid"] - dataframe['min180']) / (dataframe['max180'] - dataframe['min180']))
dataframe = self.rsi_trend_probability(dataframe, short=60, long=360)
# ################### INFORMATIVE 1h
@@ -625,21 +640,8 @@ class FrictradeLearning(IStrategy):
informative['rsi'] = talib.RSI(informative['mid'], timeperiod=14)
- informative['sma5'] = informative['mid'].ewm(span=5, adjust=False).mean()
- informative['sma5_deriv1'] = 1000 * (informative['sma5'] - informative['sma5'].shift(1)) / informative[
- 'sma5'].shift(1)
-
- informative['sma12'] = informative['mid'].ewm(span=12, adjust=False).mean()
- informative['sma12_deriv1'] = 1000 * (informative['sma12'] - informative['sma12'].shift(1)) / informative[
- 'sma12'].shift(1)
-
- informative['sma24'] = informative['mid'].ewm(span=24, adjust=False).mean()
- informative['sma24_deriv1'] = 1000 * (informative['sma24'] - informative['sma24'].shift(1)) / informative[
- 'sma24'].shift(1)
-
- informative['sma60'] = informative['mid'].ewm(span=60, adjust=False).mean()
- informative['sma60_deriv1'] = 1000 * (informative['sma60'] - informative['sma60'].shift(1)) / informative[
- 'sma60'].shift(1)
+ for timeperiod in [5, 12, 24, 60]:
+ informative[f'sma{timeperiod}'] = informative['mid'].ewm(span=timeperiod, adjust=False).mean()
informative['rsi'] = talib.RSI(informative['mid'], timeperiod=14)
self.calculeDerivees(informative, 'rsi', ema_period=12)
@@ -647,12 +649,63 @@ class FrictradeLearning(IStrategy):
# informative = self.rsi_trend_probability(informative)
- # probas = self.calculModelInformative(informative)
-
# self.calculateConfiance(informative)
# informative = self.populate1hIndicators(df=informative, metadata=metadata)
# informative = self.calculateRegression(informative, 'mid', lookback=15)
+
+ ###########################################################
+ # Bollinger Bands
+ bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=20, stds=2)
+ informative['bb_lowerband'] = bollinger['lower']
+ informative['bb_middleband'] = bollinger['mid']
+ informative['bb_upperband'] = bollinger['upper']
+ informative["bb_percent"] = (
+ (informative["close"] - informative["bb_lowerband"]) /
+ (informative["bb_upperband"] - informative["bb_lowerband"])
+ )
+ informative["bb_width"] = (informative["bb_upperband"] - informative["bb_lowerband"]) / informative["bb_middleband"]
+
+ # Calcul MACD
+ macd, macdsignal, macdhist = talib.MACD(informative['close'], fastperiod=12, slowperiod=26, signalperiod=9)
+
+ # | Nom | Formule / définition | Signification |
+ # | ---------------------------- | ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+ # | **MACD** (`macd`) | `EMA_fast - EMA_slow` (ex : 12-26 périodes) | Montre l’écart entre la moyenne courte et la moyenne longue.
- Positive → tendance haussière
- Négative → tendance baissière |
+ # | **Signal** (`macdsignal`) | `EMA_9(MACD)` | Sert de ligne de **signal de déclenchement**.
- Croisement du MACD au-dessus → signal d’achat
- Croisement du MACD en dessous → signal de vente |
+ # | **Histogramme** (`macdhist`) | `MACD - Signal` | Montre la **force et l’accélération** de la tendance.
- Positif et croissant → tendance haussière qui s’accélère
- Positif mais décroissant → ralentissement de la hausse
- Négatif et décroissant → baisse qui s’accélère
- Négatif mais croissant → ralentissement de la baisse |
+
+ # Ajouter dans le informative
+ informative['macd'] = macd
+ informative['macdsignal'] = macdsignal
+ informative['macdhist'] = macdhist
+
+ informative["volume_mean"] = informative["volume"].rolling(20).mean()
+ informative["volume_ratio"] = informative["volume"] / informative["volume_mean"]
+ informative['volume2'] = informative['volume']
+ informative.loc[informative['close'].pct_change() < 0, 'volume2'] *= -1
+ informative['volume_spike'] = (abs(informative['volume2']) > abs(informative['volume2'].rolling(window=20).mean() * 5)) \
+ & (informative['volume'].rolling(window=5).max() > 1000)
+
+ # --- Volatilité normalisée ---
+ informative['atr'] = ta.volatility.AverageTrueRange(high=informative['high'], low=informative['low'], close=informative['close'], window=14).average_true_range()
+ informative['atr_norm'] = informative['atr'] / informative['close']
+ # --- Force de tendance ---
+ informative['adx'] = ta.trend.ADXIndicator(high=informative['high'], low=informative['low'], close=informative['close'], window=14).adx()
+
+ # --- Volume directionnel (On Balance Volume) ---
+ informative['obv'] = ta.volume.OnBalanceVolumeIndicator(close=informative['close'], volume=informative['volume']).on_balance_volume()
+ self.calculeDerivees(informative, 'obv', ema_period=1)
+
+ informative['obv12'] = ta.volume.OnBalanceVolumeIndicator(close=informative['sma12'], volume=informative['volume'].rolling(12).sum()).on_balance_volume()
+ informative['obv24'] = ta.volume.OnBalanceVolumeIndicator(close=informative['sma24'], volume=informative['volume'].rolling(24).sum()).on_balance_volume()
+ informative['rsi_slope'] = informative['rsi'].diff(3) / 3 # vitesse moyenne du RSI
+ informative['adx_change'] = informative['adx'] - informative['adx'].shift(12) # évolution de la tendance
+ informative['volatility_ratio'] = informative['atr_norm'] / informative['bb_width']
+
+ # informative["slope_ratio"] = informative["sma5_deriv1"] / (informative["sma60_deriv1"] + 1e-9)
+ # informative["divergence"] = (informative["rsi_deriv1"] * informative["sma5_deriv1"]) < 0
+
dataframe = merge_informative_pair(dataframe, informative, '1m', '1h', ffill=True)
# ################### INFORMATIVE 1d
@@ -665,8 +718,62 @@ class FrictradeLearning(IStrategy):
# informative = self.calculateRegression(informative, 'mid', lookback=15)
# self.calculateConfiance(informative)
+ ###########################################################
+ # Bollinger Bands
+ bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=20, stds=2)
+ informative['bb_lowerband'] = bollinger['lower']
+ informative['bb_middleband'] = bollinger['mid']
+ informative['bb_upperband'] = bollinger['upper']
+ informative["bb_percent"] = (
+ (informative["close"] - informative["bb_lowerband"]) /
+ (informative["bb_upperband"] - informative["bb_lowerband"])
+ )
+ # informative["bb_width"] = (informative["bb_upperband"] - informative["bb_lowerband"]) / informative["bb_middleband"]
+
+ # # Calcul MACD
+ # macd, macdsignal, macdhist = talib.MACD(
+ # informative['close'],
+ # fastperiod=12,
+ # slowperiod=26,
+ # signalperiod=9
+ # )
+ #
+ # # | Nom | Formule / définition | Signification |
+ # # | ---------------------------- | ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+ # # | **MACD** (`macd`) | `EMA_fast - EMA_slow` (ex : 12-26 périodes) | Montre l’écart entre la moyenne courte et la moyenne longue.
- Positive → tendance haussière
- Négative → tendance baissière |
+ # # | **Signal** (`macdsignal`) | `EMA_9(MACD)` | Sert de ligne de **signal de déclenchement**.
- Croisement du MACD au-dessus → signal d’achat
- Croisement du MACD en dessous → signal de vente |
+ # # | **Histogramme** (`macdhist`) | `MACD - Signal` | Montre la **force et l’accélération** de la tendance.
- Positif et croissant → tendance haussière qui s’accélère
- Positif mais décroissant → ralentissement de la hausse
- Négatif et décroissant → baisse qui s’accélère
- Négatif mais croissant → ralentissement de la baisse |
+ #
+ # # Ajouter dans le informative
+ # informative['macd'] = macd
+ # informative['macdsignal'] = macdsignal
+ # informative['macdhist'] = macdhist
+
+ informative["volume_mean"] = informative["volume"].rolling(20).mean()
+ informative["volume_ratio"] = informative["volume"] / informative["volume_mean"]
+ informative['volume2'] = informative['volume']
+ informative.loc[informative['close'].pct_change() < 0, 'volume2'] *= -1
+ informative['volume_spike'] = (abs(informative['volume2']) > abs(informative['volume2'].rolling(window=20).mean() * 5)) \
+ & (informative['volume'].rolling(window=5).max() > 1000)
+
+ for timeperiod in [3, 5, 8, 12]:
+ informative[f'sma{timeperiod}'] = informative['mid'].ewm(span=timeperiod, adjust=False).mean()
+
+ informative['rsi'] = talib.RSI(informative['mid'], timeperiod=14)
+ self.calculeDerivees(informative, 'rsi', ema_period=12)
+ self.calculateScores(informative, 6)
+
dataframe = merge_informative_pair(dataframe, informative, '1m', '1d', ffill=True)
+ dataframe["pct30"] = dataframe["close"].pct_change(30)
+ dataframe["pct60"] = dataframe["close"].pct_change(60)
+ dataframe["pct120"] = dataframe["close"].pct_change(120)
+ dataframe["pct180"] = dataframe["close"].pct_change(180)
+ dataframe["pct300"] = dataframe["close"].pct_change(300)
+ dataframe["pct600"] = dataframe["close"].pct_change(600)
+ dataframe["pct1200"] = dataframe["close"].pct_change(1200)
+ dataframe["sma_ratio"] = dataframe["sma5_1h"] / dataframe["sma60"]
+
dataframe['last_price'] = dataframe['close']
dataframe['first_price'] = dataframe['close']
if self.dp:
@@ -707,6 +814,10 @@ class FrictradeLearning(IStrategy):
#
# print(levels)
+ for timeperiod in [5, 12, 24, 60]:
+ dataframe[f'sma{timeperiod}_1h'] = dataframe[f'sma{timeperiod}_1h'].rolling(window=60).mean()
+ self.calculeDerivees(dataframe, f'sma{timeperiod}_1h', ema_period=12)
+
###########################################################
# Bollinger Bands
bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(dataframe), window=20, stds=2)
@@ -717,7 +828,7 @@ class FrictradeLearning(IStrategy):
(dataframe["close"] - dataframe["bb_lowerband"]) /
(dataframe["bb_upperband"] - dataframe["bb_lowerband"])
)
- dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["sma24"]
+ dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"]
# Calcul MACD
macd, macdsignal, macdhist = talib.MACD(
@@ -763,11 +874,11 @@ class FrictradeLearning(IStrategy):
dataframe[f'sma_{s_short}'] = dataframe['close'].rolling(window=s_short).mean()
dataframe[f'sma_{s_long}'] = dataframe['close'].rolling(window=s_long).mean()
- # --- pente brute ---
- dataframe['slope'] = dataframe['sma24'].diff()
-
- # --- lissage EMA ---
- dataframe['slope_smooth'] = dataframe['slope'].ewm(span=10, adjust=False).mean()
+ # # --- pente brute ---
+ # dataframe['slope'] = dataframe['sma24'].diff()
+ #
+ # # --- lissage EMA ---
+ # dataframe['slope_smooth'] = dataframe['slope'].ewm(span=10, adjust=False).mean()
# # RSI
# window = 14
@@ -794,48 +905,21 @@ class FrictradeLearning(IStrategy):
# Assure-toi qu'il est trié par date croissante
timeframe = self.timeframe
# --- Volatilité normalisée ---
- dataframe['atr'] = ta.volatility.AverageTrueRange(
- high=dataframe['high'], low=dataframe['low'], close=dataframe['close'], window=14
- ).average_true_range()
+ dataframe['atr'] = ta.volatility.AverageTrueRange(high=dataframe['high'], low=dataframe['low'], close=dataframe['close'], window=14).average_true_range()
dataframe['atr_norm'] = dataframe['atr'] / dataframe['close']
-
# --- Force de tendance ---
- dataframe['adx'] = ta.trend.ADXIndicator(
- high=dataframe['high'], low=dataframe['low'], close=dataframe['close'], window=14
- ).adx()
+ dataframe['adx'] = ta.trend.ADXIndicator(high=dataframe['high'], low=dataframe['low'], close=dataframe['close'], window=14).adx()
# --- Volume directionnel (On Balance Volume) ---
- dataframe['obv'] = ta.volume.OnBalanceVolumeIndicator(
- close=dataframe['close'], volume=dataframe['volume']
- ).on_balance_volume()
+ dataframe['obv'] = ta.volume.OnBalanceVolumeIndicator(close=dataframe['close'], volume=dataframe['volume']).on_balance_volume()
self.calculeDerivees(dataframe, 'obv', ema_period=1)
- dataframe['obv12'] = ta.volume.OnBalanceVolumeIndicator(
- close=dataframe['sma12'], volume=dataframe['volume'].rolling(12).sum()
- ).on_balance_volume()
-
- dataframe['obv24'] = ta.volume.OnBalanceVolumeIndicator(
- close=dataframe['sma24'], volume=dataframe['volume'].rolling(24).sum()
- ).on_balance_volume()
-
- # --- Volatilité récente (écart-type des rendements) ---
- dataframe['vol_24'] = dataframe['percent'].rolling(24).std()
-
- # Compter les baisses / hausses consécutives
- # self.calculateDownAndUp(dataframe, limit=0.0001)
-
- # df : ton dataframe OHLCV + indicateurs existants
- # Assurez-vous que les colonnes suivantes existent :
- # 'max_rsi_12', 'roc_24', 'bb_percent_1h'
-
- # --- Filtrage des NaN initiaux ---
- # dataframe = dataframe.dropna()
-
+ dataframe['obv12'] = ta.volume.OnBalanceVolumeIndicator(close=dataframe['sma12'], volume=dataframe['volume'].rolling(12).sum()).on_balance_volume()
+ dataframe['obv24'] = ta.volume.OnBalanceVolumeIndicator(close=dataframe['sma24'], volume=dataframe['volume'].rolling(24).sum()).on_balance_volume()
dataframe['rsi_slope'] = dataframe['rsi'].diff(3) / 3 # vitesse moyenne du RSI
dataframe['adx_change'] = dataframe['adx'] - dataframe['adx'].shift(12) # évolution de la tendance
dataframe['volatility_ratio'] = dataframe['atr_norm'] / dataframe['bb_width']
- dataframe["rsi_diff"] = dataframe["rsi"] - dataframe["rsi"].shift(3)
dataframe["slope_ratio"] = dataframe["sma5_deriv1"] / (dataframe["sma60_deriv1"] + 1e-9)
dataframe["divergence"] = (dataframe["rsi_deriv1"] * dataframe["sma5_deriv1"]) < 0
@@ -846,19 +930,23 @@ class FrictradeLearning(IStrategy):
self.model_indicators = self.listUsableColumns(dataframe)
+ print("INDICATORS : ", self.model_indicators )
+
if False and self.dp.runmode.value in ('backtest'):
- self.trainModel(dataframe, metadata)
+ self.trainModel3(dataframe, metadata)
short_pair = self.getShortName(pair)
- # path=f"user_data/strategies/plots/{short_pair}/"
+ path=f"user_data/strategies/plots/{short_pair}/"
+
+ data = joblib.load(f"{self.path}/{short_pair}_rf_model.pkl")
+ self.model = data["model"]
+ self.model_indicators = data["features"]
+
+ # Préparer les features pour la prédiction
+ X_Valid = dataframe[self.model_indicators].fillna(0)
+
+ # Prédiction : probabilité que le prix monte
- # self.model = joblib.load(f"{self.path}/{short_pair}_rf_model.pkl")
- #
- # # Préparer les features pour la prédiction
- # features = dataframe[self.model_indicators].fillna(0)
- #
- # # Prédiction : probabilité que le prix monte
- #
# # Affichage des colonnes intérressantes dans le model
# features_pruned, kept_features = self.prune_features(
# model=self.model,
@@ -866,14 +954,23 @@ class FrictradeLearning(IStrategy):
# feature_columns=self.model_indicators,
# importance_threshold=0.005 # enlever features < % importance
# )
- #
+
# probs = self.model.predict_proba(features)[:, 1]
- #
- # # Sauvegarder la probabilité pour l’analyse
- # dataframe['ml_prob'] = probs
- #
- # if False and self.dp.runmode.value in ('backtest'):
- # self.inspect_model(self.model)
+ probs_all_classes = self.model.predict(X_Valid) # shape = (n_samples, n_classes)
+ print(probs_all_classes.shape) # doit être (n_samples, 3)
+
+ # Ajouter probabilité de chaque classe au dataframe pour analyse
+ for i in range(3):
+ dataframe[f'prob_class_{i}'] = probs_all_classes[:, i]
+
+ # Pour la probabilité de la classe 2 :
+ probs = probs_all_classes[:, 2]
+
+ # Sauvegarder la probabilité pour l’analyse
+ dataframe['ml_prob'] = probs
+
+ if False and self.dp.runmode.value in ('backtest'):
+ self.inspect_model(self.model)
#
# absolute_min = dataframe['absolute_min'].min()
@@ -1050,7 +1147,7 @@ class FrictradeLearning(IStrategy):
#
# return dataframe
- def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+ def populate_entry_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
"""
Buy when the model predicts a high upside probability/value.
This method loads the ML model, generates predictions, and
@@ -1086,16 +1183,51 @@ class FrictradeLearning(IStrategy):
# , ['enter_long', 'enter_tag']
# ] = (1, f"future")
- dataframe.loc[
- # (dataframe["ml_prob"].shift(1) < dataframe["ml_prob"])
- (
- (dataframe['close'].shift(3) < dataframe['min180'].shift(3)) |
- (dataframe['close'].shift(4) < dataframe['min180'].shift(4)) |
- (dataframe['close'].shift(5) < dataframe['min180'].shift(5))
- )
- & (dataframe['hapercent'] > 0)
- , ['enter_long', 'enter_tag']
- ] = (1, f"min180")
+ score = (
+ (dataframe['max_rsi_12'] > 70).astype(int) * 3 +
+ (dataframe['pct30'] < 0).astype(int) * 2 +
+ (dataframe['percent12'] < 0).astype(int) * 2 +
+ (dataframe['rsi_dist'] < 0).astype(int) * 1
+ )
+
+ dataframe.loc[score >= 5, ['enter_long', 'enter_tag']] = (1, f"long")
+
+ # dataframe.loc[
+ # # (dataframe["ml_prob"].shift(1) < dataframe["ml_prob"])
+ # (
+ # # 🔥 RSI récemment élevé (surachat)
+ # (dataframe['max_rsi_12'] > 70) &
+ #
+ # # 📉 retournement en cours
+ # (dataframe['rsi'] < dataframe['max_rsi_12'] - 10) &
+ #
+ # # 📉 perte de momentum court terme
+ # (dataframe['pct30'] < 0) &
+ #
+ # # 📉 confirmation
+ # (dataframe['percent12'] < 0)
+ # )
+ # & (dataframe['hapercent'] > 0)
+ # , ['enter_long', 'enter_tag']
+ # ] = (1, f"long")
+
+ # dataframe.loc[
+ # # (dataframe["ml_prob"].shift(1) < dataframe["ml_prob"])
+ # (
+ # dataframe['prob_class_0'] > 0.45
+ # )
+ # & (dataframe['hapercent'] < 0)
+ # , ['enter_short', 'enter_tag']
+ # ] = (1, f"short")
+
+ score = (
+ (dataframe['pct30'] > 0.01).astype(int) * 3 +
+ (dataframe['percent12'] > 0.005).astype(int) * 3 +
+ (dataframe['rsi'] > 60).astype(int) * 2 +
+ (dataframe['rsi'] < dataframe['rsi'].shift(1)).astype(int) * 1
+ )
+
+ dataframe.loc[score >= 5, ['enter_short', 'enter_tag']] = (1, f"short")
dataframe['test'] = np.where(dataframe['enter_long'] == 1, dataframe['close'] * 1.01, np.nan)
@@ -1188,7 +1320,7 @@ class FrictradeLearning(IStrategy):
#
# return dataframe
- def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+ def populate_exit_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
return dataframe
@@ -1225,14 +1357,14 @@ class FrictradeLearning(IStrategy):
return max(round(y), 1) # évite les valeurs négatives
def adjust_stake_amount(self, pair: str, last_candle: DataFrame):
- if (self.pairs[pair]['first_amount'] > 0):
- amount = min(self.wallets.get_available_stake_amount(), self.pairs[pair]['first_amount'])
- else:
- if last_candle['enter_tag'] in ['fall', 'bear', 'Force', 'Range-']:
- amount = self.wallets.get_available_stake_amount() / 5
- else:
- amount = self.wallets.get_available_stake_amount() / 3# / (2 * self.pairs[pair]['count_of_lost'] + 1)
- return min(amount, self.wallets.get_available_stake_amount())
+ # if (self.pairs[pair]['first_amount'] > 0):
+ # amount = min(self.wallets.get_available_stake_amount(), self.pairs[pair]['first_amount'])
+ # else:
+ # if last_candle['enter_tag'] in ['fall', 'bear', 'Force', 'Range-']:
+ # amount = self.wallets.get_available_stake_amount() / 5
+ # else:
+ # amount = self.wallets.get_available_stake_amount() / 3# / (2 * self.pairs[pair]['count_of_lost'] + 1)
+ return self.wallets.get_available_stake_amount()
def calculateMises(self, pair, ath, val):
# ath = max(self.pairs[pair]['last_max'], self.get_last_ath_before_candle(last_candle))
@@ -1527,77 +1659,80 @@ class FrictradeLearning(IStrategy):
# stake=0
# )
- if current_profit < - 0.02 and last_candle[f"close"] <= last_candle['sma60']:
- self.pairs[pair]['force_sell'] = True
- return 'sma60'
+ if trade.is_short:
+ if current_profit > 0.005 and \
+ (baisse > 0.25 and last_candle[f"close"] <= last_candle['sma24']) \
+ and last_candle['hapercent'] > 0 :
+ self.pairs[pair]['force_sell'] = True
+ return 'B30sht'
+ else:
+ # if current_profit < - 0.02 and last_candle[f"close"] <= last_candle['sma60']:
+ # self.pairs[pair]['force_sell'] = True
+ # return 'sma60'
- if profit > 5 and \
- (baisse > 0.25 and last_candle[f"close"] <= last_candle['sma24']) \
- and last_candle['hapercent'] <0 :
- self.pairs[pair]['force_sell'] = True
- return 'B30'
+ if current_profit > 0.005 and \
+ (baisse > 0.25 and last_candle[f"close"] <= last_candle['sma24']) \
+ and last_candle['hapercent'] <0 :
+ self.pairs[pair]['force_sell'] = True
+ return 'B30Lng'
- if profit > 0 and last_candle['cross_sma60']: #5 or last_candle['rsi_1d'] < 30:
- return 'Cross'
+ # if profit > 0 and last_candle['cross_sma60']: #5 or last_candle['rsi_1d'] < 30:
+ # return 'Cross'
+ #
+ # if last_candle['max_rsi_24'] > 88 and last_candle['hapercent'] < 0\
+ # and last_candle['sma5_deriv2'] < -0.1:
+ # return f"rsi_{count_of_buys}_{self.pairs[pair]['has_gain']}"
- if last_candle['max_rsi_24'] > 88 and last_candle['hapercent'] < 0\
- and last_candle['sma5_deriv2'] < -0.1:
- return f"rsi_{count_of_buys}_{self.pairs[pair]['has_gain']}"
+ limit = max_profit * (1 - current_trailing_stop_positive)
+ # if profit < limit and baisse > 0.2:
+ # return f"lim_{count_of_buys}_{self.pairs[pair]['has_gain']}"
+ # if last_candle['ml_prob'] > 0.5:
+ # if last_candle['sma12_deriv1'] > 0: # and last_candle['rsi'] < 85:
+ # return None
- limit = max_profit * (1 - current_trailing_stop_positive)
- # if profit < limit and baisse > 0.2:
- # return f"lim_{count_of_buys}_{self.pairs[pair]['has_gain']}"
- # if last_candle['ml_prob'] > 0.5:
- # if last_candle['sma12_deriv1'] > 0: # and last_candle['rsi'] < 85:
- # return None
+ # if last_candle['sma24_deriv1'] > 0 : #and minutes < 180 and baisse < 30: # and last_candle['sma5_deriv1'] > -0.15:
+ # if (minutes < 180):
+ # return None
+ # if (minutes > 1440 and last_candle['sma60_deriv1'] > 0) :
+ # return None
- # if last_candle['sma24_deriv1'] > 0 : #and minutes < 180 and baisse < 30: # and last_candle['sma5_deriv1'] > -0.15:
- # if (minutes < 180):
- # return None
- # if (minutes > 1440 and last_candle['sma60_deriv1'] > 0) :
- # return None
+ # # ----- 4) OFFSET : faut-il attendre de dépasser trailing_stop_positive_offset ? -----
+ # if current_trailing_only_offset_is_reached and max_profit > current_trailing_stop_positive_offset:
+ # # Max profit pas atteint ET perte < 2 * current_trailing_stop_positive
+ # if profit > limit: # 2 * current_trailing_stop_positive:
+ # print(
+ # f"{current_time} trailing non atteint trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} "
+ # f"max={round(max_profit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} baisse={round(baisse,2)}")
+ # return None # ne pas activer le trailing encore
+ # else:
+ # print(
+ # f"{current_time} trailing atteint trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} "
+ # f"max={round(max_profit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} baisse={round(baisse,2)}")
+ # else:
+ # # print(
+ # # f"1 - {current_time} trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} "
+ # # f"limit={round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)}"
+ # # f" baisse={round(baisse,2)} {round(last_candle['sma180_deriv1'], 4)} {round(last_candle['sma60_deriv1'], 4)} {round(last_candle['sma24_deriv1'], 4)}")
+ #
+ # return None
+ # # Sinon : trailing actif dès le début
+ #
+ # # ----- 6) Condition de vente -----
+ # if 0 < profit <= trailing_stop: # and last_candle['mid'] < last_candle['sma5']: # and profit > current_trailing_stop_positive_offset:
+ # self.pairs[pair]['force_buy'] = True
+ # print(
+ # f"{current_time} Condition de vente trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} "
+ # f"{round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} "
+ # f"baisse={round(baisse,2)}")
+ #
+ # return f"stop_{count_of_buys}_{self.pairs[pair]['has_gain']}"
- # # ----- 4) OFFSET : faut-il attendre de dépasser trailing_stop_positive_offset ? -----
- # if current_trailing_only_offset_is_reached and max_profit > current_trailing_stop_positive_offset:
- # # Max profit pas atteint ET perte < 2 * current_trailing_stop_positive
- # if profit > limit: # 2 * current_trailing_stop_positive:
- # print(
- # f"{current_time} trailing non atteint trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} "
- # f"max={round(max_profit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} baisse={round(baisse,2)}")
- # return None # ne pas activer le trailing encore
- # else:
- # print(
- # f"{current_time} trailing atteint trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} "
- # f"max={round(max_profit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} baisse={round(baisse,2)}")
- # else:
- # # print(
- # # f"1 - {current_time} trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} "
- # # f"limit={round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)}"
- # # f" baisse={round(baisse,2)} {round(last_candle['sma180_deriv1'], 4)} {round(last_candle['sma60_deriv1'], 4)} {round(last_candle['sma24_deriv1'], 4)}")
- #
- # return None
- # # Sinon : trailing actif dès le début
- #
- # # ----- 6) Condition de vente -----
- # if 0 < profit <= trailing_stop: # and last_candle['mid'] < last_candle['sma5']: # and profit > current_trailing_stop_positive_offset:
- # self.pairs[pair]['force_buy'] = True
- # print(
- # f"{current_time} Condition de vente trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} "
- # f"{round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} "
- # f"baisse={round(baisse,2)}")
- #
- # return f"stop_{count_of_buys}_{self.pairs[pair]['has_gain']}"
+ # print(
+ # f"2 - {current_time} trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} "
+ # f"{round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} "
+ # f"baisse={round(baisse,2)} {round(last_candle['sma180_deriv1'], 4)} {round(last_candle['sma60_deriv1'], 4)} {round(last_candle['sma24_deriv1'], 4)}")
- # print(
- # f"2 - {current_time} trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} "
- # f"{round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} "
- # f"baisse={round(baisse,2)} {round(last_candle['sma180_deriv1'], 4)} {round(last_candle['sma60_deriv1'], 4)} {round(last_candle['sma24_deriv1'], 4)}")
-
- return None
- print(
- f"2 - {current_time} trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} "
- f"{round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} "
- f"baisse={round(baisse,2)} {round(last_candle['sma180_deriv1'], 4)} {round(last_candle['sma60_deriv1'], 4)} {round(last_candle['sma24_deriv1'], 4)}")
+ return None
def informative_pairs(self):
# get access to all pairs available in whitelist.
@@ -1734,13 +1869,86 @@ class FrictradeLearning(IStrategy):
# Corrélations des colonnes
corr = df.corr(numeric_only=True)
- print("Corrélation des colonnes")
- print(corr)
+ # print("Corrélation des colonnes")
+ # print(corr)
# 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies
- # df['target'] = (df['sma24'].shift(-24) > df['sma24']).astype(int)
- df['target'] = ((df["sma24"].shift(-13) - df["sma24"]) > 100).astype(int)
- df['target'] = df['target'].fillna(0).astype(int)
+ os.makedirs(path, exist_ok=True)
+
+ horizon = 120 # en 1min
+ indicator = 'sma60'
+
+ df['future_max'] = df[indicator].shift(-1).rolling(horizon).max()
+ df['future_min'] = df[indicator].shift(-1).rolling(horizon).min()
+ tp = 0.0025 # +%
+ sl = 0.0025 # -% (important !)
+
+ df['target'] = 0
+
+ # 🎯 cas gagnant
+ df.loc[df['future_max'] > df[indicator] * (1 + tp), 'target'] = 1
+
+ # 💀 cas perdant
+ df.loc[df['future_min'] < df[indicator] * (1 - sl), 'target'] = -1
+
+ # Filtre
+ # df = df[df['atr_norm'] > 0.002]
+
+ print("===== 🚀 TRAIN MODEL START =====")
+ df = df.dropna().copy()
+
+ features = self.listUsableColumns(df)
+ target_col = "target"
+
+ # 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies
+ df['target'] = 0
+
+ for i in range(len(df) - horizon):
+ window = df.iloc[i + 1:i + 1 + horizon]
+
+ entry = df.iloc[i][indicator]
+ tp_price = entry * (1 + tp)
+ sl_price = entry * (1 - sl)
+
+ hit_tp = window[window[indicator] >= tp_price]
+ hit_sl = window[window[indicator] <= sl_price]
+
+ if not hit_tp.empty and not hit_sl.empty:
+ if hit_tp.index[0] < hit_sl.index[0]:
+ df.iloc[i, df.columns.get_loc('target')] = 1
+ else:
+ df.iloc[i, df.columns.get_loc('target')] = -1
+ elif not hit_tp.empty:
+ df.iloc[i, df.columns.get_loc('target')] = 1
+ elif not hit_sl.empty:
+ df.iloc[i, df.columns.get_loc('target')] = -1
+
+ working_columns = self.select_features_pipeline(df)
+ features=working_columns
+ X = df[features]
+ y = (df['target'] == 1).astype(int) # df[target_col]
+
+ # df['target'].value_counts(normalize=True)
+ counts = df['target'].value_counts()
+ n_neg = counts.get(0, 0) # nombre de 0
+ n_pos = counts.get(1, 0) # nombre de 1
+
+ scale_pos_weight = n_neg / n_pos
+ print("Samples:", len(df))
+ print("Target ratio:", df['target'].mean())
+ print("Working features:", len(working_columns))
+ print("Used features:", len(X.columns))
+ print("Poids pour la classe 1 :", scale_pos_weight)
+ print("==== VARIANCE ====")
+ print(X.var().sort_values().head(10))
+ print("==== DESCRIBE ====")
+ print(X.describe().T[['mean', 'std']].head(20))
+ print("Samples before:", len(df))
+ df = df.dropna()
+ print("Samples after:", len(df))
+ print(df['target'].value_counts())
+ # time.sleep(5.5) # Pause 5.5 seconds
+
# Corrélations triées par importance avec une colonne cible
target_corr = df.corr(numeric_only=True)["target"].sort_values(ascending=False)
@@ -1798,9 +2006,20 @@ class FrictradeLearning(IStrategy):
print(f"✅ Matrice enregistrée : {output_path}")
# Exemple d'utilisation :
- selected_corr = self.select_uncorrelated_features(df, target="target", top_n=30, corr_threshold=0.7)
- print("===== 🎯 FEATURES SÉLECTIONNÉES =====")
- print(selected_corr)
+ # selected_corr = self.select_uncorrelated_features(df, target="target", top_n=30, corr_threshold=0.98)
+ # print("===== 🎯 FEATURES SÉLECTIONNÉES =====")
+ # print(selected_corr)
+ #
+ # # 🔥 EXTRACTION CORRECTE
+ # working_columns = selected_corr["feature"].tolist()
+
+ # Nettoyage
+ df = df[working_columns + ['target', indicator]].dropna()
+
+ X = df[working_columns]
+ y = df['target']
+
+ self.model_indicators = working_columns
# Nettoyage
df = df.dropna()
@@ -1814,7 +2033,15 @@ class FrictradeLearning(IStrategy):
X = df[self.model_indicators]
y = df['target']
# Séparation temporelle (train = 80 %, valid = 20 %)
- X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=False)
+ # X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=False)
+ split_idx = int(len(df) * 0.8)
+ df_train = df.iloc[:split_idx].copy()
+ df_valid = df.iloc[split_idx:].copy()
+ X_train = df_train[self.model_indicators]
+ y_train = df_train['target']
+ X_valid = df_valid[self.model_indicators]
+ y_valid = df_valid['target']
+ self.df_valid = df_valid
# Nettoyage des valeurs invalides
@@ -1847,60 +2074,236 @@ class FrictradeLearning(IStrategy):
# study = optuna.create_study(direction="maximize")
# study.optimize(objective, n_trials=50)
- def objective(trial):
- # local_model = XGBClassifier(
- # n_estimators=300, # nombre d'arbres plus raisonnable
- # learning_rate=0.01, # un peu plus rapide que 0.006, mais stable
- # max_depth=4, # capture plus de patterns que 3, sans overfitting excessif
- # subsample=0.7, # utilise 70% des lignes pour chaque arbre → réduit overfitting
- # colsample_bytree=0.8, # 80% des features par arbre
- # gamma=0.01, # gain minimal pour un split → régularisation
- # reg_alpha=0.01, # L1 régularisation des feuilles
- # reg_lambda=1, # L2 régularisation des feuilles
- # n_jobs=-1, # utilise tous les cœurs CPU pour accélérer
- # random_state=42, # reproductibilité
- # missing=float('nan'), # valeur manquante reconnue
- # eval_metric='logloss' # métrique pour classification binaire
- # )
+ # def objective(trial):
+ # # local_model = XGBClassifier(
+ # # n_estimators=300, # nombre d'arbres plus raisonnable
+ # # learning_rate=0.01, # un peu plus rapide que 0.006, mais stable
+ # # max_depth=4, # capture plus de patterns que 3, sans overfitting excessif
+ # # subsample=0.7, # utilise 70% des lignes pour chaque arbre → réduit overfitting
+ # # colsample_bytree=0.8, # 80% des features par arbre
+ # # gamma=0.01, # gain minimal pour un split → régularisation
+ # # reg_alpha=0.01, # L1 régularisation des feuilles
+ # # reg_lambda=1, # L2 régularisation des feuilles
+ # # n_jobs=-1, # utilise tous les cœurs CPU pour accélérer
+ # # random_state=42, # reproductibilité
+ # # missing=float('nan'), # valeur manquante reconnue
+ # # eval_metric='logloss' # métrique pour classification binaire
+ # # )
+ #
+ # local_model = XGBClassifier(
+ # n_estimators=trial.suggest_int("n_estimators", 300, 500),
+ # max_depth=trial.suggest_int("max_depth", 1, 6),
+ # learning_rate=trial.suggest_float("learning_rate", 0.005, 0.3, log=True),
+ # subsample=trial.suggest_float("subsample", 0.6, 1.0),
+ # colsample_bytree=trial.suggest_float("colsample_bytree", 0.6, 1.0),
+ # scale_pos_weight=1,
+ # objective="binary:logistic",
+ # eval_metric="logloss",
+ # n_jobs=-1
+ # )
+ #
+ # local_model.fit(
+ # X_train,
+ # y_train,
+ # eval_set=[(X_valid, y_valid)],
+ # # early_stopping_rounds=50,
+ # verbose=False
+ # )
+ #
+ # proba = local_model.predict_proba(X_valid)[:, 1]
+ # thresholds = np.linspace(0.1, 0.9, 50)
+ # best_f1 = max(f1_score(y_valid, (proba > t)) for t in thresholds)
+ #
+ # return best_f1
- local_model = XGBClassifier(
- n_estimators=trial.suggest_int("n_estimators", 300, 500),
- max_depth=trial.suggest_int("max_depth", 1, 6),
- learning_rate=trial.suggest_float("learning_rate", 0.005, 0.3, log=True),
- subsample=trial.suggest_float("subsample", 0.6, 1.0),
- colsample_bytree=trial.suggest_float("colsample_bytree", 0.6, 1.0),
- scale_pos_weight=1,
- objective="binary:logistic",
- eval_metric="logloss",
+ # def objective(trial):
+ #
+ # scale_pos_weight = (y_train == 0).sum() / max((y_train == 1).sum(), 1)
+ #
+ # local_model = XGBClassifier(
+ # n_estimators=trial.suggest_int("n_estimators", 300, 500),
+ # max_depth=trial.suggest_int("max_depth", 2, 6),
+ # learning_rate=trial.suggest_float("learning_rate", 0.005, 0.2, log=True),
+ # subsample=trial.suggest_float("subsample", 0.6, 1.0),
+ # colsample_bytree=trial.suggest_float("colsample_bytree", 0.6, 1.0),
+ # gamma=trial.suggest_float("gamma", 0, 0.1),
+ # reg_alpha=trial.suggest_float("reg_alpha", 0, 0.1),
+ # reg_lambda=trial.suggest_float("reg_lambda", 0.5, 2),
+ # scale_pos_weight=scale_pos_weight,
+ # objective="binary:logistic",
+ # eval_metric="logloss",
+ # n_jobs=-1,
+ # random_state=42
+ # )
+ #
+ # local_model.fit(
+ # X_train,
+ # y_train,
+ # eval_set=[(X_valid, y_valid)],
+ # verbose=False
+ # )
+ #
+ # proba = local_model.predict_proba(X_valid)[:, 1]
+ #
+ # # 🔥 seuil optimisé
+ # threshold = trial.suggest_float("threshold", 0.3, 0.7)
+ # prices = self.df_valid["close"].values
+ # profit = 0
+ # wins = 0
+ # losses = 0
+ #
+ # horizon = trial.suggest_int("horizon", 2, 6)
+ #
+ # min_move = trial.suggest_float("min_move", 0.002, 0.01)
+ #
+ # for i in range(len(proba) - horizon):
+ # if proba[i] > threshold:
+ # entry = prices[i]
+ # exit = prices[i + horizon]
+ # pct = (exit - entry) / entry
+ #
+ # # 🔥 filtre anti bruit
+ # if abs(pct) < min_move:
+ # continue
+ #
+ # pct -= 0.001 # fees
+ # profit += pct
+ # if pct > 0:
+ # wins += 1
+ # else:
+ # losses += 1
+ #
+ # if wins + losses == 0:
+ # return -1
+ #
+ # winrate = wins / (wins + losses)
+ #
+ # # 🔥 score final
+ # return profit * winrate
+
+ # 4️⃣ Fonction objectif Optuna
+ # def objective(trial):
+ # model = XGBClassifier(
+ # n_estimators=trial.suggest_int("n_estimators", 300, 500),
+ # max_depth=trial.suggest_int("max_depth", 3, 7),
+ # learning_rate=trial.suggest_float("learning_rate", 0.005, 0.1, log=True),
+ # subsample=trial.suggest_float("subsample", 0.6, 1.0),
+ # colsample_bytree=trial.suggest_float("colsample_bytree", 0.6, 1.0),
+ # gamma=trial.suggest_float("gamma", 0, 0.1),
+ # reg_alpha=trial.suggest_float("reg_alpha", 0, 0.1),
+ # reg_lambda=trial.suggest_float("reg_lambda", 1, 2),
+ # scale_pos_weight=scale_pos_weight,
+ # objective="binary:logistic",
+ # eval_metric="logloss",
+ # n_jobs=-1,
+ # random_state=42
+ # )
+ #
+ # model.fit(
+ # X_train,
+ # y_train,
+ # eval_set=[(X_valid, y_valid)],
+ # verbose=False
+ # )
+ #
+ # best_threshold = 0
+ # proba = model.predict_proba(X_valid)[:, 1]
+ # best_score = -1
+ # for t in np.linspace(0.2, 0.8, 30):
+ # preds = (proba > t).astype(int)
+ # precision = precision_score(y_valid, preds, zero_division=0)
+ # if precision < 0.6:
+ # score = 0
+ # else:
+ # recall = recall_score(y_valid, preds, zero_division=0)
+ # score = (0.7 * recall) + (0.3 * precision)
+ #
+ # if score > best_score:
+ # best_threshold = t
+ # best_score = score
+ # print("Best threshold:", best_threshold)
+ #
+ # return best_score
+ #
+ # # proba = model.predict_proba(X_valid)[:, 1]
+ # #
+ # # thresholds = np.linspace(0.1, 0.9, 50)
+ # # best_f1 = max(f1_score(y_valid, (proba > t)) for t in thresholds)
+ # #
+ # # return best_f1
+
+ def objective(trial):
+
+ model = LGBMClassifier(
+ n_estimators=trial.suggest_int("n_estimators", 300, 700),
+ learning_rate=trial.suggest_float("learning_rate", 0.02, 0.08),
+ max_depth=trial.suggest_int("max_depth", 3, 6),
+ num_leaves=trial.suggest_int("num_leaves", 20, 80),
+ # 🔥 FIX CRITIQUE
+ min_child_samples=trial.suggest_int("min_child_samples", 10, 50),
+ subsample=trial.suggest_float("subsample", 0.7, 1.0),
+ colsample_bytree=trial.suggest_float("colsample_bytree", 0.7, 1.0),
+ # 🔥 FIX CRITIQUE
+ reg_alpha=trial.suggest_float("reg_alpha", 0.0, 0.1),
+ reg_lambda=trial.suggest_float("reg_lambda", 0.5, 1.5),
+ scale_pos_weight=scale_pos_weight,
+ random_state=42,
n_jobs=-1
)
- local_model.fit(
- X_train,
- y_train,
- eval_set=[(X_valid, y_valid)],
- # early_stopping_rounds=50,
- verbose=False
- )
+ model.fit(X_train, y_train)
+ proba = model.predict_proba(X_valid)[:, 1]
+ best_score = 0
+ for t in np.linspace(0.2, 0.8, 30):
+ preds = (proba > t).astype(int)
- proba = local_model.predict_proba(X_valid)[:, 1]
- thresholds = np.linspace(0.1, 0.9, 50)
- best_f1 = max(f1_score(y_valid, (proba > t)) for t in thresholds)
+ precision = precision_score(y_valid, preds)
+ recall = recall_score(y_valid, preds)
- return best_f1
+ # 🎯 ton objectif réel
+ if precision < 0.6:
+ score = 0
+ else:
+ score = (0.7 * recall) + (0.3 * precision)
+ if score > best_score:
+ best_score = score
+
+ return best_score
+
+ # 3️⃣ Lancer l'optimisation
study = optuna.create_study(direction="maximize")
- study.optimize(objective, n_trials=20)
+ study.optimize(objective, n_trials=200)
+
+ # 4️⃣ Afficher les meilleurs hyperparamètres
+ print("✅ Best trial:")
+ trial = study.best_trial
+ print(trial.params)
+
+ # 5️⃣ Entraîner le modèle final avec les meilleurs params
+ best_model = XGBClassifier(
+ **trial.params,
+ scale_pos_weight=scale_pos_weight,
+ objective="binary:logistic",
+ eval_metric="logloss",
+ n_jobs=-1,
+ random_state=42
+ )
+ best_model.fit(X_train, y_train)
+ self.train_model = best_model
+
+ # 6️⃣ Calcul du meilleur seuil F1
+ proba = best_model.predict_proba(X_valid)[:, 1]
+ thresholds = np.linspace(0.1, 0.9, 50)
+ f1_scores = [f1_score(y_valid, proba > t) for t in thresholds]
+ best_threshold = thresholds[np.argmax(f1_scores)]
+ print("✅ Meilleur seuil F1:", best_threshold)
# SHAP
# Reconstruction du modèle final avec les meilleurs hyperparamètres
# Récupération des meilleurs paramètres trouvés
best_params = study.best_params
- best_model = XGBClassifier(**best_params)
- best_model.fit(X_train, y_train)
- self.train_model = best_model
-
# === SHAP plots ===
# Calcul SHAP
explainer = shap.TreeExplainer(self.train_model)
@@ -1954,10 +2357,10 @@ class FrictradeLearning(IStrategy):
for k, v in best_trial.params.items():
print(f" - {k}: {v}")
- # All trials summary
- print("\n=== ALL TRIALS ===")
- for t in study.trials:
- print(f"Trial {t.number}: f1 = {t.value}, params = {t.params}")
+ # # All trials summary
+ # print("\n=== ALL TRIALS ===")
+ # for t in study.trials:
+ # print(f"Trial {t.number}: f1 = {t.value}, params = {t.params}")
# DataFrame of trials
df = study.trials_dataframe()
@@ -2013,14 +2416,14 @@ class FrictradeLearning(IStrategy):
feat_imp = pd.Series(importances, index=X_train.columns).sort_values(ascending=False)
# Affichage
- feat_imp.plot(kind='bar', figsize=(12, 6))
+ feat_imp.plot(kind='bar', figsize=(18, 6))
plt.title("Feature importances")
# plt.show()
plt.savefig(f"{self.path}/Feature importances.png", bbox_inches='tight')
result = permutation_importance(self.train_model, X_valid, y_valid, scoring='f1', n_repeats=10, random_state=42)
perm_imp = pd.Series(result.importances_mean, index=X_valid.columns).sort_values(ascending=False)
- perm_imp.plot(kind='bar', figsize=(12, 6))
+ perm_imp.plot(kind='bar', figsize=(18, 6))
plt.title("Permutation feature importance")
# plt.show()
plt.savefig(f"{self.path}/Permutation feature importance.png", bbox_inches='tight')
@@ -2036,6 +2439,7 @@ class FrictradeLearning(IStrategy):
force_plot = shap.force_plot(explainer.expected_value, shap_values[0, :], X_valid.iloc[0, :])
shap.save_html(f"{self.path}/shap_force_plot.html", force_plot)
+ print("\nGénération des dépendances :\n")
fig, ax = plt.subplots(figsize=(24, 48))
PartialDependenceDisplay.from_estimator(
self.train_model,
@@ -2065,7 +2469,12 @@ class FrictradeLearning(IStrategy):
print(f"Accuracy: {acc:.3f}")
# 7️⃣ Sauvegarde du modèle
- joblib.dump(self.train_model, f"{self.path}/{pair}_rf_model.pkl")
+ joblib.dump(
+ {"model": self.train_model,
+ "threshold": best_threshold,
+ "features": self.model_indicators},
+ f"{self.path}/{pair}_rf_model.pkl"
+ )
print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl")
# X = dataframe des features (après shift/rolling/indicators)
@@ -2092,6 +2501,26 @@ class FrictradeLearning(IStrategy):
self.analyze_model(pair, self.train_model, X_train, X_valid, y_train, y_valid)
+ def trading_score(self, y_true, y_pred_proba, prices, threshold=0.5):
+ trades = (y_pred_proba > threshold).astype(int)
+
+ profit = 0
+ trade_count = 0
+
+ for i in range(len(trades) - 1):
+ if trades[i] == 1:
+ entry = prices[i]
+ exit = prices[i + 1]
+
+ pct = (exit - entry) / entry
+ profit += pct
+ trade_count += 1
+
+ if trade_count == 0:
+ return -1 # pénalité si aucun trade
+
+ return profit
+
def inspect_model(self, model):
"""
Affiche les informations d'un modèle ML déjà entraîné.
@@ -2383,11 +2812,11 @@ class FrictradeLearning(IStrategy):
numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns
# Étape 2 : enlever constantes
usable_cols = [c for c in numeric_cols if dataframe[c].nunique() > 1
- and not c.endswith("_state")
- and not c.endswith("_1d")
+ # and not c.endswith("_state")
# and not c.endswith("_1h")
- and not c.startswith("open") and not c.startswith("close")
- and not c.startswith("low") and not c.startswith("high")
+ and not c.startswith("open")
+ # and not c.startswith("close")
+ # and not c.startswith("low") and not c.startswith("high")
and not c.startswith("haopen") and not c.startswith("haclose")
# and not c.startswith("bb_lower") and not c.startswith("bb_upper")
# and not c.startswith("bb_middle")
@@ -2396,16 +2825,16 @@ class FrictradeLearning(IStrategy):
and not c.startswith('stop_buying')
and not c.startswith('target')
and not c.startswith('lvl')
- and not c.startswith('sma5_deriv1_1h')
- and not c.startswith('sma5_1h')
- and not c.startswith('sma12_deriv1_1h')
- and not c.startswith('sma12_1h')
- and not c.startswith('confidence_index')
- and not c.startswith('price_change')
- and not c.startswith('price_score')
- and not c.startswith('heat_score')
- and not c.startswith('min30_1d')
- and not c.startswith('max30_1d')
+ # and not c.startswith('sma5_deriv1_1h')
+ # and not c.startswith('sma5_1h')
+ # and not c.startswith('sma12_deriv1_1h')
+ # and not c.startswith('sma12_1h')
+ # and not c.startswith('confidence_index')
+ # and not c.startswith('price_change')
+ # and not c.startswith('price_score')
+ # and not c.startswith('heat_score')
+ # and not c.startswith('min30_1d')
+ # and not c.startswith('max30_1d')
]
# Étape 3 : remplacer inf et NaN par 0
dataframe[usable_cols] = dataframe[usable_cols].replace([np.inf, -np.inf], 0).fillna(0)
@@ -2608,41 +3037,6 @@ class FrictradeLearning(IStrategy):
return informative
- def calculModelInformative(self, informative):
- # préparation
- # print(df)
- df = informative.copy()
- X = df[self.listUsableColumns(df)]
- df['target'] = ((df["sma24"].shift(-13) - df["sma24"]) > 0).astype(int)
- df['target'] = df['target'].fillna(0).astype(int)
- y = df['target']
-
- # train/test
- X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)
-
- # Pipeline normalisé + Logistic Regresson
- clf = Pipeline([
- ("scaler", StandardScaler()),
- ("logreg", LogisticRegression(max_iter=5000))
- ])
-
- # Calibration CV automatique
- cal = CalibratedClassifierCV(clf, cv=3, method="isotonic")
-
- # Entraînement
- cal.fit(X_train, y_train)
-
- # Probabilités calibrées
- probas = cal.predict_proba(X_test)[:, 1]
- # Injection propre des probabilités dans le dataframe original (aux bons index)
- df.loc[X_test.index, 'ml_prob'] = probas
-
- print("Brier score:", brier_score_loss(y_test, probas))
- print("ROC AUC:", roc_auc_score(y_test, probas))
-
- # joindre probabilités au df (dernières lignes correspondantes)
- return probas
-
def prune_features(self, model, dataframe, feature_columns, importance_threshold=0.01):
"""
Supprime les features dont l'importance est inférieure au seuil.
@@ -2674,3 +3068,999 @@ class FrictradeLearning(IStrategy):
# print(f"⚡ Features conservées ({len(kept_features)} / {len(feature_columns)}): {kept_features}")
return dataframe_pruned, kept_features
+
+ def trainModel2(self, df, metadata):
+ pair = self.getShortName(metadata['pair'])
+ pd.set_option('display.max_rows', None)
+ pd.set_option('display.max_columns', None)
+ pd.set_option("display.width", 200)
+ path = self.path # f"user_data/plots/{pair}/"
+ os.makedirs(path, exist_ok=True)
+
+ horizon = 300 # 5h en 1min
+
+ df['future_max'] = df['close'].shift(-1).rolling(horizon).max()
+ df['future_min'] = df['close'].shift(-1).rolling(horizon).min()
+ tp = 0.005 # +0.5%
+ sl = 0.003 # -0.3% (important !)
+
+ df['target'] = 0
+
+ # 🎯 cas gagnant
+ df.loc[df['future_max'] > df['close'] * (1 + tp), 'target'] = 1
+
+ # 💀 cas perdant
+ df.loc[df['future_min'] < df['close'] * (1 - sl), 'target'] = -1
+
+ # Filtre
+ df = df[df['atr_norm'] > 0.002]
+
+ print("===== 🚀 TRAIN MODEL START =====")
+ df = df.dropna().copy()
+
+ features = self.listUsableColumns(df)
+ target_col = "target"
+
+ # 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies
+ df['target'] = 0
+ # Exemple : 3 classes
+ # Classe 0 : percent30 < -0.01
+ # Classe 1 : -0.01 <= percent30 <= 0.01
+ # Classe 2 : percent30 > 0.01
+ df['target'] = pd.cut(
+ df['percent24'].shift(-12),
+ bins=[-np.inf, -0.005, 0.005, np.inf],
+ labels=[0, 1, 2]
+ )
+ df = df.dropna(subset=['target']) # supprime les lignes avec target NaN
+ df['target'] = df['target'].astype(int)
+
+ # df = df.drop(columns=['percent24'])
+ # features.remove('percent24')
+ # features.remove('open')
+ # features.remove('close')
+ # features.remove('high')
+ # features.remove('low')
+
+ # for i in range(len(df) - horizon):
+ # window = df.iloc[i + 1:i + 1 + horizon]
+ #
+ # entry = df.iloc[i]['close']
+ # tp_price = entry * (1 + tp)
+ # sl_price = entry * (1 - sl)
+ #
+ # hit_tp = window[window['high'] >= tp_price]
+ # hit_sl = window[window['low'] <= sl_price]
+ #
+ # if not hit_tp.empty and not hit_sl.empty:
+ # if hit_tp.index[0] < hit_sl.index[0]:
+ # df.iloc[i, df.columns.get_loc('target')] = 1
+ # else:
+ # df.iloc[i, df.columns.get_loc('target')] = -1
+ # elif not hit_tp.empty:
+ # df.iloc[i, df.columns.get_loc('target')] = 1
+ # elif not hit_sl.empty:
+ # df.iloc[i, df.columns.get_loc('target')] = -1
+
+ features = self.select_features_pipeline(df)
+
+ X = df[features]
+ y = df['target'] #(df['target'] == 1).astype(int) # df[target_col]
+ # df = df[features]
+
+ print("DF shape:", df.shape)
+ print("Columns:", features)
+
+ # if "target" in features:
+ # print("Target raw: ", df["target"].value_counts(dropna=False))
+ # else:
+ # print("❌ target column missing")
+
+ print("Target distribution:")
+ print(y.value_counts(normalize=True))
+
+ # ⚠️ split temporel (CRUCIAL en trading)
+ split = int(len(df) * 0.8)
+ X_train, X_valid = X.iloc[:split], X.iloc[split:]
+ y_train, y_valid = y.iloc[:split], y.iloc[split:]
+
+ # ⚠️ SMOTE uniquement sur TRAIN
+ smote = SMOTE(random_state=42)
+ X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
+
+ print("After SMOTE:")
+ print(pd.Series(y_train_res).value_counts(normalize=True))
+
+ num_classes = len(np.unique(y_train)) # nombre de classes dans ton target
+ # =========================
+ # 🎯 OPTUNA OBJECTIVE
+ # =========================
+ def objective(trial):
+ params = {
+ "objective": "multiclass", # <-- changer pour multiclass
+ "metric": "multi_logloss", # <-- metric adaptée au multiclass
+ "num_class": num_classes, # <-- nombre de classes
+ "boosting_type": "gbdt",
+
+ "num_leaves": trial.suggest_int("num_leaves", 16, 128),
+ "max_depth": trial.suggest_int("max_depth", 3, 10),
+
+ "learning_rate": trial.suggest_float("learning_rate", 0.005, 0.1, log=True),
+
+ "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0),
+ "bagging_fraction": trial.suggest_float("bagging_fraction", 0.6, 1.0),
+ "bagging_freq": trial.suggest_int("bagging_freq", 1, 10),
+
+ "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
+
+ "lambda_l1": trial.suggest_float("lambda_l1", 1e-4, 10, log=True),
+ "lambda_l2": trial.suggest_float("lambda_l2", 1e-4, 10, log=True),
+
+ "verbose": -1,
+ "seed": 42,
+ }
+
+ train_data = lgb.Dataset(X_train_res, y_train_res)
+ valid_data = lgb.Dataset(X_valid, y_valid)
+
+ model = lgb.train(
+ params,
+ train_data,
+ num_boost_round=1000,
+ valid_sets=[valid_data],
+ callbacks=[lgb.early_stopping(50), lgb.log_evaluation(0)]
+ )
+
+ proba = model.predict(X_valid)
+ preds = np.argmax(proba, axis=1) # <-- pour multiclass
+
+ f1 = f1_score(y_valid, preds, average='macro') # <-- multiclass
+ return f1
+
+ # =========================
+ # 🚀 RUN OPTUNA
+ # =========================
+ study = optuna.create_study(direction="maximize")
+ study.optimize(objective, n_trials=200)
+
+ print("===== 🏆 BEST PARAMS =====")
+ print(study.best_params)
+
+ best_params = study.best_params.copy()
+ # best_threshold = best_params.pop("threshold")
+
+ # =========================
+ # 🔥 TRAIN FINAL MODEL
+ # =========================
+ final_params = {
+ **best_params,
+ "objective": "multiclass",
+ "metric": "multi_logloss",
+ "num_class": num_classes,
+ "boosting_type": "gbdt",
+ "verbose": -1,
+ "seed": 42
+ }
+
+ # Entraînement
+ train_data = lgb.Dataset(X_train_res, y_train_res)
+ model = lgb.train(final_params, train_data, num_boost_round=1000)
+
+ # =========================
+ # 📊 EVALUATION MULTICLASS
+ # =========================
+ proba = model.predict(X_valid) # shape = (n_samples, n_classes)
+ preds = np.argmax(proba, axis=1) # Classe prédite
+
+ print("===== 📊 RESULTS =====")
+ print("F1:", f1_score(y_valid, preds, average='macro'))
+ print("Precision:", precision_score(y_valid, preds, average='macro'))
+ print("Recall:", recall_score(y_valid, preds, average='macro'))
+
+ # ROC AUC multiclass
+ try:
+ roc = roc_auc_score(y_valid, proba, multi_class='ovr', average='macro')
+ print("ROC AUC:", roc)
+ except ValueError:
+ print("ROC AUC cannot be computed (check y_valid and number of classes)")
+
+ # model_path = f"user_data/{metadata['pair'].replace('/', '_')}_lgbm.pkl"
+ # joblib.dump({
+ # "model": model,
+ # "threshold": best_threshold,
+ # "features": features
+ # }, model_path)
+
+ self.train_model = model
+ # self.model_threshold = best_threshold
+
+ joblib.dump(
+ {"model": self.train_model,
+ # "threshold": best_threshold,
+ "features": features},
+ f"{self.path}/{pair}_rf_model.pkl"
+ )
+ print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl")
+
+ # Génération de diagnostics pour multiclass
+ proba = self.train_model.predict(X_valid) # shape = (n_samples, n_classes)
+ preds = np.argmax(proba, axis=1) # labels prédits
+
+ self.generate_diagnostics(
+ model=self.train_model,
+ X_valid=X_valid,
+ y_valid=y_valid,
+ df=df,
+ metadata=metadata
+ )
+ print(f"Detected multiclass SHAP with {num_classes} classes")
+
+ self.generate_shap_analysis(model=self.train_model, X_valid=X_valid, metadata=metadata)
+
+ def generate_diagnostics(self, model, X_valid, y_valid, df, metadata):
+
+ os.makedirs(self.path, exist_ok=True)
+ pair = metadata["pair"].replace("/", "_")
+ # ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+ def save_fig(name):
+ filepath = f"{self.path}/{pair}_{name}.png"
+ plt.savefig(filepath)
+ plt.close()
+ print(f"📊 Saved: {filepath}")
+
+ # =========================
+ # 🔥 PROBA & PREDICTIONS MULTICLASS
+ # =========================
+ proba = model.predict(X_valid) # shape = (n_samples, n_classes)
+ preds = np.argmax(proba, axis=1)
+
+ # =========================
+ # 📊 PROBA DISTRIBUTION PAR CLASSE
+ # =========================
+ plt.figure(figsize=(10, 5))
+ num_classes = proba.shape[1]
+ for c in range(num_classes):
+ plt.hist(proba[:, c][y_valid == c], bins=50, alpha=0.5, label=f"Class {c}")
+ plt.title("Probability Distribution per Class")
+ plt.legend()
+ save_fig("proba_distribution")
+
+ # =========================
+ # 📈 METRICS MULTICLASS
+ # =========================
+ f1 = f1_score(y_valid, preds, average='macro')
+ precision = precision_score(y_valid, preds, average='macro', zero_division=0)
+ recall = recall_score(y_valid, preds, average='macro', zero_division=0)
+ try:
+ roc = roc_auc_score(y_valid, proba, multi_class='ovr', average='macro')
+ except ValueError:
+ roc = None
+
+ print("===== 📊 RESULTS =====")
+ print("F1:", f1)
+ print("Precision:", precision)
+ print("Recall:", recall)
+ if roc is not None:
+ print("ROC AUC:", roc)
+
+ # =========================
+ # 💰 EQUITY CURVE SIMPLIFIÉE
+ # =========================
+ prices = df.loc[X_valid.index]["close"].values
+ returns = []
+ for i in range(len(preds) - 1):
+ # Ex: utiliser uniquement classe cible 2 pour long
+ if preds[i] == 2:
+ r = (prices[i + 1] - prices[i]) / prices[i]
+ returns.append(r)
+ equity = np.cumsum(returns)
+
+ plt.figure(figsize=(10, 5))
+ plt.plot(equity)
+ plt.title("Equity Curve (Class 2 signals)")
+ save_fig("equity_curve")
+
+ # =========================
+ # 📊 FEATURE IMPORTANCE
+ # =========================
+ importance = model.feature_importance()
+ feat_names = X_valid.columns
+ imp_df = pd.DataFrame({
+ "feature": feat_names,
+ "importance": importance
+ }).sort_values(by="importance", ascending=False)
+
+ plt.figure(figsize=(10, 8))
+ plt.barh(imp_df["feature"][:20], imp_df["importance"][:20])
+ plt.gca().invert_yaxis()
+ plt.title("Feature Importance")
+ save_fig("feature_importance")
+
+ # =========================
+ # 🔍 SHAP (sample pour perf)
+ # =========================
+ try:
+ sample_size = min(1000, len(X_valid))
+ X_sample = X_valid.sample(sample_size, random_state=42)
+
+ explainer = shap.TreeExplainer(model)
+ shap_values = explainer.shap_values(X_sample)
+
+ # shap_values pour multiclass est liste de matrices
+ if isinstance(shap_values, list):
+ for c, sv in enumerate(shap_values):
+ shap.summary_plot(sv, X_sample, show=False)
+ save_fig(f"shap_summary_class{c}")
+ else:
+ shap.summary_plot(shap_values, X_sample, show=False)
+ save_fig("shap_summary")
+
+ except Exception as e:
+ print(f"⚠️ SHAP failed: {e}")
+
+ # =========================
+ # 📉 WIN / LOSS DISTRIBUTION
+ # =========================
+ wins, losses = [], []
+ for i in range(len(preds) - 1):
+ if preds[i] == 2:
+ r = (prices[i + 1] - prices[i]) / prices[i]
+ if r > 0:
+ wins.append(r)
+ else:
+ losses.append(r)
+
+ plt.figure(figsize=(10, 5))
+ plt.hist(wins, bins=50, alpha=0.5, label="Wins")
+ plt.hist(losses, bins=50, alpha=0.5, label="Losses")
+ plt.legend()
+ plt.title("Wins / Losses Distribution (Class 2)")
+ save_fig("wins_losses_distribution")
+
+
+ # def generate_diagnostics(self, model, X_valid, y_valid, df, best_threshold, metadata):
+ #
+ # import os
+ # import numpy as np
+ # import pandas as pd
+ # import matplotlib.pyplot as plt
+ # from sklearn.metrics import precision_score, recall_score
+ # import shap
+ # from datetime import datetime
+ #
+ # os.makedirs(self.path, exist_ok=True)
+ #
+ # pair = metadata["pair"].replace("/", "_")
+ # ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+ #
+ # def save_fig(name):
+ # filepath = f"{self.path}/{pair}_{name}.png"
+ # plt.savefig(filepath)
+ # plt.close()
+ # print(f"📊 Saved: {filepath}")
+ #
+ # # =========================
+ # # 🔥 PROBA DISTRIBUTION
+ # # =========================
+ # proba = model.predict(X_valid)
+ #
+ # plt.figure(figsize=(10, 5))
+ # plt.hist(proba[y_valid == 0], bins=50, alpha=0.5, label="Class 0")
+ # plt.hist(proba[y_valid == 1], bins=50, alpha=0.5, label="Class 1")
+ # plt.title("Probability Distribution")
+ # plt.legend()
+ # save_fig("proba_distribution")
+ #
+ # # =========================
+ # # 📈 PRECISION / RECALL
+ # # =========================
+ # thresholds = np.linspace(0.1, 0.9, 50)
+ # precisions, recalls = [], []
+ #
+ # for t in thresholds:
+ # preds = (proba > t).astype(int)
+ # precisions.append(precision_score(y_valid, preds, zero_division=0))
+ # recalls.append(recall_score(y_valid, preds, zero_division=0))
+ #
+ # plt.figure(figsize=(10, 5))
+ # plt.plot(thresholds, precisions, label="Precision")
+ # plt.plot(thresholds, recalls, label="Recall")
+ # plt.xlabel("Threshold")
+ # plt.title("Precision / Recall vs Threshold")
+ # plt.legend()
+ # save_fig("precision_recall_curve")
+ #
+ # # =========================
+ # # 💰 EQUITY CURVE (simple)
+ # # =========================
+ # prices = df.loc[X_valid.index]["close"].values
+ #
+ # returns = []
+ # for i in range(len(proba) - 1):
+ # if proba[i] > best_threshold:
+ # r = (prices[i+1] - prices[i]) / prices[i]
+ # returns.append(r)
+ #
+ # equity = np.cumsum(returns)
+ #
+ # plt.figure(figsize=(10, 5))
+ # plt.plot(equity)
+ # plt.title("Equity Curve")
+ # save_fig("equity_curve")
+ #
+ # # =========================
+ # # 📊 FEATURE IMPORTANCE
+ # # =========================
+ # importance = model.feature_importance()
+ # feat_names = X_valid.columns
+ #
+ # imp_df = pd.DataFrame({
+ # "feature": feat_names,
+ # "importance": importance
+ # }).sort_values(by="importance", ascending=False)
+ #
+ # plt.figure(figsize=(10, 8))
+ # plt.barh(imp_df["feature"][:20], imp_df["importance"][:20])
+ # plt.gca().invert_yaxis()
+ # plt.title("Feature Importance")
+ # save_fig("feature_importance")
+ #
+ # # =========================
+ # # 🔍 SHAP (sample pour perf)
+ # # =========================
+ # try:
+ # sample_size = min(1000, len(X_valid))
+ # X_sample = X_valid.sample(sample_size, random_state=42)
+ #
+ # explainer = shap.TreeExplainer(model)
+ # shap_values = explainer.shap_values(X_sample)
+ #
+ # shap.summary_plot(shap_values, X_sample, show=False)
+ # save_fig("shap_summary")
+ #
+ # except Exception as e:
+ # print(f"⚠️ SHAP failed: {e}")
+ #
+ # # =========================
+ # # 📉 WIN / LOSS DISTRIBUTION
+ # # =========================
+ # wins, losses = [], []
+ #
+ # for i in range(len(proba) - 1):
+ # if proba[i] > best_threshold:
+ # r = (prices[i+1] - prices[i]) / prices[i]
+ # if r > 0:
+ # wins.append(r)
+ # else:
+ # losses.append(r)
+ #
+ # plt.figure(figsize=(10, 5))
+ # plt.hist(wins, bins=50, alpha=0.5, label="Wins")
+ # plt.hist(losses, bins=50, alpha=0.5, label="Losses")
+ # plt.legend()
+ # plt.title("Wins / Losses Distribution")
+ # save_fig("wins_losses_distribution")
+
+ def select_features_pipeline(self, df):
+
+ df = df.dropna()
+
+ y = df['target']
+ X = df[self.model_indicators]
+
+ print("===== INITIAL FEATURES:", len(X.columns))
+
+ # 1. variance
+ selected = self.remove_low_variance(X)
+ X = X[selected]
+ print("After variance:", len(X.columns))
+
+ # 2. corrélation
+ selected = self.remove_correlated_features(X)
+ X = X[selected]
+ print("After correlation:", len(X.columns))
+
+ # 3. importance
+ selected = self.select_by_importance(X, y, top_n=40)
+ X = X[selected]
+ print("After importance:", len(X.columns))
+
+ # 4. stabilité
+ selected = self.stability_filter(X, y)[:25]
+ X = X[selected]
+
+ # # 5. Sharp filtering
+ # explainer = shap.TreeExplainer(model)
+ # shap_values = explainer.shap_values(X)
+ # shap_importance = np.abs(shap_values).mean(axis=0)
+ # selected = X.columns[np.argsort(shap_importance)[-20:]]
+ # X = X[selected]
+ # print("After sharp:", len(X.columns))
+
+ print("Final features:", len(X.columns))
+
+ return X.columns.tolist()
+
+ def remove_correlated_features(self, df, threshold=0.95):
+ corr = df.corr().abs()
+
+ upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))
+
+ to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
+
+ return [col for col in df.columns if col not in to_drop]
+
+ def remove_low_variance(self, X, threshold=1e-6):
+ selector = VarianceThreshold(threshold)
+ selector.fit(X)
+
+ return X.columns[selector.get_support()].tolist()
+
+ def select_by_importance(self, X, y, top_n=30):
+ model = RandomForestClassifier(
+ n_estimators=200,
+ max_depth=6,
+ n_jobs=-1,
+ random_state=42
+ )
+
+ model.fit(X, y)
+
+ importances = pd.Series(model.feature_importances_, index=X.columns)
+ importances = importances.sort_values(ascending=False)
+
+ return importances.head(top_n).index.tolist()
+
+ def stability_filter(self, X, y, splits=3):
+ from sklearn.model_selection import TimeSeriesSplit
+
+ tscv = TimeSeriesSplit(n_splits=splits)
+
+ feature_scores = {col: [] for col in X.columns}
+
+ for train_idx, val_idx in tscv.split(X):
+ X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
+ y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
+
+ model = RandomForestClassifier(n_estimators=100, max_depth=5, n_jobs=-1)
+ model.fit(X_train, y_train)
+
+ for i, col in enumerate(X.columns):
+ feature_scores[col].append(model.feature_importances_[i])
+
+ # moyenne importance
+ stability = {
+ col: np.mean(vals) for col, vals in feature_scores.items()
+ }
+
+ return sorted(stability, key=stability.get, reverse=True)
+
+ # def transformData(self, df: pd.DataFrame) -> pd.DataFrame:
+ # """
+ # Sélection des features + scaling automatique basé sur variance relative
+ # """
+ # # ---- Étape 1 : sélection des features (exemple simplifié) ----
+ # # Ici tu mets ton pipeline actuel de variance / corrélation / importance
+ # selected_features = df.columns.tolist() # remplacer par ton filtrage réel
+ # df_selected = df[selected_features].copy()
+ #
+ # # ---- Étape 2 : scaling automatique ----
+ # epsilon = 1e-8
+ # variance_relative = (df_selected.std() ** 2) / (df_selected.mean().abs() + epsilon)
+ # threshold = 1.0
+ #
+ # self.features_to_scale = variance_relative[variance_relative > threshold].index.tolist()
+ # self.features_no_scale = variance_relative[variance_relative <= threshold].index.tolist()
+ #
+ # # Appliquer StandardScaler uniquement sur les features à normaliser
+ # self.scaler = StandardScaler()
+ # df_selected[self.features_to_scale] = self.scaler.fit_transform(df_selected[self.features_to_scale])
+ # df_selected[self.features_no_scale] = df_selected[self.features_no_scale]
+ #
+ # # ---- Optionnel : print pour debug ----
+ # print("Features scalées :", self.features_to_scale)
+ # print("Features non-scalées :", self.features_no_scale)
+ #
+ # return df_selected
+ #
+ # def transform_new_data(self, df_new: pd.DataFrame) -> pd.DataFrame:
+ # """
+ # Appliquer le scaling sur de nouvelles données avec le scaler déjà entraîné
+ # """
+ # df_new_scaled = df_new.copy()
+ # if self.scaler is not None:
+ # df_new_scaled[self.features_to_scale] = self.scaler.transform(df_new_scaled[self.features_to_scale])
+ # return df_new_scaled
+
+ def generate_shap_analysis_class(self, model, X_valid, metadata):
+
+ os.makedirs(self.path, exist_ok=True)
+ pair = metadata["pair"].replace("/", "_")
+ # ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+ def save_fig(name):
+ filepath = f"{self.path}/{pair}_{name}.png"
+ plt.savefig(filepath)
+ plt.close()
+ print(f"📊 Saved: {filepath}")
+
+ # =========================
+ # 🔹 SAMPLE (perf)
+ # =========================
+ sample_size = min(1000, len(X_valid))
+ X_sample = X_valid.sample(sample_size, random_state=42)
+
+ # =========================
+ # 🔥 SHAP CALCULATION
+ # =========================
+ explainer = shap.TreeExplainer(model)
+ shap_values = explainer.shap_values(X_sample)
+
+ print("SHAP type:", type(shap_values))
+
+ # =========================
+ # 🧠 MULTICLASS HANDLING
+ # =========================
+
+ if isinstance(shap_values, list):
+ # ancien format
+ shap_list = shap_values
+
+ elif len(shap_values.shape) == 3:
+ # nouveau format : (samples, features, classes)
+ shap_list = [shap_values[:, :, i] for i in range(shap_values.shape[2])]
+
+ else:
+ # binaire classique
+ shap_list = [shap_values]
+
+ print("SHAP shape:", getattr(shap_values, "shape", None))
+ print("SHAP type:", type(shap_values))
+
+ # =========================
+ # 📊 SHAP PAR CLASSE
+ # =========================
+ for i, sv in enumerate(shap_list):
+ shap.summary_plot(sv, X_sample, max_display=20, show=False)
+ save_fig(f"shap_summary_class_{i}")
+
+ for i, sv in enumerate(shap_list):
+ feat_importance = np.mean(np.abs(sv), axis=0) # (n_features,)
+ imp_df = pd.DataFrame({
+ "feature": X_sample.columns,
+ "importance": feat_importance
+ }).sort_values(by="importance", ascending=False)
+ imp_df.to_csv(f"{self.path}/{pair}_shap_importance_class_{i}.csv", index=False)
+
+ # # =========================
+ # # 🌍 SHAP GLOBAL (IMPORTANT)
+ # # =========================
+ # shap_mean = np.mean([np.abs(sv) for sv in shap_values], axis=i)
+ # #
+ # # # for i, cls in enumerate(shap_list):
+ # # # shap.summary_plot(cls, X_valid, show=False, plot_size=(12, 6))
+ # # # save_fig(f"shap_global")
+ # #
+ # # =========================
+ # # 📊 EXPORT CSV IMPORTANCE
+ # # =========================
+ # feature_importance = np.mean(shap_mean, axis=i)
+ #
+ # imp_df = pd.DataFrame({
+ # "feature": X_sample.columns,
+ # "importance": feature_importance
+ # }).sort_values(by="importance", ascending=False)
+ #
+ # csv_path = f"{self.path}/{pair}_shap_importance.csv"
+ # imp_df.to_csv(csv_path, index=False)
+ # print(f"📁 Saved CSV: {csv_path}")
+
+ def trainModel3(self, df, metadata):
+ pair = self.getShortName(metadata['pair'])
+ pd.set_option('display.max_rows', None)
+ pd.set_option('display.max_columns', None)
+ pd.set_option("display.width", 200)
+ path = self.path # f"user_data/plots/{pair}/"
+ os.makedirs(path, exist_ok=True)
+
+ # 1️⃣ Colonnes utilisables
+ features = self.listUsableColumns(df)
+
+ target_col = "target"
+
+ # 2️⃣ Créer la cible multiclass
+ # Classe 0 : percent24 < -0.005
+ # Classe 1 : -0.005 <= percent24 <= 0.005
+ # Classe 2 : percent24 > 0.005
+ df['target'] = pd.cut(
+ df['percent24'].shift(-12),
+ bins=[-np.inf, -0.0025, 0.0025, np.inf],
+ labels=[0, 1, 2]
+ )
+
+ # Supprimer NaN générés par shift
+ df = df.dropna(subset=['target'])
+ features = self.select_features_pipeline_for_class(df)
+ df['target'] = df['target'].astype(int)
+
+ # Supprimer percent24 des features
+ if 'percent24' in features:
+ features.remove('percent24')
+
+ # 3️⃣ Séparer X et y
+ X = df[features]
+ y = df['target']
+
+ print("DF shape:", df.shape)
+ print("Columns:", features)
+ print("Target distribution:")
+ print(y.value_counts(normalize=True))
+
+ # 4️⃣ Split temporel train / valid
+ split = int(len(df) * 0.8)
+ X_train, X_valid = X.iloc[:split], X.iloc[split:]
+ y_train, y_valid = y.iloc[:split], y.iloc[split:]
+
+ # 5️⃣ SMOTE multiclass uniquement sur train
+ smote = SMOTE(random_state=42)
+ X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
+
+ # Nombre de classes
+ num_classes = len(np.unique(y_train_res))
+
+ # =========================
+ # 🎯 OPTUNA OBJECTIVE
+ # =========================
+ def objective(trial):
+ params = {
+ "objective": "multiclass",
+ "metric": "multi_logloss",
+ "num_class": num_classes,
+ "boosting_type": "gbdt",
+
+ "num_leaves": trial.suggest_int("num_leaves", 16, 128),
+ "max_depth": trial.suggest_int("max_depth", 3, 10),
+
+ "learning_rate": trial.suggest_float("learning_rate", 0.005, 0.1, log=True),
+
+ "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0),
+ "bagging_fraction": trial.suggest_float("bagging_fraction", 0.6, 1.0),
+ "bagging_freq": trial.suggest_int("bagging_freq", 1, 10),
+
+ "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
+
+ "lambda_l1": trial.suggest_float("lambda_l1", 1e-4, 10, log=True),
+ "lambda_l2": trial.suggest_float("lambda_l2", 1e-4, 10, log=True),
+
+ "verbose": -1,
+ "seed": 42
+ }
+
+ train_data = lgb.Dataset(X_train_res, y_train_res)
+ valid_data = lgb.Dataset(X_valid, y_valid)
+
+ model = lgb.train(
+ params,
+ train_data,
+ num_boost_round=1000,
+ valid_sets=[valid_data],
+ callbacks=[lgb.early_stopping(50), lgb.log_evaluation(0)]
+ )
+
+ # Probabilités
+ proba = model.predict(X_valid) # shape = (n_samples, n_classes)
+ preds = np.argmax(proba, axis=1)
+
+ f1 = f1_score(y_valid, preds, average='macro') # multiclass
+ return f1
+
+ # =========================
+ # 🚀 RUN OPTUNA
+ # =========================
+ study = optuna.create_study(direction="maximize")
+ study.optimize(objective, n_trials=10)
+
+ best_params = study.best_params.copy()
+
+ # =========================
+ # 🔥 TRAIN FINAL MODEL
+ # =========================
+ final_params = {
+ **best_params,
+ "objective": "multiclass",
+ "metric": "multi_logloss",
+ "num_class": num_classes,
+ "boosting_type": "gbdt",
+ "verbose": -1,
+ "seed": 42
+ }
+
+ train_data = lgb.Dataset(X_train_res, y_train_res)
+ self.train_model = lgb.train(
+ final_params,
+ train_data,
+ num_boost_round=1000
+ )
+
+ # Probabilités pour chaque classe
+ probs_all_classes = self.train_model.predict(X) # shape = (n_samples, n_classes)
+ # Ajouter probabilité de chaque classe au dataframe pour analyse
+ # for i in range(num_classes):
+ # df[f'prob_class_{i}'] = probs_all_classes[:, i]
+
+ self.features = features
+ self.df = df
+
+ # =========================
+ # 📊 EVALUATION MULTICLASS
+ # =========================
+ proba = self.train_model.predict(X_valid) # shape = (n_samples, n_classes)
+ preds = np.argmax(proba, axis=1) # Classe prédite
+
+ print("===== 📊 RESULTS =====")
+ print("F1:", f1_score(y_valid, preds, average='macro'))
+ print("Precision:", precision_score(y_valid, preds, average='macro'))
+ print("Recall:", recall_score(y_valid, preds, average='macro'))
+
+ # ROC AUC multiclass
+ try:
+ roc = roc_auc_score(y_valid, proba, multi_class='ovr', average='macro')
+ print("ROC AUC:", roc)
+ except ValueError:
+ print("ROC AUC cannot be computed (check y_valid and number of classes)")
+
+ joblib.dump(
+ {"model": self.train_model,
+ # "threshold": best_threshold,
+ "features": features},
+ f"{self.path}/{pair}_rf_model.pkl"
+ )
+ print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl")
+
+ # Génération de diagnostics pour multiclass
+ proba = self.train_model.predict(X_valid) # shape = (n_samples, n_classes)
+ preds = np.argmax(proba, axis=1) # labels prédits
+
+ self.generate_diagnostics(
+ model=self.train_model,
+ X_valid=X_valid,
+ y_valid=y_valid,
+ df=df,
+ # preds=preds, # passer les labels prédits
+ # proba=proba, # passer les probabilités si besoin
+ metadata=metadata
+ )
+
+ self.generate_shap_analysis_class(model=self.train_model, X_valid=X_valid, metadata=metadata)
+
+ self.extract_buy_rules_class(self.train_model, X_valid, y_valid)
+
+ def select_features_pipeline_for_class(self, df):
+
+ features = self.listUsableColumns(df)
+ X = df[features]
+ y = df['target']
+
+ print(f"Initial features: {len(features)}")
+
+ # =========================
+ # 1️⃣ VARIANCE
+ # =========================
+ var = X.var()
+ X = X.loc[:, var > 1e-6]
+
+ print(f"After variance: {X.shape[1]}")
+
+ # =========================
+ # 2️⃣ CORRELATION
+ # =========================
+ corr = X.corr().abs()
+ upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))
+
+ to_drop = [col for col in upper.columns if any(upper[col] > 0.90)]
+
+ X = X.drop(columns=to_drop)
+
+ print(f"After correlation: {X.shape[1]}")
+
+ # =========================
+ # 3️⃣ LIGHTGBM IMPORTANCE
+ # =========================
+ model = lgb.LGBMClassifier(
+ objective='multiclass',
+ num_class=len(y.unique()),
+ n_estimators=200,
+ random_state=42
+ )
+
+ model.fit(X, y)
+
+ importance = pd.Series(
+ model.feature_importances_,
+ index=X.columns
+ ).sort_values(ascending=False)
+
+ print("Top 10 features:")
+ print(importance.head(10))
+
+ # ⚠️ seuil dynamique (IMPORTANT)
+ threshold = importance.mean()
+
+ selected = importance[importance > threshold].index.tolist()
+
+ print(f"After importance: {len(selected)}")
+
+ return selected
+
+ def extract_buy_rules_class(self, model, X_valid, y_valid):
+
+ # =========================
+ # SAMPLE
+ # =========================
+ X_sample = X_valid.copy()
+
+ explainer = shap.TreeExplainer(model)
+ shap_values = explainer.shap_values(X_sample)
+
+ # =========================
+ # FORMAT SHAP
+ # =========================
+ if isinstance(shap_values, list):
+ shap_class = shap_values[2] # classe BUY
+
+ elif len(shap_values.shape) == 3:
+ shap_class = shap_values[:, :, 2]
+
+ else:
+ raise Exception("SHAP format inconnu")
+
+ # =========================
+ # FOCUS SUR PREDICTIONS BUY
+ # =========================
+ preds = model.predict(X_sample)
+ buy_idx = np.where(preds == 2)[0]
+
+ X_buy = X_sample.iloc[buy_idx]
+ shap_buy = shap_class[buy_idx]
+
+ print(f"BUY samples: {len(buy_idx)}")
+
+ # =========================
+ # TOP FEATURES
+ # =========================
+ mean_shap = np.mean(np.abs(shap_buy), axis=0)
+
+ importance = pd.Series(mean_shap, index=X_sample.columns)
+ importance = importance.sort_values(ascending=False)
+
+ top_features = importance.head(10).index.tolist()
+
+ print("Top BUY features:")
+ print(top_features)
+
+ # =========================
+ # EXTRACTION DE RÈGLES
+ # =========================
+ rules = []
+
+ for feat in top_features:
+ values = X_buy[feat]
+
+ q_low = values.quantile(0.25)
+ q_high = values.quantile(0.75)
+ mean_val = values.mean()
+
+ rules.append({
+ "feature": feat,
+ "mean": mean_val,
+ "q25": q_low,
+ "q75": q_high
+ })
+
+ rules_df = pd.DataFrame(rules)
+
+ print("\n===== BUY RULES =====")
+ print(rules_df)
+
+ return rules_df
\ No newline at end of file