diff --git a/EmptyShort.py b/EmptyShort.py index f07100c..6a36a28 100644 --- a/EmptyShort.py +++ b/EmptyShort.py @@ -742,6 +742,10 @@ class EmptyShort(IStrategy): dataframe["volume_mean"] = dataframe["volume"].rolling(20).mean() dataframe["volume_ratio"] = dataframe["volume"] / dataframe["volume_mean"] + dataframe['volume2'] = dataframe['volume'] + dataframe.loc[dataframe['hapercent'] < 0, 'volume2'] *= -1 + dataframe['volume_spike'] = (abs(dataframe['volume2']) > abs(dataframe['volume2'].rolling(window=20).mean() * 5)) \ + & (dataframe['volume'].rolling(window=5).max() > 1000) dataframe["market_state"] = 0 @@ -874,13 +878,6 @@ class EmptyShort(IStrategy): # Compter les baisses / hausses consécutives dataframe = self.calculateDownAndUp(dataframe, limit=0.0001) - dataframe['volume2'] = dataframe['volume'] - dataframe.loc[dataframe['hapercent'] < 0, 'volume2'] *= -1 - - # Volume confirmation - dataframe['volume_spike'] = (abs(dataframe['volume2']) > abs(dataframe['volume2'].rolling(window=20).mean() * 5)) \ - & (dataframe['volume'].rolling(window=5).max() > 1000) - dataframe['sma5_1h'] = dataframe['sma5_1h'].rolling(window=60).mean() # récupérer le dernier trade fermé diff --git a/FrictradeLearning.json b/FrictradeLearning.json index 2e1108b..b11535a 100644 --- a/FrictradeLearning.json +++ b/FrictradeLearning.json @@ -5,7 +5,7 @@ "0": 10 }, "stoploss": { - "stoploss": -1.0 + "stoploss": -0.02 }, "trailing": { "trailing_stop": false, diff --git a/FrictradeLearning.py b/FrictradeLearning.py index ffaa3d6..ae0cbeb 100644 --- a/FrictradeLearning.py +++ b/FrictradeLearning.py @@ -50,12 +50,23 @@ from sklearn.metrics import ( roc_curve, precision_score, recall_score ) -from sklearn.metrics import f1_score +from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score + from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.tree import export_text from xgboost import XGBClassifier +import lightgbm as lgb +import numpy as np +import pandas as pd +import optuna + +from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score +from sklearn.model_selection import train_test_split +from imblearn.over_sampling import SMOTE +from sklearn.ensemble import RandomForestClassifier +from lightgbm import LGBMClassifier # -------------------------------- @@ -156,8 +167,7 @@ class FrictradeLearning(IStrategy): 'mises': {}, 'dca_thresholds': {} } - for pair in ["BTC/USDC", "ETH/USDC", "DOGE/USDC", "XRP/USDC", "SOL/USDC", - "BTC/USDT", "ETH/USDT", "DOGE/USDT", "XRP/USDT", "SOL/USDT"] + for pair in ["BTC/USDC", "BTC/USDT", "BTC/USDT:USDT"] } trades = list() max_profit_pairs = {} @@ -549,6 +559,11 @@ class FrictradeLearning(IStrategy): short_pair = self.getShortName(pair) self.path = f"user_data/strategies/plots/{short_pair}/" # + ("valide/" if not self.dp.runmode.value in ('backtest') else '') + # dataframe['open'] = dataframe['open'] / dataframe['open'].rolling(180).mean() + # dataframe['close'] = dataframe['close'] / dataframe['close'].rolling(180).mean() + # dataframe['low'] = dataframe['low'] / dataframe['low'].rolling(180).mean() + # dataframe['high'] = dataframe['high'] / dataframe['high'].rolling(180).mean() + heikinashi = qtpylib.heikinashi(dataframe) dataframe['haopen'] = heikinashi['open'] dataframe['haclose'] = heikinashi['close'] @@ -606,7 +621,7 @@ class FrictradeLearning(IStrategy): dataframe['max5'] = talib.MAX(dataframe['mid'], timeperiod=5) dataframe['min180'] = talib.MIN(dataframe['mid'], timeperiod=180) dataframe['max180'] = talib.MAX(dataframe['mid'], timeperiod=180) - dataframe['pct180'] = ((dataframe["mid"] - dataframe['min180']) / (dataframe['max180'] - dataframe['min180'])) + # dataframe['pct180'] = ((dataframe["mid"] - dataframe['min180']) / (dataframe['max180'] - dataframe['min180'])) dataframe = self.rsi_trend_probability(dataframe, short=60, long=360) # ################### INFORMATIVE 1h @@ -625,21 +640,8 @@ class FrictradeLearning(IStrategy): informative['rsi'] = talib.RSI(informative['mid'], timeperiod=14) - informative['sma5'] = informative['mid'].ewm(span=5, adjust=False).mean() - informative['sma5_deriv1'] = 1000 * (informative['sma5'] - informative['sma5'].shift(1)) / informative[ - 'sma5'].shift(1) - - informative['sma12'] = informative['mid'].ewm(span=12, adjust=False).mean() - informative['sma12_deriv1'] = 1000 * (informative['sma12'] - informative['sma12'].shift(1)) / informative[ - 'sma12'].shift(1) - - informative['sma24'] = informative['mid'].ewm(span=24, adjust=False).mean() - informative['sma24_deriv1'] = 1000 * (informative['sma24'] - informative['sma24'].shift(1)) / informative[ - 'sma24'].shift(1) - - informative['sma60'] = informative['mid'].ewm(span=60, adjust=False).mean() - informative['sma60_deriv1'] = 1000 * (informative['sma60'] - informative['sma60'].shift(1)) / informative[ - 'sma60'].shift(1) + for timeperiod in [5, 12, 24, 60]: + informative[f'sma{timeperiod}'] = informative['mid'].ewm(span=timeperiod, adjust=False).mean() informative['rsi'] = talib.RSI(informative['mid'], timeperiod=14) self.calculeDerivees(informative, 'rsi', ema_period=12) @@ -647,12 +649,63 @@ class FrictradeLearning(IStrategy): # informative = self.rsi_trend_probability(informative) - # probas = self.calculModelInformative(informative) - # self.calculateConfiance(informative) # informative = self.populate1hIndicators(df=informative, metadata=metadata) # informative = self.calculateRegression(informative, 'mid', lookback=15) + + ########################################################### + # Bollinger Bands + bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=20, stds=2) + informative['bb_lowerband'] = bollinger['lower'] + informative['bb_middleband'] = bollinger['mid'] + informative['bb_upperband'] = bollinger['upper'] + informative["bb_percent"] = ( + (informative["close"] - informative["bb_lowerband"]) / + (informative["bb_upperband"] - informative["bb_lowerband"]) + ) + informative["bb_width"] = (informative["bb_upperband"] - informative["bb_lowerband"]) / informative["bb_middleband"] + + # Calcul MACD + macd, macdsignal, macdhist = talib.MACD(informative['close'], fastperiod=12, slowperiod=26, signalperiod=9) + + # | Nom | Formule / définition | Signification | + # | ---------------------------- | ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | + # | **MACD** (`macd`) | `EMA_fast - EMA_slow` (ex : 12-26 périodes) | Montre l’écart entre la moyenne courte et la moyenne longue.
- Positive → tendance haussière
- Négative → tendance baissière | + # | **Signal** (`macdsignal`) | `EMA_9(MACD)` | Sert de ligne de **signal de déclenchement**.
- Croisement du MACD au-dessus → signal d’achat
- Croisement du MACD en dessous → signal de vente | + # | **Histogramme** (`macdhist`) | `MACD - Signal` | Montre la **force et l’accélération** de la tendance.
- Positif et croissant → tendance haussière qui s’accélère
- Positif mais décroissant → ralentissement de la hausse
- Négatif et décroissant → baisse qui s’accélère
- Négatif mais croissant → ralentissement de la baisse | + + # Ajouter dans le informative + informative['macd'] = macd + informative['macdsignal'] = macdsignal + informative['macdhist'] = macdhist + + informative["volume_mean"] = informative["volume"].rolling(20).mean() + informative["volume_ratio"] = informative["volume"] / informative["volume_mean"] + informative['volume2'] = informative['volume'] + informative.loc[informative['close'].pct_change() < 0, 'volume2'] *= -1 + informative['volume_spike'] = (abs(informative['volume2']) > abs(informative['volume2'].rolling(window=20).mean() * 5)) \ + & (informative['volume'].rolling(window=5).max() > 1000) + + # --- Volatilité normalisée --- + informative['atr'] = ta.volatility.AverageTrueRange(high=informative['high'], low=informative['low'], close=informative['close'], window=14).average_true_range() + informative['atr_norm'] = informative['atr'] / informative['close'] + # --- Force de tendance --- + informative['adx'] = ta.trend.ADXIndicator(high=informative['high'], low=informative['low'], close=informative['close'], window=14).adx() + + # --- Volume directionnel (On Balance Volume) --- + informative['obv'] = ta.volume.OnBalanceVolumeIndicator(close=informative['close'], volume=informative['volume']).on_balance_volume() + self.calculeDerivees(informative, 'obv', ema_period=1) + + informative['obv12'] = ta.volume.OnBalanceVolumeIndicator(close=informative['sma12'], volume=informative['volume'].rolling(12).sum()).on_balance_volume() + informative['obv24'] = ta.volume.OnBalanceVolumeIndicator(close=informative['sma24'], volume=informative['volume'].rolling(24).sum()).on_balance_volume() + informative['rsi_slope'] = informative['rsi'].diff(3) / 3 # vitesse moyenne du RSI + informative['adx_change'] = informative['adx'] - informative['adx'].shift(12) # évolution de la tendance + informative['volatility_ratio'] = informative['atr_norm'] / informative['bb_width'] + + # informative["slope_ratio"] = informative["sma5_deriv1"] / (informative["sma60_deriv1"] + 1e-9) + # informative["divergence"] = (informative["rsi_deriv1"] * informative["sma5_deriv1"]) < 0 + dataframe = merge_informative_pair(dataframe, informative, '1m', '1h', ffill=True) # ################### INFORMATIVE 1d @@ -665,8 +718,62 @@ class FrictradeLearning(IStrategy): # informative = self.calculateRegression(informative, 'mid', lookback=15) # self.calculateConfiance(informative) + ########################################################### + # Bollinger Bands + bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=20, stds=2) + informative['bb_lowerband'] = bollinger['lower'] + informative['bb_middleband'] = bollinger['mid'] + informative['bb_upperband'] = bollinger['upper'] + informative["bb_percent"] = ( + (informative["close"] - informative["bb_lowerband"]) / + (informative["bb_upperband"] - informative["bb_lowerband"]) + ) + # informative["bb_width"] = (informative["bb_upperband"] - informative["bb_lowerband"]) / informative["bb_middleband"] + + # # Calcul MACD + # macd, macdsignal, macdhist = talib.MACD( + # informative['close'], + # fastperiod=12, + # slowperiod=26, + # signalperiod=9 + # ) + # + # # | Nom | Formule / définition | Signification | + # # | ---------------------------- | ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | + # # | **MACD** (`macd`) | `EMA_fast - EMA_slow` (ex : 12-26 périodes) | Montre l’écart entre la moyenne courte et la moyenne longue.
- Positive → tendance haussière
- Négative → tendance baissière | + # # | **Signal** (`macdsignal`) | `EMA_9(MACD)` | Sert de ligne de **signal de déclenchement**.
- Croisement du MACD au-dessus → signal d’achat
- Croisement du MACD en dessous → signal de vente | + # # | **Histogramme** (`macdhist`) | `MACD - Signal` | Montre la **force et l’accélération** de la tendance.
- Positif et croissant → tendance haussière qui s’accélère
- Positif mais décroissant → ralentissement de la hausse
- Négatif et décroissant → baisse qui s’accélère
- Négatif mais croissant → ralentissement de la baisse | + # + # # Ajouter dans le informative + # informative['macd'] = macd + # informative['macdsignal'] = macdsignal + # informative['macdhist'] = macdhist + + informative["volume_mean"] = informative["volume"].rolling(20).mean() + informative["volume_ratio"] = informative["volume"] / informative["volume_mean"] + informative['volume2'] = informative['volume'] + informative.loc[informative['close'].pct_change() < 0, 'volume2'] *= -1 + informative['volume_spike'] = (abs(informative['volume2']) > abs(informative['volume2'].rolling(window=20).mean() * 5)) \ + & (informative['volume'].rolling(window=5).max() > 1000) + + for timeperiod in [3, 5, 8, 12]: + informative[f'sma{timeperiod}'] = informative['mid'].ewm(span=timeperiod, adjust=False).mean() + + informative['rsi'] = talib.RSI(informative['mid'], timeperiod=14) + self.calculeDerivees(informative, 'rsi', ema_period=12) + self.calculateScores(informative, 6) + dataframe = merge_informative_pair(dataframe, informative, '1m', '1d', ffill=True) + dataframe["pct30"] = dataframe["close"].pct_change(30) + dataframe["pct60"] = dataframe["close"].pct_change(60) + dataframe["pct120"] = dataframe["close"].pct_change(120) + dataframe["pct180"] = dataframe["close"].pct_change(180) + dataframe["pct300"] = dataframe["close"].pct_change(300) + dataframe["pct600"] = dataframe["close"].pct_change(600) + dataframe["pct1200"] = dataframe["close"].pct_change(1200) + dataframe["sma_ratio"] = dataframe["sma5_1h"] / dataframe["sma60"] + dataframe['last_price'] = dataframe['close'] dataframe['first_price'] = dataframe['close'] if self.dp: @@ -707,6 +814,10 @@ class FrictradeLearning(IStrategy): # # print(levels) + for timeperiod in [5, 12, 24, 60]: + dataframe[f'sma{timeperiod}_1h'] = dataframe[f'sma{timeperiod}_1h'].rolling(window=60).mean() + self.calculeDerivees(dataframe, f'sma{timeperiod}_1h', ema_period=12) + ########################################################### # Bollinger Bands bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(dataframe), window=20, stds=2) @@ -717,7 +828,7 @@ class FrictradeLearning(IStrategy): (dataframe["close"] - dataframe["bb_lowerband"]) / (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) ) - dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["sma24"] + dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"] # Calcul MACD macd, macdsignal, macdhist = talib.MACD( @@ -763,11 +874,11 @@ class FrictradeLearning(IStrategy): dataframe[f'sma_{s_short}'] = dataframe['close'].rolling(window=s_short).mean() dataframe[f'sma_{s_long}'] = dataframe['close'].rolling(window=s_long).mean() - # --- pente brute --- - dataframe['slope'] = dataframe['sma24'].diff() - - # --- lissage EMA --- - dataframe['slope_smooth'] = dataframe['slope'].ewm(span=10, adjust=False).mean() + # # --- pente brute --- + # dataframe['slope'] = dataframe['sma24'].diff() + # + # # --- lissage EMA --- + # dataframe['slope_smooth'] = dataframe['slope'].ewm(span=10, adjust=False).mean() # # RSI # window = 14 @@ -794,48 +905,21 @@ class FrictradeLearning(IStrategy): # Assure-toi qu'il est trié par date croissante timeframe = self.timeframe # --- Volatilité normalisée --- - dataframe['atr'] = ta.volatility.AverageTrueRange( - high=dataframe['high'], low=dataframe['low'], close=dataframe['close'], window=14 - ).average_true_range() + dataframe['atr'] = ta.volatility.AverageTrueRange(high=dataframe['high'], low=dataframe['low'], close=dataframe['close'], window=14).average_true_range() dataframe['atr_norm'] = dataframe['atr'] / dataframe['close'] - # --- Force de tendance --- - dataframe['adx'] = ta.trend.ADXIndicator( - high=dataframe['high'], low=dataframe['low'], close=dataframe['close'], window=14 - ).adx() + dataframe['adx'] = ta.trend.ADXIndicator(high=dataframe['high'], low=dataframe['low'], close=dataframe['close'], window=14).adx() # --- Volume directionnel (On Balance Volume) --- - dataframe['obv'] = ta.volume.OnBalanceVolumeIndicator( - close=dataframe['close'], volume=dataframe['volume'] - ).on_balance_volume() + dataframe['obv'] = ta.volume.OnBalanceVolumeIndicator(close=dataframe['close'], volume=dataframe['volume']).on_balance_volume() self.calculeDerivees(dataframe, 'obv', ema_period=1) - dataframe['obv12'] = ta.volume.OnBalanceVolumeIndicator( - close=dataframe['sma12'], volume=dataframe['volume'].rolling(12).sum() - ).on_balance_volume() - - dataframe['obv24'] = ta.volume.OnBalanceVolumeIndicator( - close=dataframe['sma24'], volume=dataframe['volume'].rolling(24).sum() - ).on_balance_volume() - - # --- Volatilité récente (écart-type des rendements) --- - dataframe['vol_24'] = dataframe['percent'].rolling(24).std() - - # Compter les baisses / hausses consécutives - # self.calculateDownAndUp(dataframe, limit=0.0001) - - # df : ton dataframe OHLCV + indicateurs existants - # Assurez-vous que les colonnes suivantes existent : - # 'max_rsi_12', 'roc_24', 'bb_percent_1h' - - # --- Filtrage des NaN initiaux --- - # dataframe = dataframe.dropna() - + dataframe['obv12'] = ta.volume.OnBalanceVolumeIndicator(close=dataframe['sma12'], volume=dataframe['volume'].rolling(12).sum()).on_balance_volume() + dataframe['obv24'] = ta.volume.OnBalanceVolumeIndicator(close=dataframe['sma24'], volume=dataframe['volume'].rolling(24).sum()).on_balance_volume() dataframe['rsi_slope'] = dataframe['rsi'].diff(3) / 3 # vitesse moyenne du RSI dataframe['adx_change'] = dataframe['adx'] - dataframe['adx'].shift(12) # évolution de la tendance dataframe['volatility_ratio'] = dataframe['atr_norm'] / dataframe['bb_width'] - dataframe["rsi_diff"] = dataframe["rsi"] - dataframe["rsi"].shift(3) dataframe["slope_ratio"] = dataframe["sma5_deriv1"] / (dataframe["sma60_deriv1"] + 1e-9) dataframe["divergence"] = (dataframe["rsi_deriv1"] * dataframe["sma5_deriv1"]) < 0 @@ -846,19 +930,23 @@ class FrictradeLearning(IStrategy): self.model_indicators = self.listUsableColumns(dataframe) + print("INDICATORS : ", self.model_indicators ) + if False and self.dp.runmode.value in ('backtest'): - self.trainModel(dataframe, metadata) + self.trainModel3(dataframe, metadata) short_pair = self.getShortName(pair) - # path=f"user_data/strategies/plots/{short_pair}/" + path=f"user_data/strategies/plots/{short_pair}/" + + data = joblib.load(f"{self.path}/{short_pair}_rf_model.pkl") + self.model = data["model"] + self.model_indicators = data["features"] + + # Préparer les features pour la prédiction + X_Valid = dataframe[self.model_indicators].fillna(0) + + # Prédiction : probabilité que le prix monte - # self.model = joblib.load(f"{self.path}/{short_pair}_rf_model.pkl") - # - # # Préparer les features pour la prédiction - # features = dataframe[self.model_indicators].fillna(0) - # - # # Prédiction : probabilité que le prix monte - # # # Affichage des colonnes intérressantes dans le model # features_pruned, kept_features = self.prune_features( # model=self.model, @@ -866,14 +954,23 @@ class FrictradeLearning(IStrategy): # feature_columns=self.model_indicators, # importance_threshold=0.005 # enlever features < % importance # ) - # + # probs = self.model.predict_proba(features)[:, 1] - # - # # Sauvegarder la probabilité pour l’analyse - # dataframe['ml_prob'] = probs - # - # if False and self.dp.runmode.value in ('backtest'): - # self.inspect_model(self.model) + probs_all_classes = self.model.predict(X_Valid) # shape = (n_samples, n_classes) + print(probs_all_classes.shape) # doit être (n_samples, 3) + + # Ajouter probabilité de chaque classe au dataframe pour analyse + for i in range(3): + dataframe[f'prob_class_{i}'] = probs_all_classes[:, i] + + # Pour la probabilité de la classe 2 : + probs = probs_all_classes[:, 2] + + # Sauvegarder la probabilité pour l’analyse + dataframe['ml_prob'] = probs + + if False and self.dp.runmode.value in ('backtest'): + self.inspect_model(self.model) # # absolute_min = dataframe['absolute_min'].min() @@ -1050,7 +1147,7 @@ class FrictradeLearning(IStrategy): # # return dataframe - def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + def populate_entry_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: """ Buy when the model predicts a high upside probability/value. This method loads the ML model, generates predictions, and @@ -1086,16 +1183,51 @@ class FrictradeLearning(IStrategy): # , ['enter_long', 'enter_tag'] # ] = (1, f"future") - dataframe.loc[ - # (dataframe["ml_prob"].shift(1) < dataframe["ml_prob"]) - ( - (dataframe['close'].shift(3) < dataframe['min180'].shift(3)) | - (dataframe['close'].shift(4) < dataframe['min180'].shift(4)) | - (dataframe['close'].shift(5) < dataframe['min180'].shift(5)) - ) - & (dataframe['hapercent'] > 0) - , ['enter_long', 'enter_tag'] - ] = (1, f"min180") + score = ( + (dataframe['max_rsi_12'] > 70).astype(int) * 3 + + (dataframe['pct30'] < 0).astype(int) * 2 + + (dataframe['percent12'] < 0).astype(int) * 2 + + (dataframe['rsi_dist'] < 0).astype(int) * 1 + ) + + dataframe.loc[score >= 5, ['enter_long', 'enter_tag']] = (1, f"long") + + # dataframe.loc[ + # # (dataframe["ml_prob"].shift(1) < dataframe["ml_prob"]) + # ( + # # 🔥 RSI récemment élevé (surachat) + # (dataframe['max_rsi_12'] > 70) & + # + # # 📉 retournement en cours + # (dataframe['rsi'] < dataframe['max_rsi_12'] - 10) & + # + # # 📉 perte de momentum court terme + # (dataframe['pct30'] < 0) & + # + # # 📉 confirmation + # (dataframe['percent12'] < 0) + # ) + # & (dataframe['hapercent'] > 0) + # , ['enter_long', 'enter_tag'] + # ] = (1, f"long") + + # dataframe.loc[ + # # (dataframe["ml_prob"].shift(1) < dataframe["ml_prob"]) + # ( + # dataframe['prob_class_0'] > 0.45 + # ) + # & (dataframe['hapercent'] < 0) + # , ['enter_short', 'enter_tag'] + # ] = (1, f"short") + + score = ( + (dataframe['pct30'] > 0.01).astype(int) * 3 + + (dataframe['percent12'] > 0.005).astype(int) * 3 + + (dataframe['rsi'] > 60).astype(int) * 2 + + (dataframe['rsi'] < dataframe['rsi'].shift(1)).astype(int) * 1 + ) + + dataframe.loc[score >= 5, ['enter_short', 'enter_tag']] = (1, f"short") dataframe['test'] = np.where(dataframe['enter_long'] == 1, dataframe['close'] * 1.01, np.nan) @@ -1188,7 +1320,7 @@ class FrictradeLearning(IStrategy): # # return dataframe - def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + def populate_exit_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: return dataframe @@ -1225,14 +1357,14 @@ class FrictradeLearning(IStrategy): return max(round(y), 1) # évite les valeurs négatives def adjust_stake_amount(self, pair: str, last_candle: DataFrame): - if (self.pairs[pair]['first_amount'] > 0): - amount = min(self.wallets.get_available_stake_amount(), self.pairs[pair]['first_amount']) - else: - if last_candle['enter_tag'] in ['fall', 'bear', 'Force', 'Range-']: - amount = self.wallets.get_available_stake_amount() / 5 - else: - amount = self.wallets.get_available_stake_amount() / 3# / (2 * self.pairs[pair]['count_of_lost'] + 1) - return min(amount, self.wallets.get_available_stake_amount()) + # if (self.pairs[pair]['first_amount'] > 0): + # amount = min(self.wallets.get_available_stake_amount(), self.pairs[pair]['first_amount']) + # else: + # if last_candle['enter_tag'] in ['fall', 'bear', 'Force', 'Range-']: + # amount = self.wallets.get_available_stake_amount() / 5 + # else: + # amount = self.wallets.get_available_stake_amount() / 3# / (2 * self.pairs[pair]['count_of_lost'] + 1) + return self.wallets.get_available_stake_amount() def calculateMises(self, pair, ath, val): # ath = max(self.pairs[pair]['last_max'], self.get_last_ath_before_candle(last_candle)) @@ -1527,77 +1659,80 @@ class FrictradeLearning(IStrategy): # stake=0 # ) - if current_profit < - 0.02 and last_candle[f"close"] <= last_candle['sma60']: - self.pairs[pair]['force_sell'] = True - return 'sma60' + if trade.is_short: + if current_profit > 0.005 and \ + (baisse > 0.25 and last_candle[f"close"] <= last_candle['sma24']) \ + and last_candle['hapercent'] > 0 : + self.pairs[pair]['force_sell'] = True + return 'B30sht' + else: + # if current_profit < - 0.02 and last_candle[f"close"] <= last_candle['sma60']: + # self.pairs[pair]['force_sell'] = True + # return 'sma60' - if profit > 5 and \ - (baisse > 0.25 and last_candle[f"close"] <= last_candle['sma24']) \ - and last_candle['hapercent'] <0 : - self.pairs[pair]['force_sell'] = True - return 'B30' + if current_profit > 0.005 and \ + (baisse > 0.25 and last_candle[f"close"] <= last_candle['sma24']) \ + and last_candle['hapercent'] <0 : + self.pairs[pair]['force_sell'] = True + return 'B30Lng' - if profit > 0 and last_candle['cross_sma60']: #5 or last_candle['rsi_1d'] < 30: - return 'Cross' + # if profit > 0 and last_candle['cross_sma60']: #5 or last_candle['rsi_1d'] < 30: + # return 'Cross' + # + # if last_candle['max_rsi_24'] > 88 and last_candle['hapercent'] < 0\ + # and last_candle['sma5_deriv2'] < -0.1: + # return f"rsi_{count_of_buys}_{self.pairs[pair]['has_gain']}" - if last_candle['max_rsi_24'] > 88 and last_candle['hapercent'] < 0\ - and last_candle['sma5_deriv2'] < -0.1: - return f"rsi_{count_of_buys}_{self.pairs[pair]['has_gain']}" + limit = max_profit * (1 - current_trailing_stop_positive) + # if profit < limit and baisse > 0.2: + # return f"lim_{count_of_buys}_{self.pairs[pair]['has_gain']}" + # if last_candle['ml_prob'] > 0.5: + # if last_candle['sma12_deriv1'] > 0: # and last_candle['rsi'] < 85: + # return None - limit = max_profit * (1 - current_trailing_stop_positive) - # if profit < limit and baisse > 0.2: - # return f"lim_{count_of_buys}_{self.pairs[pair]['has_gain']}" - # if last_candle['ml_prob'] > 0.5: - # if last_candle['sma12_deriv1'] > 0: # and last_candle['rsi'] < 85: - # return None + # if last_candle['sma24_deriv1'] > 0 : #and minutes < 180 and baisse < 30: # and last_candle['sma5_deriv1'] > -0.15: + # if (minutes < 180): + # return None + # if (minutes > 1440 and last_candle['sma60_deriv1'] > 0) : + # return None - # if last_candle['sma24_deriv1'] > 0 : #and minutes < 180 and baisse < 30: # and last_candle['sma5_deriv1'] > -0.15: - # if (minutes < 180): - # return None - # if (minutes > 1440 and last_candle['sma60_deriv1'] > 0) : - # return None + # # ----- 4) OFFSET : faut-il attendre de dépasser trailing_stop_positive_offset ? ----- + # if current_trailing_only_offset_is_reached and max_profit > current_trailing_stop_positive_offset: + # # Max profit pas atteint ET perte < 2 * current_trailing_stop_positive + # if profit > limit: # 2 * current_trailing_stop_positive: + # print( + # f"{current_time} trailing non atteint trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} " + # f"max={round(max_profit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} baisse={round(baisse,2)}") + # return None # ne pas activer le trailing encore + # else: + # print( + # f"{current_time} trailing atteint trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} " + # f"max={round(max_profit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} baisse={round(baisse,2)}") + # else: + # # print( + # # f"1 - {current_time} trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} " + # # f"limit={round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)}" + # # f" baisse={round(baisse,2)} {round(last_candle['sma180_deriv1'], 4)} {round(last_candle['sma60_deriv1'], 4)} {round(last_candle['sma24_deriv1'], 4)}") + # + # return None + # # Sinon : trailing actif dès le début + # + # # ----- 6) Condition de vente ----- + # if 0 < profit <= trailing_stop: # and last_candle['mid'] < last_candle['sma5']: # and profit > current_trailing_stop_positive_offset: + # self.pairs[pair]['force_buy'] = True + # print( + # f"{current_time} Condition de vente trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} " + # f"{round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} " + # f"baisse={round(baisse,2)}") + # + # return f"stop_{count_of_buys}_{self.pairs[pair]['has_gain']}" - # # ----- 4) OFFSET : faut-il attendre de dépasser trailing_stop_positive_offset ? ----- - # if current_trailing_only_offset_is_reached and max_profit > current_trailing_stop_positive_offset: - # # Max profit pas atteint ET perte < 2 * current_trailing_stop_positive - # if profit > limit: # 2 * current_trailing_stop_positive: - # print( - # f"{current_time} trailing non atteint trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} " - # f"max={round(max_profit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} baisse={round(baisse,2)}") - # return None # ne pas activer le trailing encore - # else: - # print( - # f"{current_time} trailing atteint trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} " - # f"max={round(max_profit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} baisse={round(baisse,2)}") - # else: - # # print( - # # f"1 - {current_time} trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} " - # # f"limit={round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)}" - # # f" baisse={round(baisse,2)} {round(last_candle['sma180_deriv1'], 4)} {round(last_candle['sma60_deriv1'], 4)} {round(last_candle['sma24_deriv1'], 4)}") - # - # return None - # # Sinon : trailing actif dès le début - # - # # ----- 6) Condition de vente ----- - # if 0 < profit <= trailing_stop: # and last_candle['mid'] < last_candle['sma5']: # and profit > current_trailing_stop_positive_offset: - # self.pairs[pair]['force_buy'] = True - # print( - # f"{current_time} Condition de vente trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} " - # f"{round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} " - # f"baisse={round(baisse,2)}") - # - # return f"stop_{count_of_buys}_{self.pairs[pair]['has_gain']}" + # print( + # f"2 - {current_time} trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} " + # f"{round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} " + # f"baisse={round(baisse,2)} {round(last_candle['sma180_deriv1'], 4)} {round(last_candle['sma60_deriv1'], 4)} {round(last_candle['sma24_deriv1'], 4)}") - # print( - # f"2 - {current_time} trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} " - # f"{round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} " - # f"baisse={round(baisse,2)} {round(last_candle['sma180_deriv1'], 4)} {round(last_candle['sma60_deriv1'], 4)} {round(last_candle['sma24_deriv1'], 4)}") - - return None - print( - f"2 - {current_time} trailing_stop={round(trailing_stop, 4)} profit={round(profit, 4)} max={round(max_profit, 4)} " - f"{round(limit, 4)} offset={round(current_trailing_stop_positive_offset, 4)} " - f"baisse={round(baisse,2)} {round(last_candle['sma180_deriv1'], 4)} {round(last_candle['sma60_deriv1'], 4)} {round(last_candle['sma24_deriv1'], 4)}") + return None def informative_pairs(self): # get access to all pairs available in whitelist. @@ -1734,13 +1869,86 @@ class FrictradeLearning(IStrategy): # Corrélations des colonnes corr = df.corr(numeric_only=True) - print("Corrélation des colonnes") - print(corr) + # print("Corrélation des colonnes") + # print(corr) # 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies - # df['target'] = (df['sma24'].shift(-24) > df['sma24']).astype(int) - df['target'] = ((df["sma24"].shift(-13) - df["sma24"]) > 100).astype(int) - df['target'] = df['target'].fillna(0).astype(int) + os.makedirs(path, exist_ok=True) + + horizon = 120 # en 1min + indicator = 'sma60' + + df['future_max'] = df[indicator].shift(-1).rolling(horizon).max() + df['future_min'] = df[indicator].shift(-1).rolling(horizon).min() + tp = 0.0025 # +% + sl = 0.0025 # -% (important !) + + df['target'] = 0 + + # 🎯 cas gagnant + df.loc[df['future_max'] > df[indicator] * (1 + tp), 'target'] = 1 + + # 💀 cas perdant + df.loc[df['future_min'] < df[indicator] * (1 - sl), 'target'] = -1 + + # Filtre + # df = df[df['atr_norm'] > 0.002] + + print("===== 🚀 TRAIN MODEL START =====") + df = df.dropna().copy() + + features = self.listUsableColumns(df) + target_col = "target" + + # 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies + df['target'] = 0 + + for i in range(len(df) - horizon): + window = df.iloc[i + 1:i + 1 + horizon] + + entry = df.iloc[i][indicator] + tp_price = entry * (1 + tp) + sl_price = entry * (1 - sl) + + hit_tp = window[window[indicator] >= tp_price] + hit_sl = window[window[indicator] <= sl_price] + + if not hit_tp.empty and not hit_sl.empty: + if hit_tp.index[0] < hit_sl.index[0]: + df.iloc[i, df.columns.get_loc('target')] = 1 + else: + df.iloc[i, df.columns.get_loc('target')] = -1 + elif not hit_tp.empty: + df.iloc[i, df.columns.get_loc('target')] = 1 + elif not hit_sl.empty: + df.iloc[i, df.columns.get_loc('target')] = -1 + + working_columns = self.select_features_pipeline(df) + features=working_columns + X = df[features] + y = (df['target'] == 1).astype(int) # df[target_col] + + # df['target'].value_counts(normalize=True) + counts = df['target'].value_counts() + n_neg = counts.get(0, 0) # nombre de 0 + n_pos = counts.get(1, 0) # nombre de 1 + + scale_pos_weight = n_neg / n_pos + print("Samples:", len(df)) + print("Target ratio:", df['target'].mean()) + print("Working features:", len(working_columns)) + print("Used features:", len(X.columns)) + print("Poids pour la classe 1 :", scale_pos_weight) + print("==== VARIANCE ====") + print(X.var().sort_values().head(10)) + print("==== DESCRIBE ====") + print(X.describe().T[['mean', 'std']].head(20)) + print("Samples before:", len(df)) + df = df.dropna() + print("Samples after:", len(df)) + print(df['target'].value_counts()) + # time.sleep(5.5) # Pause 5.5 seconds + # Corrélations triées par importance avec une colonne cible target_corr = df.corr(numeric_only=True)["target"].sort_values(ascending=False) @@ -1798,9 +2006,20 @@ class FrictradeLearning(IStrategy): print(f"✅ Matrice enregistrée : {output_path}") # Exemple d'utilisation : - selected_corr = self.select_uncorrelated_features(df, target="target", top_n=30, corr_threshold=0.7) - print("===== 🎯 FEATURES SÉLECTIONNÉES =====") - print(selected_corr) + # selected_corr = self.select_uncorrelated_features(df, target="target", top_n=30, corr_threshold=0.98) + # print("===== 🎯 FEATURES SÉLECTIONNÉES =====") + # print(selected_corr) + # + # # 🔥 EXTRACTION CORRECTE + # working_columns = selected_corr["feature"].tolist() + + # Nettoyage + df = df[working_columns + ['target', indicator]].dropna() + + X = df[working_columns] + y = df['target'] + + self.model_indicators = working_columns # Nettoyage df = df.dropna() @@ -1814,7 +2033,15 @@ class FrictradeLearning(IStrategy): X = df[self.model_indicators] y = df['target'] # Séparation temporelle (train = 80 %, valid = 20 %) - X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=False) + # X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=False) + split_idx = int(len(df) * 0.8) + df_train = df.iloc[:split_idx].copy() + df_valid = df.iloc[split_idx:].copy() + X_train = df_train[self.model_indicators] + y_train = df_train['target'] + X_valid = df_valid[self.model_indicators] + y_valid = df_valid['target'] + self.df_valid = df_valid # Nettoyage des valeurs invalides @@ -1847,60 +2074,236 @@ class FrictradeLearning(IStrategy): # study = optuna.create_study(direction="maximize") # study.optimize(objective, n_trials=50) - def objective(trial): - # local_model = XGBClassifier( - # n_estimators=300, # nombre d'arbres plus raisonnable - # learning_rate=0.01, # un peu plus rapide que 0.006, mais stable - # max_depth=4, # capture plus de patterns que 3, sans overfitting excessif - # subsample=0.7, # utilise 70% des lignes pour chaque arbre → réduit overfitting - # colsample_bytree=0.8, # 80% des features par arbre - # gamma=0.01, # gain minimal pour un split → régularisation - # reg_alpha=0.01, # L1 régularisation des feuilles - # reg_lambda=1, # L2 régularisation des feuilles - # n_jobs=-1, # utilise tous les cœurs CPU pour accélérer - # random_state=42, # reproductibilité - # missing=float('nan'), # valeur manquante reconnue - # eval_metric='logloss' # métrique pour classification binaire - # ) + # def objective(trial): + # # local_model = XGBClassifier( + # # n_estimators=300, # nombre d'arbres plus raisonnable + # # learning_rate=0.01, # un peu plus rapide que 0.006, mais stable + # # max_depth=4, # capture plus de patterns que 3, sans overfitting excessif + # # subsample=0.7, # utilise 70% des lignes pour chaque arbre → réduit overfitting + # # colsample_bytree=0.8, # 80% des features par arbre + # # gamma=0.01, # gain minimal pour un split → régularisation + # # reg_alpha=0.01, # L1 régularisation des feuilles + # # reg_lambda=1, # L2 régularisation des feuilles + # # n_jobs=-1, # utilise tous les cœurs CPU pour accélérer + # # random_state=42, # reproductibilité + # # missing=float('nan'), # valeur manquante reconnue + # # eval_metric='logloss' # métrique pour classification binaire + # # ) + # + # local_model = XGBClassifier( + # n_estimators=trial.suggest_int("n_estimators", 300, 500), + # max_depth=trial.suggest_int("max_depth", 1, 6), + # learning_rate=trial.suggest_float("learning_rate", 0.005, 0.3, log=True), + # subsample=trial.suggest_float("subsample", 0.6, 1.0), + # colsample_bytree=trial.suggest_float("colsample_bytree", 0.6, 1.0), + # scale_pos_weight=1, + # objective="binary:logistic", + # eval_metric="logloss", + # n_jobs=-1 + # ) + # + # local_model.fit( + # X_train, + # y_train, + # eval_set=[(X_valid, y_valid)], + # # early_stopping_rounds=50, + # verbose=False + # ) + # + # proba = local_model.predict_proba(X_valid)[:, 1] + # thresholds = np.linspace(0.1, 0.9, 50) + # best_f1 = max(f1_score(y_valid, (proba > t)) for t in thresholds) + # + # return best_f1 - local_model = XGBClassifier( - n_estimators=trial.suggest_int("n_estimators", 300, 500), - max_depth=trial.suggest_int("max_depth", 1, 6), - learning_rate=trial.suggest_float("learning_rate", 0.005, 0.3, log=True), - subsample=trial.suggest_float("subsample", 0.6, 1.0), - colsample_bytree=trial.suggest_float("colsample_bytree", 0.6, 1.0), - scale_pos_weight=1, - objective="binary:logistic", - eval_metric="logloss", + # def objective(trial): + # + # scale_pos_weight = (y_train == 0).sum() / max((y_train == 1).sum(), 1) + # + # local_model = XGBClassifier( + # n_estimators=trial.suggest_int("n_estimators", 300, 500), + # max_depth=trial.suggest_int("max_depth", 2, 6), + # learning_rate=trial.suggest_float("learning_rate", 0.005, 0.2, log=True), + # subsample=trial.suggest_float("subsample", 0.6, 1.0), + # colsample_bytree=trial.suggest_float("colsample_bytree", 0.6, 1.0), + # gamma=trial.suggest_float("gamma", 0, 0.1), + # reg_alpha=trial.suggest_float("reg_alpha", 0, 0.1), + # reg_lambda=trial.suggest_float("reg_lambda", 0.5, 2), + # scale_pos_weight=scale_pos_weight, + # objective="binary:logistic", + # eval_metric="logloss", + # n_jobs=-1, + # random_state=42 + # ) + # + # local_model.fit( + # X_train, + # y_train, + # eval_set=[(X_valid, y_valid)], + # verbose=False + # ) + # + # proba = local_model.predict_proba(X_valid)[:, 1] + # + # # 🔥 seuil optimisé + # threshold = trial.suggest_float("threshold", 0.3, 0.7) + # prices = self.df_valid["close"].values + # profit = 0 + # wins = 0 + # losses = 0 + # + # horizon = trial.suggest_int("horizon", 2, 6) + # + # min_move = trial.suggest_float("min_move", 0.002, 0.01) + # + # for i in range(len(proba) - horizon): + # if proba[i] > threshold: + # entry = prices[i] + # exit = prices[i + horizon] + # pct = (exit - entry) / entry + # + # # 🔥 filtre anti bruit + # if abs(pct) < min_move: + # continue + # + # pct -= 0.001 # fees + # profit += pct + # if pct > 0: + # wins += 1 + # else: + # losses += 1 + # + # if wins + losses == 0: + # return -1 + # + # winrate = wins / (wins + losses) + # + # # 🔥 score final + # return profit * winrate + + # 4️⃣ Fonction objectif Optuna + # def objective(trial): + # model = XGBClassifier( + # n_estimators=trial.suggest_int("n_estimators", 300, 500), + # max_depth=trial.suggest_int("max_depth", 3, 7), + # learning_rate=trial.suggest_float("learning_rate", 0.005, 0.1, log=True), + # subsample=trial.suggest_float("subsample", 0.6, 1.0), + # colsample_bytree=trial.suggest_float("colsample_bytree", 0.6, 1.0), + # gamma=trial.suggest_float("gamma", 0, 0.1), + # reg_alpha=trial.suggest_float("reg_alpha", 0, 0.1), + # reg_lambda=trial.suggest_float("reg_lambda", 1, 2), + # scale_pos_weight=scale_pos_weight, + # objective="binary:logistic", + # eval_metric="logloss", + # n_jobs=-1, + # random_state=42 + # ) + # + # model.fit( + # X_train, + # y_train, + # eval_set=[(X_valid, y_valid)], + # verbose=False + # ) + # + # best_threshold = 0 + # proba = model.predict_proba(X_valid)[:, 1] + # best_score = -1 + # for t in np.linspace(0.2, 0.8, 30): + # preds = (proba > t).astype(int) + # precision = precision_score(y_valid, preds, zero_division=0) + # if precision < 0.6: + # score = 0 + # else: + # recall = recall_score(y_valid, preds, zero_division=0) + # score = (0.7 * recall) + (0.3 * precision) + # + # if score > best_score: + # best_threshold = t + # best_score = score + # print("Best threshold:", best_threshold) + # + # return best_score + # + # # proba = model.predict_proba(X_valid)[:, 1] + # # + # # thresholds = np.linspace(0.1, 0.9, 50) + # # best_f1 = max(f1_score(y_valid, (proba > t)) for t in thresholds) + # # + # # return best_f1 + + def objective(trial): + + model = LGBMClassifier( + n_estimators=trial.suggest_int("n_estimators", 300, 700), + learning_rate=trial.suggest_float("learning_rate", 0.02, 0.08), + max_depth=trial.suggest_int("max_depth", 3, 6), + num_leaves=trial.suggest_int("num_leaves", 20, 80), + # 🔥 FIX CRITIQUE + min_child_samples=trial.suggest_int("min_child_samples", 10, 50), + subsample=trial.suggest_float("subsample", 0.7, 1.0), + colsample_bytree=trial.suggest_float("colsample_bytree", 0.7, 1.0), + # 🔥 FIX CRITIQUE + reg_alpha=trial.suggest_float("reg_alpha", 0.0, 0.1), + reg_lambda=trial.suggest_float("reg_lambda", 0.5, 1.5), + scale_pos_weight=scale_pos_weight, + random_state=42, n_jobs=-1 ) - local_model.fit( - X_train, - y_train, - eval_set=[(X_valid, y_valid)], - # early_stopping_rounds=50, - verbose=False - ) + model.fit(X_train, y_train) + proba = model.predict_proba(X_valid)[:, 1] + best_score = 0 + for t in np.linspace(0.2, 0.8, 30): + preds = (proba > t).astype(int) - proba = local_model.predict_proba(X_valid)[:, 1] - thresholds = np.linspace(0.1, 0.9, 50) - best_f1 = max(f1_score(y_valid, (proba > t)) for t in thresholds) + precision = precision_score(y_valid, preds) + recall = recall_score(y_valid, preds) - return best_f1 + # 🎯 ton objectif réel + if precision < 0.6: + score = 0 + else: + score = (0.7 * recall) + (0.3 * precision) + if score > best_score: + best_score = score + + return best_score + + # 3️⃣ Lancer l'optimisation study = optuna.create_study(direction="maximize") - study.optimize(objective, n_trials=20) + study.optimize(objective, n_trials=200) + + # 4️⃣ Afficher les meilleurs hyperparamètres + print("✅ Best trial:") + trial = study.best_trial + print(trial.params) + + # 5️⃣ Entraîner le modèle final avec les meilleurs params + best_model = XGBClassifier( + **trial.params, + scale_pos_weight=scale_pos_weight, + objective="binary:logistic", + eval_metric="logloss", + n_jobs=-1, + random_state=42 + ) + best_model.fit(X_train, y_train) + self.train_model = best_model + + # 6️⃣ Calcul du meilleur seuil F1 + proba = best_model.predict_proba(X_valid)[:, 1] + thresholds = np.linspace(0.1, 0.9, 50) + f1_scores = [f1_score(y_valid, proba > t) for t in thresholds] + best_threshold = thresholds[np.argmax(f1_scores)] + print("✅ Meilleur seuil F1:", best_threshold) # SHAP # Reconstruction du modèle final avec les meilleurs hyperparamètres # Récupération des meilleurs paramètres trouvés best_params = study.best_params - best_model = XGBClassifier(**best_params) - best_model.fit(X_train, y_train) - self.train_model = best_model - # === SHAP plots === # Calcul SHAP explainer = shap.TreeExplainer(self.train_model) @@ -1954,10 +2357,10 @@ class FrictradeLearning(IStrategy): for k, v in best_trial.params.items(): print(f" - {k}: {v}") - # All trials summary - print("\n=== ALL TRIALS ===") - for t in study.trials: - print(f"Trial {t.number}: f1 = {t.value}, params = {t.params}") + # # All trials summary + # print("\n=== ALL TRIALS ===") + # for t in study.trials: + # print(f"Trial {t.number}: f1 = {t.value}, params = {t.params}") # DataFrame of trials df = study.trials_dataframe() @@ -2013,14 +2416,14 @@ class FrictradeLearning(IStrategy): feat_imp = pd.Series(importances, index=X_train.columns).sort_values(ascending=False) # Affichage - feat_imp.plot(kind='bar', figsize=(12, 6)) + feat_imp.plot(kind='bar', figsize=(18, 6)) plt.title("Feature importances") # plt.show() plt.savefig(f"{self.path}/Feature importances.png", bbox_inches='tight') result = permutation_importance(self.train_model, X_valid, y_valid, scoring='f1', n_repeats=10, random_state=42) perm_imp = pd.Series(result.importances_mean, index=X_valid.columns).sort_values(ascending=False) - perm_imp.plot(kind='bar', figsize=(12, 6)) + perm_imp.plot(kind='bar', figsize=(18, 6)) plt.title("Permutation feature importance") # plt.show() plt.savefig(f"{self.path}/Permutation feature importance.png", bbox_inches='tight') @@ -2036,6 +2439,7 @@ class FrictradeLearning(IStrategy): force_plot = shap.force_plot(explainer.expected_value, shap_values[0, :], X_valid.iloc[0, :]) shap.save_html(f"{self.path}/shap_force_plot.html", force_plot) + print("\nGénération des dépendances :\n") fig, ax = plt.subplots(figsize=(24, 48)) PartialDependenceDisplay.from_estimator( self.train_model, @@ -2065,7 +2469,12 @@ class FrictradeLearning(IStrategy): print(f"Accuracy: {acc:.3f}") # 7️⃣ Sauvegarde du modèle - joblib.dump(self.train_model, f"{self.path}/{pair}_rf_model.pkl") + joblib.dump( + {"model": self.train_model, + "threshold": best_threshold, + "features": self.model_indicators}, + f"{self.path}/{pair}_rf_model.pkl" + ) print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl") # X = dataframe des features (après shift/rolling/indicators) @@ -2092,6 +2501,26 @@ class FrictradeLearning(IStrategy): self.analyze_model(pair, self.train_model, X_train, X_valid, y_train, y_valid) + def trading_score(self, y_true, y_pred_proba, prices, threshold=0.5): + trades = (y_pred_proba > threshold).astype(int) + + profit = 0 + trade_count = 0 + + for i in range(len(trades) - 1): + if trades[i] == 1: + entry = prices[i] + exit = prices[i + 1] + + pct = (exit - entry) / entry + profit += pct + trade_count += 1 + + if trade_count == 0: + return -1 # pénalité si aucun trade + + return profit + def inspect_model(self, model): """ Affiche les informations d'un modèle ML déjà entraîné. @@ -2383,11 +2812,11 @@ class FrictradeLearning(IStrategy): numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns # Étape 2 : enlever constantes usable_cols = [c for c in numeric_cols if dataframe[c].nunique() > 1 - and not c.endswith("_state") - and not c.endswith("_1d") + # and not c.endswith("_state") # and not c.endswith("_1h") - and not c.startswith("open") and not c.startswith("close") - and not c.startswith("low") and not c.startswith("high") + and not c.startswith("open") + # and not c.startswith("close") + # and not c.startswith("low") and not c.startswith("high") and not c.startswith("haopen") and not c.startswith("haclose") # and not c.startswith("bb_lower") and not c.startswith("bb_upper") # and not c.startswith("bb_middle") @@ -2396,16 +2825,16 @@ class FrictradeLearning(IStrategy): and not c.startswith('stop_buying') and not c.startswith('target') and not c.startswith('lvl') - and not c.startswith('sma5_deriv1_1h') - and not c.startswith('sma5_1h') - and not c.startswith('sma12_deriv1_1h') - and not c.startswith('sma12_1h') - and not c.startswith('confidence_index') - and not c.startswith('price_change') - and not c.startswith('price_score') - and not c.startswith('heat_score') - and not c.startswith('min30_1d') - and not c.startswith('max30_1d') + # and not c.startswith('sma5_deriv1_1h') + # and not c.startswith('sma5_1h') + # and not c.startswith('sma12_deriv1_1h') + # and not c.startswith('sma12_1h') + # and not c.startswith('confidence_index') + # and not c.startswith('price_change') + # and not c.startswith('price_score') + # and not c.startswith('heat_score') + # and not c.startswith('min30_1d') + # and not c.startswith('max30_1d') ] # Étape 3 : remplacer inf et NaN par 0 dataframe[usable_cols] = dataframe[usable_cols].replace([np.inf, -np.inf], 0).fillna(0) @@ -2608,41 +3037,6 @@ class FrictradeLearning(IStrategy): return informative - def calculModelInformative(self, informative): - # préparation - # print(df) - df = informative.copy() - X = df[self.listUsableColumns(df)] - df['target'] = ((df["sma24"].shift(-13) - df["sma24"]) > 0).astype(int) - df['target'] = df['target'].fillna(0).astype(int) - y = df['target'] - - # train/test - X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2) - - # Pipeline normalisé + Logistic Regresson - clf = Pipeline([ - ("scaler", StandardScaler()), - ("logreg", LogisticRegression(max_iter=5000)) - ]) - - # Calibration CV automatique - cal = CalibratedClassifierCV(clf, cv=3, method="isotonic") - - # Entraînement - cal.fit(X_train, y_train) - - # Probabilités calibrées - probas = cal.predict_proba(X_test)[:, 1] - # Injection propre des probabilités dans le dataframe original (aux bons index) - df.loc[X_test.index, 'ml_prob'] = probas - - print("Brier score:", brier_score_loss(y_test, probas)) - print("ROC AUC:", roc_auc_score(y_test, probas)) - - # joindre probabilités au df (dernières lignes correspondantes) - return probas - def prune_features(self, model, dataframe, feature_columns, importance_threshold=0.01): """ Supprime les features dont l'importance est inférieure au seuil. @@ -2674,3 +3068,999 @@ class FrictradeLearning(IStrategy): # print(f"⚡ Features conservées ({len(kept_features)} / {len(feature_columns)}): {kept_features}") return dataframe_pruned, kept_features + + def trainModel2(self, df, metadata): + pair = self.getShortName(metadata['pair']) + pd.set_option('display.max_rows', None) + pd.set_option('display.max_columns', None) + pd.set_option("display.width", 200) + path = self.path # f"user_data/plots/{pair}/" + os.makedirs(path, exist_ok=True) + + horizon = 300 # 5h en 1min + + df['future_max'] = df['close'].shift(-1).rolling(horizon).max() + df['future_min'] = df['close'].shift(-1).rolling(horizon).min() + tp = 0.005 # +0.5% + sl = 0.003 # -0.3% (important !) + + df['target'] = 0 + + # 🎯 cas gagnant + df.loc[df['future_max'] > df['close'] * (1 + tp), 'target'] = 1 + + # 💀 cas perdant + df.loc[df['future_min'] < df['close'] * (1 - sl), 'target'] = -1 + + # Filtre + df = df[df['atr_norm'] > 0.002] + + print("===== 🚀 TRAIN MODEL START =====") + df = df.dropna().copy() + + features = self.listUsableColumns(df) + target_col = "target" + + # 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies + df['target'] = 0 + # Exemple : 3 classes + # Classe 0 : percent30 < -0.01 + # Classe 1 : -0.01 <= percent30 <= 0.01 + # Classe 2 : percent30 > 0.01 + df['target'] = pd.cut( + df['percent24'].shift(-12), + bins=[-np.inf, -0.005, 0.005, np.inf], + labels=[0, 1, 2] + ) + df = df.dropna(subset=['target']) # supprime les lignes avec target NaN + df['target'] = df['target'].astype(int) + + # df = df.drop(columns=['percent24']) + # features.remove('percent24') + # features.remove('open') + # features.remove('close') + # features.remove('high') + # features.remove('low') + + # for i in range(len(df) - horizon): + # window = df.iloc[i + 1:i + 1 + horizon] + # + # entry = df.iloc[i]['close'] + # tp_price = entry * (1 + tp) + # sl_price = entry * (1 - sl) + # + # hit_tp = window[window['high'] >= tp_price] + # hit_sl = window[window['low'] <= sl_price] + # + # if not hit_tp.empty and not hit_sl.empty: + # if hit_tp.index[0] < hit_sl.index[0]: + # df.iloc[i, df.columns.get_loc('target')] = 1 + # else: + # df.iloc[i, df.columns.get_loc('target')] = -1 + # elif not hit_tp.empty: + # df.iloc[i, df.columns.get_loc('target')] = 1 + # elif not hit_sl.empty: + # df.iloc[i, df.columns.get_loc('target')] = -1 + + features = self.select_features_pipeline(df) + + X = df[features] + y = df['target'] #(df['target'] == 1).astype(int) # df[target_col] + # df = df[features] + + print("DF shape:", df.shape) + print("Columns:", features) + + # if "target" in features: + # print("Target raw: ", df["target"].value_counts(dropna=False)) + # else: + # print("❌ target column missing") + + print("Target distribution:") + print(y.value_counts(normalize=True)) + + # ⚠️ split temporel (CRUCIAL en trading) + split = int(len(df) * 0.8) + X_train, X_valid = X.iloc[:split], X.iloc[split:] + y_train, y_valid = y.iloc[:split], y.iloc[split:] + + # ⚠️ SMOTE uniquement sur TRAIN + smote = SMOTE(random_state=42) + X_train_res, y_train_res = smote.fit_resample(X_train, y_train) + + print("After SMOTE:") + print(pd.Series(y_train_res).value_counts(normalize=True)) + + num_classes = len(np.unique(y_train)) # nombre de classes dans ton target + # ========================= + # 🎯 OPTUNA OBJECTIVE + # ========================= + def objective(trial): + params = { + "objective": "multiclass", # <-- changer pour multiclass + "metric": "multi_logloss", # <-- metric adaptée au multiclass + "num_class": num_classes, # <-- nombre de classes + "boosting_type": "gbdt", + + "num_leaves": trial.suggest_int("num_leaves", 16, 128), + "max_depth": trial.suggest_int("max_depth", 3, 10), + + "learning_rate": trial.suggest_float("learning_rate", 0.005, 0.1, log=True), + + "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0), + "bagging_fraction": trial.suggest_float("bagging_fraction", 0.6, 1.0), + "bagging_freq": trial.suggest_int("bagging_freq", 1, 10), + + "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), + + "lambda_l1": trial.suggest_float("lambda_l1", 1e-4, 10, log=True), + "lambda_l2": trial.suggest_float("lambda_l2", 1e-4, 10, log=True), + + "verbose": -1, + "seed": 42, + } + + train_data = lgb.Dataset(X_train_res, y_train_res) + valid_data = lgb.Dataset(X_valid, y_valid) + + model = lgb.train( + params, + train_data, + num_boost_round=1000, + valid_sets=[valid_data], + callbacks=[lgb.early_stopping(50), lgb.log_evaluation(0)] + ) + + proba = model.predict(X_valid) + preds = np.argmax(proba, axis=1) # <-- pour multiclass + + f1 = f1_score(y_valid, preds, average='macro') # <-- multiclass + return f1 + + # ========================= + # 🚀 RUN OPTUNA + # ========================= + study = optuna.create_study(direction="maximize") + study.optimize(objective, n_trials=200) + + print("===== 🏆 BEST PARAMS =====") + print(study.best_params) + + best_params = study.best_params.copy() + # best_threshold = best_params.pop("threshold") + + # ========================= + # 🔥 TRAIN FINAL MODEL + # ========================= + final_params = { + **best_params, + "objective": "multiclass", + "metric": "multi_logloss", + "num_class": num_classes, + "boosting_type": "gbdt", + "verbose": -1, + "seed": 42 + } + + # Entraînement + train_data = lgb.Dataset(X_train_res, y_train_res) + model = lgb.train(final_params, train_data, num_boost_round=1000) + + # ========================= + # 📊 EVALUATION MULTICLASS + # ========================= + proba = model.predict(X_valid) # shape = (n_samples, n_classes) + preds = np.argmax(proba, axis=1) # Classe prédite + + print("===== 📊 RESULTS =====") + print("F1:", f1_score(y_valid, preds, average='macro')) + print("Precision:", precision_score(y_valid, preds, average='macro')) + print("Recall:", recall_score(y_valid, preds, average='macro')) + + # ROC AUC multiclass + try: + roc = roc_auc_score(y_valid, proba, multi_class='ovr', average='macro') + print("ROC AUC:", roc) + except ValueError: + print("ROC AUC cannot be computed (check y_valid and number of classes)") + + # model_path = f"user_data/{metadata['pair'].replace('/', '_')}_lgbm.pkl" + # joblib.dump({ + # "model": model, + # "threshold": best_threshold, + # "features": features + # }, model_path) + + self.train_model = model + # self.model_threshold = best_threshold + + joblib.dump( + {"model": self.train_model, + # "threshold": best_threshold, + "features": features}, + f"{self.path}/{pair}_rf_model.pkl" + ) + print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl") + + # Génération de diagnostics pour multiclass + proba = self.train_model.predict(X_valid) # shape = (n_samples, n_classes) + preds = np.argmax(proba, axis=1) # labels prédits + + self.generate_diagnostics( + model=self.train_model, + X_valid=X_valid, + y_valid=y_valid, + df=df, + metadata=metadata + ) + print(f"Detected multiclass SHAP with {num_classes} classes") + + self.generate_shap_analysis(model=self.train_model, X_valid=X_valid, metadata=metadata) + + def generate_diagnostics(self, model, X_valid, y_valid, df, metadata): + + os.makedirs(self.path, exist_ok=True) + pair = metadata["pair"].replace("/", "_") + # ts = datetime.now().strftime("%Y%m%d_%H%M%S") + + def save_fig(name): + filepath = f"{self.path}/{pair}_{name}.png" + plt.savefig(filepath) + plt.close() + print(f"📊 Saved: {filepath}") + + # ========================= + # 🔥 PROBA & PREDICTIONS MULTICLASS + # ========================= + proba = model.predict(X_valid) # shape = (n_samples, n_classes) + preds = np.argmax(proba, axis=1) + + # ========================= + # 📊 PROBA DISTRIBUTION PAR CLASSE + # ========================= + plt.figure(figsize=(10, 5)) + num_classes = proba.shape[1] + for c in range(num_classes): + plt.hist(proba[:, c][y_valid == c], bins=50, alpha=0.5, label=f"Class {c}") + plt.title("Probability Distribution per Class") + plt.legend() + save_fig("proba_distribution") + + # ========================= + # 📈 METRICS MULTICLASS + # ========================= + f1 = f1_score(y_valid, preds, average='macro') + precision = precision_score(y_valid, preds, average='macro', zero_division=0) + recall = recall_score(y_valid, preds, average='macro', zero_division=0) + try: + roc = roc_auc_score(y_valid, proba, multi_class='ovr', average='macro') + except ValueError: + roc = None + + print("===== 📊 RESULTS =====") + print("F1:", f1) + print("Precision:", precision) + print("Recall:", recall) + if roc is not None: + print("ROC AUC:", roc) + + # ========================= + # 💰 EQUITY CURVE SIMPLIFIÉE + # ========================= + prices = df.loc[X_valid.index]["close"].values + returns = [] + for i in range(len(preds) - 1): + # Ex: utiliser uniquement classe cible 2 pour long + if preds[i] == 2: + r = (prices[i + 1] - prices[i]) / prices[i] + returns.append(r) + equity = np.cumsum(returns) + + plt.figure(figsize=(10, 5)) + plt.plot(equity) + plt.title("Equity Curve (Class 2 signals)") + save_fig("equity_curve") + + # ========================= + # 📊 FEATURE IMPORTANCE + # ========================= + importance = model.feature_importance() + feat_names = X_valid.columns + imp_df = pd.DataFrame({ + "feature": feat_names, + "importance": importance + }).sort_values(by="importance", ascending=False) + + plt.figure(figsize=(10, 8)) + plt.barh(imp_df["feature"][:20], imp_df["importance"][:20]) + plt.gca().invert_yaxis() + plt.title("Feature Importance") + save_fig("feature_importance") + + # ========================= + # 🔍 SHAP (sample pour perf) + # ========================= + try: + sample_size = min(1000, len(X_valid)) + X_sample = X_valid.sample(sample_size, random_state=42) + + explainer = shap.TreeExplainer(model) + shap_values = explainer.shap_values(X_sample) + + # shap_values pour multiclass est liste de matrices + if isinstance(shap_values, list): + for c, sv in enumerate(shap_values): + shap.summary_plot(sv, X_sample, show=False) + save_fig(f"shap_summary_class{c}") + else: + shap.summary_plot(shap_values, X_sample, show=False) + save_fig("shap_summary") + + except Exception as e: + print(f"⚠️ SHAP failed: {e}") + + # ========================= + # 📉 WIN / LOSS DISTRIBUTION + # ========================= + wins, losses = [], [] + for i in range(len(preds) - 1): + if preds[i] == 2: + r = (prices[i + 1] - prices[i]) / prices[i] + if r > 0: + wins.append(r) + else: + losses.append(r) + + plt.figure(figsize=(10, 5)) + plt.hist(wins, bins=50, alpha=0.5, label="Wins") + plt.hist(losses, bins=50, alpha=0.5, label="Losses") + plt.legend() + plt.title("Wins / Losses Distribution (Class 2)") + save_fig("wins_losses_distribution") + + + # def generate_diagnostics(self, model, X_valid, y_valid, df, best_threshold, metadata): + # + # import os + # import numpy as np + # import pandas as pd + # import matplotlib.pyplot as plt + # from sklearn.metrics import precision_score, recall_score + # import shap + # from datetime import datetime + # + # os.makedirs(self.path, exist_ok=True) + # + # pair = metadata["pair"].replace("/", "_") + # ts = datetime.now().strftime("%Y%m%d_%H%M%S") + # + # def save_fig(name): + # filepath = f"{self.path}/{pair}_{name}.png" + # plt.savefig(filepath) + # plt.close() + # print(f"📊 Saved: {filepath}") + # + # # ========================= + # # 🔥 PROBA DISTRIBUTION + # # ========================= + # proba = model.predict(X_valid) + # + # plt.figure(figsize=(10, 5)) + # plt.hist(proba[y_valid == 0], bins=50, alpha=0.5, label="Class 0") + # plt.hist(proba[y_valid == 1], bins=50, alpha=0.5, label="Class 1") + # plt.title("Probability Distribution") + # plt.legend() + # save_fig("proba_distribution") + # + # # ========================= + # # 📈 PRECISION / RECALL + # # ========================= + # thresholds = np.linspace(0.1, 0.9, 50) + # precisions, recalls = [], [] + # + # for t in thresholds: + # preds = (proba > t).astype(int) + # precisions.append(precision_score(y_valid, preds, zero_division=0)) + # recalls.append(recall_score(y_valid, preds, zero_division=0)) + # + # plt.figure(figsize=(10, 5)) + # plt.plot(thresholds, precisions, label="Precision") + # plt.plot(thresholds, recalls, label="Recall") + # plt.xlabel("Threshold") + # plt.title("Precision / Recall vs Threshold") + # plt.legend() + # save_fig("precision_recall_curve") + # + # # ========================= + # # 💰 EQUITY CURVE (simple) + # # ========================= + # prices = df.loc[X_valid.index]["close"].values + # + # returns = [] + # for i in range(len(proba) - 1): + # if proba[i] > best_threshold: + # r = (prices[i+1] - prices[i]) / prices[i] + # returns.append(r) + # + # equity = np.cumsum(returns) + # + # plt.figure(figsize=(10, 5)) + # plt.plot(equity) + # plt.title("Equity Curve") + # save_fig("equity_curve") + # + # # ========================= + # # 📊 FEATURE IMPORTANCE + # # ========================= + # importance = model.feature_importance() + # feat_names = X_valid.columns + # + # imp_df = pd.DataFrame({ + # "feature": feat_names, + # "importance": importance + # }).sort_values(by="importance", ascending=False) + # + # plt.figure(figsize=(10, 8)) + # plt.barh(imp_df["feature"][:20], imp_df["importance"][:20]) + # plt.gca().invert_yaxis() + # plt.title("Feature Importance") + # save_fig("feature_importance") + # + # # ========================= + # # 🔍 SHAP (sample pour perf) + # # ========================= + # try: + # sample_size = min(1000, len(X_valid)) + # X_sample = X_valid.sample(sample_size, random_state=42) + # + # explainer = shap.TreeExplainer(model) + # shap_values = explainer.shap_values(X_sample) + # + # shap.summary_plot(shap_values, X_sample, show=False) + # save_fig("shap_summary") + # + # except Exception as e: + # print(f"⚠️ SHAP failed: {e}") + # + # # ========================= + # # 📉 WIN / LOSS DISTRIBUTION + # # ========================= + # wins, losses = [], [] + # + # for i in range(len(proba) - 1): + # if proba[i] > best_threshold: + # r = (prices[i+1] - prices[i]) / prices[i] + # if r > 0: + # wins.append(r) + # else: + # losses.append(r) + # + # plt.figure(figsize=(10, 5)) + # plt.hist(wins, bins=50, alpha=0.5, label="Wins") + # plt.hist(losses, bins=50, alpha=0.5, label="Losses") + # plt.legend() + # plt.title("Wins / Losses Distribution") + # save_fig("wins_losses_distribution") + + def select_features_pipeline(self, df): + + df = df.dropna() + + y = df['target'] + X = df[self.model_indicators] + + print("===== INITIAL FEATURES:", len(X.columns)) + + # 1. variance + selected = self.remove_low_variance(X) + X = X[selected] + print("After variance:", len(X.columns)) + + # 2. corrélation + selected = self.remove_correlated_features(X) + X = X[selected] + print("After correlation:", len(X.columns)) + + # 3. importance + selected = self.select_by_importance(X, y, top_n=40) + X = X[selected] + print("After importance:", len(X.columns)) + + # 4. stabilité + selected = self.stability_filter(X, y)[:25] + X = X[selected] + + # # 5. Sharp filtering + # explainer = shap.TreeExplainer(model) + # shap_values = explainer.shap_values(X) + # shap_importance = np.abs(shap_values).mean(axis=0) + # selected = X.columns[np.argsort(shap_importance)[-20:]] + # X = X[selected] + # print("After sharp:", len(X.columns)) + + print("Final features:", len(X.columns)) + + return X.columns.tolist() + + def remove_correlated_features(self, df, threshold=0.95): + corr = df.corr().abs() + + upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool)) + + to_drop = [column for column in upper.columns if any(upper[column] > threshold)] + + return [col for col in df.columns if col not in to_drop] + + def remove_low_variance(self, X, threshold=1e-6): + selector = VarianceThreshold(threshold) + selector.fit(X) + + return X.columns[selector.get_support()].tolist() + + def select_by_importance(self, X, y, top_n=30): + model = RandomForestClassifier( + n_estimators=200, + max_depth=6, + n_jobs=-1, + random_state=42 + ) + + model.fit(X, y) + + importances = pd.Series(model.feature_importances_, index=X.columns) + importances = importances.sort_values(ascending=False) + + return importances.head(top_n).index.tolist() + + def stability_filter(self, X, y, splits=3): + from sklearn.model_selection import TimeSeriesSplit + + tscv = TimeSeriesSplit(n_splits=splits) + + feature_scores = {col: [] for col in X.columns} + + for train_idx, val_idx in tscv.split(X): + X_train, X_val = X.iloc[train_idx], X.iloc[val_idx] + y_train, y_val = y.iloc[train_idx], y.iloc[val_idx] + + model = RandomForestClassifier(n_estimators=100, max_depth=5, n_jobs=-1) + model.fit(X_train, y_train) + + for i, col in enumerate(X.columns): + feature_scores[col].append(model.feature_importances_[i]) + + # moyenne importance + stability = { + col: np.mean(vals) for col, vals in feature_scores.items() + } + + return sorted(stability, key=stability.get, reverse=True) + + # def transformData(self, df: pd.DataFrame) -> pd.DataFrame: + # """ + # Sélection des features + scaling automatique basé sur variance relative + # """ + # # ---- Étape 1 : sélection des features (exemple simplifié) ---- + # # Ici tu mets ton pipeline actuel de variance / corrélation / importance + # selected_features = df.columns.tolist() # remplacer par ton filtrage réel + # df_selected = df[selected_features].copy() + # + # # ---- Étape 2 : scaling automatique ---- + # epsilon = 1e-8 + # variance_relative = (df_selected.std() ** 2) / (df_selected.mean().abs() + epsilon) + # threshold = 1.0 + # + # self.features_to_scale = variance_relative[variance_relative > threshold].index.tolist() + # self.features_no_scale = variance_relative[variance_relative <= threshold].index.tolist() + # + # # Appliquer StandardScaler uniquement sur les features à normaliser + # self.scaler = StandardScaler() + # df_selected[self.features_to_scale] = self.scaler.fit_transform(df_selected[self.features_to_scale]) + # df_selected[self.features_no_scale] = df_selected[self.features_no_scale] + # + # # ---- Optionnel : print pour debug ---- + # print("Features scalées :", self.features_to_scale) + # print("Features non-scalées :", self.features_no_scale) + # + # return df_selected + # + # def transform_new_data(self, df_new: pd.DataFrame) -> pd.DataFrame: + # """ + # Appliquer le scaling sur de nouvelles données avec le scaler déjà entraîné + # """ + # df_new_scaled = df_new.copy() + # if self.scaler is not None: + # df_new_scaled[self.features_to_scale] = self.scaler.transform(df_new_scaled[self.features_to_scale]) + # return df_new_scaled + + def generate_shap_analysis_class(self, model, X_valid, metadata): + + os.makedirs(self.path, exist_ok=True) + pair = metadata["pair"].replace("/", "_") + # ts = datetime.now().strftime("%Y%m%d_%H%M%S") + + def save_fig(name): + filepath = f"{self.path}/{pair}_{name}.png" + plt.savefig(filepath) + plt.close() + print(f"📊 Saved: {filepath}") + + # ========================= + # 🔹 SAMPLE (perf) + # ========================= + sample_size = min(1000, len(X_valid)) + X_sample = X_valid.sample(sample_size, random_state=42) + + # ========================= + # 🔥 SHAP CALCULATION + # ========================= + explainer = shap.TreeExplainer(model) + shap_values = explainer.shap_values(X_sample) + + print("SHAP type:", type(shap_values)) + + # ========================= + # 🧠 MULTICLASS HANDLING + # ========================= + + if isinstance(shap_values, list): + # ancien format + shap_list = shap_values + + elif len(shap_values.shape) == 3: + # nouveau format : (samples, features, classes) + shap_list = [shap_values[:, :, i] for i in range(shap_values.shape[2])] + + else: + # binaire classique + shap_list = [shap_values] + + print("SHAP shape:", getattr(shap_values, "shape", None)) + print("SHAP type:", type(shap_values)) + + # ========================= + # 📊 SHAP PAR CLASSE + # ========================= + for i, sv in enumerate(shap_list): + shap.summary_plot(sv, X_sample, max_display=20, show=False) + save_fig(f"shap_summary_class_{i}") + + for i, sv in enumerate(shap_list): + feat_importance = np.mean(np.abs(sv), axis=0) # (n_features,) + imp_df = pd.DataFrame({ + "feature": X_sample.columns, + "importance": feat_importance + }).sort_values(by="importance", ascending=False) + imp_df.to_csv(f"{self.path}/{pair}_shap_importance_class_{i}.csv", index=False) + + # # ========================= + # # 🌍 SHAP GLOBAL (IMPORTANT) + # # ========================= + # shap_mean = np.mean([np.abs(sv) for sv in shap_values], axis=i) + # # + # # # for i, cls in enumerate(shap_list): + # # # shap.summary_plot(cls, X_valid, show=False, plot_size=(12, 6)) + # # # save_fig(f"shap_global") + # # + # # ========================= + # # 📊 EXPORT CSV IMPORTANCE + # # ========================= + # feature_importance = np.mean(shap_mean, axis=i) + # + # imp_df = pd.DataFrame({ + # "feature": X_sample.columns, + # "importance": feature_importance + # }).sort_values(by="importance", ascending=False) + # + # csv_path = f"{self.path}/{pair}_shap_importance.csv" + # imp_df.to_csv(csv_path, index=False) + # print(f"📁 Saved CSV: {csv_path}") + + def trainModel3(self, df, metadata): + pair = self.getShortName(metadata['pair']) + pd.set_option('display.max_rows', None) + pd.set_option('display.max_columns', None) + pd.set_option("display.width", 200) + path = self.path # f"user_data/plots/{pair}/" + os.makedirs(path, exist_ok=True) + + # 1️⃣ Colonnes utilisables + features = self.listUsableColumns(df) + + target_col = "target" + + # 2️⃣ Créer la cible multiclass + # Classe 0 : percent24 < -0.005 + # Classe 1 : -0.005 <= percent24 <= 0.005 + # Classe 2 : percent24 > 0.005 + df['target'] = pd.cut( + df['percent24'].shift(-12), + bins=[-np.inf, -0.0025, 0.0025, np.inf], + labels=[0, 1, 2] + ) + + # Supprimer NaN générés par shift + df = df.dropna(subset=['target']) + features = self.select_features_pipeline_for_class(df) + df['target'] = df['target'].astype(int) + + # Supprimer percent24 des features + if 'percent24' in features: + features.remove('percent24') + + # 3️⃣ Séparer X et y + X = df[features] + y = df['target'] + + print("DF shape:", df.shape) + print("Columns:", features) + print("Target distribution:") + print(y.value_counts(normalize=True)) + + # 4️⃣ Split temporel train / valid + split = int(len(df) * 0.8) + X_train, X_valid = X.iloc[:split], X.iloc[split:] + y_train, y_valid = y.iloc[:split], y.iloc[split:] + + # 5️⃣ SMOTE multiclass uniquement sur train + smote = SMOTE(random_state=42) + X_train_res, y_train_res = smote.fit_resample(X_train, y_train) + + # Nombre de classes + num_classes = len(np.unique(y_train_res)) + + # ========================= + # 🎯 OPTUNA OBJECTIVE + # ========================= + def objective(trial): + params = { + "objective": "multiclass", + "metric": "multi_logloss", + "num_class": num_classes, + "boosting_type": "gbdt", + + "num_leaves": trial.suggest_int("num_leaves", 16, 128), + "max_depth": trial.suggest_int("max_depth", 3, 10), + + "learning_rate": trial.suggest_float("learning_rate", 0.005, 0.1, log=True), + + "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0), + "bagging_fraction": trial.suggest_float("bagging_fraction", 0.6, 1.0), + "bagging_freq": trial.suggest_int("bagging_freq", 1, 10), + + "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), + + "lambda_l1": trial.suggest_float("lambda_l1", 1e-4, 10, log=True), + "lambda_l2": trial.suggest_float("lambda_l2", 1e-4, 10, log=True), + + "verbose": -1, + "seed": 42 + } + + train_data = lgb.Dataset(X_train_res, y_train_res) + valid_data = lgb.Dataset(X_valid, y_valid) + + model = lgb.train( + params, + train_data, + num_boost_round=1000, + valid_sets=[valid_data], + callbacks=[lgb.early_stopping(50), lgb.log_evaluation(0)] + ) + + # Probabilités + proba = model.predict(X_valid) # shape = (n_samples, n_classes) + preds = np.argmax(proba, axis=1) + + f1 = f1_score(y_valid, preds, average='macro') # multiclass + return f1 + + # ========================= + # 🚀 RUN OPTUNA + # ========================= + study = optuna.create_study(direction="maximize") + study.optimize(objective, n_trials=10) + + best_params = study.best_params.copy() + + # ========================= + # 🔥 TRAIN FINAL MODEL + # ========================= + final_params = { + **best_params, + "objective": "multiclass", + "metric": "multi_logloss", + "num_class": num_classes, + "boosting_type": "gbdt", + "verbose": -1, + "seed": 42 + } + + train_data = lgb.Dataset(X_train_res, y_train_res) + self.train_model = lgb.train( + final_params, + train_data, + num_boost_round=1000 + ) + + # Probabilités pour chaque classe + probs_all_classes = self.train_model.predict(X) # shape = (n_samples, n_classes) + # Ajouter probabilité de chaque classe au dataframe pour analyse + # for i in range(num_classes): + # df[f'prob_class_{i}'] = probs_all_classes[:, i] + + self.features = features + self.df = df + + # ========================= + # 📊 EVALUATION MULTICLASS + # ========================= + proba = self.train_model.predict(X_valid) # shape = (n_samples, n_classes) + preds = np.argmax(proba, axis=1) # Classe prédite + + print("===== 📊 RESULTS =====") + print("F1:", f1_score(y_valid, preds, average='macro')) + print("Precision:", precision_score(y_valid, preds, average='macro')) + print("Recall:", recall_score(y_valid, preds, average='macro')) + + # ROC AUC multiclass + try: + roc = roc_auc_score(y_valid, proba, multi_class='ovr', average='macro') + print("ROC AUC:", roc) + except ValueError: + print("ROC AUC cannot be computed (check y_valid and number of classes)") + + joblib.dump( + {"model": self.train_model, + # "threshold": best_threshold, + "features": features}, + f"{self.path}/{pair}_rf_model.pkl" + ) + print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl") + + # Génération de diagnostics pour multiclass + proba = self.train_model.predict(X_valid) # shape = (n_samples, n_classes) + preds = np.argmax(proba, axis=1) # labels prédits + + self.generate_diagnostics( + model=self.train_model, + X_valid=X_valid, + y_valid=y_valid, + df=df, + # preds=preds, # passer les labels prédits + # proba=proba, # passer les probabilités si besoin + metadata=metadata + ) + + self.generate_shap_analysis_class(model=self.train_model, X_valid=X_valid, metadata=metadata) + + self.extract_buy_rules_class(self.train_model, X_valid, y_valid) + + def select_features_pipeline_for_class(self, df): + + features = self.listUsableColumns(df) + X = df[features] + y = df['target'] + + print(f"Initial features: {len(features)}") + + # ========================= + # 1️⃣ VARIANCE + # ========================= + var = X.var() + X = X.loc[:, var > 1e-6] + + print(f"After variance: {X.shape[1]}") + + # ========================= + # 2️⃣ CORRELATION + # ========================= + corr = X.corr().abs() + upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool)) + + to_drop = [col for col in upper.columns if any(upper[col] > 0.90)] + + X = X.drop(columns=to_drop) + + print(f"After correlation: {X.shape[1]}") + + # ========================= + # 3️⃣ LIGHTGBM IMPORTANCE + # ========================= + model = lgb.LGBMClassifier( + objective='multiclass', + num_class=len(y.unique()), + n_estimators=200, + random_state=42 + ) + + model.fit(X, y) + + importance = pd.Series( + model.feature_importances_, + index=X.columns + ).sort_values(ascending=False) + + print("Top 10 features:") + print(importance.head(10)) + + # ⚠️ seuil dynamique (IMPORTANT) + threshold = importance.mean() + + selected = importance[importance > threshold].index.tolist() + + print(f"After importance: {len(selected)}") + + return selected + + def extract_buy_rules_class(self, model, X_valid, y_valid): + + # ========================= + # SAMPLE + # ========================= + X_sample = X_valid.copy() + + explainer = shap.TreeExplainer(model) + shap_values = explainer.shap_values(X_sample) + + # ========================= + # FORMAT SHAP + # ========================= + if isinstance(shap_values, list): + shap_class = shap_values[2] # classe BUY + + elif len(shap_values.shape) == 3: + shap_class = shap_values[:, :, 2] + + else: + raise Exception("SHAP format inconnu") + + # ========================= + # FOCUS SUR PREDICTIONS BUY + # ========================= + preds = model.predict(X_sample) + buy_idx = np.where(preds == 2)[0] + + X_buy = X_sample.iloc[buy_idx] + shap_buy = shap_class[buy_idx] + + print(f"BUY samples: {len(buy_idx)}") + + # ========================= + # TOP FEATURES + # ========================= + mean_shap = np.mean(np.abs(shap_buy), axis=0) + + importance = pd.Series(mean_shap, index=X_sample.columns) + importance = importance.sort_values(ascending=False) + + top_features = importance.head(10).index.tolist() + + print("Top BUY features:") + print(top_features) + + # ========================= + # EXTRACTION DE RÈGLES + # ========================= + rules = [] + + for feat in top_features: + values = X_buy[feat] + + q_low = values.quantile(0.25) + q_high = values.quantile(0.75) + mean_val = values.mean() + + rules.append({ + "feature": feat, + "mean": mean_val, + "q25": q_low, + "q75": q_high + }) + + rules_df = pd.DataFrame(rules) + + print("\n===== BUY RULES =====") + print(rules_df) + + return rules_df \ No newline at end of file