From 496c4d7827bfb6e50f024ba3d8ff3cf1949636ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Delacotte?= Date: Tue, 2 Dec 2025 19:47:22 +0100 Subject: [PATCH] FrictradeLearning --- FrictradeLearning.py | 325 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 277 insertions(+), 48 deletions(-) diff --git a/FrictradeLearning.py b/FrictradeLearning.py index 3a39bb5..237cd25 100644 --- a/FrictradeLearning.py +++ b/FrictradeLearning.py @@ -68,6 +68,15 @@ from sklearn.model_selection import train_test_split from sklearn.metrics import f1_score from xgboost import XGBClassifier +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LogisticRegression +from sklearn.calibration import CalibratedClassifierCV +from sklearn.metrics import brier_score_loss, roc_auc_score + +from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import Pipeline + + logger = logging.getLogger(__name__) # Couleurs ANSI de base @@ -82,7 +91,7 @@ RESET = "\033[0m" class FrictradeLearning(IStrategy): startup_candle_count = 180 - + train_model = None model_indicators = [] DEFAULT_PARAMS = { "rsi_buy": 30, @@ -97,6 +106,19 @@ class FrictradeLearning(IStrategy): "minimal_roi": {"0": 0.10} } + dca_levels = { + 0: 0.00, + -2: 0.05, + -4: 0.07, + -6: 0.10, + -8: 0.12, + -10: 0.15, + -12: 0.18, + -14: 0.22, + -16: 0.26, + -18: 0.30, + } + # ROI table: minimal_roi = { "0": 10 @@ -359,7 +381,7 @@ class FrictradeLearning(IStrategy): self.printLog( f"| {'Date':<16} | {'Action':<10} |{'Pair':<5}| {'Trade Type':<18} |{'Rate':>8} | {'Dispo':>6} | {'Profit':>8} " f"| {'Pct':>6} | {'max_touch':>11} | {'last_lost':>12} | {'last_max':>7}| {'last_min':>7}|{'Buys':>5}| {'Stake':>5} |" - f"{'rsi':>6}" #|Distmax|s201d|s5_1d|s5_2d|s51h|s52h|smt1h|smt2h|tdc1d|tdc1h" + f"{'rsi':>6}|{'mlprob':>6}" #|Distmax|s201d|s5_1d|s5_2d|s51h|s52h|smt1h|smt2h|tdc1d|tdc1h" ) self.printLineLog() df = pd.DataFrame.from_dict(self.pairs, orient='index') @@ -410,8 +432,8 @@ class FrictradeLearning(IStrategy): f"|{color}{profit or '-':>10}{RESET}| {pct_max or '-':>6} | {round(self.pairs[pair]['max_touch'], 2) or '-':>11} | {last_lost or '-':>12} " f"| {last_max or '-':>7} | {last_min or '-':>7} |{total_counts or '-':>5}|{stake or '-':>7}" f"{round(last_candle['max_rsi_24'], 1) or '-' :>6}|{round(last_candle['rsi_1h'], 1) or '-' :>6}|{round(last_candle['rsi_1d'], 1) or '-' :>6}|" - f"{round(last_candle['rtp_1h'] * 100, 0) or '-' :>6}|{round(last_candle['rtp_1d'] * 100, 0) or '-' :>6}|" - + # f"{round(last_candle['rtp_1h'] * 100, 0) or '-' :>6}|{round(last_candle['rtp_1d'] * 100, 0) or '-' :>6}|" + f"{round(last_candle['ml_prob'], 1) or '-' :>6}|" ) def printLineLog(self): @@ -445,6 +467,10 @@ class FrictradeLearning(IStrategy): dataframe['sma5'] = dataframe['mid'].ewm(span=5, adjust=False).mean() #dataframe["mid"].rolling(window=5).mean() dataframe['sma5_deriv1'] = 1000 * (dataframe['sma5'] - dataframe['sma5'].shift(1)) / dataframe['sma5'].shift(1) + dataframe['sma12'] = dataframe['mid'].ewm(span=12, adjust=False).mean() + dataframe['sma12_deriv1'] = 1000 * (dataframe['sma12'] - dataframe['sma12'].shift(1)) / dataframe[ + 'sma12'].shift(1) + dataframe['sma24'] = dataframe['mid'].ewm(span=24, adjust=False).mean() dataframe['sma24_deriv1'] = 1000 * (dataframe['sma24'] - dataframe['sma24'].shift(1)) / dataframe['sma24'].shift(1) @@ -477,16 +503,35 @@ class FrictradeLearning(IStrategy): dataframe['min180'] = talib.MIN(dataframe['mid'], timeperiod=180) dataframe['max180'] = talib.MAX(dataframe['mid'], timeperiod=180) dataframe['pct180'] = ((dataframe["mid"] - dataframe['min180'] ) / (dataframe['max180'] - dataframe['min180'] )) - dataframe = self.rsi_trend_probability(dataframe, short=60, long=360) # ################### INFORMATIVE 1h informative = self.dp.get_pair_dataframe(pair=metadata['pair'], timeframe='1h') informative['mid'] = informative['open'] + (informative['close'] - informative['open']) / 2 - # informative = self.populate1hIndicators(df=informative, metadata=metadata) + # Calcul MACD + macd, macdsignal, macdhist = talib.MACD( + informative['close'], + fastperiod=12, + slowperiod=26, + signalperiod=9 + ) + informative['macd'] = macd + informative['macdsignal'] = macdsignal + informative['macdhist'] = macdhist + + informative['rsi'] = talib.RSI(informative['mid'], timeperiod=14) + informative['sma24'] = informative['mid'].ewm(span=24, adjust=False).mean() + informative['sma24_deriv1'] = 1000 * (informative['sma24'] - informative['sma24'].shift(1)) / informative['sma24'].shift(1) + + informative['sma60'] = informative['mid'].ewm(span=60, adjust=False).mean() + informative['sma60_deriv1'] = 1000 * (informative['sma60'] - informative['sma60'].shift(1)) / informative['sma60'].shift(1) informative['rsi'] = talib.RSI(informative['mid'], timeperiod=14) self.calculeDerivees(informative, 'rsi', ema_period=12) - informative = self.rsi_trend_probability(informative) + # informative = self.rsi_trend_probability(informative) + + probas = self.calculModelInformative(informative) + + # informative = self.populate1hIndicators(df=informative, metadata=metadata) # informative = self.calculateRegression(informative, 'mid', lookback=15) dataframe = merge_informative_pair(dataframe, informative, '1m', '1h', ffill=True) @@ -494,7 +539,7 @@ class FrictradeLearning(IStrategy): informative = self.dp.get_pair_dataframe(pair=metadata['pair'], timeframe='1d') informative['mid'] = informative['open'] + (informative['close'] - informative['open']) / 2 informative['rsi'] = talib.RSI(informative['mid'], timeperiod=5) - informative = self.rsi_trend_probability(informative) + # informative = self.rsi_trend_probability(informative) # informative = self.calculateRegression(informative, 'mid', lookback=15) dataframe = merge_informative_pair(dataframe, informative, '1m', '1d', ffill=True) @@ -545,7 +590,6 @@ class FrictradeLearning(IStrategy): ) dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["sma24"] - # Calcul MACD macd, macdsignal, macdhist = talib.MACD( dataframe['close'], @@ -637,10 +681,15 @@ class FrictradeLearning(IStrategy): ).on_balance_volume() self.calculeDerivees(dataframe, 'obv', ema_period=1) - dataframe['obv5'] = ta.volume.OnBalanceVolumeIndicator( - close=dataframe['sma5'], volume=dataframe['volume'].rolling(5).sum() + dataframe['obv12'] = ta.volume.OnBalanceVolumeIndicator( + close=dataframe['sma12'], volume=dataframe['volume'].rolling(12).sum() ).on_balance_volume() - self.calculeDerivees(dataframe, 'obv5', ema_period=5) + + dataframe['obv24'] = ta.volume.OnBalanceVolumeIndicator( + close=dataframe['sma24'], volume=dataframe['volume'].rolling(24).sum() + ).on_balance_volume() + + # self.calculeDerivees(dataframe, 'obv5', ema_period=5) # --- Volatilité récente (écart-type des rendements) --- dataframe['vol_24'] = dataframe['percent'].rolling(24).std() @@ -674,13 +723,23 @@ class FrictradeLearning(IStrategy): self.trainModel(dataframe, metadata) short_pair = self.getShortName(pair) + path=f"user_data/plots/{short_pair}/" - self.model = joblib.load(f"{short_pair}_rf_model.pkl") + self.model = joblib.load(f"{path}/{short_pair}_rf_model.pkl") # Préparer les features pour la prédiction features = dataframe[self.model_indicators].fillna(0) # Prédiction : probabilité que le prix monte + + # Affichage des colonnes intérressantes dans le model + features_pruned, kept_features = self.prune_features( + model=self.model, + dataframe=dataframe, + feature_columns=self.model_indicators, + importance_threshold=0.005 # enlever features < % importance + ) + probs = self.model.predict_proba(features)[:, 1] # Sauvegarder la probabilité pour l’analyse @@ -765,7 +824,7 @@ class FrictradeLearning(IStrategy): # Buy = prediction > threshold dataframe["buy"] = 0 - dataframe.loc[dataframe["ml_prob"] > threshold, ['enter_long', 'enter_tag']] = (1, f"future") + dataframe.loc[dataframe["ml_prob"] > 0.5, ['enter_long', 'enter_tag']] = (1, f"future") dataframe['test'] = np.where(dataframe['enter_long'] == 1, dataframe['close'] * 1.003, np.nan) return dataframe @@ -883,6 +942,9 @@ class FrictradeLearning(IStrategy): def adjust_stake_amount(self, pair: str, last_candle: DataFrame): + if self.pairs[pair]['first_amount'] > 0: + return self.pairs[pair]['first_amount'] + ath = max(self.pairs[pair]['last_max'], self.get_last_ath_before_candle(last_candle)) ath_dist = 100 * (ath - last_candle["mid"]) / ath @@ -967,7 +1029,39 @@ class FrictradeLearning(IStrategy): # stake_amount = last_amount * current_rate * 0.5 # return stake_amount - condition = last_candle['hapercent'] > 0 and last_candle['sma24_deriv1'] > 0 + + ########################### ALGO ATH + # # --- 1. Calcul ATH local de la paire --- + # ath = max(self.pairs[pair]['last_max'], self.get_last_ath_before_candle(last_candle)) + # + # # --- 2. Distance ATH - current --- + # dd = (current_rate - ath) / ath * 100 # dd <= 0 + # + # if dd > -1: # pas de renfort si drawdown trop faible + # return None + # + # # --- 3. DCA dynamique (modèle exponentiel) --- + # a = 0.015 + # b = 0.12 + # + # pct = a * (math.exp(b * (-dd)) - 1) # proportion du wallet libre + # + # # Clamp de sécurité + # pct = min(max(pct, 0), 0.35) # max 35% d’un coup + # + # if pct <= 0: + # return None + # + # # --- 4. Stake en fonction du wallet libre --- + # stake_amount = self.wallets.get_available_stake_amount() * pct + # + # if stake_amount < self.min_stake_amount: + # return None + + # FIN ########################## ALGO ATH + + condition = last_candle['hapercent'] > 0 and last_candle['sma24_deriv1'] > 0 \ + and last_candle['close'] < self.pairs[pair]['first_buy'] and last_candle['ml_prob'] > 0.65 limit_buy = 40 # or (last_candle['close'] <= last_candle['min180'] and hours > 3) if (decline >= dca_threshold) and condition: @@ -977,9 +1071,7 @@ class FrictradeLearning(IStrategy): self.pairs[pair]['previous_profit'] = profit return None - max_amount = self.config.get('stake_amount') * 2.5 - stake_amount = min(min(max_amount, self.wallets.get_available_stake_amount()), - self.adjust_stake_amount(pair, last_candle)) + stake_amount = min(self.wallets.get_available_stake_amount(), self.adjust_stake_amount(pair, last_candle) / 2) # print(f"profit={profit} previous={self.pairs[pair]['previous_profit']} count_of_buys={trade.nr_of_successful_entries}") if stake_amount > 0: self.pairs[pair]['previous_profit'] = profit @@ -1143,7 +1235,7 @@ class FrictradeLearning(IStrategy): stake=0 ) - if last_candle['ml_prob'] > 0.5: + if last_candle['ml_prob'] > 0.65: return None # if last_candle['sma24_deriv1'] > 0 : #and minutes < 180 and baisse < 30: # and last_candle['sma5_deriv1'] > -0.15: # if (minutes < 180): @@ -1416,19 +1508,34 @@ class FrictradeLearning(IStrategy): # study.optimize(objective, n_trials=50) def objective(trial): - self.train_model = XGBClassifier( - n_estimators=trial.suggest_int("n_estimators", 200, 800), - max_depth=trial.suggest_int("max_depth", 3, 10), - learning_rate=trial.suggest_float("learning_rate", 0.005, 0.3, log=True), - subsample=trial.suggest_float("subsample", 0.6, 1.0), - colsample_bytree=trial.suggest_float("colsample_bytree", 0.6, 1.0), + # local_model = XGBClassifier( + # n_estimators=300, # nombre d'arbres plus raisonnable + # learning_rate=0.01, # un peu plus rapide que 0.006, mais stable + # max_depth=4, # capture plus de patterns que 3, sans overfitting excessif + # subsample=0.7, # utilise 70% des lignes pour chaque arbre → réduit overfitting + # colsample_bytree=0.8, # 80% des features par arbre + # gamma=0.01, # gain minimal pour un split → régularisation + # reg_alpha=0.01, # L1 régularisation des feuilles + # reg_lambda=1, # L2 régularisation des feuilles + # n_jobs=-1, # utilise tous les cœurs CPU pour accélérer + # random_state=42, # reproductibilité + # missing=float('nan'), # valeur manquante reconnue + # eval_metric='logloss' # métrique pour classification binaire + # ) + + local_model = XGBClassifier( + n_estimators=300, #trial.suggest_int("n_estimators", 300, 500), + max_depth=trial.suggest_int("max_depth", 1, 3), + learning_rate=0.01, #trial.suggest_float("learning_rate", 0.005, 0.3, log=True), + subsample=0.7, #trial.suggest_float("subsample", 0.6, 1.0), + colsample_bytree=0.8, #trial.suggest_float("colsample_bytree", 0.6, 1.0), scale_pos_weight=1, objective="binary:logistic", eval_metric="logloss", n_jobs=-1 ) - self.train_model.fit( + local_model.fit( X_train, y_train, eval_set=[(X_valid, y_valid)], @@ -1436,14 +1543,63 @@ class FrictradeLearning(IStrategy): verbose=False ) - proba = self.train_model.predict_proba(X_valid)[:, 1] + proba = local_model.predict_proba(X_valid)[:, 1] thresholds = np.linspace(0.1, 0.9, 50) best_f1 = max(f1_score(y_valid, (proba > t)) for t in thresholds) return best_f1 study = optuna.create_study(direction="maximize") - study.optimize(objective, n_trials=50) + study.optimize(objective, n_trials=20) + # SHAP + # Reconstruction du modèle final avec les meilleurs hyperparamètres + # Récupération des meilleurs paramètres trouvés + best_params = study.best_params + + best_model = XGBClassifier(**best_params) + best_model.fit(X_train, y_train) + self.train_model = best_model + + # === SHAP plots === + # Calcul SHAP + explainer = shap.TreeExplainer(self.train_model) + shap_values = explainer(X_train) + + # On choisit une observation pour le graphique waterfall + # Explication du modèle de prédiction pour la première ligne de X_valid.” + i = 0 + + # Extraction des valeurs + shap_val = shap_values[i].values + feature_names = X_train.columns + feature_values = X_train.iloc[i] + + # Tri par importance absolue + # order = np.argsort(np.abs(shap_val))[::-1] + k = 10 + order = np.argsort(np.abs(shap_val))[::-1][:k] + + # ---- Création figure sans l'afficher ---- + plt.ioff() # Désactive l'affichage interactif + + shap.plots.waterfall( + shap.Explanation( + values=shap_val[order], + base_values=shap_values.base_values[i], + data=feature_values.values[order], + feature_names=feature_names[order] + ), + show=False # IMPORTANT : n'affiche pas dans Jupyter / console + ) + + # Sauvegarde du graphique sur disque + output_path = f"{path}/shap_waterfall.png" + plt.savefig(output_path, dpi=200, bbox_inches='tight') + plt.close() # ferme la figure proprement + + print(f"Graphique SHAP enregistré : {output_path}") + + # FIN SHAP # ---- après avoir exécuté la study ------ print("Best value (F1):", study.best_value) @@ -1540,9 +1696,16 @@ class FrictradeLearning(IStrategy): force_plot = shap.force_plot(explainer.expected_value, shap_values[0, :], X_valid.iloc[0, :]) shap.save_html(f"{path}/shap_force_plot.html", force_plot) - PartialDependenceDisplay.from_estimator(self.train_model, X_valid, selected_features, kind='average') - plt.figure(figsize=(24, 24)) - plt.savefig(f"{path}/PartialDependenceDisplay.png", bbox_inches='tight') + fig, ax = plt.subplots(figsize=(24, 48)) + PartialDependenceDisplay.from_estimator( + self.train_model, + X_valid, + selected_features, + kind="average", + ax=ax + ) + fig.savefig(f"{path}/PartialDependenceDisplay.png", bbox_inches="tight") + plt.close(fig) best_f1 = 0 best_t = 0.5 @@ -1562,9 +1725,10 @@ class FrictradeLearning(IStrategy): print(f"Accuracy: {acc:.3f}") # 7️⃣ Sauvegarde du modèle - joblib.dump(self.train_model, f"{pair}_rf_model.pkl") + joblib.dump(self.train_model, f"{path}/{pair}_rf_model.pkl") print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl") + # X = dataframe des features (après shift/rolling/indicators) # y = target binaire ou décimale # model = ton modèle entraîné (RandomForestClassifier ou Regressor) @@ -1666,8 +1830,7 @@ class FrictradeLearning(IStrategy): Analyse complète d'un modèle ML supervisé (classification binaire). Affiche performances, importance des features, matrices, seuils, etc. """ - output_dir = f"user_data/plots/{pair}/" - os.makedirs(output_dir, exist_ok=True) + os.makedirs(self.path, exist_ok=True) # ---- Prédictions ---- preds = model.predict(X_valid) @@ -1704,7 +1867,7 @@ class FrictradeLearning(IStrategy): for j in range(2): plt.text(j, i, cm[i, j], ha="center", va="center", color="black") # plt.show() - plt.savefig(os.path.join(output_dir, "Matrice de confusion.png"), bbox_inches="tight") + plt.savefig(os.path.join(self.path, "Matrice de confusion.png"), bbox_inches="tight") plt.close() # ---- Importance des features ---- @@ -1727,7 +1890,7 @@ class FrictradeLearning(IStrategy): plt.title("Importance des features") # plt.show() - plt.savefig(os.path.join(output_dir, "Importance des features.png"), bbox_inches="tight") + plt.savefig(os.path.join(self.path, "Importance des features.png"), bbox_inches="tight") plt.close() # ---- Arbre de décision (extrait) ---- @@ -1753,7 +1916,7 @@ class FrictradeLearning(IStrategy): plt.title("Courbe ROC") plt.legend() # plt.show() - plt.savefig(os.path.join(output_dir, "Courbe ROC.png"), bbox_inches="tight") + plt.savefig(os.path.join(self.path, "Courbe ROC.png"), bbox_inches="tight") plt.close() # # ---- Interprétation SHAP (optionnelle) ---- @@ -1782,7 +1945,7 @@ class FrictradeLearning(IStrategy): # # plt.figure(figsize=(12, 4)) # shap.summary_plot(shap_values_to_plot, X_to_plot, show=False) - # plt.savefig(os.path.join(output_dir, "shap_summary.png"), bbox_inches="tight") + # plt.savefig(os.path.join(self.path, "shap_summary.png"), bbox_inches="tight") # plt.close() # except ImportError: # print("\n(SHAP non installé — `pip install shap` pour activer l’analyse SHAP.)") @@ -1791,7 +1954,7 @@ class FrictradeLearning(IStrategy): # Trace ou enregistre le graphique self.plot_threshold_analysis(y_valid, y_proba, step=0.05, - save_path=f"{output_dir}/threshold_analysis.png") + save_path=f"{self.path}/threshold_analysis.png") # y_valid : vraies classes (0 / 1) # y_proba : probabilités de la classe 1 prédites par ton modèle @@ -1820,7 +1983,7 @@ class FrictradeLearning(IStrategy): plt.ylabel("Score") plt.grid(True, alpha=0.3) plt.legend() - plt.savefig(f"{output_dir}/seuil_de_probabilite.png", bbox_inches='tight') + plt.savefig(f"{self.path}/seuil_de_probabilite.png", bbox_inches='tight') # plt.show() print(f"✅ Meilleur F1 : {f1s[best_idx]:.3f} au seuil {seuils[best_idx]:.2f}") @@ -1837,11 +2000,8 @@ class FrictradeLearning(IStrategy): """ # Le graphique généré affichera trois courbes : - # # 🔵 Precision — la fiabilité de tes signaux haussiers. - # # 🟢 Recall — la proportion de hausses que ton modèle détecte. - # # 🟣 F1-score — le compromis optimal entre les deux. thresholds = np.arange(0, 1.01, step) @@ -1887,22 +2047,23 @@ class FrictradeLearning(IStrategy): and not c.endswith("_state") and not c.endswith("_1d") # and not c.endswith("_1h") - # and not c.startswith("open") and not c.startswith("close") - # and not c.startswith("low") and not c.startswith("high") - # and not c.startswith("haopen") and not c.startswith("haclose") + and not c.startswith("open") and not c.startswith("close") + and not c.startswith("low") and not c.startswith("high") + and not c.startswith("haopen") and not c.startswith("haclose") # and not c.startswith("bb_lower") and not c.startswith("bb_upper") # and not c.startswith("bb_middle") and not c.endswith("_count") and not c.endswith("_class") and not c.endswith("_price") and not c.startswith('stop_buying') + and not c.startswith('target') and not c.startswith('lvl') ] # Étape 3 : remplacer inf et NaN par 0 dataframe[usable_cols] = dataframe[usable_cols].replace([np.inf, -np.inf], 0).fillna(0) print("Colonnes utilisables pour le modèle :") print(usable_cols) - self.model_indicators = usable_cols - return self.model_indicators + # self.model_indicators = usable_cols + return usable_cols def select_uncorrelated_features(self, df, target, top_n=20, corr_threshold=0.7): @@ -2049,3 +2210,71 @@ class FrictradeLearning(IStrategy): # self.calculateProbabilite2Index(dataframe, futur_cols=['futur_percent'], indic_1=f"{name}{suffixe}_deriv1", indic_2=f"{name}{suffixe}_deriv2") return dataframe + + def calculModelInformative(self, informative): + # préparation + # print(df) + df = informative.copy() + X = df[self.listUsableColumns(df)] + df['target'] = ((df["sma24"].shift(-13) - df["sma24"]) > 0).astype(int) + df['target'] = df['target'].fillna(0).astype(int) + y = df['target'] + + # train/test + X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2) + + # Pipeline normalisé + Logistic Regresson + clf = Pipeline([ + ("scaler", StandardScaler()), + ("logreg", LogisticRegression(max_iter=5000)) + ]) + + # Calibration CV automatique + cal = CalibratedClassifierCV(clf, cv=3, method="isotonic") + + # Entraînement + cal.fit(X_train, y_train) + + # Probabilités calibrées + probas = cal.predict_proba(X_test)[:, 1] + # Injection propre des probabilités dans le dataframe original (aux bons index) + df.loc[X_test.index, 'ml_prob'] = probas + + print("Brier score:", brier_score_loss(y_test, probas)) + print("ROC AUC:", roc_auc_score(y_test, probas)) + + # joindre probabilités au df (dernières lignes correspondantes) + return probas + + + def prune_features(self, model, dataframe, feature_columns, importance_threshold=0.01): + """ + Supprime les features dont l'importance est inférieure au seuil. + + Args: + model: XGBClassifier déjà entraîné + dataframe: DataFrame contenant toutes les features + feature_columns: liste des colonnes/features utilisées pour la prédiction + importance_threshold: seuil minimal pour conserver une feature (en proportion de l'importance totale) + + Returns: + dataframe_pruned: dataframe avec uniquement les features conservées + kept_features: liste des features conservées + """ + booster = model.get_booster() + + # Récupérer importance des features selon 'gain' + importance = booster.get_score(importance_type='gain') + + # Normaliser pour que la somme soit 1 + total_gain = sum(importance.values()) + normalized_importance = {k: v / total_gain for k, v in importance.items()} + + # Features à garder + kept_features = [f for f in feature_columns if normalized_importance.get(f, 0) >= importance_threshold] + + dataframe_pruned = dataframe[kept_features].fillna(0) + + print(f"⚡ Features conservées ({len(kept_features)} / {len(feature_columns)}): {kept_features}") + + return dataframe_pruned, kept_features