FrictradeLearning

2025-12-02 19:47:22 +01:00
parent c66b9c4a8b
commit 496c4d7827
1 changed files with 277 additions and 48 deletions
--- a/FrictradeLearning.py
+++ b/FrictradeLearning.py
@@ -68,6 +68,15 @@ from sklearn.model_selection import train_test_split
 from sklearn.metrics import f1_score
 from xgboost import XGBClassifier
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LogisticRegression
 from sklearn.calibration import CalibratedClassifierCV
 from sklearn.metrics import brier_score_loss, roc_auc_score
 from sklearn.preprocessing import StandardScaler
 from sklearn.pipeline import Pipeline
 logger = logging.getLogger(__name__)
 # Couleurs ANSI de base
@@ -82,7 +91,7 @@ RESET = "\033[0m"
 class FrictradeLearning(IStrategy):
    startup_candle_count = 180
-
+    train_model = None
    model_indicators = []
    DEFAULT_PARAMS = {
        "rsi_buy": 30,
@@ -97,6 +106,19 @@ class FrictradeLearning(IStrategy):
        "minimal_roi": {"0": 0.10}
    }
    dca_levels = {
        0: 0.00,
        -2: 0.05,
        -4: 0.07,
        -6: 0.10,
        -8: 0.12,
        -10: 0.15,
        -12: 0.18,
        -14: 0.22,
        -16: 0.26,
        -18: 0.30,
    }
    # ROI table:
    minimal_roi = {
        "0": 10
@@ -359,7 +381,7 @@ class FrictradeLearning(IStrategy):
            self.printLog(
                f"| {'Date':<16} | {'Action':<10} |{'Pair':<5}| {'Trade Type':<18} |{'Rate':>8} | {'Dispo':>6} | {'Profit':>8} "
                f"| {'Pct':>6} | {'max_touch':>11} | {'last_lost':>12} | {'last_max':>7}| {'last_min':>7}|{'Buys':>5}| {'Stake':>5} |"
-                f"{'rsi':>6}" #|Distmax|s201d|s5_1d|s5_2d|s51h|s52h|smt1h|smt2h|tdc1d|tdc1h"
+                f"{'rsi':>6}|{'mlprob':>6}" #|Distmax|s201d|s5_1d|s5_2d|s51h|s52h|smt1h|smt2h|tdc1d|tdc1h"
            )
            self.printLineLog()
            df = pd.DataFrame.from_dict(self.pairs, orient='index')
@@ -410,8 +432,8 @@ class FrictradeLearning(IStrategy):
            f"|{color}{profit or '-':>10}{RESET}| {pct_max or '-':>6} | {round(self.pairs[pair]['max_touch'], 2) or '-':>11} | {last_lost or '-':>12} "
            f"| {last_max or '-':>7} | {last_min or '-':>7} |{total_counts or '-':>5}|{stake or '-':>7}"
            f"{round(last_candle['max_rsi_24'], 1) or '-' :>6}|{round(last_candle['rsi_1h'], 1) or '-' :>6}|{round(last_candle['rsi_1d'], 1) or '-' :>6}|"
-            f"{round(last_candle['rtp_1h'] * 100, 0) or '-' :>6}|{round(last_candle['rtp_1d'] * 100, 0) or '-' :>6}|"
+            # f"{round(last_candle['rtp_1h'] * 100, 0) or '-' :>6}|{round(last_candle['rtp_1d'] * 100, 0) or '-' :>6}|"
-
+            f"{round(last_candle['ml_prob'], 1) or '-' :>6}|"
        )
    def printLineLog(self):
@@ -445,6 +467,10 @@ class FrictradeLearning(IStrategy):
        dataframe['sma5'] = dataframe['mid'].ewm(span=5, adjust=False).mean() #dataframe["mid"].rolling(window=5).mean()
        dataframe['sma5_deriv1'] = 1000 * (dataframe['sma5'] - dataframe['sma5'].shift(1)) / dataframe['sma5'].shift(1)
        dataframe['sma12'] = dataframe['mid'].ewm(span=12, adjust=False).mean()
        dataframe['sma12_deriv1'] = 1000 * (dataframe['sma12'] - dataframe['sma12'].shift(1)) / dataframe[
            'sma12'].shift(1)
        dataframe['sma24'] = dataframe['mid'].ewm(span=24, adjust=False).mean()
        dataframe['sma24_deriv1'] = 1000 * (dataframe['sma24'] - dataframe['sma24'].shift(1)) / dataframe['sma24'].shift(1)
@@ -477,16 +503,35 @@ class FrictradeLearning(IStrategy):
        dataframe['min180'] = talib.MIN(dataframe['mid'], timeperiod=180)
        dataframe['max180'] = talib.MAX(dataframe['mid'], timeperiod=180)
        dataframe['pct180'] = ((dataframe["mid"] - dataframe['min180'] ) / (dataframe['max180'] - dataframe['min180'] ))
        dataframe = self.rsi_trend_probability(dataframe, short=60, long=360)
        # ################### INFORMATIVE 1h
        informative = self.dp.get_pair_dataframe(pair=metadata['pair'], timeframe='1h')
        informative['mid'] = informative['open'] + (informative['close'] - informative['open']) / 2
-        # informative = self.populate1hIndicators(df=informative, metadata=metadata)
+        # Calcul MACD
        macd, macdsignal, macdhist = talib.MACD(
            informative['close'],
            fastperiod=12,
            slowperiod=26,
            signalperiod=9
        )
        informative['macd'] = macd
        informative['macdsignal'] = macdsignal
        informative['macdhist'] = macdhist
        informative['rsi'] = talib.RSI(informative['mid'], timeperiod=14)
        informative['sma24'] = informative['mid'].ewm(span=24, adjust=False).mean()
        informative['sma24_deriv1'] = 1000 * (informative['sma24'] - informative['sma24'].shift(1)) / informative['sma24'].shift(1)
        informative['sma60'] = informative['mid'].ewm(span=60, adjust=False).mean()
        informative['sma60_deriv1'] = 1000 * (informative['sma60'] - informative['sma60'].shift(1)) / informative['sma60'].shift(1)
        informative['rsi'] = talib.RSI(informative['mid'], timeperiod=14)
        self.calculeDerivees(informative, 'rsi', ema_period=12)
-        informative = self.rsi_trend_probability(informative)
+        # informative = self.rsi_trend_probability(informative)
        probas = self.calculModelInformative(informative)
        # informative = self.populate1hIndicators(df=informative, metadata=metadata)
        # informative = self.calculateRegression(informative, 'mid', lookback=15)
        dataframe = merge_informative_pair(dataframe, informative, '1m', '1h', ffill=True)
@@ -494,7 +539,7 @@ class FrictradeLearning(IStrategy):
        informative = self.dp.get_pair_dataframe(pair=metadata['pair'], timeframe='1d')
        informative['mid'] = informative['open'] + (informative['close'] - informative['open']) / 2
        informative['rsi'] = talib.RSI(informative['mid'], timeperiod=5)
-        informative = self.rsi_trend_probability(informative)
+        # informative = self.rsi_trend_probability(informative)
        # informative = self.calculateRegression(informative, 'mid', lookback=15)
        dataframe = merge_informative_pair(dataframe, informative, '1m', '1d', ffill=True)
@@ -545,7 +590,6 @@ class FrictradeLearning(IStrategy):
        )
        dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["sma24"]
        # Calcul MACD
        macd, macdsignal, macdhist = talib.MACD(
            dataframe['close'],
@@ -637,10 +681,15 @@ class FrictradeLearning(IStrategy):
        ).on_balance_volume()
        self.calculeDerivees(dataframe, 'obv', ema_period=1)
-        dataframe['obv5'] = ta.volume.OnBalanceVolumeIndicator(
+        dataframe['obv12'] = ta.volume.OnBalanceVolumeIndicator(
-            close=dataframe['sma5'], volume=dataframe['volume'].rolling(5).sum()
+            close=dataframe['sma12'], volume=dataframe['volume'].rolling(12).sum()
        ).on_balance_volume()
-        self.calculeDerivees(dataframe, 'obv5', ema_period=5)
+
        dataframe['obv24'] = ta.volume.OnBalanceVolumeIndicator(
            close=dataframe['sma24'], volume=dataframe['volume'].rolling(24).sum()
        ).on_balance_volume()
        # self.calculeDerivees(dataframe, 'obv5', ema_period=5)
        # --- Volatilité récente (écart-type des rendements) ---
        dataframe['vol_24'] = dataframe['percent'].rolling(24).std()
@@ -674,13 +723,23 @@ class FrictradeLearning(IStrategy):
            self.trainModel(dataframe, metadata)
        short_pair = self.getShortName(pair)
        path=f"user_data/plots/{short_pair}/"
-        self.model = joblib.load(f"{short_pair}_rf_model.pkl")
+        self.model = joblib.load(f"{path}/{short_pair}_rf_model.pkl")
        # Préparer les features pour la prédiction
        features = dataframe[self.model_indicators].fillna(0)
        # Prédiction : probabilité que le prix monte
        # Affichage des colonnes intérressantes dans le model
        features_pruned, kept_features = self.prune_features(
            model=self.model,
            dataframe=dataframe,
            feature_columns=self.model_indicators,
            importance_threshold=0.005  # enlever features < % importance
        )
        probs = self.model.predict_proba(features)[:, 1]
        # Sauvegarder la probabilité pour l’analyse
@@ -765,7 +824,7 @@ class FrictradeLearning(IStrategy):
        # Buy = prediction > threshold
        dataframe["buy"] = 0
-        dataframe.loc[dataframe["ml_prob"] > threshold, ['enter_long', 'enter_tag']] = (1, f"future")
+        dataframe.loc[dataframe["ml_prob"] > 0.5, ['enter_long', 'enter_tag']] = (1, f"future")
        dataframe['test'] = np.where(dataframe['enter_long'] == 1, dataframe['close'] * 1.003, np.nan)
        return dataframe
@@ -883,6 +942,9 @@ class FrictradeLearning(IStrategy):
    def adjust_stake_amount(self, pair: str, last_candle: DataFrame):
        if self.pairs[pair]['first_amount'] > 0:
            return self.pairs[pair]['first_amount']
        ath = max(self.pairs[pair]['last_max'], self.get_last_ath_before_candle(last_candle))
        ath_dist = 100 * (ath - last_candle["mid"]) / ath
@@ -967,7 +1029,39 @@ class FrictradeLearning(IStrategy):
        #     stake_amount = last_amount * current_rate * 0.5
        #     return stake_amount
-        condition = last_candle['hapercent'] > 0 and last_candle['sma24_deriv1'] > 0
+
        ########################### ALGO ATH
        # # --- 1. Calcul ATH local de la paire ---
        # ath = max(self.pairs[pair]['last_max'], self.get_last_ath_before_candle(last_candle))
        #
        # # --- 2. Distance ATH - current ---
        # dd = (current_rate - ath) / ath * 100  # dd <= 0
        #
        # if dd > -1:  # pas de renfort si drawdown trop faible
        #     return None
        #
        # # --- 3. DCA dynamique (modèle exponentiel) ---
        # a = 0.015
        # b = 0.12
        #
        # pct = a * (math.exp(b * (-dd)) - 1)  # proportion du wallet libre
        #
        # # Clamp de sécurité
        # pct = min(max(pct, 0), 0.35)  # max 35% d’un coup
        #
        # if pct <= 0:
        #     return None
        #
        # # --- 4. Stake en fonction du wallet libre ---
        # stake_amount = self.wallets.get_available_stake_amount() * pct
        #
        # if stake_amount < self.min_stake_amount:
        #     return None
        # FIN ########################## ALGO ATH
        condition = last_candle['hapercent'] > 0 and last_candle['sma24_deriv1'] > 0 \
                    and last_candle['close'] < self.pairs[pair]['first_buy'] and last_candle['ml_prob'] > 0.65
        limit_buy = 40
        # or (last_candle['close'] <= last_candle['min180'] and hours > 3)
        if (decline >= dca_threshold) and condition:
@@ -977,9 +1071,7 @@ class FrictradeLearning(IStrategy):
                    self.pairs[pair]['previous_profit'] = profit
                    return None
-                max_amount = self.config.get('stake_amount') * 2.5
+                stake_amount = min(self.wallets.get_available_stake_amount(), self.adjust_stake_amount(pair, last_candle) / 2)
                stake_amount = min(min(max_amount, self.wallets.get_available_stake_amount()),
                                   self.adjust_stake_amount(pair, last_candle))
                # print(f"profit={profit} previous={self.pairs[pair]['previous_profit']} count_of_buys={trade.nr_of_successful_entries}")
                if stake_amount > 0:
                    self.pairs[pair]['previous_profit'] = profit
@@ -1143,7 +1235,7 @@ class FrictradeLearning(IStrategy):
                stake=0
            )
-        if last_candle['ml_prob'] > 0.5:
+        if last_candle['ml_prob'] > 0.65:
            return None
        # if last_candle['sma24_deriv1'] > 0 : #and minutes < 180 and baisse < 30: # and last_candle['sma5_deriv1'] > -0.15:
        #     if (minutes < 180):
@@ -1416,19 +1508,34 @@ class FrictradeLearning(IStrategy):
        # study.optimize(objective, n_trials=50)
        def objective(trial):
-            self.train_model = XGBClassifier(
+            # local_model = XGBClassifier(
-                n_estimators=trial.suggest_int("n_estimators", 200, 800),
+            #     n_estimators=300,  # nombre d'arbres plus raisonnable
-                max_depth=trial.suggest_int("max_depth", 3, 10),
+            #     learning_rate=0.01,  # un peu plus rapide que 0.006, mais stable
-                learning_rate=trial.suggest_float("learning_rate", 0.005, 0.3, log=True),
+            #     max_depth=4,  # capture plus de patterns que 3, sans overfitting excessif
-                subsample=trial.suggest_float("subsample", 0.6, 1.0),
+            #     subsample=0.7,  # utilise 70% des lignes pour chaque arbre → réduit overfitting
-                colsample_bytree=trial.suggest_float("colsample_bytree", 0.6, 1.0),
+            #     colsample_bytree=0.8,  # 80% des features par arbre
            #     gamma=0.01,  # gain minimal pour un split → régularisation
            #     reg_alpha=0.01,  # L1 régularisation des feuilles
            #     reg_lambda=1,  # L2 régularisation des feuilles
            #     n_jobs=-1,  # utilise tous les cœurs CPU pour accélérer
            #     random_state=42,  # reproductibilité
            #     missing=float('nan'),  # valeur manquante reconnue
            #     eval_metric='logloss'  # métrique pour classification binaire
            # )
            local_model = XGBClassifier(
                n_estimators=300, #trial.suggest_int("n_estimators", 300, 500),
                max_depth=trial.suggest_int("max_depth", 1, 3),
                learning_rate=0.01, #trial.suggest_float("learning_rate", 0.005, 0.3, log=True),
                subsample=0.7, #trial.suggest_float("subsample", 0.6, 1.0),
                colsample_bytree=0.8, #trial.suggest_float("colsample_bytree", 0.6, 1.0),
                scale_pos_weight=1,
                objective="binary:logistic",
                eval_metric="logloss",
                n_jobs=-1
            )
-            self.train_model.fit(
+            local_model.fit(
                X_train,
                y_train,
                eval_set=[(X_valid, y_valid)],
@@ -1436,14 +1543,63 @@ class FrictradeLearning(IStrategy):
                verbose=False
            )
-            proba = self.train_model.predict_proba(X_valid)[:, 1]
+            proba = local_model.predict_proba(X_valid)[:, 1]
            thresholds = np.linspace(0.1, 0.9, 50)
            best_f1 = max(f1_score(y_valid, (proba > t)) for t in thresholds)
            return best_f1
        study = optuna.create_study(direction="maximize")
-        study.optimize(objective, n_trials=50)
+        study.optimize(objective, n_trials=20)
        # SHAP
        # Reconstruction du modèle final avec les meilleurs hyperparamètres
        # Récupération des meilleurs paramètres trouvés
        best_params = study.best_params
        best_model = XGBClassifier(**best_params)
        best_model.fit(X_train, y_train)
        self.train_model = best_model
        # === SHAP plots ===
        # Calcul SHAP
        explainer = shap.TreeExplainer(self.train_model)
        shap_values = explainer(X_train)
        # On choisit une observation pour le graphique waterfall
        # Explication du modèle de prédiction pour la première ligne de X_valid.”
        i = 0
        # Extraction des valeurs
        shap_val = shap_values[i].values
        feature_names = X_train.columns
        feature_values = X_train.iloc[i]
        # Tri par importance absolue
        # order = np.argsort(np.abs(shap_val))[::-1]
        k = 10
        order = np.argsort(np.abs(shap_val))[::-1][:k]
        # ---- Création figure sans l'afficher ----
        plt.ioff()  # Désactive l'affichage interactif
        shap.plots.waterfall(
            shap.Explanation(
                values=shap_val[order],
                base_values=shap_values.base_values[i],
                data=feature_values.values[order],
                feature_names=feature_names[order]
            ),
            show=False  # IMPORTANT : n'affiche pas dans Jupyter / console
        )
        # Sauvegarde du graphique sur disque
        output_path = f"{path}/shap_waterfall.png"
        plt.savefig(output_path, dpi=200, bbox_inches='tight')
        plt.close()  # ferme la figure proprement
        print(f"Graphique SHAP enregistré : {output_path}")
        # FIN SHAP
        # ---- après avoir exécuté la study ------
        print("Best value (F1):", study.best_value)
@@ -1540,9 +1696,16 @@ class FrictradeLearning(IStrategy):
        force_plot = shap.force_plot(explainer.expected_value, shap_values[0, :], X_valid.iloc[0, :])
        shap.save_html(f"{path}/shap_force_plot.html", force_plot)
-        PartialDependenceDisplay.from_estimator(self.train_model, X_valid, selected_features, kind='average')
+        fig, ax = plt.subplots(figsize=(24, 48))
-        plt.figure(figsize=(24, 24))
+        PartialDependenceDisplay.from_estimator(
-        plt.savefig(f"{path}/PartialDependenceDisplay.png", bbox_inches='tight')
+            self.train_model,
            X_valid,
            selected_features,
            kind="average",
            ax=ax
        )
        fig.savefig(f"{path}/PartialDependenceDisplay.png", bbox_inches="tight")
        plt.close(fig)
        best_f1 = 0
        best_t = 0.5
@@ -1562,9 +1725,10 @@ class FrictradeLearning(IStrategy):
        print(f"Accuracy: {acc:.3f}")
        # 7️⃣ Sauvegarde du modèle
-        joblib.dump(self.train_model, f"{pair}_rf_model.pkl")
+        joblib.dump(self.train_model, f"{path}/{pair}_rf_model.pkl")
        print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl")
        # X = dataframe des features (après shift/rolling/indicators)
        # y = target binaire ou décimale
        # model = ton modèle entraîné (RandomForestClassifier ou Regressor)
@@ -1666,8 +1830,7 @@ class FrictradeLearning(IStrategy):
        Analyse complète d'un modèle ML supervisé (classification binaire).
        Affiche performances, importance des features, matrices, seuils, etc.
        """
-        output_dir = f"user_data/plots/{pair}/"
+        os.makedirs(self.path, exist_ok=True)
        os.makedirs(output_dir, exist_ok=True)
        # ---- Prédictions ----
        preds = model.predict(X_valid)
@@ -1704,7 +1867,7 @@ class FrictradeLearning(IStrategy):
            for j in range(2):
                plt.text(j, i, cm[i, j], ha="center", va="center", color="black")
        # plt.show()
-        plt.savefig(os.path.join(output_dir, "Matrice de confusion.png"), bbox_inches="tight")
+        plt.savefig(os.path.join(self.path, "Matrice de confusion.png"), bbox_inches="tight")
        plt.close()
        # ---- Importance des features ----
@@ -1727,7 +1890,7 @@ class FrictradeLearning(IStrategy):
            plt.title("Importance des features")
            # plt.show()
-            plt.savefig(os.path.join(output_dir, "Importance des features.png"), bbox_inches="tight")
+            plt.savefig(os.path.join(self.path, "Importance des features.png"), bbox_inches="tight")
            plt.close()
        # ---- Arbre de décision (extrait) ----
@@ -1753,7 +1916,7 @@ class FrictradeLearning(IStrategy):
        plt.title("Courbe ROC")
        plt.legend()
        # plt.show()
-        plt.savefig(os.path.join(output_dir, "Courbe ROC.png"), bbox_inches="tight")
+        plt.savefig(os.path.join(self.path, "Courbe ROC.png"), bbox_inches="tight")
        plt.close()
        # # ---- Interprétation SHAP (optionnelle) ----
@@ -1782,7 +1945,7 @@ class FrictradeLearning(IStrategy):
        #
        #     plt.figure(figsize=(12, 4))
        #     shap.summary_plot(shap_values_to_plot, X_to_plot, show=False)
-        #     plt.savefig(os.path.join(output_dir, "shap_summary.png"), bbox_inches="tight")
+        #     plt.savefig(os.path.join(self.path, "shap_summary.png"), bbox_inches="tight")
        #     plt.close()
        # except ImportError:
        #     print("\n(SHAP non installé — `pip install shap` pour activer l’analyse SHAP.)")
@@ -1791,7 +1954,7 @@ class FrictradeLearning(IStrategy):
        # Trace ou enregistre le graphique
        self.plot_threshold_analysis(y_valid, y_proba, step=0.05,
-                                save_path=f"{output_dir}/threshold_analysis.png")
+                                save_path=f"{self.path}/threshold_analysis.png")
        # y_valid : vraies classes (0 / 1)
        # y_proba : probabilités de la classe 1 prédites par ton modèle
@@ -1820,7 +1983,7 @@ class FrictradeLearning(IStrategy):
        plt.ylabel("Score")
        plt.grid(True, alpha=0.3)
        plt.legend()
-        plt.savefig(f"{output_dir}/seuil_de_probabilite.png", bbox_inches='tight')
+        plt.savefig(f"{self.path}/seuil_de_probabilite.png", bbox_inches='tight')
        # plt.show()
        print(f"✅ Meilleur F1 : {f1s[best_idx]:.3f} au seuil {seuils[best_idx]:.2f}")
@@ -1837,11 +2000,8 @@ class FrictradeLearning(IStrategy):
        """
        # Le graphique généré affichera trois courbes :
        #
        # 🔵 Precision — la fiabilité de tes signaux haussiers.
        #
        # 🟢 Recall — la proportion de hausses que ton modèle détecte.
        #
        # 🟣 F1-score — le compromis optimal entre les deux.
        thresholds = np.arange(0, 1.01, step)
@@ -1887,22 +2047,23 @@ class FrictradeLearning(IStrategy):
                       and not c.endswith("_state")
                       and not c.endswith("_1d")
                       # and not c.endswith("_1h")
-                            # and not c.startswith("open") and not c.startswith("close")
+                       and not c.startswith("open") and not c.startswith("close")
-                            # and not c.startswith("low") and not c.startswith("high")
+                       and not c.startswith("low") and not c.startswith("high")
-                            # and not c.startswith("haopen") and not c.startswith("haclose")
+                       and not c.startswith("haopen") and not c.startswith("haclose")
                            # and not c.startswith("bb_lower") and not c.startswith("bb_upper")
                            # and not c.startswith("bb_middle")
                        and not c.endswith("_count")
                        and not c.endswith("_class") and not c.endswith("_price")
                        and not c.startswith('stop_buying')
                        and not c.startswith('target')
                        and not c.startswith('lvl')
                       ]
        # Étape 3 : remplacer inf et NaN par 0
        dataframe[usable_cols] = dataframe[usable_cols].replace([np.inf, -np.inf], 0).fillna(0)
        print("Colonnes utilisables pour le modèle :")
        print(usable_cols)
-        self.model_indicators = usable_cols
+        # self.model_indicators = usable_cols
-        return self.model_indicators
+        return usable_cols
    def select_uncorrelated_features(self, df, target, top_n=20, corr_threshold=0.7):
@@ -2049,3 +2210,71 @@ class FrictradeLearning(IStrategy):
        #     self.calculateProbabilite2Index(dataframe, futur_cols=['futur_percent'], indic_1=f"{name}{suffixe}_deriv1", indic_2=f"{name}{suffixe}_deriv2")
        return dataframe
    def calculModelInformative(self, informative):
        # préparation
        # print(df)
        df = informative.copy()
        X = df[self.listUsableColumns(df)]
        df['target'] = ((df["sma24"].shift(-13) - df["sma24"]) > 0).astype(int)
        df['target'] = df['target'].fillna(0).astype(int)
        y = df['target']
        # train/test
        X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)
        # Pipeline normalisé + Logistic Regresson
        clf = Pipeline([
            ("scaler", StandardScaler()),
            ("logreg", LogisticRegression(max_iter=5000))
        ])
        # Calibration CV automatique
        cal = CalibratedClassifierCV(clf, cv=3, method="isotonic")
        # Entraînement
        cal.fit(X_train, y_train)
        # Probabilités calibrées
        probas = cal.predict_proba(X_test)[:, 1]
        # Injection propre des probabilités dans le dataframe original (aux bons index)
        df.loc[X_test.index, 'ml_prob'] = probas
        print("Brier score:", brier_score_loss(y_test, probas))
        print("ROC AUC:", roc_auc_score(y_test, probas))
        # joindre probabilités au df (dernières lignes correspondantes)
        return probas
    def prune_features(self, model, dataframe, feature_columns, importance_threshold=0.01):
        """
        Supprime les features dont l'importance est inférieure au seuil.
        Args:
            model: XGBClassifier déjà entraîné
            dataframe: DataFrame contenant toutes les features
            feature_columns: liste des colonnes/features utilisées pour la prédiction
            importance_threshold: seuil minimal pour conserver une feature (en proportion de l'importance totale)
        Returns:
            dataframe_pruned: dataframe avec uniquement les features conservées
            kept_features: liste des features conservées
        """
        booster = model.get_booster()
        # Récupérer importance des features selon 'gain'
        importance = booster.get_score(importance_type='gain')
        # Normaliser pour que la somme soit 1
        total_gain = sum(importance.values())
        normalized_importance = {k: v / total_gain for k, v in importance.items()}
        # Features à garder
        kept_features = [f for f in feature_columns if normalized_importance.get(f, 0) >= importance_threshold]
        dataframe_pruned = dataframe[kept_features].fillna(0)
        print(f"⚡ Features conservées ({len(kept_features)} / {len(feature_columns)}): {kept_features}")
        return dataframe_pruned, kept_features