LGBMRegressor calcul multi pairs et sauvegarde graph

2025-11-12 23:08:39 +01:00
parent a1f5bc1e41
commit a7135599bc
2 changed files with 146 additions and 39 deletions
--- a/Zeus_LGBMRegressor.py
+++ b/Zeus_LGBMRegressor.py
@@ -290,7 +290,7 @@ class Zeus_LGBMRegressor(IStrategy):
    indicators = {'sma5', 'sma12', 'sma24', 'sma60'}
    indicators_percent = {'percent', 'percent3', 'percent12', 'percent24', 'percent_1h', 'percent3_1h', 'percent12_1h', 'percent24_1h'}

-    mises = IntParameter(1, 50, default=5, space='buy', optimize=False, load=False)
+    mises = IntParameter(1, 50, default=5, space='buy', optimize=True, load=True)

    ml_prob_buy = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='buy', optimize=True, load=True)
    ml_prob_sell = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='sell', optimize=True, load=True)
@@ -908,10 +908,11 @@ class Zeus_LGBMRegressor(IStrategy):
        dataframe["ms-0"] = dataframe["mid_smooth_24_deriv1"]
        # dataframe["ms+10"] = dataframe["mid_smooth_24"].shift(-11)

-        if False and self.dp.runmode.value in ('backtest'):
+        if self.dp.runmode.value in ('backtest'):
            self.trainModel(dataframe, metadata)

-        self.model = joblib.load('rf_model.pkl')
+        short_pair = self.getShortName(pair)
+        self.model = joblib.load(f"{short_pair}_rf_model.pkl")

        # Préparer les features pour la prédiction
        features = dataframe[self.model_indicators].fillna(0)
@@ -928,9 +929,12 @@ class Zeus_LGBMRegressor(IStrategy):
        return dataframe

    def trainModel(self, dataframe: DataFrame, metadata: dict):
+        pair = self.getShortName(metadata['pair'])
        pd.set_option('display.max_rows', None)
        pd.set_option('display.max_columns', None)
        pd.set_option("display.width", 200)
+        path=f"user_data/plots/{pair}/"
+        os.makedirs(path, exist_ok=True)

        # # Étape 1 : sélectionner numériques
        # numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns
@@ -1010,7 +1014,7 @@ class Zeus_LGBMRegressor(IStrategy):
        plt.yticks(rotation=0)

        # --- Sauvegarde ---
-        output_path = "/home/souti/freqtrade/user_data/plots/Matrice_de_correlation_temperature.png"
+        output_path = f"{path}/Matrice_de_correlation_temperature.png"
        plt.savefig(output_path, bbox_inches="tight", dpi=150)
        plt.close(fig)

@@ -1050,7 +1054,7 @@ class Zeus_LGBMRegressor(IStrategy):
        # )
        # train_model.fit(X_train, y_train)

-        train_model, selected_features = self.optuna(X_train, X_test, y_train, y_test)
+        train_model, selected_features = self.optuna(path, X_train, X_test, y_train, y_test)
        print("Features retenues :", list(selected_features))

        # # 2️⃣ Sélection des features AVANT calibration
@@ -1082,10 +1086,10 @@ class Zeus_LGBMRegressor(IStrategy):
        # print(f"Accuracy: {acc:.3f}")

        # 7️⃣ Sauvegarde du modèle
-        joblib.dump(train_model, 'rf_model.pkl')
-        print("✅ Modèle sauvegardé sous rf_model.pkl")
+        joblib.dump(train_model, f"{pair}_rf_model.pkl")
+        print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl")

-        self.analyze_model(train_model, X_train, X_test, y_train, y_test)
+        self.analyze_model(pair, train_model, X_train, X_test, y_train, y_test)

    def inspect_model(self, model):
        """
@@ -1159,12 +1163,12 @@ class Zeus_LGBMRegressor(IStrategy):

        print("\n===== ✅ FIN DE L’INSPECTION =====")

-    def analyze_model(self, model, X_train, X_test, y_train, y_test):
+    def analyze_model(self, pair, model, X_train, X_test, y_train, y_test):
        """
        Analyse complète d'un modèle ML supervisé (classification binaire).
        Affiche performances, importance des features, matrices, seuils, etc.
        """
-        output_dir = "user_data/plots"
+        output_dir = f"user_data/plots/{pair}/"
        os.makedirs(output_dir, exist_ok=True)

        # ---- Importance des features ----
@@ -1206,8 +1210,7 @@ class Zeus_LGBMRegressor(IStrategy):
        print(f"RMSE: {rmse:.5f} | R²: {r2:.3f}")

        # --- Création du dossier de sortie ---
-        plot_dir = "/home/souti/freqtrade/user_data/plots"
-        os.makedirs(plot_dir, exist_ok=True)
+        os.makedirs(output_dir, exist_ok=True)

        # --- Graphique prédiction vs réel ---
        plt.figure(figsize=(8, 8))
@@ -1225,36 +1228,41 @@ class Zeus_LGBMRegressor(IStrategy):
        plt.legend()

        # --- Sauvegarde ---
-        plot_path = os.path.join(plot_dir, "LightGBM_regression_pred_vs_real.png")
+        plot_path = os.path.join(output_dir, "LightGBM_regression_pred_vs_real.png")
        plt.savefig(plot_path, bbox_inches="tight", dpi=200)
        plt.close()

+        self.plot_pred_vs_real_filtered(model, X_test, y_test, preds, output_dir)
+
+
        print(f"✅ Graphique sauvegardé : {plot_path}")

-        # save_dir = "/home/souti/freqtrade/user_data/plots/"
-        # os.makedirs(save_dir, exist_ok=True)
+        # ax = lgb.plot_tree(model, tree_index=0, figsize=(30, 20), show_info=["split_gain", "internal_value", "internal_count"])
+        # plt.title("Arbre de décision n°0")
+        # plt.savefig(os.path.join(output_dir, "lgbm_tree_0.png"), bbox_inches="tight")
+        # plt.close()

-        ax = lgb.plot_tree(model, tree_index=0, figsize=(30, 20),
-                           show_info=["split_gain", "internal_value", "internal_count"])
-        plt.title("Arbre de décision n°0")
-        plt.savefig(os.path.join(plot_dir, "lgbm_tree_0.png"), bbox_inches="tight")
-        plt.close()
+        for i in range(5):
+            ax = lgb.plot_tree(model, tree_index=i, figsize=(20, 12))
+            plt.title(f"Arbre {i}")
+            plt.savefig(os.path.join(output_dir, f"lgbm_tree_{i}.png"), bbox_inches="tight")
+            plt.close()

        ax = lgb.plot_tree(model, figsize=(40, 20))
        plt.title("Vue globale du modèle LGBM")
-        plt.savefig(os.path.join(plot_dir, "lgbm_all_trees.png"), bbox_inches="tight")
+        plt.savefig(os.path.join(output_dir, "lgbm_all_trees.png"), bbox_inches="tight")
        plt.close()
        # X_test = np.linspace(0, 10, 1000).reshape(-1, 1)
        y_pred = model.predict(X_test)

-        self.graphFonctionApprise(X_test, y_test, y_pred)
-        self.graphFonctionAppriseFeature(X_test, y_test, y_pred)
+        self.graphFonctionApprise(output_dir, X_test, y_test, y_pred)
+        self.graphFonctionAppriseFeature(output_dir, X_test, y_test, y_pred)

        # ==============================================================================

        ax = lgb.plot_importance(model, max_num_features=30, figsize=(12, 6))
        plt.title("Importance des features - LGBM")
-        plt.savefig(os.path.join(plot_dir, "lgbm_feature_importance.png"), bbox_inches="tight")
+        plt.savefig(os.path.join(output_dir, "lgbm_feature_importance.png"), bbox_inches="tight")
        plt.close()

        corr = X_train.corr() * 100  # en pourcentage
@@ -1262,7 +1270,7 @@ class Zeus_LGBMRegressor(IStrategy):
        plt.figure(figsize=(20, 16))
        sns.heatmap(corr, cmap="coolwarm", center=0, annot=False, fmt=".1f", cbar_kws={'label': 'Corrélation (%)'})
        plt.title("Matrice de corrélation (%)")
-        plt.savefig(os.path.join(plot_dir, "correlation_matrix.png"), bbox_inches="tight")
+        plt.savefig(os.path.join(output_dir, "correlation_matrix.png"), bbox_inches="tight")
        plt.close()

        plt.figure(figsize=(10, 6))
@@ -1270,11 +1278,53 @@ class Zeus_LGBMRegressor(IStrategy):
        plt.xlabel("Valeurs réelles")
        plt.ylabel("Prédictions du modèle")
        plt.title("Comparaison y_test vs y_pred")
-        plt.savefig(os.path.join(plot_dir, "ytest_vs_ypred.png"), bbox_inches="tight")
+        plt.savefig(os.path.join(output_dir, "ytest_vs_ypred.png"), bbox_inches="tight")
        plt.close()

        print("\n===== ✅ FIN DE L’ANALYSE =====")

+    def plot_pred_vs_real_filtered(self, model, X_test, y_test, preds, output_dir, top_n=5):
+        """
+        Affiche le graphique prédiction vs réel pour les N features les plus importantes.
+        """
+        # --- 1️⃣ Extraire les features les plus importantes ---
+        importance_df = pd.DataFrame({
+            "feature": X_test.columns,
+            "importance": model.feature_importances_
+        }).sort_values(by="importance", ascending=False)
+
+        top_features = importance_df.head(top_n)["feature"].tolist()
+        print(f"Top {top_n} features: {top_features}")
+
+        # --- 2️⃣ Créer un masque pour ne garder que les lignes où au moins une des top features varie fortement ---
+        X_top = X_test[top_features]
+
+        # Optionnel : filtrer les points atypiques pour lisser le nuage
+        mask = np.all(np.abs((X_top - X_top.mean()) / X_top.std()) < 3, axis=1)
+        X_filtered = X_top[mask]
+        y_filtered = y_test[mask]
+        preds_filtered = preds[mask]
+
+        # --- 3️⃣ Tracer ---
+        plt.figure(figsize=(8, 8))
+        plt.scatter(y_filtered, preds_filtered, alpha=0.4, s=15, c='blue', label=f"Top {top_n} features")
+        plt.xlabel("Valeurs réelles", fontsize=12)
+        plt.ylabel("Valeurs prédites", fontsize=12)
+        plt.title(f"LightGBM Régression — Prédiction vs Réel (filtré sur top {top_n} features)", fontsize=14)
+        plt.plot(
+            [y_filtered.min(), y_filtered.max()],
+            [y_filtered.min(), y_filtered.max()],
+            'r--',
+            linewidth=1,
+            label="Ligne idéale"
+        )
+        plt.legend()
+        plt.grid(True)
+
+        out_path = f"{output_dir}/lgbm_pred_vs_real_top{top_n}.png"
+        plt.savefig(out_path, bbox_inches="tight")
+        plt.close()
+
    def plot_threshold_analysis(self, y_true, y_proba, step=0.05, save_path=None):
        """
        Affiche la précision, le rappel et le F1-score selon le seuil de décision.
@@ -1708,7 +1758,7 @@ class Zeus_LGBMRegressor(IStrategy):
    def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
        dataframe.loc[
            (
-                (dataframe['ml_prob'] > self.ml_prob_buy.value)
+                (dataframe['ml_prob'] > dataframe['sma24_deriv1'])
            ), ['enter_long', 'enter_tag']] = (1, f"ml_prob")

        dataframe['test'] = np.where(dataframe['enter_long'] == 1, dataframe['close'] * 1.01, np.nan)
@@ -1864,7 +1914,7 @@ class Zeus_LGBMRegressor(IStrategy):
    def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
        dataframe.loc[
            (
-                (dataframe['ml_prob'] < self.ml_prob_sell.value)
+                (dataframe['ml_prob'] < dataframe['sma24_deriv1'])
            ), ['exit_long', 'exit_tag']] = (1, f"ml_prob")

        return dataframe
@@ -1872,6 +1922,7 @@ class Zeus_LGBMRegressor(IStrategy):
    def adjust_trade_position(self, trade: Trade, current_time: datetime,
                              current_rate: float, current_profit: float, min_stake: float,
                              max_stake: float, **kwargs):
+        return None
        # ne rien faire si ordre deja en cours
        if trade.has_open_orders:
            # print("skip open orders")
@@ -3078,7 +3129,7 @@ class Zeus_LGBMRegressor(IStrategy):

        return selected_corr

-    def graphFonctionApprise(self, X_test, y_test, y_pred):
+    def graphFonctionApprise(self, path, X_test, y_test, y_pred):
        # Exemple : trier les valeurs de X_test et les prédictions
        x_sorted = np.argsort(X_test.iloc[:, 0])
        x = X_test.iloc[:, 0].iloc[x_sorted]
@@ -3095,33 +3146,47 @@ class Zeus_LGBMRegressor(IStrategy):
        plt.legend()
        plt.grid(True)

-        out_path = "/home/souti/freqtrade/user_data/plots/lgbm_function.png"
+        out_path = f"{path}/lgbm_function.png"
        plt.savefig(out_path, bbox_inches="tight")
        plt.close()

        print(f"Graphique sauvegardé : {out_path}")

-    def graphFonctionAppriseFeature(self,  X_test, y_test, y_pred):
-        plt.figure(figsize=(14, 8))
+    import numpy as np
+    import seaborn as sns
+    import matplotlib.pyplot as plt

+    def graphFonctionAppriseFeature(self, path, X_test, y_test, y_pred):
+        plt.figure(figsize=(14, 8))
        colors = sns.color_palette("coolwarm", n_colors=X_test.shape[1])

-        for i, col in enumerate(X_test.columns):
-            plt.plot(X_test[col], y_pred, '.', color=colors[i], alpha=0.4, label=col)
+        # Conversion en DataFrame pour manip plus simple
+        df = X_test.copy()
+        df["y_pred"] = y_pred

-        plt.title("Fonction apprise par LGBMRegressor (par feature)")
+        # --- filtrage sur y_pred (ou sur chaque feature si tu veux)
+        mean = df["y_pred"].mean()
+        std = df["y_pred"].std()
+
+        df = df[(df["y_pred"] >= mean - 2 * std) & (df["y_pred"] <= mean + 2 * std)]
+
+        # --- tracé
+        for i, col in enumerate(X_test.columns):
+            plt.plot(df[col], df["y_pred"], '.', color=colors[i], alpha=0.4, label=col)
+
+        plt.title("Fonction apprise par LGBMRegressor (filtrée à ±2σ)")
        plt.xlabel("Valeur feature")
        plt.ylabel("Valeur prédite")
-        plt.legend(loc="best")
+        plt.legend(loc="right")
        plt.grid(True)

-        out_path = "/home/souti/freqtrade/user_data/plots/lgbm_features.png"
+        out_path = f"{path}/lgbm_features.png"
        plt.savefig(out_path, bbox_inches="tight")
        plt.close()

        print(f"Graphique sauvegardé : {out_path}")

-    def optuna(self, X_train, X_test, y_train, y_test):
+    def optuna(self, path, X_train, X_test, y_train, y_test):
        # Suppose que X_train, y_train sont déjà définis
        # ou sinon :
        # X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)
@@ -3161,7 +3226,7 @@ class Zeus_LGBMRegressor(IStrategy):
        print(f"Meilleur RMSE : {study.best_value:.4f}")

        # 🔹 Sauvegarder les résultats
-        optuna_path = "/home/souti/freqtrade/user_data/plots/optuna_lgbm_results.txt"
+        optuna_path = f"{path}/optuna_lgbm_results.txt"
        with open(optuna_path, "w") as f:
            f.write(f"Best params:\n{study.best_params}\n")
            f.write(f"Best RMSE: {study.best_value:.4f}\n")
--- a/tools/sklearn/sma_regression.py
+++ b/tools/sklearn/sma_regression.py
@@ -0,0 +1,42 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.linear_model import LinearRegression
+
+# Exemple : dataframe avec une colonne sma24
+n = 100
+df = pd.DataFrame({
+    "sma24": np.sin(np.linspace(0, 6*np.pi, n)) * 50 + 200 + np.random.randn(n)*2
+})
+
+# --- paramètres
+lookback = 30        # nombre de bougies utilisées pour l'apprentissage
+future_steps = 10    # combien de bougies futures on veut estimer
+
+# --- Préparer les données
+X = np.arange(lookback).reshape(-1, 1)                      # 0 .. 29
+y = df["sma24"].iloc[-lookback:].values                     # les dernières valeurs
+
+# --- Entraîner la régression
+model = LinearRegression()
+model.fit(X, y)
+
+# --- Prédire les valeurs futures
+X_future = np.arange(lookback, lookback + future_steps).reshape(-1, 1)
+y_future = model.predict(X_future)
+
+# --- Reconstituer la courbe complète (historique + prévision)
+predicted_full = np.concatenate([y, y_future])
+
+# --- Affichage
+plt.figure(figsize=(10,5))
+plt.plot(df.index[-lookback:], y, label="Historique (sma24)", color="blue")
+plt.plot(
+    np.arange(df.index[-1]-lookback+1, df.index[-1]+future_steps+1),
+    predicted_full,
+    label="Régression + prévision", color="orange", linestyle="--"
+)
+plt.axvline(df.index[-1], color="gray", linestyle=":")
+plt.legend()
+plt.title(f"Projection de SMA24 sur {future_steps} bougies futures")
+plt.show()