LGBMClassifier ajout des corrélations mid_smooth_24

2025-11-11 17:00:52 +01:00
parent 3b3cf5976a
commit 3ca1c2d9c1
5 changed files with 484 additions and 707 deletions
--- a/Zeus_8_3_2_B_4_2.py
+++ b/Zeus_8_3_2_B_4_2.py
@@ -84,7 +84,6 @@ def normalize(df):

 class Zeus_8_3_2_B_4_2(IStrategy):
    # Machine Learning
-    model = joblib.load('rf_model.pkl')
    # model_indicators = [
    #     'rsi', 'rsi_deriv1', 'rsi_deriv2', "max_rsi_12",
    #     "bb_percent",
@@ -100,24 +99,29 @@ class Zeus_8_3_2_B_4_2(IStrategy):
    #     'rsi_1h', 'rsi_deriv1_1h', 'rsi_deriv2_1h', "max_rsi_12_1h",
    # ]

-    model_indicators = ['open', 'high', 'low', 'close', 'volume', 'haopen', 'haclose', 'hapercent', 'mid',
-     'percent', 'percent3', 'percent12', 'percent24', 'sma5', 'sma5_dist', 'sma5_deriv1',
-     'sma5_deriv2', 'sma5_state', 'sma12', 'sma12_dist', 'sma12_deriv1', 'sma12_deriv2',
-     'sma12_state', 'sma24', 'sma24_dist', 'sma24_deriv1', 'sma24_deriv2', 'sma24_state', 'sma48',
-     'sma48_dist', 'sma48_deriv1', 'sma48_deriv2', 'sma48_state', 'sma60', 'sma60_dist',
-     'sma60_deriv1', 'sma60_deriv2', 'sma60_state', 'mid_smooth_3', 'mid_smooth_3_dist',
-     'mid_smooth_3_deriv1', 'mid_smooth_3_deriv2', 'mid_smooth_3_state', 'mid_smooth_5',
-     'mid_smooth_5_dist', 'mid_smooth_5_deriv1', 'mid_smooth_5_deriv2', 'mid_smooth_5_state',
-     'mid_smooth_12', 'mid_smooth_12_dist', 'mid_smooth_12_deriv1', 'mid_smooth_12_deriv2',
-     'mid_smooth_12_state', 'mid_smooth_24', 'mid_smooth_24_dist', 'mid_smooth_24_deriv1',
-     'mid_smooth_24_deriv2', 'mid_smooth_24_state', 'rsi', 'max_rsi_12', 'max_rsi_24', 'rsi_dist',
-     'rsi_deriv1', 'rsi_deriv2', 'rsi_state', 'max12', 'max60', 'min60', 'min_max_60',
-     'bb_lowerband', 'bb_middleband', 'bb_upperband', 'bb_percent', 'bb_width', 'macd',
-     'macdsignal', 'macdhist', 'sma_20', 'sma_100', 'slope', 'slope_smooth', 'atr', 'atr_norm',
-     'adx', 'obv', 'ret', 'vol_24', 'down_count', 'up_count', 'down_pct', 'up_pct',
-     'rsi_slope', 'adx_change', 'volatility_ratio', 'rsi_diff', 'slope_ratio', 'volume_sma_deriv',
-     'volume_dist', 'volume_deriv1', 'volume_deriv2', 'volume_state', 'slope_norm', 'trend_class',
-     'mid_smooth']
+    model_indicators = [
+     # 'hapercent',
+     # 'percent', 'percent3', 'percent12', 'percent24',
+     # 'sma5_dist', 'sma5_deriv1', 'sma5_deriv2',
+     # 'sma12_dist', 'sma12_deriv1', 'sma12_deriv2',
+     # 'sma24_dist', 'sma24_deriv1', 'sma24_deriv2',
+     # 'sma48_dist', 'sma48_deriv1', 'sma48_deriv2',
+     # 'sma60_dist', 'sma60_deriv1', 'sma60_deriv2',
+     # 'mid_smooth_3_deriv1', 'mid_smooth_3_deriv2',
+     # 'mid_smooth_5_dist', 'mid_smooth_5_deriv1', 'mid_smooth_5_deriv2',
+     # 'mid_smooth_12_dist', 'mid_smooth_12_deriv1', 'mid_smooth_12_deriv2',
+     # 'mid_smooth_24_dist', 'mid_smooth_24_deriv1', 'mid_smooth_24_deriv2',
+     # 'rsi', 'max_rsi_12', 'max_rsi_24', 'rsi_dist',
+     # 'rsi_deriv1', 'rsi_deriv2', 'min_max_60',
+     # 'bb_percent', 'bb_width', 'macd',
+     # 'macdsignal', 'macdhist', 'slope', 'slope_smooth', 'atr', 'atr_norm',
+     # 'adx',
+     # 'obv', 'obv_deriv1', 'obv_deriv2',
+     # 'obv5', 'obv5_deriv1', 'obv5_deriv2',
+     # 'vol_24', 'down_count', 'up_count', 'down_pct', 'up_pct',
+     # 'rsi_slope', 'adx_change', 'volatility_ratio', 'rsi_diff', 'slope_ratio', 'volume_sma_deriv',
+     # 'volume_dist', 'volume_deriv1', 'volume_deriv2', 'slope_norm',
+     ]

    levels = [1, 2, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
    # startup_candle_count = 12 * 24 * 5
@@ -1083,40 +1087,48 @@ class Zeus_8_3_2_B_4_2(IStrategy):
        if self.dp.runmode.value in ('backtest'):
            self.trainModel(dataframe, metadata)

-        # Préparer les features pour la prédiction
-        features = dataframe[self.model_indicators].fillna(0)
+        short_pair = self.getShortName(pair)
+        if short_pair == 'BTC':
+            self.model = joblib.load(f"{short_pair}_rf_model.pkl")

-        # Prédiction : probabilité que le prix monte
-        probs = self.model.predict_proba(features)[:, 1]
+            # Préparer les features pour la prédiction
+            features = dataframe[self.model_indicators].fillna(0)

-        # Sauvegarder la probabilité pour l’analyse
-        dataframe['ml_prob'] = probs
+            # Prédiction : probabilité que le prix monte
+            probs = self.model.predict_proba(features)[:, 1]

-        self.inspect_model(self.model)
+            # Sauvegarder la probabilité pour l’analyse
+            dataframe['ml_prob'] = probs
+
+            self.inspect_model(self.model)

        return dataframe

    def trainModel(self, dataframe: DataFrame, metadata: dict):
+        pair = self.getShortName(metadata['pair'])
        pd.set_option('display.max_rows', None)
        pd.set_option('display.max_columns', None)
        pd.set_option("display.width", 200)
+        path=f"user_data/plots/{pair}/"
+        os.makedirs(path, exist_ok=True)

-        # Étape 1 : sélectionner numériques
-        numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns
-
-        # Étape 2 : enlever constantes
-        usable_cols = [c for c in numeric_cols if dataframe[c].nunique() > 1
-                       and (not c.endswith("_state") and not c.endswith("_1h") and not c.endswith("_1d")
-                            and not c.endswith("_class") and not c.endswith("_price")
-                            and not c.startswith('stop_buying'))]
-
-        # Étape 3 : remplacer inf et NaN par 0
-        dataframe[usable_cols] = dataframe[usable_cols].replace([np.inf, -np.inf], 0).fillna(0)
-
-        print("Colonnes utilisables pour le modèle :")
-        print(usable_cols)
-
-        self.model_indicators = usable_cols
+        # # Étape 1 : sélectionner numériques
+        # numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns
+        #
+        # # Étape 2 : enlever constantes
+        # usable_cols = [c for c in numeric_cols if dataframe[c].nunique() > 1
+        #                and (not c.endswith("_state") and not c.endswith("_1h") and not c.endswith("_1d")
+        #                     and not c.endswith("_class") and not c.endswith("_price")
+        #                     and not c.startswith('stop_buying'))]
+        #
+        # # Étape 3 : remplacer inf et NaN par 0
+        # dataframe[usable_cols] = dataframe[usable_cols].replace([np.inf, -np.inf], 0).fillna(0)
+        #
+        # print("Colonnes utilisables pour le modèle :")
+        # print(usable_cols)
+        #
+        # self.model_indicators = usable_cols
+        #
        df = dataframe[self.model_indicators].copy()

        # Corrélations des colonnes
@@ -1126,7 +1138,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):

        # 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies
        # df['target'] = (df['sma24'].shift(-24) > df['sma24']).astype(int)
-        df['target'] = (df['sma24'].shift(-25).rolling(24).max() > df['sma24'] * 1.003).astype(int)
+        df['target'] = (df['percent12'].shift(-13) > 0.0015).astype(int)
        df['target'] = df['target'].fillna(0).astype(int)

        # Corrélations triées par importance avec une colonne cible
@@ -1178,7 +1190,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
        plt.yticks(rotation=0)

        # --- Sauvegarde ---
-        output_path = "/home/souti/freqtrade/user_data/plots/Matrice_de_correlation_temperature.png"
+        output_path = f"{path}/Matrice_de_correlation_temperature.png"
        plt.savefig(output_path, bbox_inches="tight", dpi=150)
        plt.close(fig)

@@ -1194,6 +1206,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):

        X = df[self.model_indicators]
        y = df['target']  # ta colonne cible binaire ou numérique
+        print("===== 🎯 FEATURES SCORES =====")
        print(self.feature_auc_scores(X, y))

        # 4️⃣ Split train/test
@@ -1211,29 +1224,30 @@ class Zeus_8_3_2_B_4_2(IStrategy):

        # 5️⃣ Entraînement du modèle
        # train_model = RandomForestClassifier(n_estimators=200, random_state=42)
-        # train_model = RandomForestClassifier(
-        #     n_estimators=300,
-        #     max_depth=12,
-        #     # min_samples_split=4,
-        #     # min_samples_leaf=2,
-        #     # max_features='sqrt',
-        #     # random_state=42,
-        #     # n_jobs=-1,
-        #     class_weight='balanced'
-        # )
-        # 1️⃣ Entraîne ton modèle LGBM normal
-        train_model = LGBMClassifier(
-            n_estimators=800,
-            learning_rate=0.02,
-            max_depth=10,
-            num_leaves=31,
-            subsample=0.8,
-            colsample_bytree=0.8,
-            reg_alpha=0.2,
-            reg_lambda=0.4,
-            class_weight='balanced',
-            random_state=42,
+        train_model = RandomForestClassifier(
+            n_estimators=300,
+            max_depth=12,
+            # min_samples_split=4,
+            # min_samples_leaf=2,
+            # max_features='sqrt',
+            # random_state=42,
+            # n_jobs=-1,
+            class_weight='balanced'
        )
+        # 1️⃣ Entraîne ton modèle LGBM normal
+        # train_model = LGBMClassifier(
+        #     n_estimators=800,
+        #     learning_rate=0.02,
+        #     max_depth=10,
+        #     num_leaves=31,
+        #     subsample=0.8,
+        #     colsample_bytree=0.8,
+        #     reg_alpha=0.2,
+        #     reg_lambda=0.4,
+        #     class_weight='balanced',
+        #     random_state=42,
+        # )
+
        train_model.fit(X_train, y_train)

        # 2️⃣ Sélection des features AVANT calibration
@@ -1246,7 +1260,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
        calibrated.fit(X_train[selected_features], y_train)
        print(calibrated)

-        # # calibration
+        # # # calibration
        # train_model = CalibratedClassifierCV(train_model, method='sigmoid', cv=5)
        # # Sélection
        # sfm = SelectFromModel(train_model, threshold="median")
@@ -1262,14 +1276,13 @@ class Zeus_8_3_2_B_4_2(IStrategy):
        print("\nRapport de classification :\n", classification_report(y_test, y_pred))
        print("\nMatrice de confusion :\n", confusion_matrix(y_test, y_pred))

-        # Importances
-        importances = pd.DataFrame({
-            "feature": train_model.feature_name_,
-            "importance": train_model.feature_importances_
-        }).sort_values("importance", ascending=False)
-        print("\n===== 🔍 IMPORTANCE DES FEATURES =====")
-
-        print(importances)
+        # # Importances
+        # importances = pd.DataFrame({
+        #     "feature": train_model.feature_name_,
+        #     "importance": train_model.feature_importances_
+        # }).sort_values("importance", ascending=False)
+        # print("\n===== 🔍 IMPORTANCE DES FEATURES =====")
+        # print(importances)

        best_f1 = 0
        best_t = 0.5
@@ -1289,7 +1302,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
        print(f"Accuracy: {acc:.3f}")

        # 7️⃣ Sauvegarde du modèle
-        joblib.dump(train_model, 'rf_model.pkl')
+        joblib.dump(train_model, f"{pair}_rf_model.pkl")
        print("✅ Modèle sauvegardé sous rf_model.pkl")

        # X = dataframe des features (après shift/rolling/indicators)
@@ -1314,7 +1327,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
        # plt.ylabel("Score")
        # plt.show()

-        self.analyze_model(train_model, X_train, X_test, y_train, y_test)
+        self.analyze_model(pair, train_model, X_train, X_test, y_train, y_test)

    def inspect_model(self, model):
        """
@@ -1388,12 +1401,12 @@ class Zeus_8_3_2_B_4_2(IStrategy):

        print("\n===== ✅ FIN DE L’INSPECTION =====")

-    def analyze_model(self, model, X_train, X_test, y_train, y_test):
+    def analyze_model(self, pair, model, X_train, X_test, y_train, y_test):
        """
        Analyse complète d'un modèle ML supervisé (classification binaire).
        Affiche performances, importance des features, matrices, seuils, etc.
        """
-        output_dir = "user_data/plots"
+        output_dir = f"user_data/plots/{pair}/"
        os.makedirs(output_dir, exist_ok=True)

        # ---- Prédictions ----
@@ -1518,7 +1531,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):

        # Trace ou enregistre le graphique
        self.plot_threshold_analysis(y_test, y_proba, step=0.05,
-                                save_path="/home/souti/freqtrade/user_data/plots/threshold_analysis.png")
+                                save_path=f"{output_dir}/threshold_analysis.png")

        # y_test : vraies classes (0 / 1)
        # y_proba : probabilités de la classe 1 prédites par ton modèle
@@ -1547,7 +1560,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
        plt.ylabel("Score")
        plt.grid(True, alpha=0.3)
        plt.legend()
-        plt.savefig("/home/souti/freqtrade/user_data/plots/seuil_de_probabilite.png", bbox_inches='tight')
+        plt.savefig(f"{output_dir}/seuil_de_probabilite.png", bbox_inches='tight')
        # plt.show()

        print(f"✅ Meilleur F1 : {f1s[best_idx]:.3f} au seuil {seuils[best_idx]:.2f}")
@@ -1616,6 +1629,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):


    def populateDataframe(self, dataframe, timeframe='5m'):
+        dataframe = dataframe.copy()
        heikinashi = qtpylib.heikinashi(dataframe)
        dataframe['haopen'] = heikinashi['open']
        dataframe['haclose'] = heikinashi['close']
@@ -1667,7 +1681,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
            (dataframe["close"] - dataframe["bb_lowerband"]) /
            (dataframe["bb_upperband"] - dataframe["bb_lowerband"])
        )
-        dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["sma5"]
+        dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["sma24"]

        # dataframe["bb_width"] = (
        #     (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"]
@@ -1762,6 +1776,12 @@ class Zeus_8_3_2_B_4_2(IStrategy):
        dataframe['obv'] = ta.volume.OnBalanceVolumeIndicator(
            close=dataframe['close'], volume=dataframe['volume']
        ).on_balance_volume()
+        self.calculeDerivees(dataframe, 'obv', timeframe=timeframe, ema_period=1)
+
+        dataframe['obv5'] = ta.volume.OnBalanceVolumeIndicator(
+            close=dataframe['sma5'], volume=dataframe['volume'].rolling(5).sum()
+        ).on_balance_volume()
+        self.calculeDerivees(dataframe, 'obv5', timeframe=timeframe, ema_period=5)

        # --- Volatilité récente (écart-type des rendements) ---
        dataframe['vol_24'] = dataframe['percent'].rolling(24).std()
@@ -1797,7 +1817,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
        aucs = {}
        for col in X.columns:
            try:
-                aucs[col] = roc_auc_score(y, X[col].fillna(method='ffill').fillna(0))
+                aucs[col] = roc_auc_score(y, X[col].ffill().fillna(0))
            except Exception:
                aucs[col] = np.nan
        return pd.Series(aucs).sort_values(ascending=False)
--- a/Zeus_LGBMRegressor.json
+++ b/Zeus_LGBMRegressor.json
@@ -0,0 +1,36 @@
+{
+  "strategy_name": "Zeus_LGBMRegressor",
+  "params": {
+    "roi": {
+      "0": 0.564,
+      "567": 0.273,
+      "2814": 0.12,
+      "7675": 0
+    },
+    "stoploss": {
+      "stoploss": -1.0
+    },
+    "trailing": {
+      "trailing_stop": true,
+      "trailing_stop_positive": 0.15,
+      "trailing_stop_positive_offset": 0.2,
+      "trailing_only_offset_is_reached": true
+    },
+    "max_open_trades": {
+      "max_open_trades": 80
+    },
+    "buy": {
+      "mises": 5,
+      "mise_factor_buy": 0.02,
+      "ml_prob_buy": -0.27,
+      "pct": 0.037,
+      "pct_inc": 0.0016
+    },
+    "sell": {
+      "ml_prob_sell": -0.05
+    },
+    "protection": {}
+  },
+  "ft_stratparam_v": 1,
+  "export_time": "2025-11-11 15:44:21.251297+00:00"
+}
--- a/Zeus_LGBMRegressor.md
+++ b/Zeus_LGBMRegressor.md
@@ -0,0 +1,106 @@
+🌟 Paramètres principaux de LGBMRegressor
+1️⃣ objective='regression'
+
+But : indique le type de problème à résoudre.
+
+Ici, tu veux prédire une valeur continue (par ex. un rendement futur, un prix, etc.).
+
+Autres valeurs possibles :
+
+'binary' → pour classification 0/1
+
+'multiclass' → pour plusieurs classes
+
+'regression_l1' → pour des valeurs continues mais avec perte L1 (moins sensible aux outliers)
+
+📘 En résumé : ici LightGBM cherche à minimiser l’erreur entre la valeur prédite et la valeur réelle.
+
+2️⃣ metric='rmse'
+
+But : indique la métrique utilisée pour évaluer la qualité du modèle.
+
+'rmse' = Root Mean Squared Error (racine de la moyenne des carrés des erreurs)
+→ pénalise fortement les grosses erreurs.
+
+'mae' (Mean Absolute Error) est une alternative plus robuste (moins sensible aux outliers).
+
+Tu peux aussi utiliser plusieurs métriques : metric=['rmse', 'mae'].
+
+3️⃣ n_estimators=300
+
+But : nombre d’arbres de décision à construire.
+
+Chaque arbre apprend à corriger les erreurs du précédent → c’est le boosting.
+
+Plus ce nombre est grand :
+
+Meilleure précision potentielle
+
+Mais risque de surapprentissage et de lenteur
+
+Typiquement, on le combine avec un petit learning_rate (comme ici 0.05).
+
+4️⃣ learning_rate=0.05
+
+But : contrôle l’intensité avec laquelle chaque nouvel arbre corrige les erreurs.
+
+Si learning_rate ↓, il faut plus d’arbres (n_estimators ↑) pour converger.
+
+Typiquement :
+
+0.1 = standard
+
+0.05 = prudent (meilleur généralisation)
+
+0.01 = très lent mais précis
+
+⚖️ Ce paramètre agit comme un “frein” sur l’apprentissage.
+
+5️⃣ max_depth=7
+
+But : profondeur maximale des arbres.
+
+Plus les arbres sont profonds :
+
+→ plus le modèle capture des relations complexes
+
+→ mais risque de surapprentissage
+
+Valeurs typiques :
+
+3 à 8 pour éviter le surapprentissage
+
+-1 = pas de limite
+
+6️⃣ subsample=0.8
+
+But : fraction de l’échantillon d’entraînement utilisée pour chaque arbre.
+
+Exemple :
+
+0.8 = chaque arbre est entraîné sur 80 % des lignes (tirées aléatoirement).
+
+Permet :
+
+de réduire le surapprentissage
+
+d’accélérer l’entraînement
+
+Si tu veux des résultats très stables → monte à 1.0
+Si tu veux plus de diversité entre les arbres → garde entre 0.7 et 0.9.
+
+7️⃣ colsample_bytree=0.8
+
+But : fraction de colonnes (features) utilisées pour chaque arbre.
+
+Comme subsample, mais pour les variables.
+
+Aide à la régularisation : chaque arbre ne voit pas toutes les colonnes → modèle plus robuste.
+
+Typiquement entre 0.6 et 1.0.
+
+8️⃣ random_state=42
+
+But : fixe la graine aléatoire.
+
+Permet d’obtenir de
--- a/Zeus_LGBMRegressor.py
+++ b/Zeus_LGBMRegressor.py
--- a/tools/sklearn/Sinus.py
+++ b/tools/sklearn/Sinus.py
@@ -0,0 +1,32 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from lightgbm import LGBMRegressor
+
+# === Données non linéaires ===
+np.random.seed(0)
+X = np.linspace(0, 10, 200).reshape(-1, 1)
+y = np.sin(X).ravel() + np.random.normal(0, 0.1, X.shape[0])  # sinusoïde + bruit
+
+# === Entraînement du modèle ===
+model = LGBMRegressor(
+    n_estimators=300,   # nombre d’arbres
+    learning_rate=0.05, # taux d’apprentissage (plus petit = plus lisse)
+    max_depth=5         # profondeur des arbres (plus grand = plus complexe)
+)
+model.fit(X, y)
+
+# === Prédiction ===
+X_test = np.linspace(0, 10, 500).reshape(-1, 1)
+y_pred = model.predict(X_test)
+
+# === Visualisation ===
+plt.figure(figsize=(10, 5))
+plt.scatter(X, y, color="lightgray", label="Données réelles (sin + bruit)", s=20)
+plt.plot(X_test, np.sin(X_test), color="green", linestyle="--", label="sin(x) réel")
+plt.plot(X_test, y_pred, color="red", label="Prédiction LGBM")
+plt.title("Approximation non linéaire avec LGBMRegressor")
+plt.xlabel("x")
+plt.ylabel("y")
+plt.legend()
+plt.grid(True)
+plt.show()