diff --git a/Zeus_8_3_2_B_4_2.py b/Zeus_8_3_2_B_4_2.py index 6df0452..6e76ef5 100644 --- a/Zeus_8_3_2_B_4_2.py +++ b/Zeus_8_3_2_B_4_2.py @@ -55,8 +55,13 @@ from sklearn.tree import export_text import inspect from sklearn.feature_selection import mutual_info_classif from sklearn.inspection import permutation_importance - +from lightgbm import LGBMClassifier +from sklearn.calibration import CalibratedClassifierCV +from sklearn.feature_selection import SelectFromModel from tabulate import tabulate +from sklearn.model_selection import GridSearchCV +from sklearn.feature_selection import VarianceThreshold +import seaborn as sns # Couleurs ANSI de base RED = "\033[31m" @@ -80,19 +85,39 @@ def normalize(df): class Zeus_8_3_2_B_4_2(IStrategy): # Machine Learning model = joblib.load('rf_model.pkl') - model_indicators = [ - 'rsi', 'rsi_deriv1', "max_rsi_12", - "bb_percent", - 'vol_24', - 'percent3', - 'sma5_dist', 'sma5_deriv1', - 'sma24_dist', 'sma24_deriv1', - 'sma60_dist', 'sma60_deriv1', - 'down_count', 'up_count', - 'down_pct', 'slope_norm', - 'min_max_60', - 'rsi_slope', 'adx_change', 'volatility_ratio' - ] + # model_indicators = [ + # 'rsi', 'rsi_deriv1', 'rsi_deriv2', "max_rsi_12", + # "bb_percent", + # 'vol_24', + # 'percent3', + # 'sma5_dist', 'sma5_deriv1', 'sma5_deriv2', + # 'sma24_dist', 'sma24_deriv1', 'sma24_deriv2', + # 'sma60_dist', 'sma60_deriv1', 'sma60_deriv2', + # 'down_pct', 'slope_norm', + # 'min_max_60', + # 'rsi_slope', 'adx_change', 'volatility_ratio', + # 'slope_ratio', 'bb_width', + # 'rsi_1h', 'rsi_deriv1_1h', 'rsi_deriv2_1h', "max_rsi_12_1h", + # ] + + model_indicators = ['open', 'high', 'low', 'close', 'volume', 'haopen', 'haclose', 'hapercent', 'mid', + 'percent', 'percent3', 'percent12', 'percent24', 'sma5', 'sma5_dist', 'sma5_deriv1', + 'sma5_deriv2', 'sma5_state', 'sma12', 'sma12_dist', 'sma12_deriv1', 'sma12_deriv2', + 'sma12_state', 'sma24', 'sma24_dist', 'sma24_deriv1', 'sma24_deriv2', 'sma24_state', 'sma48', + 'sma48_dist', 'sma48_deriv1', 'sma48_deriv2', 'sma48_state', 'sma60', 'sma60_dist', + 'sma60_deriv1', 'sma60_deriv2', 'sma60_state', 'mid_smooth_3', 'mid_smooth_3_dist', + 'mid_smooth_3_deriv1', 'mid_smooth_3_deriv2', 'mid_smooth_3_state', 'mid_smooth_5', + 'mid_smooth_5_dist', 'mid_smooth_5_deriv1', 'mid_smooth_5_deriv2', 'mid_smooth_5_state', + 'mid_smooth_12', 'mid_smooth_12_dist', 'mid_smooth_12_deriv1', 'mid_smooth_12_deriv2', + 'mid_smooth_12_state', 'mid_smooth_24', 'mid_smooth_24_dist', 'mid_smooth_24_deriv1', + 'mid_smooth_24_deriv2', 'mid_smooth_24_state', 'rsi', 'max_rsi_12', 'max_rsi_24', 'rsi_dist', + 'rsi_deriv1', 'rsi_deriv2', 'rsi_state', 'max12', 'max60', 'min60', 'min_max_60', + 'bb_lowerband', 'bb_middleband', 'bb_upperband', 'bb_percent', 'bb_width', 'macd', + 'macdsignal', 'macdhist', 'sma_20', 'sma_100', 'slope', 'slope_smooth', 'atr', 'atr_norm', + 'adx', 'obv', 'ret', 'vol_24', 'down_count', 'up_count', 'down_pct', 'up_pct', + 'rsi_slope', 'adx_change', 'volatility_ratio', 'rsi_diff', 'slope_ratio', 'volume_sma_deriv', + 'volume_dist', 'volume_deriv1', 'volume_deriv2', 'volume_state', 'slope_norm', 'trend_class', + 'mid_smooth'] levels = [1, 2, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20] # startup_candle_count = 12 * 24 * 5 @@ -1072,12 +1097,98 @@ class Zeus_8_3_2_B_4_2(IStrategy): return dataframe def trainModel(self, dataframe: DataFrame, metadata: dict): - df = dataframe.copy() + pd.set_option('display.max_rows', None) + pd.set_option('display.max_columns', None) + pd.set_option("display.width", 200) + + # Étape 1 : sélectionner numériques + numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns + + # Étape 2 : enlever constantes + usable_cols = [c for c in numeric_cols if dataframe[c].nunique() > 1 + and (not c.endswith("_state") and not c.endswith("_1h") and not c.endswith("_1d") + and not c.endswith("_class") and not c.endswith("_price") + and not c.startswith('stop_buying'))] + + # Étape 3 : remplacer inf et NaN par 0 + dataframe[usable_cols] = dataframe[usable_cols].replace([np.inf, -np.inf], 0).fillna(0) + + print("Colonnes utilisables pour le modèle :") + print(usable_cols) + + self.model_indicators = usable_cols + df = dataframe[self.model_indicators].copy() + + # Corrélations des colonnes + corr = df.corr(numeric_only=True) + print("Corrélation des colonnes") + print(corr) + # 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies # df['target'] = (df['sma24'].shift(-24) > df['sma24']).astype(int) - df['target'] = (df['sma5'].shift(-12).rolling(12).max() > df['sma5'] * 1.00025).astype(int) + df['target'] = (df['sma24'].shift(-25).rolling(24).max() > df['sma24'] * 1.003).astype(int) df['target'] = df['target'].fillna(0).astype(int) + # Corrélations triées par importance avec une colonne cible + target_corr = df.corr(numeric_only=True)["target"].sort_values(ascending=False) + print("Corrélations triées par importance avec une colonne cible") + print(target_corr) + + # Corrélations triées par importance avec une colonne cible + corr = df.corr(numeric_only=True) + corr_unstacked = ( + corr.unstack() + .reset_index() + .rename(columns={"level_0": "col1", "level_1": "col2", 0: "corr"}) + ) + # Supprimer les doublons col1/col2 inversés et soi-même + corr_unstacked = corr_unstacked[corr_unstacked["col1"] < corr_unstacked["col2"]] + + # Trier par valeur absolue de corrélation + corr_sorted = corr_unstacked.reindex(corr_unstacked["corr"].abs().sort_values(ascending=False).index) + print("Trier par valeur absolue de corrélation") + print(corr_sorted.head(20)) + + # --- Calcul de la corrélation --- + corr = df.corr(numeric_only=True) # évite les colonnes non numériques + corr = corr * 100 # passage en pourcentage + + # --- Masque pour n’afficher que le triangle supérieur (optionnel) --- + mask = np.triu(np.ones_like(corr, dtype=bool)) + + # --- Création de la figure --- + fig, ax = plt.subplots(figsize=(96, 36)) + + # --- Heatmap avec un effet “température” --- + sns.heatmap( + corr, + mask=mask, + cmap="coolwarm", # palette bleu → rouge + center=0, # 0 au centre + annot=True, # affiche les valeurs dans chaque case + fmt=".0f", # format entier (pas de décimale) + cbar_kws={"label": "Corrélation (%)"}, # légende à droite + linewidths=0.5, # petites lignes entre les cases + ax=ax + ) + + # --- Personnalisation --- + ax.set_title("Matrice de corrélation (en %)", fontsize=20, pad=20) + plt.xticks(rotation=45, ha="right") + plt.yticks(rotation=0) + + # --- Sauvegarde --- + output_path = "/home/souti/freqtrade/user_data/plots/Matrice_de_correlation_temperature.png" + plt.savefig(output_path, bbox_inches="tight", dpi=150) + plt.close(fig) + + print(f"✅ Matrice enregistrée : {output_path}") + + # Exemple d'utilisation : + selected_corr = self.select_uncorrelated_features(df, target="target", top_n=30, corr_threshold=0.7) + print("===== 🎯 FEATURES SÉLECTIONNÉES =====") + print(selected_corr) + # Nettoyage df = df.dropna() @@ -1088,22 +1199,90 @@ class Zeus_8_3_2_B_4_2(IStrategy): # 4️⃣ Split train/test X = df[self.model_indicators] y = df['target'] + # Séparation temporelle (train = 80 %, valid = 20 %) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False) + # Nettoyage des valeurs invalides + + selector = VarianceThreshold(threshold=0.0001) + selector.fit(X_train) + selected = X_train.columns[selector.get_support()] + print("Colonnes conservées :", list(selected)) + # 5️⃣ Entraînement du modèle # train_model = RandomForestClassifier(n_estimators=200, random_state=42) - train_model = RandomForestClassifier( - n_estimators=300, - max_depth=12, - # min_samples_split=4, - # min_samples_leaf=2, - # max_features='sqrt', - # random_state=42, - # n_jobs=-1, - class_weight='balanced' + # train_model = RandomForestClassifier( + # n_estimators=300, + # max_depth=12, + # # min_samples_split=4, + # # min_samples_leaf=2, + # # max_features='sqrt', + # # random_state=42, + # # n_jobs=-1, + # class_weight='balanced' + # ) + # 1️⃣ Entraîne ton modèle LGBM normal + train_model = LGBMClassifier( + n_estimators=800, + learning_rate=0.02, + max_depth=10, + num_leaves=31, + subsample=0.8, + colsample_bytree=0.8, + reg_alpha=0.2, + reg_lambda=0.4, + class_weight='balanced', + random_state=42, ) train_model.fit(X_train, y_train) + # 2️⃣ Sélection des features AVANT calibration + sfm = SelectFromModel(train_model, threshold="median", prefit=True) + selected_features = X_train.columns[sfm.get_support()] + print(selected_features) + + # 3️⃣ Calibration ensuite (facultative) + calibrated = CalibratedClassifierCV(train_model, method='sigmoid', cv=5) + calibrated.fit(X_train[selected_features], y_train) + print(calibrated) + + # # calibration + # train_model = CalibratedClassifierCV(train_model, method='sigmoid', cv=5) + # # Sélection + # sfm = SelectFromModel(train_model, threshold="median") + # sfm.fit(X_train, y_train) + # selected_features = X_train.columns[sfm.get_support()] + # print(selected_features) + + train_model.fit(X_train, y_train) + y_pred = train_model.predict(X_test) + y_proba = train_model.predict_proba(X_test)[:, 1] + # print(classification_report(y_test, y_pred)) + # print(confusion_matrix(y_test, y_pred)) + print("\nRapport de classification :\n", classification_report(y_test, y_pred)) + print("\nMatrice de confusion :\n", confusion_matrix(y_test, y_pred)) + + # Importances + importances = pd.DataFrame({ + "feature": train_model.feature_name_, + "importance": train_model.feature_importances_ + }).sort_values("importance", ascending=False) + print("\n===== 🔍 IMPORTANCE DES FEATURES =====") + + print(importances) + + best_f1 = 0 + best_t = 0.5 + for t in [0.3, 0.4, 0.5, 0.6, 0.7]: + y_pred_thresh = (y_proba > t).astype(int) + score = f1_score(y_test, y_pred_thresh) + print(f"Seuil {t:.1f} → F1: {score:.3f}") + if score > best_f1: + best_f1 = score + best_t = t + + print(f"✅ Meilleur seuil trouvé: {best_t} avec F1={best_f1:.3f}") + # 6️⃣ Évaluer la précision (facultatif) preds = train_model.predict(X_test) acc = accuracy_score(y_test, preds) @@ -1263,7 +1442,16 @@ class Zeus_8_3_2_B_4_2(IStrategy): "importance": model.feature_importances_ }).sort_values(by="importance", ascending=False) print(importance) - importance.plot.bar(x="feature", y="importance", legend=False, figsize=(6, 3)) + + # Crée une figure plus grande + fig, ax = plt.subplots(figsize=(24, 8)) # largeur=24 pouces, hauteur=8 pouces + + # Trace le bar plot sur cet axe + importance.plot.bar(x="feature", y="importance", legend=False, ax=ax) + + # Tourner les labels pour plus de lisibilité + ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right') + plt.title("Importance des features") # plt.show() plt.savefig(os.path.join(output_dir, "Importance des features.png"), bbox_inches="tight") @@ -1439,8 +1627,8 @@ class Zeus_8_3_2_B_4_2(IStrategy): dataframe["percent12"] = dataframe['close'].pct_change(12) dataframe["percent24"] = dataframe['close'].pct_change(24) - if self.dp.runmode.value in ('backtest'): - dataframe['futur_percent'] = 100 * (dataframe['close'].shift(-1) - dataframe['close']) / dataframe['close'] + # if self.dp.runmode.value in ('backtest'): + # dataframe['futur_percent'] = 100 * (dataframe['close'].shift(-1) - dataframe['close']) / dataframe['close'] dataframe['sma5'] = dataframe['mid'].ewm(span=5, adjust=False).mean() #dataframe["mid"].rolling(window=5).mean() self.calculeDerivees(dataframe, 'sma5', timeframe=timeframe, ema_period=5) @@ -1479,9 +1667,11 @@ class Zeus_8_3_2_B_4_2(IStrategy): (dataframe["close"] - dataframe["bb_lowerband"]) / (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) ) - dataframe["bb_width"] = ( - (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"] - ) + dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["sma5"] + + # dataframe["bb_width"] = ( + # (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"] + # ) # Calcul MACD macd, macdsignal, macdhist = talib.MACD( @@ -1574,8 +1764,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): ).on_balance_volume() # --- Volatilité récente (écart-type des rendements) --- - dataframe['ret'] = dataframe['close'].pct_change() - dataframe['vol_24'] = dataframe['ret'].rolling(24).std() + dataframe['vol_24'] = dataframe['percent'].rolling(24).std() # Compter les baisses / hausses consécutives self.calculateDownAndUp(dataframe, limit=0.0001) @@ -1591,6 +1780,10 @@ class Zeus_8_3_2_B_4_2(IStrategy): dataframe['adx_change'] = dataframe['adx'] - dataframe['adx'].shift(12) # évolution de la tendance dataframe['volatility_ratio'] = dataframe['atr_norm'] / dataframe['bb_width'] + dataframe["rsi_diff"] = dataframe["rsi"] - dataframe["rsi"].shift(3) + dataframe["slope_ratio"] = dataframe["sma5_deriv1"] / (dataframe["sma60_deriv1"] + 1e-9) + dataframe["divergence"] = (dataframe["rsi_deriv1"] * dataframe["sma5_deriv1"]) < 0 + ########################### dataframe['volume_sma_deriv'] = dataframe['volume'] * dataframe['sma5_deriv1'] / (dataframe['volume'].rolling(5).mean()) @@ -3294,3 +3487,35 @@ class Zeus_8_3_2_B_4_2(IStrategy): def getParamValue(self, pair, trend, space, param): pair = self.getShortName(pair) return self.parameters[pair][trend][0]['content']['params'][space][param] + + + def select_uncorrelated_features(self, df, target, top_n=20, corr_threshold=0.7): + """ + Sélectionne les features les plus corrélées avec target, + tout en supprimant celles trop corrélées entre elles. + """ + # 1️⃣ Calcul des corrélations absolues avec la cible + corr = df.corr(numeric_only=True) + corr_target = corr[target].abs().sort_values(ascending=False) + + # 2️⃣ Prend les N features les plus corrélées avec la cible (hors target) + features = corr_target.drop(target).head(top_n).index.tolist() + + # 3️⃣ Évite les features trop corrélées entre elles + selected = [] + for feat in features: + too_correlated = False + for sel in selected: + if abs(corr.loc[feat, sel]) > corr_threshold: + too_correlated = True + break + if not too_correlated: + selected.append(feat) + + # 4️⃣ Retourne un DataFrame propre avec les valeurs de corrélation + selected_corr = pd.DataFrame({ + "feature": selected, + "corr_with_target": [corr.loc[f, target] for f in selected] + }).sort_values(by="corr_with_target", key=np.abs, ascending=False) + + return selected_corr