From 3ca1c2d9c18f8d177ad2d171bc62111bbbdb4504 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Delacotte?= Date: Tue, 11 Nov 2025 17:00:52 +0100 Subject: [PATCH] =?UTF-8?q?LGBMClassifier=20ajout=20des=20corr=C3=A9lation?= =?UTF-8?q?s=20mid=5Fsmooth=5F24?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Zeus_8_3_2_B_4_2.py | 186 +++++---- Zeus_LGBMRegressor.json | 36 ++ Zeus_LGBMRegressor.md | 106 +++++ Zeus_LGBMRegressor.py | 831 ++++++++++------------------------------ tools/sklearn/Sinus.py | 32 ++ 5 files changed, 484 insertions(+), 707 deletions(-) create mode 100644 Zeus_LGBMRegressor.json create mode 100644 Zeus_LGBMRegressor.md create mode 100644 tools/sklearn/Sinus.py diff --git a/Zeus_8_3_2_B_4_2.py b/Zeus_8_3_2_B_4_2.py index 6e76ef5..ed58fe4 100644 --- a/Zeus_8_3_2_B_4_2.py +++ b/Zeus_8_3_2_B_4_2.py @@ -84,7 +84,6 @@ def normalize(df): class Zeus_8_3_2_B_4_2(IStrategy): # Machine Learning - model = joblib.load('rf_model.pkl') # model_indicators = [ # 'rsi', 'rsi_deriv1', 'rsi_deriv2', "max_rsi_12", # "bb_percent", @@ -100,24 +99,29 @@ class Zeus_8_3_2_B_4_2(IStrategy): # 'rsi_1h', 'rsi_deriv1_1h', 'rsi_deriv2_1h', "max_rsi_12_1h", # ] - model_indicators = ['open', 'high', 'low', 'close', 'volume', 'haopen', 'haclose', 'hapercent', 'mid', - 'percent', 'percent3', 'percent12', 'percent24', 'sma5', 'sma5_dist', 'sma5_deriv1', - 'sma5_deriv2', 'sma5_state', 'sma12', 'sma12_dist', 'sma12_deriv1', 'sma12_deriv2', - 'sma12_state', 'sma24', 'sma24_dist', 'sma24_deriv1', 'sma24_deriv2', 'sma24_state', 'sma48', - 'sma48_dist', 'sma48_deriv1', 'sma48_deriv2', 'sma48_state', 'sma60', 'sma60_dist', - 'sma60_deriv1', 'sma60_deriv2', 'sma60_state', 'mid_smooth_3', 'mid_smooth_3_dist', - 'mid_smooth_3_deriv1', 'mid_smooth_3_deriv2', 'mid_smooth_3_state', 'mid_smooth_5', - 'mid_smooth_5_dist', 'mid_smooth_5_deriv1', 'mid_smooth_5_deriv2', 'mid_smooth_5_state', - 'mid_smooth_12', 'mid_smooth_12_dist', 'mid_smooth_12_deriv1', 'mid_smooth_12_deriv2', - 'mid_smooth_12_state', 'mid_smooth_24', 'mid_smooth_24_dist', 'mid_smooth_24_deriv1', - 'mid_smooth_24_deriv2', 'mid_smooth_24_state', 'rsi', 'max_rsi_12', 'max_rsi_24', 'rsi_dist', - 'rsi_deriv1', 'rsi_deriv2', 'rsi_state', 'max12', 'max60', 'min60', 'min_max_60', - 'bb_lowerband', 'bb_middleband', 'bb_upperband', 'bb_percent', 'bb_width', 'macd', - 'macdsignal', 'macdhist', 'sma_20', 'sma_100', 'slope', 'slope_smooth', 'atr', 'atr_norm', - 'adx', 'obv', 'ret', 'vol_24', 'down_count', 'up_count', 'down_pct', 'up_pct', - 'rsi_slope', 'adx_change', 'volatility_ratio', 'rsi_diff', 'slope_ratio', 'volume_sma_deriv', - 'volume_dist', 'volume_deriv1', 'volume_deriv2', 'volume_state', 'slope_norm', 'trend_class', - 'mid_smooth'] + model_indicators = [ + # 'hapercent', + # 'percent', 'percent3', 'percent12', 'percent24', + # 'sma5_dist', 'sma5_deriv1', 'sma5_deriv2', + # 'sma12_dist', 'sma12_deriv1', 'sma12_deriv2', + # 'sma24_dist', 'sma24_deriv1', 'sma24_deriv2', + # 'sma48_dist', 'sma48_deriv1', 'sma48_deriv2', + # 'sma60_dist', 'sma60_deriv1', 'sma60_deriv2', + # 'mid_smooth_3_deriv1', 'mid_smooth_3_deriv2', + # 'mid_smooth_5_dist', 'mid_smooth_5_deriv1', 'mid_smooth_5_deriv2', + # 'mid_smooth_12_dist', 'mid_smooth_12_deriv1', 'mid_smooth_12_deriv2', + # 'mid_smooth_24_dist', 'mid_smooth_24_deriv1', 'mid_smooth_24_deriv2', + # 'rsi', 'max_rsi_12', 'max_rsi_24', 'rsi_dist', + # 'rsi_deriv1', 'rsi_deriv2', 'min_max_60', + # 'bb_percent', 'bb_width', 'macd', + # 'macdsignal', 'macdhist', 'slope', 'slope_smooth', 'atr', 'atr_norm', + # 'adx', + # 'obv', 'obv_deriv1', 'obv_deriv2', + # 'obv5', 'obv5_deriv1', 'obv5_deriv2', + # 'vol_24', 'down_count', 'up_count', 'down_pct', 'up_pct', + # 'rsi_slope', 'adx_change', 'volatility_ratio', 'rsi_diff', 'slope_ratio', 'volume_sma_deriv', + # 'volume_dist', 'volume_deriv1', 'volume_deriv2', 'slope_norm', + ] levels = [1, 2, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20] # startup_candle_count = 12 * 24 * 5 @@ -1083,40 +1087,48 @@ class Zeus_8_3_2_B_4_2(IStrategy): if self.dp.runmode.value in ('backtest'): self.trainModel(dataframe, metadata) - # Préparer les features pour la prédiction - features = dataframe[self.model_indicators].fillna(0) + short_pair = self.getShortName(pair) + if short_pair == 'BTC': + self.model = joblib.load(f"{short_pair}_rf_model.pkl") - # Prédiction : probabilité que le prix monte - probs = self.model.predict_proba(features)[:, 1] + # Préparer les features pour la prédiction + features = dataframe[self.model_indicators].fillna(0) - # Sauvegarder la probabilité pour l’analyse - dataframe['ml_prob'] = probs + # Prédiction : probabilité que le prix monte + probs = self.model.predict_proba(features)[:, 1] - self.inspect_model(self.model) + # Sauvegarder la probabilité pour l’analyse + dataframe['ml_prob'] = probs + + self.inspect_model(self.model) return dataframe def trainModel(self, dataframe: DataFrame, metadata: dict): + pair = self.getShortName(metadata['pair']) pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) pd.set_option("display.width", 200) + path=f"user_data/plots/{pair}/" + os.makedirs(path, exist_ok=True) - # Étape 1 : sélectionner numériques - numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns - - # Étape 2 : enlever constantes - usable_cols = [c for c in numeric_cols if dataframe[c].nunique() > 1 - and (not c.endswith("_state") and not c.endswith("_1h") and not c.endswith("_1d") - and not c.endswith("_class") and not c.endswith("_price") - and not c.startswith('stop_buying'))] - - # Étape 3 : remplacer inf et NaN par 0 - dataframe[usable_cols] = dataframe[usable_cols].replace([np.inf, -np.inf], 0).fillna(0) - - print("Colonnes utilisables pour le modèle :") - print(usable_cols) - - self.model_indicators = usable_cols + # # Étape 1 : sélectionner numériques + # numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns + # + # # Étape 2 : enlever constantes + # usable_cols = [c for c in numeric_cols if dataframe[c].nunique() > 1 + # and (not c.endswith("_state") and not c.endswith("_1h") and not c.endswith("_1d") + # and not c.endswith("_class") and not c.endswith("_price") + # and not c.startswith('stop_buying'))] + # + # # Étape 3 : remplacer inf et NaN par 0 + # dataframe[usable_cols] = dataframe[usable_cols].replace([np.inf, -np.inf], 0).fillna(0) + # + # print("Colonnes utilisables pour le modèle :") + # print(usable_cols) + # + # self.model_indicators = usable_cols + # df = dataframe[self.model_indicators].copy() # Corrélations des colonnes @@ -1126,7 +1138,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): # 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies # df['target'] = (df['sma24'].shift(-24) > df['sma24']).astype(int) - df['target'] = (df['sma24'].shift(-25).rolling(24).max() > df['sma24'] * 1.003).astype(int) + df['target'] = (df['percent12'].shift(-13) > 0.0015).astype(int) df['target'] = df['target'].fillna(0).astype(int) # Corrélations triées par importance avec une colonne cible @@ -1178,7 +1190,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): plt.yticks(rotation=0) # --- Sauvegarde --- - output_path = "/home/souti/freqtrade/user_data/plots/Matrice_de_correlation_temperature.png" + output_path = f"{path}/Matrice_de_correlation_temperature.png" plt.savefig(output_path, bbox_inches="tight", dpi=150) plt.close(fig) @@ -1194,6 +1206,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): X = df[self.model_indicators] y = df['target'] # ta colonne cible binaire ou numérique + print("===== 🎯 FEATURES SCORES =====") print(self.feature_auc_scores(X, y)) # 4️⃣ Split train/test @@ -1211,29 +1224,30 @@ class Zeus_8_3_2_B_4_2(IStrategy): # 5️⃣ Entraînement du modèle # train_model = RandomForestClassifier(n_estimators=200, random_state=42) - # train_model = RandomForestClassifier( - # n_estimators=300, - # max_depth=12, - # # min_samples_split=4, - # # min_samples_leaf=2, - # # max_features='sqrt', - # # random_state=42, - # # n_jobs=-1, - # class_weight='balanced' - # ) - # 1️⃣ Entraîne ton modèle LGBM normal - train_model = LGBMClassifier( - n_estimators=800, - learning_rate=0.02, - max_depth=10, - num_leaves=31, - subsample=0.8, - colsample_bytree=0.8, - reg_alpha=0.2, - reg_lambda=0.4, - class_weight='balanced', - random_state=42, + train_model = RandomForestClassifier( + n_estimators=300, + max_depth=12, + # min_samples_split=4, + # min_samples_leaf=2, + # max_features='sqrt', + # random_state=42, + # n_jobs=-1, + class_weight='balanced' ) + # 1️⃣ Entraîne ton modèle LGBM normal + # train_model = LGBMClassifier( + # n_estimators=800, + # learning_rate=0.02, + # max_depth=10, + # num_leaves=31, + # subsample=0.8, + # colsample_bytree=0.8, + # reg_alpha=0.2, + # reg_lambda=0.4, + # class_weight='balanced', + # random_state=42, + # ) + train_model.fit(X_train, y_train) # 2️⃣ Sélection des features AVANT calibration @@ -1246,7 +1260,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): calibrated.fit(X_train[selected_features], y_train) print(calibrated) - # # calibration + # # # calibration # train_model = CalibratedClassifierCV(train_model, method='sigmoid', cv=5) # # Sélection # sfm = SelectFromModel(train_model, threshold="median") @@ -1262,14 +1276,13 @@ class Zeus_8_3_2_B_4_2(IStrategy): print("\nRapport de classification :\n", classification_report(y_test, y_pred)) print("\nMatrice de confusion :\n", confusion_matrix(y_test, y_pred)) - # Importances - importances = pd.DataFrame({ - "feature": train_model.feature_name_, - "importance": train_model.feature_importances_ - }).sort_values("importance", ascending=False) - print("\n===== 🔍 IMPORTANCE DES FEATURES =====") - - print(importances) + # # Importances + # importances = pd.DataFrame({ + # "feature": train_model.feature_name_, + # "importance": train_model.feature_importances_ + # }).sort_values("importance", ascending=False) + # print("\n===== 🔍 IMPORTANCE DES FEATURES =====") + # print(importances) best_f1 = 0 best_t = 0.5 @@ -1289,7 +1302,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): print(f"Accuracy: {acc:.3f}") # 7️⃣ Sauvegarde du modèle - joblib.dump(train_model, 'rf_model.pkl') + joblib.dump(train_model, f"{pair}_rf_model.pkl") print("✅ Modèle sauvegardé sous rf_model.pkl") # X = dataframe des features (après shift/rolling/indicators) @@ -1314,7 +1327,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): # plt.ylabel("Score") # plt.show() - self.analyze_model(train_model, X_train, X_test, y_train, y_test) + self.analyze_model(pair, train_model, X_train, X_test, y_train, y_test) def inspect_model(self, model): """ @@ -1388,12 +1401,12 @@ class Zeus_8_3_2_B_4_2(IStrategy): print("\n===== ✅ FIN DE L’INSPECTION =====") - def analyze_model(self, model, X_train, X_test, y_train, y_test): + def analyze_model(self, pair, model, X_train, X_test, y_train, y_test): """ Analyse complète d'un modèle ML supervisé (classification binaire). Affiche performances, importance des features, matrices, seuils, etc. """ - output_dir = "user_data/plots" + output_dir = f"user_data/plots/{pair}/" os.makedirs(output_dir, exist_ok=True) # ---- Prédictions ---- @@ -1518,7 +1531,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): # Trace ou enregistre le graphique self.plot_threshold_analysis(y_test, y_proba, step=0.05, - save_path="/home/souti/freqtrade/user_data/plots/threshold_analysis.png") + save_path=f"{output_dir}/threshold_analysis.png") # y_test : vraies classes (0 / 1) # y_proba : probabilités de la classe 1 prédites par ton modèle @@ -1547,7 +1560,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): plt.ylabel("Score") plt.grid(True, alpha=0.3) plt.legend() - plt.savefig("/home/souti/freqtrade/user_data/plots/seuil_de_probabilite.png", bbox_inches='tight') + plt.savefig(f"{output_dir}/seuil_de_probabilite.png", bbox_inches='tight') # plt.show() print(f"✅ Meilleur F1 : {f1s[best_idx]:.3f} au seuil {seuils[best_idx]:.2f}") @@ -1616,6 +1629,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): def populateDataframe(self, dataframe, timeframe='5m'): + dataframe = dataframe.copy() heikinashi = qtpylib.heikinashi(dataframe) dataframe['haopen'] = heikinashi['open'] dataframe['haclose'] = heikinashi['close'] @@ -1667,7 +1681,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): (dataframe["close"] - dataframe["bb_lowerband"]) / (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) ) - dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["sma5"] + dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["sma24"] # dataframe["bb_width"] = ( # (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"] @@ -1762,6 +1776,12 @@ class Zeus_8_3_2_B_4_2(IStrategy): dataframe['obv'] = ta.volume.OnBalanceVolumeIndicator( close=dataframe['close'], volume=dataframe['volume'] ).on_balance_volume() + self.calculeDerivees(dataframe, 'obv', timeframe=timeframe, ema_period=1) + + dataframe['obv5'] = ta.volume.OnBalanceVolumeIndicator( + close=dataframe['sma5'], volume=dataframe['volume'].rolling(5).sum() + ).on_balance_volume() + self.calculeDerivees(dataframe, 'obv5', timeframe=timeframe, ema_period=5) # --- Volatilité récente (écart-type des rendements) --- dataframe['vol_24'] = dataframe['percent'].rolling(24).std() @@ -1797,7 +1817,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): aucs = {} for col in X.columns: try: - aucs[col] = roc_auc_score(y, X[col].fillna(method='ffill').fillna(0)) + aucs[col] = roc_auc_score(y, X[col].ffill().fillna(0)) except Exception: aucs[col] = np.nan return pd.Series(aucs).sort_values(ascending=False) diff --git a/Zeus_LGBMRegressor.json b/Zeus_LGBMRegressor.json new file mode 100644 index 0000000..f627a33 --- /dev/null +++ b/Zeus_LGBMRegressor.json @@ -0,0 +1,36 @@ +{ + "strategy_name": "Zeus_LGBMRegressor", + "params": { + "roi": { + "0": 0.564, + "567": 0.273, + "2814": 0.12, + "7675": 0 + }, + "stoploss": { + "stoploss": -1.0 + }, + "trailing": { + "trailing_stop": true, + "trailing_stop_positive": 0.15, + "trailing_stop_positive_offset": 0.2, + "trailing_only_offset_is_reached": true + }, + "max_open_trades": { + "max_open_trades": 80 + }, + "buy": { + "mises": 5, + "mise_factor_buy": 0.02, + "ml_prob_buy": -0.27, + "pct": 0.037, + "pct_inc": 0.0016 + }, + "sell": { + "ml_prob_sell": -0.05 + }, + "protection": {} + }, + "ft_stratparam_v": 1, + "export_time": "2025-11-11 15:44:21.251297+00:00" +} \ No newline at end of file diff --git a/Zeus_LGBMRegressor.md b/Zeus_LGBMRegressor.md new file mode 100644 index 0000000..405d3c5 --- /dev/null +++ b/Zeus_LGBMRegressor.md @@ -0,0 +1,106 @@ +🌟 Paramètres principaux de LGBMRegressor +1️⃣ objective='regression' + +But : indique le type de problème à résoudre. + +Ici, tu veux prédire une valeur continue (par ex. un rendement futur, un prix, etc.). + +Autres valeurs possibles : + +'binary' → pour classification 0/1 + +'multiclass' → pour plusieurs classes + +'regression_l1' → pour des valeurs continues mais avec perte L1 (moins sensible aux outliers) + +📘 En résumé : ici LightGBM cherche à minimiser l’erreur entre la valeur prédite et la valeur réelle. + +2️⃣ metric='rmse' + +But : indique la métrique utilisée pour évaluer la qualité du modèle. + +'rmse' = Root Mean Squared Error (racine de la moyenne des carrés des erreurs) +→ pénalise fortement les grosses erreurs. + +'mae' (Mean Absolute Error) est une alternative plus robuste (moins sensible aux outliers). + +Tu peux aussi utiliser plusieurs métriques : metric=['rmse', 'mae']. + +3️⃣ n_estimators=300 + +But : nombre d’arbres de décision à construire. + +Chaque arbre apprend à corriger les erreurs du précédent → c’est le boosting. + +Plus ce nombre est grand : + +Meilleure précision potentielle + +Mais risque de surapprentissage et de lenteur + +Typiquement, on le combine avec un petit learning_rate (comme ici 0.05). + +4️⃣ learning_rate=0.05 + +But : contrôle l’intensité avec laquelle chaque nouvel arbre corrige les erreurs. + +Si learning_rate ↓, il faut plus d’arbres (n_estimators ↑) pour converger. + +Typiquement : + +0.1 = standard + +0.05 = prudent (meilleur généralisation) + +0.01 = très lent mais précis + +⚖️ Ce paramètre agit comme un “frein” sur l’apprentissage. + +5️⃣ max_depth=7 + +But : profondeur maximale des arbres. + +Plus les arbres sont profonds : + +→ plus le modèle capture des relations complexes + +→ mais risque de surapprentissage + +Valeurs typiques : + +3 à 8 pour éviter le surapprentissage + +-1 = pas de limite + +6️⃣ subsample=0.8 + +But : fraction de l’échantillon d’entraînement utilisée pour chaque arbre. + +Exemple : + +0.8 = chaque arbre est entraîné sur 80 % des lignes (tirées aléatoirement). + +Permet : + +de réduire le surapprentissage + +d’accélérer l’entraînement + +Si tu veux des résultats très stables → monte à 1.0 +Si tu veux plus de diversité entre les arbres → garde entre 0.7 et 0.9. + +7️⃣ colsample_bytree=0.8 + +But : fraction de colonnes (features) utilisées pour chaque arbre. + +Comme subsample, mais pour les variables. + +Aide à la régularisation : chaque arbre ne voit pas toutes les colonnes → modèle plus robuste. + +Typiquement entre 0.6 et 1.0. + +8️⃣ random_state=42 + +But : fixe la graine aléatoire. + +Permet d’obtenir de \ No newline at end of file diff --git a/Zeus_LGBMRegressor.py b/Zeus_LGBMRegressor.py index fd8e386..846f9c7 100644 --- a/Zeus_LGBMRegressor.py +++ b/Zeus_LGBMRegressor.py @@ -17,8 +17,6 @@ from pandas import DataFrame from typing import Optional, Union, Tuple import math import logging -import configparser -from technical import pivots_points from pathlib import Path # -------------------------------- @@ -27,19 +25,12 @@ from pathlib import Path import ta import talib.abstract as talib import freqtrade.vendor.qtpylib.indicators as qtpylib -import requests from datetime import timezone, timedelta -from scipy.signal import savgol_filter -from ta.trend import SMAIndicator, EMAIndicator, MACD, ADXIndicator -from collections import Counter logger = logging.getLogger(__name__) # Machine Learning -from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor from sklearn.model_selection import train_test_split -from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error -from sklearn.metrics import accuracy_score import joblib import matplotlib.pyplot as plt from sklearn.metrics import ( @@ -53,17 +44,17 @@ from sklearn.metrics import ( ) from sklearn.tree import export_text import inspect -from sklearn.feature_selection import mutual_info_classif -from sklearn.inspection import permutation_importance -from lightgbm import LGBMClassifier -from sklearn.calibration import CalibratedClassifierCV from sklearn.feature_selection import SelectFromModel from tabulate import tabulate -from sklearn.model_selection import GridSearchCV from sklearn.feature_selection import VarianceThreshold import seaborn as sns -from lightgbm import LGBMRegressor import lightgbm as lgb +from sklearn.model_selection import cross_val_score +import optuna.visualization as vis +import optuna +from lightgbm import LGBMRegressor +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split # Couleurs ANSI de base RED = "\033[31m" @@ -74,6 +65,8 @@ MAGENTA = "\033[35m" CYAN = "\033[36m" RESET = "\033[0m" +import warnings +warnings.filterwarnings("ignore", message="No further splits with positive gain") def pprint_df(dframe): print(tabulate(dframe, headers='keys', tablefmt='psql', showindex=False)) @@ -85,24 +78,27 @@ def normalize(df): class Zeus_LGBMRegressor(IStrategy): + startup_candle_count = 24 + # Machine Learning - # model_indicators = [ - # 'rsi', 'rsi_deriv1', 'rsi_deriv2', "max_rsi_12", - # "bb_percent", - # 'vol_24', - # 'percent3', - # 'sma5_dist', 'sma5_deriv1', 'sma5_deriv2', - # 'sma24_dist', 'sma24_deriv1', 'sma24_deriv2', - # 'sma60_dist', 'sma60_deriv1', 'sma60_deriv2', - # 'down_pct', 'slope_norm', - # 'min_max_60', - # 'rsi_slope', 'adx_change', 'volatility_ratio', - # 'slope_ratio', 'bb_width', - # 'rsi_1h', 'rsi_deriv1_1h', 'rsi_deriv2_1h', "max_rsi_12_1h", - # ] + model_indicators = [ + "ms-10", "ms-5", "ms-4", "ms-3", "ms-2", "ms-1", "ms-0", + 'rsi', 'rsi_deriv1', 'rsi_deriv2', "max_rsi_12", + "bb_percent", + 'vol_24', + 'percent3', + 'sma5_dist', 'sma5_deriv1', 'sma5_deriv2', + 'sma24_dist', 'sma24_deriv1', 'sma24_deriv2', + 'sma60_dist', 'sma60_deriv1', 'sma60_deriv2', + 'down_pct', 'slope_norm', + 'min_max_60', + 'rsi_slope', 'adx_change', 'volatility_ratio', + 'slope_ratio', 'bb_width', + 'rsi_1h', 'rsi_deriv1_1h', 'rsi_deriv2_1h', "max_rsi_12_1h", + ] model = None - model_indicators = ["ms-10", "ms-5", "ms-4", "ms-3", "ms-2", "ms-1", "ms-0"] + # model_indicators = ["ms-10", "ms-5", "ms-4", "ms-3", "ms-2", "ms-1", "ms-0"] # model_indicators = ['open', 'high', 'close', 'haclose', 'percent', 'sma5', 'sma12', 'sma24', 'sma24_deriv1', 'sma24_deriv2', 'sma48', 'sma48_deriv1', 'sma48_deriv2', 'sma60', 'sma60_dist', 'sma60_deriv1', # 'sma60_deriv2', 'mid_smooth_3_deriv2', 'mid_smooth_12_deriv1', 'mid_smooth_12_deriv2', 'mid_smooth_24', 'mid_smooth_24_deriv1', 'mid_smooth_24_deriv2', 'max_rsi_12', 'max_rsi_24', 'max12', # 'max60', 'min60', 'min_max_60', 'bb_lowerband', 'bb_upperband', 'bb_width', 'macd', 'macdsignal', 'macdhist', 'sma_20', 'sma_100', 'atr', 'atr_norm', 'adx', 'obv', 'vol_24', 'adx_change', @@ -289,21 +285,6 @@ class Zeus_LGBMRegressor(IStrategy): trades = list() max_profit_pairs = {} - # # sma24_deriv1_1d_stop_protection = DecimalParameter(-0.2, 0.2, default=0.05, decimals=2, space='protection', - # # optimize=True, load=True) - # sma5_deriv1_1d_stop_protection = DecimalParameter(-5, 0, default=0.5, decimals=1, space='protection', - # optimize=True, load=True) - # sma5_deriv2_1d_stop_protection = DecimalParameter(-5, 0, default=0.5, decimals=1, space='protection', optimize=True, - # load=True) - # - # # sma24_deriv1_1d_start_protection = DecimalParameter(-0.2, 0.2, default=0.05, decimals=2, space='protection', - # # optimize=True, load=True) - # sma5_deriv1_1d_restart_protection = DecimalParameter(0, 5, default=0.5, decimals=1, space='protection', - # optimize=True, load=True) - # sma5_deriv2_1d_restart_protection = DecimalParameter(0, 5, default=0.5, decimals=1, space='protection', - # optimize=True, - # load=True) - # mise_factor_buy = DecimalParameter(0.01, 0.1, default=0.05, decimals=2, space='buy', optimize=True, load=True) indicators = {'sma5', 'sma12', 'sma24', 'sma60'} @@ -311,54 +292,16 @@ class Zeus_LGBMRegressor(IStrategy): mises = IntParameter(1, 50, default=5, space='buy', optimize=False, load=False) + ml_prob_buy = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='buy', optimize=True, load=True) + ml_prob_sell = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='sell', optimize=True, load=True) + pct = DecimalParameter(0.005, 0.05, default=0.012, decimals=3, space='buy', optimize=True, load=True) pct_inc = DecimalParameter(0.0001, 0.003, default=0.0022, decimals=4, space='buy', optimize=True, load=True) - indic_5m_slope_sup_buy = CategoricalParameter(indicators, default="sma60", space='buy') - # indic_deriv_5m_slop_sup_buy = CategoricalParameter(indicators, default="sma12", space='buy', optimize=True, load=True) - # deriv_5m_slope_sup_buy = DecimalParameter(-0.1, 0.5, default=0, decimals=2, space='buy', optimize=True, load=True) - - indic_5m_slope_inf_buy = CategoricalParameter(indicators, default="sma60", space='buy') - # indic_deriv_5m_slop_inf_buy = CategoricalParameter(indicators, default="sma12", space='buy', optimize=True, load=True) - # deriv_5m_slope_inf_buy = DecimalParameter(-0.1, 0.5, default=0, decimals=2, space='buy', optimize=True, load=True) - - - # indic_deriv1_5m = DecimalParameter(-2, 2, default=0, decimals=2, space='buy', optimize=True, load=True) - # indic_deriv2_5m = DecimalParameter(-2, 2, default=0, decimals=2, space='buy', optimize=True, load=True) - - # indic_1h = CategoricalParameter(indicators, default="sma60", space='buy') - # indic_deriv1_1h = DecimalParameter(-5, 5, default=0, decimals=1, space='buy', optimize=True, load=True) - # indic_deriv2_1h = DecimalParameter(-10, 10, default=0, decimals=1, space='buy', optimize=True, load=True) - - # indic_1d_p = CategoricalParameter(indicators, default="sma60", space='protection') - # indic_deriv1_1d_p_stop = DecimalParameter(-2, 2, default=0, decimals=1, space='protection', optimize=True, load=True) - # indic_deriv2_1d_p_stop = DecimalParameter(-4, 4, default=0, decimals=1, space='protection', optimize=True, load=True) - # indic_deriv1_1d_p_start = DecimalParameter(-2, 2, default=0, decimals=1, space='protection', optimize=True, load=True) - # indic_deriv2_1d_p_start = DecimalParameter(-4, 4, default=0, decimals=1, space='protection', optimize=True, load=True) - - - indic_5m_slope_sup_sell = CategoricalParameter(indicators, default="sma60", space='sell') - indic_deriv_5m_slope_sup_sell = CategoricalParameter(indicators, default="sma60", space='sell') - deriv_5m_slope_sup_sell = DecimalParameter(-0.1, 0.5, default=0, decimals=2, space='sell', optimize=True, load=True) - - indic_5m_slope_inf_sell = CategoricalParameter(indicators, default="sma60", space='sell') - indic_deriv_5m_slope_inf_sell = CategoricalParameter(indicators, default="sma60", space='sell') - deriv_5m_slope_inf_sell = DecimalParameter(-0.1, 0.5, default=0, decimals=2, space='sell', optimize=True, load=True) - - deriv1_buy_protect = DecimalParameter(-0.3, 0.1, default=-0.1, decimals=2, space='protection', optimize=True, load=True) - rsi_buy_protect = IntParameter(50, 90, default=70, space='protection', optimize=True, load=True) - indic_5m_slope_sup = CategoricalParameter(indicators, default="sma60", space='protection') - indic_1h_slope_sup = CategoricalParameter(indicators, default="sma5", space='protection') - - # indic_percent_sell = CategoricalParameter(indicators_percent, default="sma60", space='sell') - - # percent_5m_sell = DecimalParameter(-0.1, -0.0, default=0, decimals=2, space='sell', optimize=True, load=True) - - # indic_deriv1_5m_sell = DecimalParameter(-2, 2, default=0, decimals=2, space='sell', optimize=True, load=True) - # indic_deriv2_5m_sell = DecimalParameter(-2, 2, default=0, decimals=2, space='sell', optimize=True, load=True) - - # indic_deriv1_1h_sell = DecimalParameter(-5, 5, default=0, decimals=1, space='sell', optimize=True, load=True) - # indic_deriv2_1h_sell = DecimalParameter(-10, 10, default=0, decimals=1, space='sell', optimize=True, load=True) + # deriv1_buy_protect = DecimalParameter(-0.3, 0.1, default=-0.1, decimals=2, space='protection', optimize=True, load=True) + # rsi_buy_protect = IntParameter(50, 90, default=70, space='protection', optimize=True, load=True) + # indic_5m_slope_sup = CategoricalParameter(indicators, default="sma60", space='protection') + # indic_1h_slope_sup = CategoricalParameter(indicators, default="sma5", space='protection') labels = ['B3', 'B2', 'B1', 'N0', 'H1', 'H2', 'H3'] index_labels = ['B3', 'B2', 'B1', 'N0', 'H1', 'H2', 'H3'] @@ -384,18 +327,6 @@ class Zeus_LGBMRegressor(IStrategy): # Extraction de la matrice numérique sma5_derive1_2_numeric_matrice = sma5_derive1_2_matrice_df.reindex(index=ordered_labels, columns=ordered_labels).values - - # paliers = {} - - # ========================================================================= - # Parameters hyperopt - - # buy_mid_smooth_3_deriv1 = DecimalParameter(-0.1, 0.1, decimals=2, default=-0.06, space='buy') - # buy_mid_smooth_24_deriv1 = DecimalParameter(-0.6, 0, decimals=2, default=-0.03, space='buy') - # buy_horizon_predict_1h = IntParameter(1, 6, default=2, space='buy') - - # buy_level_predict_1h = IntParameter(2, 5, default=4, space='buy') - should_enter_trade_count = 0 def confirm_trade_entry(self, pair: str, order_type: str, amount: float, rate: float, time_in_force: str, @@ -542,7 +473,6 @@ class Zeus_LGBMRegressor(IStrategy): before_last_candle = dataframe.iloc[-2].squeeze() before_last_candle_2 = dataframe.iloc[-3].squeeze() before_last_candle_12 = dataframe.iloc[-13].squeeze() - before_last_candle_24 = dataframe.iloc[-25].squeeze() expected_profit = self.expectedProfit(pair, last_candle) # print(f"current_time={current_time} current_profit={current_profit} expected_profit={expected_profit}") @@ -661,30 +591,6 @@ class Zeus_LGBMRegressor(IStrategy): return str(count_of_buys) + '_' + 'Rsi75_' + pair_name + '_' + str(self.pairs[pair]['has_gain']) - # if last_candle['mid_smooth_1h_deriv1'] < 0 and profit > expected_profit: - # self.pairs[pair]['force_sell'] = False - # self.pairs[pair]['force_buy'] = (self.pairs[pair]['count_of_buys'] - self.pairs[pair]['has_gain'] > 5) - # return str(count_of_buys) + '_' + 'Drv3_' + pair_name + '_' + str(self.pairs[pair]['has_gain']) - - # if 4 <= count_of_buys <= 6: - # if ((before_last_candle_2['mid_smooth_12_deriv1'] <= before_last_candle['mid_smooth_12_deriv1']) - # & (before_last_candle['mid_smooth_12_deriv1'] >= last_candle['mid_smooth_12_deriv1'])) \ - # and (current_profit > expected_profit): - # return 'Drv13_' + pair_name + '_' + str(count_of_buys) - # - # if 7 <= count_of_buys: - # if ((before_last_candle_24['sma24_deriv1_1h'] <= before_last_candle_12['sma24_deriv1_1h']) - # & (before_last_candle_12['sma24_deriv1_1h'] >= last_candle['sma24_deriv1_1h'])) \ - # and (current_profit > expected_profit): - # return 'Drv24_' + pair_name + '_' + str(count_of_buys) - - # if (baisse > mx) & (current_profit > expected_profit): - # self.trades = list() - # return 'mx_' + str(count_of_buys) - # if (last_candle['percent12'] <= -0.01) & (current_profit >= expected_profit): - # self.trades = list() - # return 'pft_' + str(count_of_buys) - self.pairs[pair]['max_touch'] = max(last_candle['close'], self.pairs[pair]['max_touch']) def getShortName(self, pair): @@ -921,25 +827,9 @@ class Zeus_LGBMRegressor(IStrategy): dataframe = self.populateDataframe(dataframe, timeframe='5m') - # dataframe = self.calculateRegression(dataframe, column='mid_smooth', window=24, degree=4, future_offset=12) - # dataframe = self.calculateRegression(dataframe, column='mid_smooth_24', window=24, degree=4, future_offset=12) - ################### INFORMATIVE 1h informative = self.dp.get_pair_dataframe(pair=metadata['pair'], timeframe="1h") informative = self.populateDataframe(informative, timeframe='1h') - # informative['target_value'] = informative['sma5'].shift(-6).rolling(5).max() - informative['sma5'] * 1.005 - # if self.dp.runmode.value in ('backtest'): - # self.trainModel(informative, metadata) - # - # # Préparer les features pour la prédiction - # features = informative[self.model_indicators].fillna(0) - # - # # Prédiction : probabilité que le prix monte - # probs = self.model.predict_proba(features)[:, 1] - # - # # Sauvegarder la probabilité pour l’analyse - # informative['ml_prob'] = probs - dataframe = merge_informative_pair(dataframe, informative, self.timeframe, "1h", ffill=True) ################### INFORMATIVE 1d @@ -949,10 +839,6 @@ class Zeus_LGBMRegressor(IStrategy): dataframe['last_price'] = dataframe['close'] dataframe['first_price'] = dataframe['close'] - # dataframe['mid_price'] = (dataframe['last_price'] + dataframe['first_price']) / 2 - # dataframe['close01'] = dataframe.iloc[-1]['close'] * 1.01 - # dataframe['limit'] = dataframe['close'] - count_buys = 0 if self.dp: if self.dp.runmode.value in ('live', 'dry_run'): self.getOpenTrades() @@ -995,81 +881,34 @@ class Zeus_LGBMRegressor(IStrategy): dataframe['mid_smooth_5h'] dataframe["mid_smooth_5h_deriv2"] = 100 * dataframe["mid_smooth_5h_deriv1"].diff().rolling(window=60).mean() - # =============================== - # Lissage des valeurs Journalières - horizon_d = 12 * 5 * 24 - # dataframe['rsi_1h'] = dataframe['rsi_1h'].rolling(12).mean() - # dataframe['rsi_deriv1_1h'] = dataframe['rsi_deriv1_1h'].rolling(12).mean() - # dataframe['rsi_deriv2_1h'] = dataframe['rsi_deriv2_1h'].rolling(12).mean() - - # dataframe['mid_smooth_1d'] = dataframe['mid_smooth_1d'].rolling(window=horizon_d * 5).mean() - # dataframe["mid_smooth_deriv1_1d"] = dataframe["mid_smooth_1d"].rolling(horizon_d).mean().diff() / horizon_d - # dataframe["mid_smooth_deriv2_1d"] = horizon_d * dataframe["mid_smooth_deriv1_1d"].rolling(horizon_d).mean().diff() + # indic_5m_protect = self.indic_5m_slope_sup.value + # indic_1h_protect = self.indic_1h_slope_sup.value + '_1h' # - # dataframe['sma5_1d'] = dataframe['sma5_1d'].rolling(window=horizon_d).mean() - # dataframe['sma5_deriv1_1d'] = dataframe['sma5_deriv1_1d'].rolling(window=horizon_d).mean() - # dataframe['sma24_1d'] = dataframe['sma24_1d'].rolling(window=horizon_d).mean() - # dataframe['sma24_deriv1_1d'] = dataframe['sma24_deriv1_1d'].rolling(window=horizon_d).mean() - # dataframe = self.calculateRegression(dataframe, column='mid_smooth_1d', window=24, degree=4, future_offset=12) + # dataframe['stop_buying_deb'] = ((dataframe['max_rsi_12_1d'] > self.rsi_buy_protect.value) | (dataframe['sma24_deriv1_1h'] < self.deriv1_buy_protect.value)) & (qtpylib.crossed_below(dataframe[indic_5m_protect], dataframe[indic_1h_protect])) + # dataframe['stop_buying_end'] = (dataframe[indic_1h_protect].shift(24) > dataframe[indic_1h_protect].shift(12)) & (dataframe[indic_1h_protect].shift(12) < dataframe[indic_1h_protect]) - # dataframe['percent_with_previous_day'] = 100 * (dataframe['close'] - dataframe['close_1d']) / dataframe['close'] - # dataframe['percent_with_max_hour'] = 100 * (dataframe['close'] - dataframe['max12_1h']) / dataframe['close'] + # latched = np.zeros(len(dataframe), dtype=bool) # - # horizon_h = 24 * 5 - # dataframe['futur_percent_1h'] = 100 * ((dataframe['mid_smooth_1h'].shift(-12) - dataframe['mid_smooth_1h']) / dataframe['mid_smooth_1h']).rolling(horizon_h).mean() - # dataframe['futur_percent_3h'] = 100 * ((dataframe['close'].shift(-36) - dataframe['close']) / dataframe['close']).rolling(horizon_h).mean() - # dataframe['futur_percent_5h'] = 100 * ((dataframe['mid_smooth_1h'].shift(-60) - dataframe['mid_smooth_1h']) / dataframe['mid_smooth_1h']).rolling(horizon_h).mean() - # dataframe['futur_percent_12h'] = 100 * ((dataframe['mid_smooth_1h'].shift(-144) - dataframe['mid_smooth_1h']) / dataframe['mid_smooth_1h']).rolling(horizon_h).mean() + # for i in range(1, len(dataframe)): + # if dataframe['stop_buying_deb'].iloc[i]: + # latched[i] = True + # elif dataframe['stop_buying_end'].iloc[i]: + # latched[i] = False + # else: + # latched[i] = latched[i - 1] # - # dataframe['futur_percent_1d'] = 100 * (dataframe['close'].shift(-1) - dataframe['close']) / dataframe['close'] - # dataframe['futur_percent_3d'] = 100 * (dataframe['close'].shift(-3) - dataframe['close']) / dataframe['close'] - # - # self.calculateProbabilite2Index(dataframe, ['futur_percent_1d'], 'sma24_deriv1_1h', 'sma5_1d') + # dataframe['stop_buying'] = latched - # if self.dp.runmode.value in ('backtest'): - # print("##################") - # print("# STAT DAY vs HOUR") - # print("##################") - # self.calculateProbabilite2Index(dataframe, futur_cols=['futur_percent_1d'], indic_1='sma5_deriv1_1d', - # indic_2='sma5_deriv2_1d') + dataframe["ms-10"] = dataframe["mid_smooth_24_deriv1"].shift(10) + dataframe["ms-5"] = dataframe["mid_smooth_24_deriv1"].shift(5) + dataframe["ms-4"] = dataframe["mid_smooth_24_deriv1"].shift(4) + dataframe["ms-3"] = dataframe["mid_smooth_24_deriv1"].shift(3) + dataframe["ms-2"] = dataframe["mid_smooth_24_deriv1"].shift(2) + dataframe["ms-1"] = dataframe["mid_smooth_24_deriv1"].shift(1) + dataframe["ms-0"] = dataframe["mid_smooth_24_deriv1"] + # dataframe["ms+10"] = dataframe["mid_smooth_24"].shift(-11) - # dataframe['proba_hausse'] = dataframe.apply(lambda row: self.getProbaHausseEmaVolume(row), axis=1) - - # dataframe['futur_percent_3'] = 100 * ((dataframe['sma5'].shift(-1) - dataframe['sma5']) / dataframe['sma5']) - # futur_cols = ['futur_percent_3'] - # indic_1 = 'mid_smooth_1h_deriv1' - # indic_2 = 'mid_smooth_1h_deriv2' - # self.calculateProbabilite2Index(dataframe, futur_cols, indic_1, indic_2) - - # dataframe = dataframe.resample('sma12_1h').ffill() - # dataframe = dataframe.resample('sma24_1h').ffill() - - # mises = IntParameter(1, 50, default=5, space='buy', optimize=False, load=False) - # - # pct = DecimalParameter(0.005, 0.05, default=0.012, decimals=3, space='buy', optimize=True, load=True) - # pct_inc = DecimalParameter(0.0001, 0.003, default=0.0022, decimals=4, space='buy', optimize=True, load=True) - # - # indic_5m_slope_sup = CategoricalParameter(indicators, default="sma60", space='buy') - - indic_5m_protect = self.indic_5m_slope_sup.value - indic_1h_protect = self.indic_1h_slope_sup.value + '_1h' - - dataframe['stop_buying_deb'] = ((dataframe['max_rsi_12_1d'] > self.rsi_buy_protect.value) | (dataframe['sma24_deriv1_1h'] < self.deriv1_buy_protect.value)) & (qtpylib.crossed_below(dataframe[indic_5m_protect], dataframe[indic_1h_protect])) - dataframe['stop_buying_end'] = (dataframe[indic_1h_protect].shift(24) > dataframe[indic_1h_protect].shift(12)) & (dataframe[indic_1h_protect].shift(12) < dataframe[indic_1h_protect]) - - latched = np.zeros(len(dataframe), dtype=bool) - - for i in range(1, len(dataframe)): - if dataframe['stop_buying_deb'].iloc[i]: - latched[i] = True - elif dataframe['stop_buying_end'].iloc[i]: - latched[i] = False - else: - latched[i] = latched[i - 1] - - dataframe['stop_buying'] = latched - - if self.dp.runmode.value in ('backtest'): + if False and self.dp.runmode.value in ('backtest'): self.trainModel(dataframe, metadata) self.model = joblib.load('rf_model.pkl') @@ -1110,14 +949,6 @@ class Zeus_LGBMRegressor(IStrategy): # # self.model_indicators = usable_cols - dataframe["ms-10"] = dataframe["mid_smooth_24_deriv1"].shift(10) - dataframe["ms-5"] = dataframe["mid_smooth_24_deriv1"].shift(5) - dataframe["ms-4"] = dataframe["mid_smooth_24_deriv1"].shift(4) - dataframe["ms-3"] = dataframe["mid_smooth_24_deriv1"].shift(3) - dataframe["ms-2"] = dataframe["mid_smooth_24_deriv1"].shift(2) - dataframe["ms-1"] = dataframe["mid_smooth_24_deriv1"].shift(1) - dataframe["ms-0"] = dataframe["mid_smooth_24_deriv1"] - # dataframe["ms+10"] = dataframe["mid_smooth_24"].shift(-11) df = dataframe[self.model_indicators].copy() # Corrélations des colonnes @@ -1158,7 +989,7 @@ class Zeus_LGBMRegressor(IStrategy): mask = np.triu(np.ones_like(corr, dtype=bool)) # --- Création de la figure --- - fig, ax = plt.subplots(figsize=(10,6)) #96, 36)) + fig, ax = plt.subplots(figsize=(20,12)) #96, 36)) # --- Heatmap avec un effet “température” --- sns.heatmap( @@ -1185,12 +1016,6 @@ class Zeus_LGBMRegressor(IStrategy): print(f"✅ Matrice enregistrée : {output_path}") - # # Exemple d'utilisation : - # selected_corr = self.select_uncorrelated_features(df, target="target", top_n=30, corr_threshold=0.7) - # print("===== 🎯 FEATURES SÉLECTIONNÉES =====") - # print(selected_corr) - - # Nettoyage df = df.dropna() @@ -1211,72 +1036,29 @@ class Zeus_LGBMRegressor(IStrategy): selected = X_train.columns[selector.get_support()] print("Colonnes conservées :", list(selected)) - # 5️⃣ Entraînement du modèle - # train_model = RandomForestClassifier(n_estimators=200, random_state=42) - # train_model = RandomForestClassifier( - # n_estimators=300, - # max_depth=12, - # # min_samples_split=4, - # # min_samples_leaf=2, - # # max_features='sqrt', - # # random_state=42, - # # n_jobs=-1, - # # n_jobs=-1, - # class_weight='balanced' - # ) # 1️⃣ Entraîne ton modèle LGBM normal - train_model = LGBMRegressor( - objective='regression', - metric='rmse', # tu peux aussi tester 'mae' - n_estimators=300, - learning_rate=0.05, - max_depth=7, - subsample=0.8, - colsample_bytree=0.8, - random_state=42 - ) - - # train_model = LGBMClassifier( - # n_estimators=800, - # learning_rate=0.02, - # max_depth=10, - # num_leaves=31, + # train_model = LGBMRegressor( + # objective='regression', + # metric='rmse', # tu peux aussi tester 'mae' + # n_estimators=300, + # learning_rate=0.05, + # max_depth=7, # subsample=0.8, # colsample_bytree=0.8, - # reg_alpha=0.2, - # reg_lambda=0.4, - # class_weight='balanced', - # random_state=42, + # random_state=42 # ) + # train_model.fit(X_train, y_train) - train_model.fit(X_train, y_train) + train_model, selected_features = self.optuna(X_train, X_test, y_train, y_test) + print("Features retenues :", list(selected_features)) - # 2️⃣ Sélection des features AVANT calibration - sfm = SelectFromModel(train_model, threshold="median", prefit=True) - selected_features = X_train.columns[sfm.get_support()] - print(selected_features) - - # 3️⃣ Calibration ensuite (facultative) - # calibrated = CalibratedClassifierCV(train_model, method='sigmoid', cv=5) - # calibrated.fit(X_train[selected_features], y_train) - # print(calibrated) - - # # calibration - # train_model = CalibratedClassifierCV(train_model, method='sigmoid', cv=5) - # # Sélection - # sfm = SelectFromModel(train_model, threshold="median") - # sfm.fit(X_train, y_train) + # # 2️⃣ Sélection des features AVANT calibration + # sfm = SelectFromModel(train_model, threshold="median", prefit=True) # selected_features = X_train.columns[sfm.get_support()] # print(selected_features) train_model.fit(X_train, y_train) - # y_pred = train_model.predict(X_test) - # y_proba = train_model.predict_proba(X_test)[:, 1] - # print(classification_report(y_test, y_pred)) - # print(confusion_matrix(y_test, y_pred)) - # print("\nRapport de classification :\n", classification_report(y_test, y_pred)) - # print("\nMatrice de confusion :\n", confusion_matrix(y_test, y_pred)) # Importances importances = pd.DataFrame({ @@ -1287,18 +1069,6 @@ class Zeus_LGBMRegressor(IStrategy): print(importances) - # best_f1 = 0 - # best_t = 0.5 - # for t in [0.3, 0.4, 0.5, 0.6, 0.7]: - # y_pred_thresh = (y_proba > t).astype(int) - # score = f1_score(y_test, y_pred_thresh) - # print(f"Seuil {t:.1f} → F1: {score:.3f}") - # if score > best_f1: - # best_f1 = score - # best_t = t - # - # print(f"✅ Meilleur seuil trouvé: {best_t} avec F1={best_f1:.3f}") - # 6️⃣ Évaluer la précision (facultatif) preds = train_model.predict(X_test) @@ -1315,28 +1085,6 @@ class Zeus_LGBMRegressor(IStrategy): joblib.dump(train_model, 'rf_model.pkl') print("✅ Modèle sauvegardé sous rf_model.pkl") - # X = dataframe des features (après shift/rolling/indicators) - # y = target binaire ou décimale - # model = ton modèle entraîné (RandomForestClassifier ou Regressor) - - # # --- 1️⃣ Mutual Information (MI) --- - # mi_scores = mutual_info_classif(X.fillna(0), y) - # mi_series = pd.Series(mi_scores, index=X.columns, name='MI') - # - # # --- 2️⃣ Permutation Importance (PI) --- - # pi_result = permutation_importance(train_model, X, y, n_repeats=10, random_state=42, n_jobs=-1) - # pi_series = pd.Series(pi_result.importances_mean, index=X.columns, name='PI') - # - # # --- 3️⃣ Combinaison dans un seul dataframe --- - # importance_df = pd.concat([mi_series, pi_series], axis=1) - # importance_df = importance_df.sort_values(by='PI', ascending=False) # tri par importance modèle - # print(importance_df) - # - # importance_df.plot(kind='bar', figsize=(10, 5)) - # plt.title("Mutual Info vs Permutation Importance") - # plt.ylabel("Score") - # plt.show() - self.analyze_model(train_model, X_train, X_test, y_train, y_test) def inspect_model(self, model): @@ -1419,44 +1167,6 @@ class Zeus_LGBMRegressor(IStrategy): output_dir = "user_data/plots" os.makedirs(output_dir, exist_ok=True) - # ---- Prédictions ---- - probs = model.predict(X_test) - # probs = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else preds - - # # ---- Performances globales ---- - # print("===== 📊 ÉVALUATION DU MODÈLE =====") - # print("Colonnes du modèle :", model.feature_names_in_) - # print("Colonnes X_test :", list(X_test.columns)) - # print(f"Accuracy: {accuracy_score(y_test, preds):.3f}") - # print(f"ROC AUC : {roc_auc_score(y_test, probs):.3f}") - # - # print("TN (True Negative) / FP (False Positive)") - # print("FN (False Negative) / TP (True Positive)") - # print("\nRapport de classification :\n", classification_report(y_test, preds)) - - # | Élément | Valeur | Signification | - # | ------------------- | ------ | ----------------------------------------------------------- | - # | TN (True Negative) | 983 | Modèle a correctement prédit 0 (pas d’achat) | - # | FP (False Positive) | 43 | Modèle a prédit 1 alors que c’était 0 (faux signal d’achat) | - # | FN (False Negative) | 108 | Modèle a prédit 0 alors que c’était 1 (manqué un achat) | - # | TP (True Positive) | 19 | Modèle a correctement prédit 1 (bon signal d’achat) | - - # ---- Matrice de confusion ---- - # cm = confusion_matrix(y_test, preds) - # print("Matrice de confusion :\n", cm) - # - # plt.figure(figsize=(4, 4)) - # plt.imshow(cm, cmap="Blues") - # plt.title("Matrice de confusion") - # plt.xlabel("Prédit") - # plt.ylabel("Réel") - # for i in range(2): - # for j in range(2): - # plt.text(j, i, cm[i, j], ha="center", va="center", color="black") - # # plt.show() - # plt.savefig(os.path.join(output_dir, "Matrice de confusion.png"), bbox_inches="tight") - # plt.close() - # ---- Importance des features ---- if hasattr(model, "feature_importances_"): print("\n===== 🔍 IMPORTANCE DES FEATURES =====") @@ -1485,96 +1195,6 @@ class Zeus_LGBMRegressor(IStrategy): print("\n===== 🌳 EXTRAIT D’UN ARBRE =====") print(export_text(model.estimators_[0], feature_names=list(X_train.columns))[:800]) - # # ---- Précision selon le seuil ---- - # thresholds = np.linspace(0.1, 0.9, 9) - # print("\n===== ⚙️ PERFORMANCE SELON SEUIL =====") - # for t in thresholds: - # preds_t = (probs > t).astype(int) - # acc = accuracy_score(y_test, preds_t) - # print(f"Seuil {t:.1f} → précision {acc:.3f}") - - # # ---- ROC Curve ---- - # fpr, tpr, _ = roc_curve(y_test, probs) - # plt.figure(figsize=(5, 4)) - # plt.plot(fpr, tpr, label="ROC curve") - # plt.plot([0, 1], [0, 1], linestyle="--", color="gray") - # plt.xlabel("Taux de faux positifs") - # plt.ylabel("Taux de vrais positifs") - # plt.title("Courbe ROC") - # plt.legend() - # # plt.show() - # plt.savefig(os.path.join(output_dir, "Courbe ROC.png"), bbox_inches="tight") - # plt.close() - - # # ---- Interprétation SHAP (optionnelle) ---- - # try: - # import shap - # - # print("\n===== 💡 ANALYSE SHAP =====") - # explainer = shap.TreeExplainer(model) - # shap_values = explainer.shap_values(X_test) - # # shap.summary_plot(shap_values[1], X_test) - # # Vérifie le type de sortie de shap_values - # if isinstance(shap_values, list): - # # Cas des modèles de classification (plusieurs classes) - # shap_values_to_plot = shap_values[0] if len(shap_values) == 1 else shap_values[1] - # else: - # shap_values_to_plot = shap_values - # - # # Ajustement des dimensions au besoin - # if shap_values_to_plot.shape[1] != X_test.shape[1]: - # print(f"⚠️ Mismatch dimensions SHAP ({shap_values_to_plot.shape[1]}) vs X_test ({X_test.shape[1]})") - # min_dim = min(shap_values_to_plot.shape[1], X_test.shape[1]) - # shap_values_to_plot = shap_values_to_plot[:, :min_dim] - # X_to_plot = X_test.iloc[:, :min_dim] - # else: - # X_to_plot = X_test - # - # plt.figure(figsize=(12, 4)) - # shap.summary_plot(shap_values_to_plot, X_to_plot, show=False) - # plt.savefig(os.path.join(output_dir, "shap_summary.png"), bbox_inches="tight") - # plt.close() - # except ImportError: - # print("\n(SHAP non installé — `pip install shap` pour activer l’analyse SHAP.)") - - # y_proba = model.predict_proba(X_test)[:, 1] - y_proba = model.predict(X_test) - - # Trace ou enregistre le graphique - # self.plot_threshold_analysis(y_test, y_proba, step=0.05, save_path="/home/souti/freqtrade/user_data/plots/threshold_analysis.png") - - # y_test : vraies classes (0 / 1) - # y_proba : probabilités de la classe 1 prédites par ton modèle - # Exemple : y_proba = model.predict_proba(X_test)[:, 1] - - # seuils = np.arange(0.0, 1.01, 0.05) - # precisions, recalls, f1s = [], [], [] - # - # for seuil in seuils: - # y_pred = (y_proba >= seuil).astype(int) - # precisions.append(precision_score(y_test, y_pred)) - # recalls.append(recall_score(y_test, y_pred)) - # f1s.append(f1_score(y_test, y_pred)) - # - # plt.figure(figsize=(10, 6)) - # plt.plot(seuils, precisions, label='Précision', marker='o') - # plt.plot(seuils, recalls, label='Rappel', marker='o') - # plt.plot(seuils, f1s, label='F1-score', marker='o') - # - # # Ajoute un point pour le meilleur F1 - # best_idx = np.argmax(f1s) - # plt.scatter(seuils[best_idx], f1s[best_idx], color='red', s=80, label=f'Max F1 ({seuils[best_idx]:.2f})') - # - # plt.title("Performance du modèle selon le seuil de probabilité") - # plt.xlabel("Seuil de probabilité (classe 1)") - # plt.ylabel("Score") - # plt.grid(True, alpha=0.3) - # plt.legend() - # plt.savefig("/home/souti/freqtrade/user_data/plots/seuil_de_probabilite.png", bbox_inches='tight') - # plt.show() - - # print(f"✅ Meilleur F1 : {f1s[best_idx]:.3f} au seuil {seuils[best_idx]:.2f}") - # --- Après l'entraînement du modèle --- preds = model.predict(X_test) @@ -1627,21 +1247,16 @@ class Zeus_LGBMRegressor(IStrategy): # X_test = np.linspace(0, 10, 1000).reshape(-1, 1) y_pred = model.predict(X_test) - plt.figure(figsize=(10, 5)) - plt.plot(X_test, y_pred, color="red", label="modèle LGBM") - plt.title("Fonction apprise par LGBMRegressor") - plt.xlabel("x") - plt.ylabel("y") - plt.legend() - plt.savefig(os.path.join(plot_dir, "lgbm_function_curve.png"), bbox_inches="tight") - plt.close() + self.graphFonctionApprise(X_test, y_test, y_pred) + self.graphFonctionAppriseFeature(X_test, y_test, y_pred) + + # ============================================================================== ax = lgb.plot_importance(model, max_num_features=30, figsize=(12, 6)) plt.title("Importance des features - LGBM") plt.savefig(os.path.join(plot_dir, "lgbm_feature_importance.png"), bbox_inches="tight") plt.close() - corr = X_train.corr() * 100 # en pourcentage plt.figure(figsize=(20, 16)) @@ -2091,117 +1706,10 @@ class Zeus_LGBMRegressor(IStrategy): return self.trades def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: - pair = metadata['pair'] - # trend = self.getTrend(dataframe) - # # params = self.loadParamsFor(pair, trend) - # - # indic_5m = self.getParamValue(pair, trend, 'buy', 'indic_5m') - # indic_deriv1_5m = self.getParamValue( pair, trend, 'buy', 'indic_deriv1_5m') - # indic_deriv2_5m = self.getParamValue( pair, trend, 'buy', 'indic_deriv2_5m') - - # dataframe.loc[ - # ( - # (dataframe['mid_smooth_3'].shift(1) < dataframe['mid_smooth_3']) - # & (dataframe['hapercent'] > 0) - # & ((dataframe['max_rsi_24_1h'] < 70) | (dataframe['close'] < dataframe['close_1d'])) - # & (dataframe['open'] <= dataframe['bb_middleband']) - # & (dataframe[f"{self.indic_5m.value}_deriv1"] >= self.indic_deriv1_5m.value) - # & (dataframe[f"{self.indic_5m.value}_deriv2"] >= self.indic_deriv2_5m.value) - # # & (dataframe[f"{indic_1h}_deriv1"] >= self.indic_deriv1_1h.value) - # # & (dataframe[f"{indic_1h}_deriv2"] >= self.indic_deriv2_1h.value) - # ), ['enter_long', 'enter_tag']] = (1, 'smth') - - # dataframe.loc[ - # ( - # (dataframe['sma24_deriv2'].shift(1) < 0) - # & (dataframe['sma24_deriv2'] > 0) - # & ((dataframe['max_rsi_24_1h'] < 70) | (dataframe['close'] < dataframe['close_1d'])) - # & (dataframe[f"{self.indic_5m.value}_deriv1"] >= self.indic_deriv1_5m.value) - # & (dataframe[f"{self.indic_5m.value}_deriv2"] >= self.indic_deriv2_5m.value) - # # & (dataframe[f"{indic_1h}_deriv1"] >= self.indic_deriv1_1h.value) - # # & (dataframe[f"{indic_1h}_deriv2"] >= self.indic_deriv2_1h.value) - # & (dataframe[f"sma60_deriv1"] >= -0.2) - # ), ['enter_long', 'enter_tag']] = (1, 'invert') - # - # dataframe.loc[ - # ( - # (dataframe['sma24_deriv1'] > 0) - # & (dataframe['sma60_deriv1'].shift(1) < 0) - # & (dataframe['sma60_deriv1'] > 0) - # & ((dataframe['max_rsi_24_1h'] < 70) | (dataframe['close'] < dataframe['close_1d'])) - # & (dataframe[f"{self.indic_5m.value}_deriv1"] >= self.indic_deriv1_5m.value) - # & (dataframe[f"{self.indic_5m.value}_deriv2"] >= self.indic_deriv2_5m.value) - # # & (dataframe[f"{indic_1h}_deriv1"] >= self.indic_deriv1_1h.value) - # # & (dataframe[f"{indic_1h}_deriv2"] >= self.indic_deriv2_1h.value) - # & (dataframe[f"sma60_deriv1"] >= -0.2) - # ), ['enter_long', 'enter_tag']] = (1, 'raise') - # - # dataframe.loc[ - # ( - # (dataframe['sma60_deriv1'].shift(1) < 0) - # & (dataframe['sma24_deriv2'] > 0) - # & ((dataframe['max_rsi_24_1h'] < 70) | (dataframe['close'] < dataframe['close_1d'])) - # & (dataframe[f"{self.indic_5m.value}_deriv1"] >= self.indic_deriv1_5m.value) - # & (dataframe[f"{self.indic_5m.value}_deriv2"] >= self.indic_deriv2_5m.value) - # # & (dataframe[f"{indic_1h}_deriv1"] >= self.indic_deriv1_1h.value) - # # & (dataframe[f"{indic_1h}_deriv2"] >= self.indic_deriv2_1h.value) - # & (dataframe[f"sma60_deriv1"] >= -0.2) - # ), ['enter_long', 'enter_tag']] = (1, 'stg_inv') - # - # dataframe.loc[ - # ( - # (dataframe['mid_smooth_24'].shift(24) >= dataframe['mid_smooth_24'].shift(12)) - # & (dataframe['mid_smooth_24'].shift(12) <= dataframe['mid_smooth_24']) - # & ((dataframe['max_rsi_24_1h'] < 70) | (dataframe['close'] < dataframe['close_1d'])) - # & (dataframe[f"{self.indic_5m.value}_deriv1"] >= self.indic_deriv1_5m.value) - # & (dataframe[f"{self.indic_5m.value}_deriv2"] >= self.indic_deriv2_5m.value) - # # & (dataframe[f"{indic_1h}_deriv1"] >= self.indic_deriv1_1h.value) - # & (dataframe[f"sma60_deriv1"] >= -0.2) - # ), ['enter_long', 'enter_tag']] = (1, 'smth3_inv') - dataframe.loc[ ( - (dataframe['percent3'] < -0.03) - & (dataframe['percent'] > 0) - ), ['enter_long', 'enter_tag']] = (1, 'pct3') - - dataframe.loc[ - ( - (dataframe["sma24"].shift(2) >= dataframe["sma24"].shift(1)) - & (dataframe["sma24"].shift(1) <= dataframe["sma24"]) - & (dataframe['percent3_1h'] < -0.03) - ), ['enter_long', 'enter_tag']] = (1, 'pct3_1h') - - dataframe.loc[ - ( - (dataframe[f"{self.indic_5m_slope_sup_buy.value}"].shift(2) >= dataframe[f"{self.indic_5m_slope_sup_buy.value}"].shift(1)) - & (dataframe[f"{self.indic_5m_slope_sup_buy.value}"].shift(1) <= dataframe[f"{self.indic_5m_slope_sup_buy.value}"]) - & (dataframe['slope_norm_1d'] < dataframe['slope_norm_1h']) - & (dataframe['stop_buying'] == False) - # & (dataframe[f"{self.indic_deriv_5m_buy.value}_deriv1"] > self.deriv_5m_buy.value) - # & (dataframe[f"sma60_deriv1"] >= -0.2) - # & (dataframe[f"hapercent"] >= -0.001) - ), ['enter_long', 'enter_tag']] = (1, f"{self.indic_5m_slope_sup.value}_sup") - - dataframe.loc[ - ( - (dataframe[f"{self.indic_5m_slope_inf_buy.value}"].shift(2) >= dataframe[f"{self.indic_5m_slope_inf_buy.value}"].shift(1)) - & (dataframe[f"{self.indic_5m_slope_inf_buy.value}"].shift(1) <= dataframe[f"{self.indic_5m_slope_inf_buy.value}"]) - & (dataframe['slope_norm_1d'] > dataframe['slope_norm_1h']) - & (dataframe['stop_buying'] == False) - # & (dataframe[f"{self.indic_deriv_5m_buy.value}_deriv1"] > self.deriv_5m_buy.value) - # & (dataframe[f"sma60_deriv1"] >= -0.2) - # & (dataframe[f"hapercent"] >= -0.001) - ), ['enter_long', 'enter_tag']] = (1, f"{self.indic_5m_slope_inf_buy.value}_inf") - - dataframe.loc[ - ( - (dataframe['stop_buying'] == False) - & (dataframe['stop_buying'].shift(1) == True) - # & (dataframe[f"{self.indic_deriv_5m_buy.value}_deriv1"] > self.deriv_5m_buy.value) - # & (dataframe[f"sma60_deriv1"] >= -0.2) - # & (dataframe[f"hapercent"] >= -0.001) - ), ['enter_long', 'enter_tag']] = (1, f"end") + (dataframe['ml_prob'] > self.ml_prob_buy.value) + ), ['enter_long', 'enter_tag']] = (1, f"ml_prob") dataframe['test'] = np.where(dataframe['enter_long'] == 1, dataframe['close'] * 1.01, np.nan) @@ -2354,42 +1862,10 @@ class Zeus_LGBMRegressor(IStrategy): print(line) def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: - # dataframe.loc[ - # ( - # (dataframe['mid_smooth_deriv1'] == 0) - # & (dataframe['mid_smooth_deriv1'].shift(1) > 0) - # ), ['sell', 'exit_long']] = (1, 'sell_sma5_pct_1h') - - # {indic_5m_sell} {indic_deriv1_5m_sell} {indic_deriv2_5m_sell} dataframe.loc[ ( - (dataframe[f"{self.indic_5m_slope_sup_sell.value}"].shift(2) < dataframe[f"{self.indic_5m_slope_sup_sell.value}"].shift(1)) - & (dataframe[f"{self.indic_5m_slope_sup_sell.value}"].shift(1) > dataframe[f"{self.indic_5m_slope_sup_sell.value}"]) - & (dataframe[f"{self.indic_deriv_5m_slope_sup_sell.value}_deriv1"] < self.deriv_5m_slope_sup_sell.value) - & (dataframe['slope_norm_1d'] > dataframe['slope_norm_1h']) - ), ['exit_long', 'exit_tag']] = (1, f"{self.indic_5m_slope_sup_sell.value}_sup") - - dataframe.loc[ - ( - (dataframe[f"{self.indic_5m_slope_inf_sell.value}"].shift(2) < dataframe[f"{self.indic_5m_slope_inf_sell.value}"].shift(1)) - & (dataframe[f"{self.indic_5m_slope_inf_sell.value}"].shift(1) > dataframe[f"{self.indic_5m_slope_inf_sell.value}"]) - & (dataframe[f"{self.indic_deriv_5m_slope_inf_sell.value}_deriv1"] < self.deriv_5m_slope_inf_sell.value) - & (dataframe['slope_norm_1d'] < dataframe['slope_norm_1h']) - ), ['exit_long', 'exit_tag']] = (1, f"{self.indic_5m_slope_inf_sell.value}_inf") - - dataframe.loc[ - ( - (dataframe['stop_buying'] == True) - & (dataframe['stop_buying'].shift(1) == False) - # & (dataframe[f"{self.indic_deriv_5m_buy.value}_deriv1"] > self.deriv_5m_buy.value) - # & (dataframe[f"sma60_deriv1"] >= -0.2) - # & (dataframe[f"hapercent"] >= -0.001) - ), ['enter_long', 'enter_tag']] = (1, f"start") - - # dataframe.loc[ - # ( - # (dataframe[f"{self.indic_percent_sell.value}"] < self.percent_5m_sell.value) - # ), ['exit_long', 'exit_tag']] = (1, f"{self.indic_percent_sell.value}") + (dataframe['ml_prob'] < self.ml_prob_sell.value) + ), ['exit_long', 'exit_tag']] = (1, f"ml_prob") return dataframe @@ -2406,10 +1882,6 @@ class Zeus_LGBMRegressor(IStrategy): dataframe, _ = self.dp.get_analyzed_dataframe(trade.pair, self.timeframe) last_candle = dataframe.iloc[-1].squeeze() before_last_candle = dataframe.iloc[-2].squeeze() - before_last_candle_12 = dataframe.iloc[-13].squeeze() - before_last_candle_24 = dataframe.iloc[-25].squeeze() - last_candle_3 = dataframe.iloc[-4].squeeze() - last_candle_previous_1h = dataframe.iloc[-13].squeeze() # prépare les données current_time = current_time.astimezone(timezone.utc) open_date = trade.open_date.astimezone(timezone.utc) @@ -2459,7 +1931,7 @@ class Zeus_LGBMRegressor(IStrategy): if not self.should_enter_trade(pair, last_candle, current_time): return None - condition = (last_candle['enter_long'] and last_candle['sma60_deriv1'] > 0 and last_candle['hapercent'] > 0 and last_candle['stop_buying'] == False) \ + condition = (last_candle['enter_long'] and last_candle['sma60_deriv1'] > 0 and last_candle['hapercent'] > 0) \ or last_candle['enter_tag'] == 'pct3' \ or last_candle['enter_tag'] == 'pct3_1h' @@ -2533,7 +2005,7 @@ class Zeus_LGBMRegressor(IStrategy): # and last_candle['sma60_deriv1'] > 0 and last_candle['max_rsi_12_1h'] < 75 and last_candle['rsi_1d'] < 58 - and last_candle['stop_buying'] == False + # and last_candle['stop_buying'] == False # and last_candle['mid_smooth_5_deriv1_1d'] > 0 and self.wallets.get_available_stake_amount() > 0 ): @@ -3297,24 +2769,6 @@ class Zeus_LGBMRegressor(IStrategy): return True - # if last_candle['sma5_deriv1_1h'] < -0.02: - # return False - # - # if last_candle['mid_smooth_1h_deriv2'] < -2 or last_candle['sma5_deriv2_1h'] < -2: - # return False - # - # if last_candle['sma5_deriv1_1h'] < 0.0 and last_candle['sma5_deriv2_1h'] < -0.0: - # return False - # - # if last_candle['mid_smooth_1h_deriv1'] < 0.0 and last_candle['mid_smooth_1h_deriv2'] < -0.0 and last_candle[ - # 'sma5_deriv2_1h'] < 0: - # return False - - # if pair.startswith('BTC'): - # return True # BTC toujours autorisé - - return True - # Filtrer les paires non-BTC non_btc_pairs = [p for p in self.pairs if not p.startswith('BTC')] @@ -3623,3 +3077,132 @@ class Zeus_LGBMRegressor(IStrategy): }).sort_values(by="corr_with_target", key=np.abs, ascending=False) return selected_corr + + def graphFonctionApprise(self, X_test, y_test, y_pred): + # Exemple : trier les valeurs de X_test et les prédictions + x_sorted = np.argsort(X_test.iloc[:, 0]) + x = X_test.iloc[:, 0].iloc[x_sorted] + y_true = y_test.iloc[x_sorted] + y_pred = y_pred[x_sorted] + + plt.figure(figsize=(12, 6)) + plt.plot(x, y_true, label="Réel", color="blue", alpha=0.7) + plt.plot(x, y_pred, label="Prédit (LGBM)", color="red", alpha=0.7) + + plt.title("Fonction apprise par LGBMRegressor") + plt.xlabel("Feature principale") + plt.ylabel("Valeur prédite") + plt.legend() + plt.grid(True) + + out_path = "/home/souti/freqtrade/user_data/plots/lgbm_function.png" + plt.savefig(out_path, bbox_inches="tight") + plt.close() + + print(f"Graphique sauvegardé : {out_path}") + + def graphFonctionAppriseFeature(self, X_test, y_test, y_pred): + plt.figure(figsize=(14, 8)) + + colors = sns.color_palette("coolwarm", n_colors=X_test.shape[1]) + + for i, col in enumerate(X_test.columns): + plt.plot(X_test[col], y_pred, '.', color=colors[i], alpha=0.4, label=col) + + plt.title("Fonction apprise par LGBMRegressor (par feature)") + plt.xlabel("Valeur feature") + plt.ylabel("Valeur prédite") + plt.legend(loc="best") + plt.grid(True) + + out_path = "/home/souti/freqtrade/user_data/plots/lgbm_features.png" + plt.savefig(out_path, bbox_inches="tight") + plt.close() + + print(f"Graphique sauvegardé : {out_path}") + + def optuna(self, X_train, X_test, y_train, y_test): + # Suppose que X_train, y_train sont déjà définis + # ou sinon : + # X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42) + + print("Description") + print(X_train.describe().T.sort_values("std")) + def objective(trial): + params = { + 'objective': 'regression', + 'metric': 'rmse', + 'n_estimators': trial.suggest_int('n_estimators', 100, 1000), + 'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.2, log=True), + 'max_depth': trial.suggest_int('max_depth', 3, 15), + 'num_leaves': trial.suggest_int('num_leaves', 20, 300), + 'subsample': trial.suggest_float('subsample', 0.5, 1.0), + 'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0), + 'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 10.0), + 'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 10.0), + 'random_state': 42, + } + + model = LGBMRegressor(**params) + model.fit(X_train, y_train) + + # On peut aussi valider sur un split interne + preds = model.predict(X_test) + rmse = np.sqrt(mean_squared_error(y_test, preds)) + return rmse + + # Crée une étude Optuna + study = optuna.create_study(direction="minimize") # on veut minimiser l'erreur + study.optimize(objective, n_trials=50, show_progress_bar=True) + + # 🔹 Afficher les meilleurs résultats + print("✅ Meilleurs hyperparamètres trouvés :") + print(study.best_params) + print(f"Meilleur RMSE : {study.best_value:.4f}") + + # 🔹 Sauvegarder les résultats + optuna_path = "/home/souti/freqtrade/user_data/plots/optuna_lgbm_results.txt" + with open(optuna_path, "w") as f: + f.write(f"Best params:\n{study.best_params}\n") + f.write(f"Best RMSE: {study.best_value:.4f}\n") + + print(f"Résultats sauvegardés dans : {optuna_path}") + + # 🔹 Créer le modèle final avec les meilleurs paramètres + print("🚀 Entraînement du modèle LightGBM...") + + # -- Appliquer le filtrage -- + X_train_filtered = self.filter_features(X_train, y_train) + best_model = LGBMRegressor(**study.best_params) + best_model.fit(X_train_filtered, y_train) + + # fig1 = vis.plot_optimization_history(study) + # fig1.write_image("/home/souti/freqtrade/user_data/plots/optuna_history.png") + # + # fig2 = vis.plot_param_importances(study) + # fig2.write_image("/home/souti/freqtrade/user_data/plots/optuna_importance.png") + + return best_model, X_train_filtered + + def filter_features(self, X: pd.DataFrame, y: pd.Series, corr_threshold: float = 0.95): + """Filtre les colonnes peu utiles ou redondantes""" + print("🔍 Filtrage automatique des features...") + + # 1️⃣ Supprimer les colonnes constantes + vt = VarianceThreshold(threshold=1e-5) + X_var = pd.DataFrame(vt.fit_transform(X), columns=X.columns[vt.get_support()]) + print(f" - {len(X.columns) - X_var.shape[1]} colonnes supprimées (variance faible)") + + # 2️⃣ Supprimer les colonnes très corrélées entre elles + corr = X_var.corr().abs() + upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool)) + drop_cols = [column for column in upper.columns if any(upper[column] > corr_threshold)] + X_corr = X_var.drop(columns=drop_cols, errors='ignore') + print(f" - {len(drop_cols)} colonnes supprimées (corrélation > {corr_threshold})") + + # 3️⃣ Facultatif : supprimer les colonnes entièrement NaN + X_clean = X_corr.dropna(axis=1, how='all') + + print(f"✅ {X_clean.shape[1]} colonnes conservées après filtrage.\n") + return X_clean + diff --git a/tools/sklearn/Sinus.py b/tools/sklearn/Sinus.py new file mode 100644 index 0000000..f6d0b73 --- /dev/null +++ b/tools/sklearn/Sinus.py @@ -0,0 +1,32 @@ +import numpy as np +import matplotlib.pyplot as plt +from lightgbm import LGBMRegressor + +# === Données non linéaires === +np.random.seed(0) +X = np.linspace(0, 10, 200).reshape(-1, 1) +y = np.sin(X).ravel() + np.random.normal(0, 0.1, X.shape[0]) # sinusoïde + bruit + +# === Entraînement du modèle === +model = LGBMRegressor( + n_estimators=300, # nombre d’arbres + learning_rate=0.05, # taux d’apprentissage (plus petit = plus lisse) + max_depth=5 # profondeur des arbres (plus grand = plus complexe) +) +model.fit(X, y) + +# === Prédiction === +X_test = np.linspace(0, 10, 500).reshape(-1, 1) +y_pred = model.predict(X_test) + +# === Visualisation === +plt.figure(figsize=(10, 5)) +plt.scatter(X, y, color="lightgray", label="Données réelles (sin + bruit)", s=20) +plt.plot(X_test, np.sin(X_test), color="green", linestyle="--", label="sin(x) réel") +plt.plot(X_test, y_pred, color="red", label="Prédiction LGBM") +plt.title("Approximation non linéaire avec LGBMRegressor") +plt.xlabel("x") +plt.ylabel("y") +plt.legend() +plt.grid(True) +plt.show()