LGBMRegressor calcul multi pairs et sauvegarde graph

This commit is contained in:
Jérôme Delacotte
2025-11-12 23:08:39 +01:00
parent a1f5bc1e41
commit a7135599bc
2 changed files with 146 additions and 39 deletions

View File

@@ -290,7 +290,7 @@ class Zeus_LGBMRegressor(IStrategy):
indicators = {'sma5', 'sma12', 'sma24', 'sma60'} indicators = {'sma5', 'sma12', 'sma24', 'sma60'}
indicators_percent = {'percent', 'percent3', 'percent12', 'percent24', 'percent_1h', 'percent3_1h', 'percent12_1h', 'percent24_1h'} indicators_percent = {'percent', 'percent3', 'percent12', 'percent24', 'percent_1h', 'percent3_1h', 'percent12_1h', 'percent24_1h'}
mises = IntParameter(1, 50, default=5, space='buy', optimize=False, load=False) mises = IntParameter(1, 50, default=5, space='buy', optimize=True, load=True)
ml_prob_buy = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='buy', optimize=True, load=True) ml_prob_buy = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='buy', optimize=True, load=True)
ml_prob_sell = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='sell', optimize=True, load=True) ml_prob_sell = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='sell', optimize=True, load=True)
@@ -908,10 +908,11 @@ class Zeus_LGBMRegressor(IStrategy):
dataframe["ms-0"] = dataframe["mid_smooth_24_deriv1"] dataframe["ms-0"] = dataframe["mid_smooth_24_deriv1"]
# dataframe["ms+10"] = dataframe["mid_smooth_24"].shift(-11) # dataframe["ms+10"] = dataframe["mid_smooth_24"].shift(-11)
if False and self.dp.runmode.value in ('backtest'): if self.dp.runmode.value in ('backtest'):
self.trainModel(dataframe, metadata) self.trainModel(dataframe, metadata)
self.model = joblib.load('rf_model.pkl') short_pair = self.getShortName(pair)
self.model = joblib.load(f"{short_pair}_rf_model.pkl")
# Préparer les features pour la prédiction # Préparer les features pour la prédiction
features = dataframe[self.model_indicators].fillna(0) features = dataframe[self.model_indicators].fillna(0)
@@ -928,9 +929,12 @@ class Zeus_LGBMRegressor(IStrategy):
return dataframe return dataframe
def trainModel(self, dataframe: DataFrame, metadata: dict): def trainModel(self, dataframe: DataFrame, metadata: dict):
pair = self.getShortName(metadata['pair'])
pd.set_option('display.max_rows', None) pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None) pd.set_option('display.max_columns', None)
pd.set_option("display.width", 200) pd.set_option("display.width", 200)
path=f"user_data/plots/{pair}/"
os.makedirs(path, exist_ok=True)
# # Étape 1 : sélectionner numériques # # Étape 1 : sélectionner numériques
# numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns # numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns
@@ -1010,7 +1014,7 @@ class Zeus_LGBMRegressor(IStrategy):
plt.yticks(rotation=0) plt.yticks(rotation=0)
# --- Sauvegarde --- # --- Sauvegarde ---
output_path = "/home/souti/freqtrade/user_data/plots/Matrice_de_correlation_temperature.png" output_path = f"{path}/Matrice_de_correlation_temperature.png"
plt.savefig(output_path, bbox_inches="tight", dpi=150) plt.savefig(output_path, bbox_inches="tight", dpi=150)
plt.close(fig) plt.close(fig)
@@ -1050,7 +1054,7 @@ class Zeus_LGBMRegressor(IStrategy):
# ) # )
# train_model.fit(X_train, y_train) # train_model.fit(X_train, y_train)
train_model, selected_features = self.optuna(X_train, X_test, y_train, y_test) train_model, selected_features = self.optuna(path, X_train, X_test, y_train, y_test)
print("Features retenues :", list(selected_features)) print("Features retenues :", list(selected_features))
# # 2⃣ Sélection des features AVANT calibration # # 2⃣ Sélection des features AVANT calibration
@@ -1082,10 +1086,10 @@ class Zeus_LGBMRegressor(IStrategy):
# print(f"Accuracy: {acc:.3f}") # print(f"Accuracy: {acc:.3f}")
# 7⃣ Sauvegarde du modèle # 7⃣ Sauvegarde du modèle
joblib.dump(train_model, 'rf_model.pkl') joblib.dump(train_model, f"{pair}_rf_model.pkl")
print("✅ Modèle sauvegardé sous rf_model.pkl") print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl")
self.analyze_model(train_model, X_train, X_test, y_train, y_test) self.analyze_model(pair, train_model, X_train, X_test, y_train, y_test)
def inspect_model(self, model): def inspect_model(self, model):
""" """
@@ -1159,12 +1163,12 @@ class Zeus_LGBMRegressor(IStrategy):
print("\n===== ✅ FIN DE LINSPECTION =====") print("\n===== ✅ FIN DE LINSPECTION =====")
def analyze_model(self, model, X_train, X_test, y_train, y_test): def analyze_model(self, pair, model, X_train, X_test, y_train, y_test):
""" """
Analyse complète d'un modèle ML supervisé (classification binaire). Analyse complète d'un modèle ML supervisé (classification binaire).
Affiche performances, importance des features, matrices, seuils, etc. Affiche performances, importance des features, matrices, seuils, etc.
""" """
output_dir = "user_data/plots" output_dir = f"user_data/plots/{pair}/"
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
# ---- Importance des features ---- # ---- Importance des features ----
@@ -1206,8 +1210,7 @@ class Zeus_LGBMRegressor(IStrategy):
print(f"RMSE: {rmse:.5f} | R²: {r2:.3f}") print(f"RMSE: {rmse:.5f} | R²: {r2:.3f}")
# --- Création du dossier de sortie --- # --- Création du dossier de sortie ---
plot_dir = "/home/souti/freqtrade/user_data/plots" os.makedirs(output_dir, exist_ok=True)
os.makedirs(plot_dir, exist_ok=True)
# --- Graphique prédiction vs réel --- # --- Graphique prédiction vs réel ---
plt.figure(figsize=(8, 8)) plt.figure(figsize=(8, 8))
@@ -1225,36 +1228,41 @@ class Zeus_LGBMRegressor(IStrategy):
plt.legend() plt.legend()
# --- Sauvegarde --- # --- Sauvegarde ---
plot_path = os.path.join(plot_dir, "LightGBM_regression_pred_vs_real.png") plot_path = os.path.join(output_dir, "LightGBM_regression_pred_vs_real.png")
plt.savefig(plot_path, bbox_inches="tight", dpi=200) plt.savefig(plot_path, bbox_inches="tight", dpi=200)
plt.close() plt.close()
self.plot_pred_vs_real_filtered(model, X_test, y_test, preds, output_dir)
print(f"✅ Graphique sauvegardé : {plot_path}") print(f"✅ Graphique sauvegardé : {plot_path}")
# save_dir = "/home/souti/freqtrade/user_data/plots/" # ax = lgb.plot_tree(model, tree_index=0, figsize=(30, 20), show_info=["split_gain", "internal_value", "internal_count"])
# os.makedirs(save_dir, exist_ok=True) # plt.title("Arbre de décision n°0")
# plt.savefig(os.path.join(output_dir, "lgbm_tree_0.png"), bbox_inches="tight")
# plt.close()
ax = lgb.plot_tree(model, tree_index=0, figsize=(30, 20), for i in range(5):
show_info=["split_gain", "internal_value", "internal_count"]) ax = lgb.plot_tree(model, tree_index=i, figsize=(20, 12))
plt.title("Arbre de décision n°0") plt.title(f"Arbre {i}")
plt.savefig(os.path.join(plot_dir, "lgbm_tree_0.png"), bbox_inches="tight") plt.savefig(os.path.join(output_dir, f"lgbm_tree_{i}.png"), bbox_inches="tight")
plt.close() plt.close()
ax = lgb.plot_tree(model, figsize=(40, 20)) ax = lgb.plot_tree(model, figsize=(40, 20))
plt.title("Vue globale du modèle LGBM") plt.title("Vue globale du modèle LGBM")
plt.savefig(os.path.join(plot_dir, "lgbm_all_trees.png"), bbox_inches="tight") plt.savefig(os.path.join(output_dir, "lgbm_all_trees.png"), bbox_inches="tight")
plt.close() plt.close()
# X_test = np.linspace(0, 10, 1000).reshape(-1, 1) # X_test = np.linspace(0, 10, 1000).reshape(-1, 1)
y_pred = model.predict(X_test) y_pred = model.predict(X_test)
self.graphFonctionApprise(X_test, y_test, y_pred) self.graphFonctionApprise(output_dir, X_test, y_test, y_pred)
self.graphFonctionAppriseFeature(X_test, y_test, y_pred) self.graphFonctionAppriseFeature(output_dir, X_test, y_test, y_pred)
# ============================================================================== # ==============================================================================
ax = lgb.plot_importance(model, max_num_features=30, figsize=(12, 6)) ax = lgb.plot_importance(model, max_num_features=30, figsize=(12, 6))
plt.title("Importance des features - LGBM") plt.title("Importance des features - LGBM")
plt.savefig(os.path.join(plot_dir, "lgbm_feature_importance.png"), bbox_inches="tight") plt.savefig(os.path.join(output_dir, "lgbm_feature_importance.png"), bbox_inches="tight")
plt.close() plt.close()
corr = X_train.corr() * 100 # en pourcentage corr = X_train.corr() * 100 # en pourcentage
@@ -1262,7 +1270,7 @@ class Zeus_LGBMRegressor(IStrategy):
plt.figure(figsize=(20, 16)) plt.figure(figsize=(20, 16))
sns.heatmap(corr, cmap="coolwarm", center=0, annot=False, fmt=".1f", cbar_kws={'label': 'Corrélation (%)'}) sns.heatmap(corr, cmap="coolwarm", center=0, annot=False, fmt=".1f", cbar_kws={'label': 'Corrélation (%)'})
plt.title("Matrice de corrélation (%)") plt.title("Matrice de corrélation (%)")
plt.savefig(os.path.join(plot_dir, "correlation_matrix.png"), bbox_inches="tight") plt.savefig(os.path.join(output_dir, "correlation_matrix.png"), bbox_inches="tight")
plt.close() plt.close()
plt.figure(figsize=(10, 6)) plt.figure(figsize=(10, 6))
@@ -1270,11 +1278,53 @@ class Zeus_LGBMRegressor(IStrategy):
plt.xlabel("Valeurs réelles") plt.xlabel("Valeurs réelles")
plt.ylabel("Prédictions du modèle") plt.ylabel("Prédictions du modèle")
plt.title("Comparaison y_test vs y_pred") plt.title("Comparaison y_test vs y_pred")
plt.savefig(os.path.join(plot_dir, "ytest_vs_ypred.png"), bbox_inches="tight") plt.savefig(os.path.join(output_dir, "ytest_vs_ypred.png"), bbox_inches="tight")
plt.close() plt.close()
print("\n===== ✅ FIN DE LANALYSE =====") print("\n===== ✅ FIN DE LANALYSE =====")
def plot_pred_vs_real_filtered(self, model, X_test, y_test, preds, output_dir, top_n=5):
"""
Affiche le graphique prédiction vs réel pour les N features les plus importantes.
"""
# --- 1⃣ Extraire les features les plus importantes ---
importance_df = pd.DataFrame({
"feature": X_test.columns,
"importance": model.feature_importances_
}).sort_values(by="importance", ascending=False)
top_features = importance_df.head(top_n)["feature"].tolist()
print(f"Top {top_n} features: {top_features}")
# --- 2⃣ Créer un masque pour ne garder que les lignes où au moins une des top features varie fortement ---
X_top = X_test[top_features]
# Optionnel : filtrer les points atypiques pour lisser le nuage
mask = np.all(np.abs((X_top - X_top.mean()) / X_top.std()) < 3, axis=1)
X_filtered = X_top[mask]
y_filtered = y_test[mask]
preds_filtered = preds[mask]
# --- 3⃣ Tracer ---
plt.figure(figsize=(8, 8))
plt.scatter(y_filtered, preds_filtered, alpha=0.4, s=15, c='blue', label=f"Top {top_n} features")
plt.xlabel("Valeurs réelles", fontsize=12)
plt.ylabel("Valeurs prédites", fontsize=12)
plt.title(f"LightGBM Régression — Prédiction vs Réel (filtré sur top {top_n} features)", fontsize=14)
plt.plot(
[y_filtered.min(), y_filtered.max()],
[y_filtered.min(), y_filtered.max()],
'r--',
linewidth=1,
label="Ligne idéale"
)
plt.legend()
plt.grid(True)
out_path = f"{output_dir}/lgbm_pred_vs_real_top{top_n}.png"
plt.savefig(out_path, bbox_inches="tight")
plt.close()
def plot_threshold_analysis(self, y_true, y_proba, step=0.05, save_path=None): def plot_threshold_analysis(self, y_true, y_proba, step=0.05, save_path=None):
""" """
Affiche la précision, le rappel et le F1-score selon le seuil de décision. Affiche la précision, le rappel et le F1-score selon le seuil de décision.
@@ -1708,7 +1758,7 @@ class Zeus_LGBMRegressor(IStrategy):
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
dataframe.loc[ dataframe.loc[
( (
(dataframe['ml_prob'] > self.ml_prob_buy.value) (dataframe['ml_prob'] > dataframe['sma24_deriv1'])
), ['enter_long', 'enter_tag']] = (1, f"ml_prob") ), ['enter_long', 'enter_tag']] = (1, f"ml_prob")
dataframe['test'] = np.where(dataframe['enter_long'] == 1, dataframe['close'] * 1.01, np.nan) dataframe['test'] = np.where(dataframe['enter_long'] == 1, dataframe['close'] * 1.01, np.nan)
@@ -1864,7 +1914,7 @@ class Zeus_LGBMRegressor(IStrategy):
def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
dataframe.loc[ dataframe.loc[
( (
(dataframe['ml_prob'] < self.ml_prob_sell.value) (dataframe['ml_prob'] < dataframe['sma24_deriv1'])
), ['exit_long', 'exit_tag']] = (1, f"ml_prob") ), ['exit_long', 'exit_tag']] = (1, f"ml_prob")
return dataframe return dataframe
@@ -1872,6 +1922,7 @@ class Zeus_LGBMRegressor(IStrategy):
def adjust_trade_position(self, trade: Trade, current_time: datetime, def adjust_trade_position(self, trade: Trade, current_time: datetime,
current_rate: float, current_profit: float, min_stake: float, current_rate: float, current_profit: float, min_stake: float,
max_stake: float, **kwargs): max_stake: float, **kwargs):
return None
# ne rien faire si ordre deja en cours # ne rien faire si ordre deja en cours
if trade.has_open_orders: if trade.has_open_orders:
# print("skip open orders") # print("skip open orders")
@@ -3078,7 +3129,7 @@ class Zeus_LGBMRegressor(IStrategy):
return selected_corr return selected_corr
def graphFonctionApprise(self, X_test, y_test, y_pred): def graphFonctionApprise(self, path, X_test, y_test, y_pred):
# Exemple : trier les valeurs de X_test et les prédictions # Exemple : trier les valeurs de X_test et les prédictions
x_sorted = np.argsort(X_test.iloc[:, 0]) x_sorted = np.argsort(X_test.iloc[:, 0])
x = X_test.iloc[:, 0].iloc[x_sorted] x = X_test.iloc[:, 0].iloc[x_sorted]
@@ -3095,33 +3146,47 @@ class Zeus_LGBMRegressor(IStrategy):
plt.legend() plt.legend()
plt.grid(True) plt.grid(True)
out_path = "/home/souti/freqtrade/user_data/plots/lgbm_function.png" out_path = f"{path}/lgbm_function.png"
plt.savefig(out_path, bbox_inches="tight") plt.savefig(out_path, bbox_inches="tight")
plt.close() plt.close()
print(f"Graphique sauvegardé : {out_path}") print(f"Graphique sauvegardé : {out_path}")
def graphFonctionAppriseFeature(self, X_test, y_test, y_pred): import numpy as np
plt.figure(figsize=(14, 8)) import seaborn as sns
import matplotlib.pyplot as plt
def graphFonctionAppriseFeature(self, path, X_test, y_test, y_pred):
plt.figure(figsize=(14, 8))
colors = sns.color_palette("coolwarm", n_colors=X_test.shape[1]) colors = sns.color_palette("coolwarm", n_colors=X_test.shape[1])
for i, col in enumerate(X_test.columns): # Conversion en DataFrame pour manip plus simple
plt.plot(X_test[col], y_pred, '.', color=colors[i], alpha=0.4, label=col) df = X_test.copy()
df["y_pred"] = y_pred
plt.title("Fonction apprise par LGBMRegressor (par feature)") # --- filtrage sur y_pred (ou sur chaque feature si tu veux)
mean = df["y_pred"].mean()
std = df["y_pred"].std()
df = df[(df["y_pred"] >= mean - 2 * std) & (df["y_pred"] <= mean + 2 * std)]
# --- tracé
for i, col in enumerate(X_test.columns):
plt.plot(df[col], df["y_pred"], '.', color=colors[i], alpha=0.4, label=col)
plt.title("Fonction apprise par LGBMRegressor (filtrée à ±2σ)")
plt.xlabel("Valeur feature") plt.xlabel("Valeur feature")
plt.ylabel("Valeur prédite") plt.ylabel("Valeur prédite")
plt.legend(loc="best") plt.legend(loc="right")
plt.grid(True) plt.grid(True)
out_path = "/home/souti/freqtrade/user_data/plots/lgbm_features.png" out_path = f"{path}/lgbm_features.png"
plt.savefig(out_path, bbox_inches="tight") plt.savefig(out_path, bbox_inches="tight")
plt.close() plt.close()
print(f"Graphique sauvegardé : {out_path}") print(f"Graphique sauvegardé : {out_path}")
def optuna(self, X_train, X_test, y_train, y_test): def optuna(self, path, X_train, X_test, y_train, y_test):
# Suppose que X_train, y_train sont déjà définis # Suppose que X_train, y_train sont déjà définis
# ou sinon : # ou sinon :
# X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42) # X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)
@@ -3161,7 +3226,7 @@ class Zeus_LGBMRegressor(IStrategy):
print(f"Meilleur RMSE : {study.best_value:.4f}") print(f"Meilleur RMSE : {study.best_value:.4f}")
# 🔹 Sauvegarder les résultats # 🔹 Sauvegarder les résultats
optuna_path = "/home/souti/freqtrade/user_data/plots/optuna_lgbm_results.txt" optuna_path = f"{path}/optuna_lgbm_results.txt"
with open(optuna_path, "w") as f: with open(optuna_path, "w") as f:
f.write(f"Best params:\n{study.best_params}\n") f.write(f"Best params:\n{study.best_params}\n")
f.write(f"Best RMSE: {study.best_value:.4f}\n") f.write(f"Best RMSE: {study.best_value:.4f}\n")

View File

@@ -0,0 +1,42 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
# Exemple : dataframe avec une colonne sma24
n = 100
df = pd.DataFrame({
"sma24": np.sin(np.linspace(0, 6*np.pi, n)) * 50 + 200 + np.random.randn(n)*2
})
# --- paramètres
lookback = 30 # nombre de bougies utilisées pour l'apprentissage
future_steps = 10 # combien de bougies futures on veut estimer
# --- Préparer les données
X = np.arange(lookback).reshape(-1, 1) # 0 .. 29
y = df["sma24"].iloc[-lookback:].values # les dernières valeurs
# --- Entraîner la régression
model = LinearRegression()
model.fit(X, y)
# --- Prédire les valeurs futures
X_future = np.arange(lookback, lookback + future_steps).reshape(-1, 1)
y_future = model.predict(X_future)
# --- Reconstituer la courbe complète (historique + prévision)
predicted_full = np.concatenate([y, y_future])
# --- Affichage
plt.figure(figsize=(10,5))
plt.plot(df.index[-lookback:], y, label="Historique (sma24)", color="blue")
plt.plot(
np.arange(df.index[-1]-lookback+1, df.index[-1]+future_steps+1),
predicted_full,
label="Régression + prévision", color="orange", linestyle="--"
)
plt.axvline(df.index[-1], color="gray", linestyle=":")
plt.legend()
plt.title(f"Projection de SMA24 sur {future_steps} bougies futures")
plt.show()