LGBMRegressor calcul multi pairs et sauvegarde graph

This commit is contained in:
Jérôme Delacotte
2025-11-12 23:08:39 +01:00
parent a1f5bc1e41
commit a7135599bc
2 changed files with 146 additions and 39 deletions

View File

@@ -290,7 +290,7 @@ class Zeus_LGBMRegressor(IStrategy):
indicators = {'sma5', 'sma12', 'sma24', 'sma60'}
indicators_percent = {'percent', 'percent3', 'percent12', 'percent24', 'percent_1h', 'percent3_1h', 'percent12_1h', 'percent24_1h'}
mises = IntParameter(1, 50, default=5, space='buy', optimize=False, load=False)
mises = IntParameter(1, 50, default=5, space='buy', optimize=True, load=True)
ml_prob_buy = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='buy', optimize=True, load=True)
ml_prob_sell = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='sell', optimize=True, load=True)
@@ -908,10 +908,11 @@ class Zeus_LGBMRegressor(IStrategy):
dataframe["ms-0"] = dataframe["mid_smooth_24_deriv1"]
# dataframe["ms+10"] = dataframe["mid_smooth_24"].shift(-11)
if False and self.dp.runmode.value in ('backtest'):
if self.dp.runmode.value in ('backtest'):
self.trainModel(dataframe, metadata)
self.model = joblib.load('rf_model.pkl')
short_pair = self.getShortName(pair)
self.model = joblib.load(f"{short_pair}_rf_model.pkl")
# Préparer les features pour la prédiction
features = dataframe[self.model_indicators].fillna(0)
@@ -928,9 +929,12 @@ class Zeus_LGBMRegressor(IStrategy):
return dataframe
def trainModel(self, dataframe: DataFrame, metadata: dict):
pair = self.getShortName(metadata['pair'])
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option("display.width", 200)
path=f"user_data/plots/{pair}/"
os.makedirs(path, exist_ok=True)
# # Étape 1 : sélectionner numériques
# numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns
@@ -1010,7 +1014,7 @@ class Zeus_LGBMRegressor(IStrategy):
plt.yticks(rotation=0)
# --- Sauvegarde ---
output_path = "/home/souti/freqtrade/user_data/plots/Matrice_de_correlation_temperature.png"
output_path = f"{path}/Matrice_de_correlation_temperature.png"
plt.savefig(output_path, bbox_inches="tight", dpi=150)
plt.close(fig)
@@ -1050,7 +1054,7 @@ class Zeus_LGBMRegressor(IStrategy):
# )
# train_model.fit(X_train, y_train)
train_model, selected_features = self.optuna(X_train, X_test, y_train, y_test)
train_model, selected_features = self.optuna(path, X_train, X_test, y_train, y_test)
print("Features retenues :", list(selected_features))
# # 2⃣ Sélection des features AVANT calibration
@@ -1082,10 +1086,10 @@ class Zeus_LGBMRegressor(IStrategy):
# print(f"Accuracy: {acc:.3f}")
# 7⃣ Sauvegarde du modèle
joblib.dump(train_model, 'rf_model.pkl')
print("✅ Modèle sauvegardé sous rf_model.pkl")
joblib.dump(train_model, f"{pair}_rf_model.pkl")
print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl")
self.analyze_model(train_model, X_train, X_test, y_train, y_test)
self.analyze_model(pair, train_model, X_train, X_test, y_train, y_test)
def inspect_model(self, model):
"""
@@ -1159,12 +1163,12 @@ class Zeus_LGBMRegressor(IStrategy):
print("\n===== ✅ FIN DE LINSPECTION =====")
def analyze_model(self, model, X_train, X_test, y_train, y_test):
def analyze_model(self, pair, model, X_train, X_test, y_train, y_test):
"""
Analyse complète d'un modèle ML supervisé (classification binaire).
Affiche performances, importance des features, matrices, seuils, etc.
"""
output_dir = "user_data/plots"
output_dir = f"user_data/plots/{pair}/"
os.makedirs(output_dir, exist_ok=True)
# ---- Importance des features ----
@@ -1206,8 +1210,7 @@ class Zeus_LGBMRegressor(IStrategy):
print(f"RMSE: {rmse:.5f} | R²: {r2:.3f}")
# --- Création du dossier de sortie ---
plot_dir = "/home/souti/freqtrade/user_data/plots"
os.makedirs(plot_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
# --- Graphique prédiction vs réel ---
plt.figure(figsize=(8, 8))
@@ -1225,36 +1228,41 @@ class Zeus_LGBMRegressor(IStrategy):
plt.legend()
# --- Sauvegarde ---
plot_path = os.path.join(plot_dir, "LightGBM_regression_pred_vs_real.png")
plot_path = os.path.join(output_dir, "LightGBM_regression_pred_vs_real.png")
plt.savefig(plot_path, bbox_inches="tight", dpi=200)
plt.close()
self.plot_pred_vs_real_filtered(model, X_test, y_test, preds, output_dir)
print(f"✅ Graphique sauvegardé : {plot_path}")
# save_dir = "/home/souti/freqtrade/user_data/plots/"
# os.makedirs(save_dir, exist_ok=True)
# ax = lgb.plot_tree(model, tree_index=0, figsize=(30, 20), show_info=["split_gain", "internal_value", "internal_count"])
# plt.title("Arbre de décision n°0")
# plt.savefig(os.path.join(output_dir, "lgbm_tree_0.png"), bbox_inches="tight")
# plt.close()
ax = lgb.plot_tree(model, tree_index=0, figsize=(30, 20),
show_info=["split_gain", "internal_value", "internal_count"])
plt.title("Arbre de décision n°0")
plt.savefig(os.path.join(plot_dir, "lgbm_tree_0.png"), bbox_inches="tight")
for i in range(5):
ax = lgb.plot_tree(model, tree_index=i, figsize=(20, 12))
plt.title(f"Arbre {i}")
plt.savefig(os.path.join(output_dir, f"lgbm_tree_{i}.png"), bbox_inches="tight")
plt.close()
ax = lgb.plot_tree(model, figsize=(40, 20))
plt.title("Vue globale du modèle LGBM")
plt.savefig(os.path.join(plot_dir, "lgbm_all_trees.png"), bbox_inches="tight")
plt.savefig(os.path.join(output_dir, "lgbm_all_trees.png"), bbox_inches="tight")
plt.close()
# X_test = np.linspace(0, 10, 1000).reshape(-1, 1)
y_pred = model.predict(X_test)
self.graphFonctionApprise(X_test, y_test, y_pred)
self.graphFonctionAppriseFeature(X_test, y_test, y_pred)
self.graphFonctionApprise(output_dir, X_test, y_test, y_pred)
self.graphFonctionAppriseFeature(output_dir, X_test, y_test, y_pred)
# ==============================================================================
ax = lgb.plot_importance(model, max_num_features=30, figsize=(12, 6))
plt.title("Importance des features - LGBM")
plt.savefig(os.path.join(plot_dir, "lgbm_feature_importance.png"), bbox_inches="tight")
plt.savefig(os.path.join(output_dir, "lgbm_feature_importance.png"), bbox_inches="tight")
plt.close()
corr = X_train.corr() * 100 # en pourcentage
@@ -1262,7 +1270,7 @@ class Zeus_LGBMRegressor(IStrategy):
plt.figure(figsize=(20, 16))
sns.heatmap(corr, cmap="coolwarm", center=0, annot=False, fmt=".1f", cbar_kws={'label': 'Corrélation (%)'})
plt.title("Matrice de corrélation (%)")
plt.savefig(os.path.join(plot_dir, "correlation_matrix.png"), bbox_inches="tight")
plt.savefig(os.path.join(output_dir, "correlation_matrix.png"), bbox_inches="tight")
plt.close()
plt.figure(figsize=(10, 6))
@@ -1270,11 +1278,53 @@ class Zeus_LGBMRegressor(IStrategy):
plt.xlabel("Valeurs réelles")
plt.ylabel("Prédictions du modèle")
plt.title("Comparaison y_test vs y_pred")
plt.savefig(os.path.join(plot_dir, "ytest_vs_ypred.png"), bbox_inches="tight")
plt.savefig(os.path.join(output_dir, "ytest_vs_ypred.png"), bbox_inches="tight")
plt.close()
print("\n===== ✅ FIN DE LANALYSE =====")
def plot_pred_vs_real_filtered(self, model, X_test, y_test, preds, output_dir, top_n=5):
"""
Affiche le graphique prédiction vs réel pour les N features les plus importantes.
"""
# --- 1⃣ Extraire les features les plus importantes ---
importance_df = pd.DataFrame({
"feature": X_test.columns,
"importance": model.feature_importances_
}).sort_values(by="importance", ascending=False)
top_features = importance_df.head(top_n)["feature"].tolist()
print(f"Top {top_n} features: {top_features}")
# --- 2⃣ Créer un masque pour ne garder que les lignes où au moins une des top features varie fortement ---
X_top = X_test[top_features]
# Optionnel : filtrer les points atypiques pour lisser le nuage
mask = np.all(np.abs((X_top - X_top.mean()) / X_top.std()) < 3, axis=1)
X_filtered = X_top[mask]
y_filtered = y_test[mask]
preds_filtered = preds[mask]
# --- 3⃣ Tracer ---
plt.figure(figsize=(8, 8))
plt.scatter(y_filtered, preds_filtered, alpha=0.4, s=15, c='blue', label=f"Top {top_n} features")
plt.xlabel("Valeurs réelles", fontsize=12)
plt.ylabel("Valeurs prédites", fontsize=12)
plt.title(f"LightGBM Régression — Prédiction vs Réel (filtré sur top {top_n} features)", fontsize=14)
plt.plot(
[y_filtered.min(), y_filtered.max()],
[y_filtered.min(), y_filtered.max()],
'r--',
linewidth=1,
label="Ligne idéale"
)
plt.legend()
plt.grid(True)
out_path = f"{output_dir}/lgbm_pred_vs_real_top{top_n}.png"
plt.savefig(out_path, bbox_inches="tight")
plt.close()
def plot_threshold_analysis(self, y_true, y_proba, step=0.05, save_path=None):
"""
Affiche la précision, le rappel et le F1-score selon le seuil de décision.
@@ -1708,7 +1758,7 @@ class Zeus_LGBMRegressor(IStrategy):
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
dataframe.loc[
(
(dataframe['ml_prob'] > self.ml_prob_buy.value)
(dataframe['ml_prob'] > dataframe['sma24_deriv1'])
), ['enter_long', 'enter_tag']] = (1, f"ml_prob")
dataframe['test'] = np.where(dataframe['enter_long'] == 1, dataframe['close'] * 1.01, np.nan)
@@ -1864,7 +1914,7 @@ class Zeus_LGBMRegressor(IStrategy):
def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
dataframe.loc[
(
(dataframe['ml_prob'] < self.ml_prob_sell.value)
(dataframe['ml_prob'] < dataframe['sma24_deriv1'])
), ['exit_long', 'exit_tag']] = (1, f"ml_prob")
return dataframe
@@ -1872,6 +1922,7 @@ class Zeus_LGBMRegressor(IStrategy):
def adjust_trade_position(self, trade: Trade, current_time: datetime,
current_rate: float, current_profit: float, min_stake: float,
max_stake: float, **kwargs):
return None
# ne rien faire si ordre deja en cours
if trade.has_open_orders:
# print("skip open orders")
@@ -3078,7 +3129,7 @@ class Zeus_LGBMRegressor(IStrategy):
return selected_corr
def graphFonctionApprise(self, X_test, y_test, y_pred):
def graphFonctionApprise(self, path, X_test, y_test, y_pred):
# Exemple : trier les valeurs de X_test et les prédictions
x_sorted = np.argsort(X_test.iloc[:, 0])
x = X_test.iloc[:, 0].iloc[x_sorted]
@@ -3095,33 +3146,47 @@ class Zeus_LGBMRegressor(IStrategy):
plt.legend()
plt.grid(True)
out_path = "/home/souti/freqtrade/user_data/plots/lgbm_function.png"
out_path = f"{path}/lgbm_function.png"
plt.savefig(out_path, bbox_inches="tight")
plt.close()
print(f"Graphique sauvegardé : {out_path}")
def graphFonctionAppriseFeature(self, X_test, y_test, y_pred):
plt.figure(figsize=(14, 8))
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
def graphFonctionAppriseFeature(self, path, X_test, y_test, y_pred):
plt.figure(figsize=(14, 8))
colors = sns.color_palette("coolwarm", n_colors=X_test.shape[1])
for i, col in enumerate(X_test.columns):
plt.plot(X_test[col], y_pred, '.', color=colors[i], alpha=0.4, label=col)
# Conversion en DataFrame pour manip plus simple
df = X_test.copy()
df["y_pred"] = y_pred
plt.title("Fonction apprise par LGBMRegressor (par feature)")
# --- filtrage sur y_pred (ou sur chaque feature si tu veux)
mean = df["y_pred"].mean()
std = df["y_pred"].std()
df = df[(df["y_pred"] >= mean - 2 * std) & (df["y_pred"] <= mean + 2 * std)]
# --- tracé
for i, col in enumerate(X_test.columns):
plt.plot(df[col], df["y_pred"], '.', color=colors[i], alpha=0.4, label=col)
plt.title("Fonction apprise par LGBMRegressor (filtrée à ±2σ)")
plt.xlabel("Valeur feature")
plt.ylabel("Valeur prédite")
plt.legend(loc="best")
plt.legend(loc="right")
plt.grid(True)
out_path = "/home/souti/freqtrade/user_data/plots/lgbm_features.png"
out_path = f"{path}/lgbm_features.png"
plt.savefig(out_path, bbox_inches="tight")
plt.close()
print(f"Graphique sauvegardé : {out_path}")
def optuna(self, X_train, X_test, y_train, y_test):
def optuna(self, path, X_train, X_test, y_train, y_test):
# Suppose que X_train, y_train sont déjà définis
# ou sinon :
# X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)
@@ -3161,7 +3226,7 @@ class Zeus_LGBMRegressor(IStrategy):
print(f"Meilleur RMSE : {study.best_value:.4f}")
# 🔹 Sauvegarder les résultats
optuna_path = "/home/souti/freqtrade/user_data/plots/optuna_lgbm_results.txt"
optuna_path = f"{path}/optuna_lgbm_results.txt"
with open(optuna_path, "w") as f:
f.write(f"Best params:\n{study.best_params}\n")
f.write(f"Best RMSE: {study.best_value:.4f}\n")

View File

@@ -0,0 +1,42 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
# Exemple : dataframe avec une colonne sma24
n = 100
df = pd.DataFrame({
"sma24": np.sin(np.linspace(0, 6*np.pi, n)) * 50 + 200 + np.random.randn(n)*2
})
# --- paramètres
lookback = 30 # nombre de bougies utilisées pour l'apprentissage
future_steps = 10 # combien de bougies futures on veut estimer
# --- Préparer les données
X = np.arange(lookback).reshape(-1, 1) # 0 .. 29
y = df["sma24"].iloc[-lookback:].values # les dernières valeurs
# --- Entraîner la régression
model = LinearRegression()
model.fit(X, y)
# --- Prédire les valeurs futures
X_future = np.arange(lookback, lookback + future_steps).reshape(-1, 1)
y_future = model.predict(X_future)
# --- Reconstituer la courbe complète (historique + prévision)
predicted_full = np.concatenate([y, y_future])
# --- Affichage
plt.figure(figsize=(10,5))
plt.plot(df.index[-lookback:], y, label="Historique (sma24)", color="blue")
plt.plot(
np.arange(df.index[-1]-lookback+1, df.index[-1]+future_steps+1),
predicted_full,
label="Régression + prévision", color="orange", linestyle="--"
)
plt.axvline(df.index[-1], color="gray", linestyle=":")
plt.legend()
plt.title(f"Projection de SMA24 sur {future_steps} bougies futures")
plt.show()