From 82ab199e2de1bdb356868d261d09020fa8f585fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Delacotte?= Date: Fri, 7 Nov 2025 20:56:30 +0100 Subject: [PATCH] RandomForestRegressor --- Zeus_8_3_2_B_4_2.py | 291 ++++++++++++++++++++++++- tools/sklearn/RandomForestRegressor.py | 22 ++ 2 files changed, 305 insertions(+), 8 deletions(-) create mode 100644 tools/sklearn/RandomForestRegressor.py diff --git a/Zeus_8_3_2_B_4_2.py b/Zeus_8_3_2_B_4_2.py index 40f2643..c44f67c 100644 --- a/Zeus_8_3_2_B_4_2.py +++ b/Zeus_8_3_2_B_4_2.py @@ -35,6 +35,23 @@ from collections import Counter logger = logging.getLogger(__name__) +# Machine Learning +from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor +from sklearn.model_selection import train_test_split +from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error +from sklearn.metrics import accuracy_score +import joblib +import matplotlib.pyplot as plt +from sklearn.metrics import ( + classification_report, + confusion_matrix, + accuracy_score, + roc_auc_score, + roc_curve, +) +from sklearn.tree import export_text +import inspect + from tabulate import tabulate @@ -58,6 +75,10 @@ def normalize(df): class Zeus_8_3_2_B_4_2(IStrategy): + # Machine Learning + model = joblib.load('rf_model.pkl') + model_indicators = ['rsi_deriv1', "max_rsi_12", "mid_smooth_5_deriv1", "volume_deriv1"] + levels = [1, 2, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20] # startup_candle_count = 12 * 24 * 5 @@ -1009,8 +1030,261 @@ class Zeus_8_3_2_B_4_2(IStrategy): dataframe['stop_buying'] = latched + self.trainModel(dataframe, metadata) + + # Préparer les features pour la prédiction + features = dataframe[self.model_indicators].fillna(0) + + # Prédiction : probabilité que le prix monte + # probs = self.model.predict_proba(features)[:, 1] + + # Sauvegarder la probabilité pour l’analyse + # dataframe['ml_prob'] = probs + + # self.inspect_model(self.model) + return dataframe + def trainModel(self, dataframe: DataFrame, metadata: dict): + df = dataframe.copy() + # 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies + df['target'] = (1000 * (df['sma24'].shift(-24) - df['sma24'])) #.astype(int) + + # Nettoyage + df = df.dropna() + + # 4️⃣ Split train/test + X = df[self.model_indicators] + y = df['target'] + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False) + + # 5️⃣ Entraînement du modèle + # train_model = RandomForestClassifier(n_estimators=200, random_state=42) + # train_model = RandomForestClassifier( + # n_estimators=300, + # max_depth=12, + # min_samples_split=4, + # min_samples_leaf=2, + # max_features='sqrt', + # random_state=42, + # n_jobs=-1 + # ) + train_model = RandomForestRegressor( + n_estimators=300, + max_depth=None, + random_state=42, + n_jobs=-1 + ) + train_model.fit(X_train, y_train) + + # 6️⃣ Évaluer la précision (facultatif) + preds = train_model.predict(X_test) + # acc = accuracy_score(y_test, preds) + # print(f"Accuracy: {acc:.3f}") + + # 7️⃣ Sauvegarde du modèle + joblib.dump(train_model, 'rf_model.pkl') + print("✅ Modèle sauvegardé sous rf_model.pkl") + + y_pred = train_model.predict(X_test) + + print("R² :", r2_score(y_test, y_pred)) + print("RMSE :", mean_squared_error(y_test, y_pred)) #, squared=False)) + print("MAE :", mean_absolute_error(y_test, y_pred)) + + # self.analyze_model(train_model, X_train, X_test, y_train, y_test) + + def inspect_model(self, model): + """ + Affiche les informations d'un modèle ML déjà entraîné. + Compatible avec scikit-learn, xgboost, lightgbm, catboost... + """ + + print("===== 🔍 INFORMATIONS DU MODÈLE =====") + + # Type de modèle + print(f"Type : {type(model).__name__}") + print(f"Module : {model.__class__.__module__}") + + # Hyperparamètres + if hasattr(model, "get_params"): + params = model.get_params() + print(f"\n===== ⚙️ HYPERPARAMÈTRES ({len(params)}) =====") + for k, v in params.items(): + print(f"{k}: {v}") + + # Nombre d’estimateurs + if hasattr(model, "n_estimators"): + print(f"\nNombre d’estimateurs : {model.n_estimators}") + + # Importance des features + if hasattr(model, "feature_importances_"): + print("\n===== 📊 IMPORTANCE DES FEATURES =====") + + # Correction ici : + feature_names = getattr(model, "feature_names_in_", None) + if isinstance(feature_names, np.ndarray): + feature_names = feature_names.tolist() + elif feature_names is None: + feature_names = [f"feature_{i}" for i in range(len(model.feature_importances_))] + + fi = pd.DataFrame({ + "feature": feature_names, + "importance": model.feature_importances_ + }).sort_values(by="importance", ascending=False) + + print(fi) + + # Coefficients (modèles linéaires) + if hasattr(model, "coef_"): + print("\n===== ➗ COEFFICIENTS =====") + coef = np.array(model.coef_) + if coef.ndim == 1: + for i, c in enumerate(coef): + print(f"Feature {i}: {c:.6f}") + else: + print(coef) + + # Intercept + if hasattr(model, "intercept_"): + print("\nIntercept :", model.intercept_) + + # Classes connues + if hasattr(model, "classes_"): + print("\n===== 🎯 CLASSES =====") + print(model.classes_) + + # Scores internes + for attr in ["best_score_", "best_iteration_", "best_ntree_limit", "score_"]: + if hasattr(model, attr): + print(f"\n{attr} = {getattr(model, attr)}") + + # Méthodes disponibles + print("\n===== 🧩 MÉTHODES DISPONIBLES =====") + methods = [m for m, _ in inspect.getmembers(model, predicate=inspect.ismethod)] + print(", ".join(methods[:15]) + ("..." if len(methods) > 15 else "")) + + print("\n===== ✅ FIN DE L’INSPECTION =====") + + def analyze_model(self, model, X_train, X_test, y_train, y_test): + """ + Analyse complète d'un modèle ML supervisé (classification binaire). + Affiche performances, importance des features, matrices, seuils, etc. + """ + output_dir = "user_data/plots" + os.makedirs(output_dir, exist_ok=True) + + # ---- Prédictions ---- + preds = model.predict(X_test) + probs = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else preds + + # ---- Performances globales ---- + print("===== 📊 ÉVALUATION DU MODÈLE =====") + print("Colonnes du modèle :", model.feature_names_in_) + print("Colonnes X_test :", list(X_test.columns)) + print(f"Accuracy: {accuracy_score(y_test, preds):.3f}") + print(f"ROC AUC : {roc_auc_score(y_test, probs):.3f}") + + print("TN (True Negative) / FP (False Positive)") + print("FN (False Negative) / TP (True Positive)") + print("\nRapport de classification :\n", classification_report(y_test, preds)) + + # | Élément | Valeur | Signification | + # | ------------------- | ------ | ----------------------------------------------------------- | + # | TN (True Negative) | 983 | Modèle a correctement prédit 0 (pas d’achat) | + # | FP (False Positive) | 43 | Modèle a prédit 1 alors que c’était 0 (faux signal d’achat) | + # | FN (False Negative) | 108 | Modèle a prédit 0 alors que c’était 1 (manqué un achat) | + # | TP (True Positive) | 19 | Modèle a correctement prédit 1 (bon signal d’achat) | + + # ---- Matrice de confusion ---- + cm = confusion_matrix(y_test, preds) + print("Matrice de confusion :\n", cm) + + plt.figure(figsize=(4, 4)) + plt.imshow(cm, cmap="Blues") + plt.title("Matrice de confusion") + plt.xlabel("Prédit") + plt.ylabel("Réel") + for i in range(2): + for j in range(2): + plt.text(j, i, cm[i, j], ha="center", va="center", color="black") + # plt.show() + plt.savefig(os.path.join(output_dir, "Matrice de confusion.png"), bbox_inches="tight") + plt.close() + + # ---- Importance des features ---- + if hasattr(model, "feature_importances_"): + print("\n===== 🔍 IMPORTANCE DES FEATURES =====") + importance = pd.DataFrame({ + "feature": X_train.columns, + "importance": model.feature_importances_ + }).sort_values(by="importance", ascending=False) + print(importance) + importance.plot.bar(x="feature", y="importance", legend=False, figsize=(6, 3)) + plt.title("Importance des features") + # plt.show() + plt.savefig(os.path.join(output_dir, "Importance des features.png"), bbox_inches="tight") + plt.close() + + # ---- Arbre de décision (extrait) ---- + if hasattr(model, "estimators_"): + print("\n===== 🌳 EXTRAIT D’UN ARBRE =====") + print(export_text(model.estimators_[0], feature_names=list(X_train.columns))[:800]) + + # ---- Précision selon le seuil ---- + thresholds = np.linspace(0.1, 0.9, 9) + print("\n===== ⚙️ PERFORMANCE SELON SEUIL =====") + for t in thresholds: + preds_t = (probs > t).astype(int) + acc = accuracy_score(y_test, preds_t) + print(f"Seuil {t:.1f} → précision {acc:.3f}") + + # ---- ROC Curve ---- + fpr, tpr, _ = roc_curve(y_test, probs) + plt.figure(figsize=(5, 4)) + plt.plot(fpr, tpr, label="ROC curve") + plt.plot([0, 1], [0, 1], linestyle="--", color="gray") + plt.xlabel("Taux de faux positifs") + plt.ylabel("Taux de vrais positifs") + plt.title("Courbe ROC") + plt.legend() + # plt.show() + plt.savefig(os.path.join(output_dir, "Courbe ROC.png"), bbox_inches="tight") + plt.close() + + # ---- Interprétation SHAP (optionnelle) ---- + try: + import shap + + print("\n===== 💡 ANALYSE SHAP =====") + explainer = shap.TreeExplainer(model) + shap_values = explainer.shap_values(X_test) + # shap.summary_plot(shap_values[1], X_test) + # Vérifie le type de sortie de shap_values + if isinstance(shap_values, list): + # Cas des modèles de classification (plusieurs classes) + shap_values_to_plot = shap_values[0] if len(shap_values) == 1 else shap_values[1] + else: + shap_values_to_plot = shap_values + + # Ajustement des dimensions au besoin + if shap_values_to_plot.shape[1] != X_test.shape[1]: + print(f"⚠️ Mismatch dimensions SHAP ({shap_values_to_plot.shape[1]}) vs X_test ({X_test.shape[1]})") + min_dim = min(shap_values_to_plot.shape[1], X_test.shape[1]) + shap_values_to_plot = shap_values_to_plot[:, :min_dim] + X_to_plot = X_test.iloc[:, :min_dim] + else: + X_to_plot = X_test + + plt.figure(figsize=(12, 10)) + shap.summary_plot(shap_values_to_plot, X_to_plot, show=False) + plt.savefig(os.path.join(output_dir, "shap_summary.png"), bbox_inches="tight") + plt.close() + except ImportError: + print("\n(SHAP non installé — `pip install shap` pour activer l’analyse SHAP.)") + + print("\n===== ✅ FIN DE L’ANALYSE =====") + def populateDataframe(self, dataframe, timeframe='5m'): heikinashi = qtpylib.heikinashi(dataframe) dataframe['haopen'] = heikinashi['open'] @@ -1130,6 +1404,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): # dataframe['atr'] = tr.rolling(window=self.DEFAULT_PARAMS['atr_period']).mean() dataframe['volume_sma_deriv'] = dataframe['volume'] * dataframe['sma5_deriv1'] / (dataframe['volume'].rolling(5).mean()) + self.calculeDerivees(dataframe, 'volume', timeframe=timeframe, ema_period=12) self.setTrends(dataframe) @@ -1253,13 +1528,13 @@ class Zeus_8_3_2_B_4_2(IStrategy): eps_d1_series = eps_d1_series.fillna(global_eps_d1).replace(0, global_eps_d1) eps_d2_series = eps_d2_series.fillna(global_eps_d2).replace(0, global_eps_d2) - if verbose and self.dp.runmode.value in ('backtest'): - stats = dataframe[[d1_col, d2_col]].agg(['min', 'max']).T - stats['abs_max'] = dataframe[[d1_col, d2_col]].abs().max(axis=0) - print(f"---- Derivatives stats {timeframe}----") - print(stats) - print(f"rolling window = {window}, coef = {coef}, ema_period = {ema_period}") - print("---------------------------") + # if verbose and self.dp.runmode.value in ('backtest'): + # stats = dataframe[[d1_col, d2_col]].agg(['min', 'max']).T + # stats['abs_max'] = dataframe[[d1_col, d2_col]].abs().max(axis=0) + # print(f"---- Derivatives stats {timeframe}----") + # print(stats) + # print(f"rolling window = {window}, coef = {coef}, ema_period = {ema_period}") + # print("---------------------------") # mapping tendency def tag_by_derivatives(row): @@ -2635,7 +2910,7 @@ class Zeus_8_3_2_B_4_2(IStrategy): def __init__(self, config: dict) -> None: super().__init__(config) - self.parameters = self.load_params_tree("user_data/strategies/params/") + # self.parameters = self.load_params_tree("user_data/strategies/params/") def setTrends(self, dataframe: DataFrame): SMOOTH_WIN=10 diff --git a/tools/sklearn/RandomForestRegressor.py b/tools/sklearn/RandomForestRegressor.py new file mode 100644 index 0000000..0e42bf6 --- /dev/null +++ b/tools/sklearn/RandomForestRegressor.py @@ -0,0 +1,22 @@ +from sklearn.ensemble import RandomForestRegressor +from sklearn.metrics import r2_score, mean_absolute_error +import pandas as pd + +# Données d'exemple +df = pd.DataFrame({ + 'sma5': [1, 2, 3, 4, 5], + 'sma24': [2, 2, 2, 3, 4], + 'close': [100, 102, 101, 105, 108] +}) +df['future_gain'] = (df['close'].shift(-1) - df['close']) / df['close'] + +X = df[['sma5', 'sma24']][:-1] +y = df['future_gain'][:-1] + +model = RandomForestRegressor(n_estimators=200, random_state=42) +model.fit(X, y) +y_pred = model.predict(X) + +print("R²:", r2_score(y, y_pred)) +print("MAE:", mean_absolute_error(y, y_pred)) +print("Prédictions :", y_pred)