RandomForestRegressor
This commit is contained in:
@@ -35,6 +35,23 @@ from collections import Counter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Machine Learning
|
||||
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
|
||||
from sklearn.metrics import accuracy_score
|
||||
import joblib
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.metrics import (
|
||||
classification_report,
|
||||
confusion_matrix,
|
||||
accuracy_score,
|
||||
roc_auc_score,
|
||||
roc_curve,
|
||||
)
|
||||
from sklearn.tree import export_text
|
||||
import inspect
|
||||
|
||||
|
||||
from tabulate import tabulate
|
||||
|
||||
@@ -58,6 +75,10 @@ def normalize(df):
|
||||
|
||||
|
||||
class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
# Machine Learning
|
||||
model = joblib.load('rf_model.pkl')
|
||||
model_indicators = ['rsi_deriv1', "max_rsi_12", "mid_smooth_5_deriv1", "volume_deriv1"]
|
||||
|
||||
levels = [1, 2, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
|
||||
# startup_candle_count = 12 * 24 * 5
|
||||
|
||||
@@ -1009,8 +1030,261 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
|
||||
dataframe['stop_buying'] = latched
|
||||
|
||||
self.trainModel(dataframe, metadata)
|
||||
|
||||
# Préparer les features pour la prédiction
|
||||
features = dataframe[self.model_indicators].fillna(0)
|
||||
|
||||
# Prédiction : probabilité que le prix monte
|
||||
# probs = self.model.predict_proba(features)[:, 1]
|
||||
|
||||
# Sauvegarder la probabilité pour l’analyse
|
||||
# dataframe['ml_prob'] = probs
|
||||
|
||||
# self.inspect_model(self.model)
|
||||
|
||||
return dataframe
|
||||
|
||||
def trainModel(self, dataframe: DataFrame, metadata: dict):
|
||||
df = dataframe.copy()
|
||||
# 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies
|
||||
df['target'] = (1000 * (df['sma24'].shift(-24) - df['sma24'])) #.astype(int)
|
||||
|
||||
# Nettoyage
|
||||
df = df.dropna()
|
||||
|
||||
# 4️⃣ Split train/test
|
||||
X = df[self.model_indicators]
|
||||
y = df['target']
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
|
||||
|
||||
# 5️⃣ Entraînement du modèle
|
||||
# train_model = RandomForestClassifier(n_estimators=200, random_state=42)
|
||||
# train_model = RandomForestClassifier(
|
||||
# n_estimators=300,
|
||||
# max_depth=12,
|
||||
# min_samples_split=4,
|
||||
# min_samples_leaf=2,
|
||||
# max_features='sqrt',
|
||||
# random_state=42,
|
||||
# n_jobs=-1
|
||||
# )
|
||||
train_model = RandomForestRegressor(
|
||||
n_estimators=300,
|
||||
max_depth=None,
|
||||
random_state=42,
|
||||
n_jobs=-1
|
||||
)
|
||||
train_model.fit(X_train, y_train)
|
||||
|
||||
# 6️⃣ Évaluer la précision (facultatif)
|
||||
preds = train_model.predict(X_test)
|
||||
# acc = accuracy_score(y_test, preds)
|
||||
# print(f"Accuracy: {acc:.3f}")
|
||||
|
||||
# 7️⃣ Sauvegarde du modèle
|
||||
joblib.dump(train_model, 'rf_model.pkl')
|
||||
print("✅ Modèle sauvegardé sous rf_model.pkl")
|
||||
|
||||
y_pred = train_model.predict(X_test)
|
||||
|
||||
print("R² :", r2_score(y_test, y_pred))
|
||||
print("RMSE :", mean_squared_error(y_test, y_pred)) #, squared=False))
|
||||
print("MAE :", mean_absolute_error(y_test, y_pred))
|
||||
|
||||
# self.analyze_model(train_model, X_train, X_test, y_train, y_test)
|
||||
|
||||
def inspect_model(self, model):
|
||||
"""
|
||||
Affiche les informations d'un modèle ML déjà entraîné.
|
||||
Compatible avec scikit-learn, xgboost, lightgbm, catboost...
|
||||
"""
|
||||
|
||||
print("===== 🔍 INFORMATIONS DU MODÈLE =====")
|
||||
|
||||
# Type de modèle
|
||||
print(f"Type : {type(model).__name__}")
|
||||
print(f"Module : {model.__class__.__module__}")
|
||||
|
||||
# Hyperparamètres
|
||||
if hasattr(model, "get_params"):
|
||||
params = model.get_params()
|
||||
print(f"\n===== ⚙️ HYPERPARAMÈTRES ({len(params)}) =====")
|
||||
for k, v in params.items():
|
||||
print(f"{k}: {v}")
|
||||
|
||||
# Nombre d’estimateurs
|
||||
if hasattr(model, "n_estimators"):
|
||||
print(f"\nNombre d’estimateurs : {model.n_estimators}")
|
||||
|
||||
# Importance des features
|
||||
if hasattr(model, "feature_importances_"):
|
||||
print("\n===== 📊 IMPORTANCE DES FEATURES =====")
|
||||
|
||||
# Correction ici :
|
||||
feature_names = getattr(model, "feature_names_in_", None)
|
||||
if isinstance(feature_names, np.ndarray):
|
||||
feature_names = feature_names.tolist()
|
||||
elif feature_names is None:
|
||||
feature_names = [f"feature_{i}" for i in range(len(model.feature_importances_))]
|
||||
|
||||
fi = pd.DataFrame({
|
||||
"feature": feature_names,
|
||||
"importance": model.feature_importances_
|
||||
}).sort_values(by="importance", ascending=False)
|
||||
|
||||
print(fi)
|
||||
|
||||
# Coefficients (modèles linéaires)
|
||||
if hasattr(model, "coef_"):
|
||||
print("\n===== ➗ COEFFICIENTS =====")
|
||||
coef = np.array(model.coef_)
|
||||
if coef.ndim == 1:
|
||||
for i, c in enumerate(coef):
|
||||
print(f"Feature {i}: {c:.6f}")
|
||||
else:
|
||||
print(coef)
|
||||
|
||||
# Intercept
|
||||
if hasattr(model, "intercept_"):
|
||||
print("\nIntercept :", model.intercept_)
|
||||
|
||||
# Classes connues
|
||||
if hasattr(model, "classes_"):
|
||||
print("\n===== 🎯 CLASSES =====")
|
||||
print(model.classes_)
|
||||
|
||||
# Scores internes
|
||||
for attr in ["best_score_", "best_iteration_", "best_ntree_limit", "score_"]:
|
||||
if hasattr(model, attr):
|
||||
print(f"\n{attr} = {getattr(model, attr)}")
|
||||
|
||||
# Méthodes disponibles
|
||||
print("\n===== 🧩 MÉTHODES DISPONIBLES =====")
|
||||
methods = [m for m, _ in inspect.getmembers(model, predicate=inspect.ismethod)]
|
||||
print(", ".join(methods[:15]) + ("..." if len(methods) > 15 else ""))
|
||||
|
||||
print("\n===== ✅ FIN DE L’INSPECTION =====")
|
||||
|
||||
def analyze_model(self, model, X_train, X_test, y_train, y_test):
|
||||
"""
|
||||
Analyse complète d'un modèle ML supervisé (classification binaire).
|
||||
Affiche performances, importance des features, matrices, seuils, etc.
|
||||
"""
|
||||
output_dir = "user_data/plots"
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# ---- Prédictions ----
|
||||
preds = model.predict(X_test)
|
||||
probs = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else preds
|
||||
|
||||
# ---- Performances globales ----
|
||||
print("===== 📊 ÉVALUATION DU MODÈLE =====")
|
||||
print("Colonnes du modèle :", model.feature_names_in_)
|
||||
print("Colonnes X_test :", list(X_test.columns))
|
||||
print(f"Accuracy: {accuracy_score(y_test, preds):.3f}")
|
||||
print(f"ROC AUC : {roc_auc_score(y_test, probs):.3f}")
|
||||
|
||||
print("TN (True Negative) / FP (False Positive)")
|
||||
print("FN (False Negative) / TP (True Positive)")
|
||||
print("\nRapport de classification :\n", classification_report(y_test, preds))
|
||||
|
||||
# | Élément | Valeur | Signification |
|
||||
# | ------------------- | ------ | ----------------------------------------------------------- |
|
||||
# | TN (True Negative) | 983 | Modèle a correctement prédit 0 (pas d’achat) |
|
||||
# | FP (False Positive) | 43 | Modèle a prédit 1 alors que c’était 0 (faux signal d’achat) |
|
||||
# | FN (False Negative) | 108 | Modèle a prédit 0 alors que c’était 1 (manqué un achat) |
|
||||
# | TP (True Positive) | 19 | Modèle a correctement prédit 1 (bon signal d’achat) |
|
||||
|
||||
# ---- Matrice de confusion ----
|
||||
cm = confusion_matrix(y_test, preds)
|
||||
print("Matrice de confusion :\n", cm)
|
||||
|
||||
plt.figure(figsize=(4, 4))
|
||||
plt.imshow(cm, cmap="Blues")
|
||||
plt.title("Matrice de confusion")
|
||||
plt.xlabel("Prédit")
|
||||
plt.ylabel("Réel")
|
||||
for i in range(2):
|
||||
for j in range(2):
|
||||
plt.text(j, i, cm[i, j], ha="center", va="center", color="black")
|
||||
# plt.show()
|
||||
plt.savefig(os.path.join(output_dir, "Matrice de confusion.png"), bbox_inches="tight")
|
||||
plt.close()
|
||||
|
||||
# ---- Importance des features ----
|
||||
if hasattr(model, "feature_importances_"):
|
||||
print("\n===== 🔍 IMPORTANCE DES FEATURES =====")
|
||||
importance = pd.DataFrame({
|
||||
"feature": X_train.columns,
|
||||
"importance": model.feature_importances_
|
||||
}).sort_values(by="importance", ascending=False)
|
||||
print(importance)
|
||||
importance.plot.bar(x="feature", y="importance", legend=False, figsize=(6, 3))
|
||||
plt.title("Importance des features")
|
||||
# plt.show()
|
||||
plt.savefig(os.path.join(output_dir, "Importance des features.png"), bbox_inches="tight")
|
||||
plt.close()
|
||||
|
||||
# ---- Arbre de décision (extrait) ----
|
||||
if hasattr(model, "estimators_"):
|
||||
print("\n===== 🌳 EXTRAIT D’UN ARBRE =====")
|
||||
print(export_text(model.estimators_[0], feature_names=list(X_train.columns))[:800])
|
||||
|
||||
# ---- Précision selon le seuil ----
|
||||
thresholds = np.linspace(0.1, 0.9, 9)
|
||||
print("\n===== ⚙️ PERFORMANCE SELON SEUIL =====")
|
||||
for t in thresholds:
|
||||
preds_t = (probs > t).astype(int)
|
||||
acc = accuracy_score(y_test, preds_t)
|
||||
print(f"Seuil {t:.1f} → précision {acc:.3f}")
|
||||
|
||||
# ---- ROC Curve ----
|
||||
fpr, tpr, _ = roc_curve(y_test, probs)
|
||||
plt.figure(figsize=(5, 4))
|
||||
plt.plot(fpr, tpr, label="ROC curve")
|
||||
plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
|
||||
plt.xlabel("Taux de faux positifs")
|
||||
plt.ylabel("Taux de vrais positifs")
|
||||
plt.title("Courbe ROC")
|
||||
plt.legend()
|
||||
# plt.show()
|
||||
plt.savefig(os.path.join(output_dir, "Courbe ROC.png"), bbox_inches="tight")
|
||||
plt.close()
|
||||
|
||||
# ---- Interprétation SHAP (optionnelle) ----
|
||||
try:
|
||||
import shap
|
||||
|
||||
print("\n===== 💡 ANALYSE SHAP =====")
|
||||
explainer = shap.TreeExplainer(model)
|
||||
shap_values = explainer.shap_values(X_test)
|
||||
# shap.summary_plot(shap_values[1], X_test)
|
||||
# Vérifie le type de sortie de shap_values
|
||||
if isinstance(shap_values, list):
|
||||
# Cas des modèles de classification (plusieurs classes)
|
||||
shap_values_to_plot = shap_values[0] if len(shap_values) == 1 else shap_values[1]
|
||||
else:
|
||||
shap_values_to_plot = shap_values
|
||||
|
||||
# Ajustement des dimensions au besoin
|
||||
if shap_values_to_plot.shape[1] != X_test.shape[1]:
|
||||
print(f"⚠️ Mismatch dimensions SHAP ({shap_values_to_plot.shape[1]}) vs X_test ({X_test.shape[1]})")
|
||||
min_dim = min(shap_values_to_plot.shape[1], X_test.shape[1])
|
||||
shap_values_to_plot = shap_values_to_plot[:, :min_dim]
|
||||
X_to_plot = X_test.iloc[:, :min_dim]
|
||||
else:
|
||||
X_to_plot = X_test
|
||||
|
||||
plt.figure(figsize=(12, 10))
|
||||
shap.summary_plot(shap_values_to_plot, X_to_plot, show=False)
|
||||
plt.savefig(os.path.join(output_dir, "shap_summary.png"), bbox_inches="tight")
|
||||
plt.close()
|
||||
except ImportError:
|
||||
print("\n(SHAP non installé — `pip install shap` pour activer l’analyse SHAP.)")
|
||||
|
||||
print("\n===== ✅ FIN DE L’ANALYSE =====")
|
||||
|
||||
def populateDataframe(self, dataframe, timeframe='5m'):
|
||||
heikinashi = qtpylib.heikinashi(dataframe)
|
||||
dataframe['haopen'] = heikinashi['open']
|
||||
@@ -1130,6 +1404,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
# dataframe['atr'] = tr.rolling(window=self.DEFAULT_PARAMS['atr_period']).mean()
|
||||
|
||||
dataframe['volume_sma_deriv'] = dataframe['volume'] * dataframe['sma5_deriv1'] / (dataframe['volume'].rolling(5).mean())
|
||||
self.calculeDerivees(dataframe, 'volume', timeframe=timeframe, ema_period=12)
|
||||
|
||||
self.setTrends(dataframe)
|
||||
|
||||
@@ -1253,13 +1528,13 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
eps_d1_series = eps_d1_series.fillna(global_eps_d1).replace(0, global_eps_d1)
|
||||
eps_d2_series = eps_d2_series.fillna(global_eps_d2).replace(0, global_eps_d2)
|
||||
|
||||
if verbose and self.dp.runmode.value in ('backtest'):
|
||||
stats = dataframe[[d1_col, d2_col]].agg(['min', 'max']).T
|
||||
stats['abs_max'] = dataframe[[d1_col, d2_col]].abs().max(axis=0)
|
||||
print(f"---- Derivatives stats {timeframe}----")
|
||||
print(stats)
|
||||
print(f"rolling window = {window}, coef = {coef}, ema_period = {ema_period}")
|
||||
print("---------------------------")
|
||||
# if verbose and self.dp.runmode.value in ('backtest'):
|
||||
# stats = dataframe[[d1_col, d2_col]].agg(['min', 'max']).T
|
||||
# stats['abs_max'] = dataframe[[d1_col, d2_col]].abs().max(axis=0)
|
||||
# print(f"---- Derivatives stats {timeframe}----")
|
||||
# print(stats)
|
||||
# print(f"rolling window = {window}, coef = {coef}, ema_period = {ema_period}")
|
||||
# print("---------------------------")
|
||||
|
||||
# mapping tendency
|
||||
def tag_by_derivatives(row):
|
||||
@@ -2635,7 +2910,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
|
||||
def __init__(self, config: dict) -> None:
|
||||
super().__init__(config)
|
||||
self.parameters = self.load_params_tree("user_data/strategies/params/")
|
||||
# self.parameters = self.load_params_tree("user_data/strategies/params/")
|
||||
|
||||
def setTrends(self, dataframe: DataFrame):
|
||||
SMOOTH_WIN=10
|
||||
|
||||
Reference in New Issue
Block a user