RandomForestRegressor
This commit is contained in:
@@ -35,6 +35,23 @@ from collections import Counter
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Machine Learning
|
||||||
|
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
import joblib
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.metrics import (
|
||||||
|
classification_report,
|
||||||
|
confusion_matrix,
|
||||||
|
accuracy_score,
|
||||||
|
roc_auc_score,
|
||||||
|
roc_curve,
|
||||||
|
)
|
||||||
|
from sklearn.tree import export_text
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
|
||||||
from tabulate import tabulate
|
from tabulate import tabulate
|
||||||
|
|
||||||
@@ -58,6 +75,10 @@ def normalize(df):
|
|||||||
|
|
||||||
|
|
||||||
class Zeus_8_3_2_B_4_2(IStrategy):
|
class Zeus_8_3_2_B_4_2(IStrategy):
|
||||||
|
# Machine Learning
|
||||||
|
model = joblib.load('rf_model.pkl')
|
||||||
|
model_indicators = ['rsi_deriv1', "max_rsi_12", "mid_smooth_5_deriv1", "volume_deriv1"]
|
||||||
|
|
||||||
levels = [1, 2, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
|
levels = [1, 2, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
|
||||||
# startup_candle_count = 12 * 24 * 5
|
# startup_candle_count = 12 * 24 * 5
|
||||||
|
|
||||||
@@ -1009,8 +1030,261 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
|||||||
|
|
||||||
dataframe['stop_buying'] = latched
|
dataframe['stop_buying'] = latched
|
||||||
|
|
||||||
|
self.trainModel(dataframe, metadata)
|
||||||
|
|
||||||
|
# Préparer les features pour la prédiction
|
||||||
|
features = dataframe[self.model_indicators].fillna(0)
|
||||||
|
|
||||||
|
# Prédiction : probabilité que le prix monte
|
||||||
|
# probs = self.model.predict_proba(features)[:, 1]
|
||||||
|
|
||||||
|
# Sauvegarder la probabilité pour l’analyse
|
||||||
|
# dataframe['ml_prob'] = probs
|
||||||
|
|
||||||
|
# self.inspect_model(self.model)
|
||||||
|
|
||||||
return dataframe
|
return dataframe
|
||||||
|
|
||||||
|
def trainModel(self, dataframe: DataFrame, metadata: dict):
|
||||||
|
df = dataframe.copy()
|
||||||
|
# 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies
|
||||||
|
df['target'] = (1000 * (df['sma24'].shift(-24) - df['sma24'])) #.astype(int)
|
||||||
|
|
||||||
|
# Nettoyage
|
||||||
|
df = df.dropna()
|
||||||
|
|
||||||
|
# 4️⃣ Split train/test
|
||||||
|
X = df[self.model_indicators]
|
||||||
|
y = df['target']
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
|
||||||
|
|
||||||
|
# 5️⃣ Entraînement du modèle
|
||||||
|
# train_model = RandomForestClassifier(n_estimators=200, random_state=42)
|
||||||
|
# train_model = RandomForestClassifier(
|
||||||
|
# n_estimators=300,
|
||||||
|
# max_depth=12,
|
||||||
|
# min_samples_split=4,
|
||||||
|
# min_samples_leaf=2,
|
||||||
|
# max_features='sqrt',
|
||||||
|
# random_state=42,
|
||||||
|
# n_jobs=-1
|
||||||
|
# )
|
||||||
|
train_model = RandomForestRegressor(
|
||||||
|
n_estimators=300,
|
||||||
|
max_depth=None,
|
||||||
|
random_state=42,
|
||||||
|
n_jobs=-1
|
||||||
|
)
|
||||||
|
train_model.fit(X_train, y_train)
|
||||||
|
|
||||||
|
# 6️⃣ Évaluer la précision (facultatif)
|
||||||
|
preds = train_model.predict(X_test)
|
||||||
|
# acc = accuracy_score(y_test, preds)
|
||||||
|
# print(f"Accuracy: {acc:.3f}")
|
||||||
|
|
||||||
|
# 7️⃣ Sauvegarde du modèle
|
||||||
|
joblib.dump(train_model, 'rf_model.pkl')
|
||||||
|
print("✅ Modèle sauvegardé sous rf_model.pkl")
|
||||||
|
|
||||||
|
y_pred = train_model.predict(X_test)
|
||||||
|
|
||||||
|
print("R² :", r2_score(y_test, y_pred))
|
||||||
|
print("RMSE :", mean_squared_error(y_test, y_pred)) #, squared=False))
|
||||||
|
print("MAE :", mean_absolute_error(y_test, y_pred))
|
||||||
|
|
||||||
|
# self.analyze_model(train_model, X_train, X_test, y_train, y_test)
|
||||||
|
|
||||||
|
def inspect_model(self, model):
|
||||||
|
"""
|
||||||
|
Affiche les informations d'un modèle ML déjà entraîné.
|
||||||
|
Compatible avec scikit-learn, xgboost, lightgbm, catboost...
|
||||||
|
"""
|
||||||
|
|
||||||
|
print("===== 🔍 INFORMATIONS DU MODÈLE =====")
|
||||||
|
|
||||||
|
# Type de modèle
|
||||||
|
print(f"Type : {type(model).__name__}")
|
||||||
|
print(f"Module : {model.__class__.__module__}")
|
||||||
|
|
||||||
|
# Hyperparamètres
|
||||||
|
if hasattr(model, "get_params"):
|
||||||
|
params = model.get_params()
|
||||||
|
print(f"\n===== ⚙️ HYPERPARAMÈTRES ({len(params)}) =====")
|
||||||
|
for k, v in params.items():
|
||||||
|
print(f"{k}: {v}")
|
||||||
|
|
||||||
|
# Nombre d’estimateurs
|
||||||
|
if hasattr(model, "n_estimators"):
|
||||||
|
print(f"\nNombre d’estimateurs : {model.n_estimators}")
|
||||||
|
|
||||||
|
# Importance des features
|
||||||
|
if hasattr(model, "feature_importances_"):
|
||||||
|
print("\n===== 📊 IMPORTANCE DES FEATURES =====")
|
||||||
|
|
||||||
|
# Correction ici :
|
||||||
|
feature_names = getattr(model, "feature_names_in_", None)
|
||||||
|
if isinstance(feature_names, np.ndarray):
|
||||||
|
feature_names = feature_names.tolist()
|
||||||
|
elif feature_names is None:
|
||||||
|
feature_names = [f"feature_{i}" for i in range(len(model.feature_importances_))]
|
||||||
|
|
||||||
|
fi = pd.DataFrame({
|
||||||
|
"feature": feature_names,
|
||||||
|
"importance": model.feature_importances_
|
||||||
|
}).sort_values(by="importance", ascending=False)
|
||||||
|
|
||||||
|
print(fi)
|
||||||
|
|
||||||
|
# Coefficients (modèles linéaires)
|
||||||
|
if hasattr(model, "coef_"):
|
||||||
|
print("\n===== ➗ COEFFICIENTS =====")
|
||||||
|
coef = np.array(model.coef_)
|
||||||
|
if coef.ndim == 1:
|
||||||
|
for i, c in enumerate(coef):
|
||||||
|
print(f"Feature {i}: {c:.6f}")
|
||||||
|
else:
|
||||||
|
print(coef)
|
||||||
|
|
||||||
|
# Intercept
|
||||||
|
if hasattr(model, "intercept_"):
|
||||||
|
print("\nIntercept :", model.intercept_)
|
||||||
|
|
||||||
|
# Classes connues
|
||||||
|
if hasattr(model, "classes_"):
|
||||||
|
print("\n===== 🎯 CLASSES =====")
|
||||||
|
print(model.classes_)
|
||||||
|
|
||||||
|
# Scores internes
|
||||||
|
for attr in ["best_score_", "best_iteration_", "best_ntree_limit", "score_"]:
|
||||||
|
if hasattr(model, attr):
|
||||||
|
print(f"\n{attr} = {getattr(model, attr)}")
|
||||||
|
|
||||||
|
# Méthodes disponibles
|
||||||
|
print("\n===== 🧩 MÉTHODES DISPONIBLES =====")
|
||||||
|
methods = [m for m, _ in inspect.getmembers(model, predicate=inspect.ismethod)]
|
||||||
|
print(", ".join(methods[:15]) + ("..." if len(methods) > 15 else ""))
|
||||||
|
|
||||||
|
print("\n===== ✅ FIN DE L’INSPECTION =====")
|
||||||
|
|
||||||
|
def analyze_model(self, model, X_train, X_test, y_train, y_test):
|
||||||
|
"""
|
||||||
|
Analyse complète d'un modèle ML supervisé (classification binaire).
|
||||||
|
Affiche performances, importance des features, matrices, seuils, etc.
|
||||||
|
"""
|
||||||
|
output_dir = "user_data/plots"
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# ---- Prédictions ----
|
||||||
|
preds = model.predict(X_test)
|
||||||
|
probs = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else preds
|
||||||
|
|
||||||
|
# ---- Performances globales ----
|
||||||
|
print("===== 📊 ÉVALUATION DU MODÈLE =====")
|
||||||
|
print("Colonnes du modèle :", model.feature_names_in_)
|
||||||
|
print("Colonnes X_test :", list(X_test.columns))
|
||||||
|
print(f"Accuracy: {accuracy_score(y_test, preds):.3f}")
|
||||||
|
print(f"ROC AUC : {roc_auc_score(y_test, probs):.3f}")
|
||||||
|
|
||||||
|
print("TN (True Negative) / FP (False Positive)")
|
||||||
|
print("FN (False Negative) / TP (True Positive)")
|
||||||
|
print("\nRapport de classification :\n", classification_report(y_test, preds))
|
||||||
|
|
||||||
|
# | Élément | Valeur | Signification |
|
||||||
|
# | ------------------- | ------ | ----------------------------------------------------------- |
|
||||||
|
# | TN (True Negative) | 983 | Modèle a correctement prédit 0 (pas d’achat) |
|
||||||
|
# | FP (False Positive) | 43 | Modèle a prédit 1 alors que c’était 0 (faux signal d’achat) |
|
||||||
|
# | FN (False Negative) | 108 | Modèle a prédit 0 alors que c’était 1 (manqué un achat) |
|
||||||
|
# | TP (True Positive) | 19 | Modèle a correctement prédit 1 (bon signal d’achat) |
|
||||||
|
|
||||||
|
# ---- Matrice de confusion ----
|
||||||
|
cm = confusion_matrix(y_test, preds)
|
||||||
|
print("Matrice de confusion :\n", cm)
|
||||||
|
|
||||||
|
plt.figure(figsize=(4, 4))
|
||||||
|
plt.imshow(cm, cmap="Blues")
|
||||||
|
plt.title("Matrice de confusion")
|
||||||
|
plt.xlabel("Prédit")
|
||||||
|
plt.ylabel("Réel")
|
||||||
|
for i in range(2):
|
||||||
|
for j in range(2):
|
||||||
|
plt.text(j, i, cm[i, j], ha="center", va="center", color="black")
|
||||||
|
# plt.show()
|
||||||
|
plt.savefig(os.path.join(output_dir, "Matrice de confusion.png"), bbox_inches="tight")
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# ---- Importance des features ----
|
||||||
|
if hasattr(model, "feature_importances_"):
|
||||||
|
print("\n===== 🔍 IMPORTANCE DES FEATURES =====")
|
||||||
|
importance = pd.DataFrame({
|
||||||
|
"feature": X_train.columns,
|
||||||
|
"importance": model.feature_importances_
|
||||||
|
}).sort_values(by="importance", ascending=False)
|
||||||
|
print(importance)
|
||||||
|
importance.plot.bar(x="feature", y="importance", legend=False, figsize=(6, 3))
|
||||||
|
plt.title("Importance des features")
|
||||||
|
# plt.show()
|
||||||
|
plt.savefig(os.path.join(output_dir, "Importance des features.png"), bbox_inches="tight")
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# ---- Arbre de décision (extrait) ----
|
||||||
|
if hasattr(model, "estimators_"):
|
||||||
|
print("\n===== 🌳 EXTRAIT D’UN ARBRE =====")
|
||||||
|
print(export_text(model.estimators_[0], feature_names=list(X_train.columns))[:800])
|
||||||
|
|
||||||
|
# ---- Précision selon le seuil ----
|
||||||
|
thresholds = np.linspace(0.1, 0.9, 9)
|
||||||
|
print("\n===== ⚙️ PERFORMANCE SELON SEUIL =====")
|
||||||
|
for t in thresholds:
|
||||||
|
preds_t = (probs > t).astype(int)
|
||||||
|
acc = accuracy_score(y_test, preds_t)
|
||||||
|
print(f"Seuil {t:.1f} → précision {acc:.3f}")
|
||||||
|
|
||||||
|
# ---- ROC Curve ----
|
||||||
|
fpr, tpr, _ = roc_curve(y_test, probs)
|
||||||
|
plt.figure(figsize=(5, 4))
|
||||||
|
plt.plot(fpr, tpr, label="ROC curve")
|
||||||
|
plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
|
||||||
|
plt.xlabel("Taux de faux positifs")
|
||||||
|
plt.ylabel("Taux de vrais positifs")
|
||||||
|
plt.title("Courbe ROC")
|
||||||
|
plt.legend()
|
||||||
|
# plt.show()
|
||||||
|
plt.savefig(os.path.join(output_dir, "Courbe ROC.png"), bbox_inches="tight")
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# ---- Interprétation SHAP (optionnelle) ----
|
||||||
|
try:
|
||||||
|
import shap
|
||||||
|
|
||||||
|
print("\n===== 💡 ANALYSE SHAP =====")
|
||||||
|
explainer = shap.TreeExplainer(model)
|
||||||
|
shap_values = explainer.shap_values(X_test)
|
||||||
|
# shap.summary_plot(shap_values[1], X_test)
|
||||||
|
# Vérifie le type de sortie de shap_values
|
||||||
|
if isinstance(shap_values, list):
|
||||||
|
# Cas des modèles de classification (plusieurs classes)
|
||||||
|
shap_values_to_plot = shap_values[0] if len(shap_values) == 1 else shap_values[1]
|
||||||
|
else:
|
||||||
|
shap_values_to_plot = shap_values
|
||||||
|
|
||||||
|
# Ajustement des dimensions au besoin
|
||||||
|
if shap_values_to_plot.shape[1] != X_test.shape[1]:
|
||||||
|
print(f"⚠️ Mismatch dimensions SHAP ({shap_values_to_plot.shape[1]}) vs X_test ({X_test.shape[1]})")
|
||||||
|
min_dim = min(shap_values_to_plot.shape[1], X_test.shape[1])
|
||||||
|
shap_values_to_plot = shap_values_to_plot[:, :min_dim]
|
||||||
|
X_to_plot = X_test.iloc[:, :min_dim]
|
||||||
|
else:
|
||||||
|
X_to_plot = X_test
|
||||||
|
|
||||||
|
plt.figure(figsize=(12, 10))
|
||||||
|
shap.summary_plot(shap_values_to_plot, X_to_plot, show=False)
|
||||||
|
plt.savefig(os.path.join(output_dir, "shap_summary.png"), bbox_inches="tight")
|
||||||
|
plt.close()
|
||||||
|
except ImportError:
|
||||||
|
print("\n(SHAP non installé — `pip install shap` pour activer l’analyse SHAP.)")
|
||||||
|
|
||||||
|
print("\n===== ✅ FIN DE L’ANALYSE =====")
|
||||||
|
|
||||||
def populateDataframe(self, dataframe, timeframe='5m'):
|
def populateDataframe(self, dataframe, timeframe='5m'):
|
||||||
heikinashi = qtpylib.heikinashi(dataframe)
|
heikinashi = qtpylib.heikinashi(dataframe)
|
||||||
dataframe['haopen'] = heikinashi['open']
|
dataframe['haopen'] = heikinashi['open']
|
||||||
@@ -1130,6 +1404,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
|||||||
# dataframe['atr'] = tr.rolling(window=self.DEFAULT_PARAMS['atr_period']).mean()
|
# dataframe['atr'] = tr.rolling(window=self.DEFAULT_PARAMS['atr_period']).mean()
|
||||||
|
|
||||||
dataframe['volume_sma_deriv'] = dataframe['volume'] * dataframe['sma5_deriv1'] / (dataframe['volume'].rolling(5).mean())
|
dataframe['volume_sma_deriv'] = dataframe['volume'] * dataframe['sma5_deriv1'] / (dataframe['volume'].rolling(5).mean())
|
||||||
|
self.calculeDerivees(dataframe, 'volume', timeframe=timeframe, ema_period=12)
|
||||||
|
|
||||||
self.setTrends(dataframe)
|
self.setTrends(dataframe)
|
||||||
|
|
||||||
@@ -1253,13 +1528,13 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
|||||||
eps_d1_series = eps_d1_series.fillna(global_eps_d1).replace(0, global_eps_d1)
|
eps_d1_series = eps_d1_series.fillna(global_eps_d1).replace(0, global_eps_d1)
|
||||||
eps_d2_series = eps_d2_series.fillna(global_eps_d2).replace(0, global_eps_d2)
|
eps_d2_series = eps_d2_series.fillna(global_eps_d2).replace(0, global_eps_d2)
|
||||||
|
|
||||||
if verbose and self.dp.runmode.value in ('backtest'):
|
# if verbose and self.dp.runmode.value in ('backtest'):
|
||||||
stats = dataframe[[d1_col, d2_col]].agg(['min', 'max']).T
|
# stats = dataframe[[d1_col, d2_col]].agg(['min', 'max']).T
|
||||||
stats['abs_max'] = dataframe[[d1_col, d2_col]].abs().max(axis=0)
|
# stats['abs_max'] = dataframe[[d1_col, d2_col]].abs().max(axis=0)
|
||||||
print(f"---- Derivatives stats {timeframe}----")
|
# print(f"---- Derivatives stats {timeframe}----")
|
||||||
print(stats)
|
# print(stats)
|
||||||
print(f"rolling window = {window}, coef = {coef}, ema_period = {ema_period}")
|
# print(f"rolling window = {window}, coef = {coef}, ema_period = {ema_period}")
|
||||||
print("---------------------------")
|
# print("---------------------------")
|
||||||
|
|
||||||
# mapping tendency
|
# mapping tendency
|
||||||
def tag_by_derivatives(row):
|
def tag_by_derivatives(row):
|
||||||
@@ -2635,7 +2910,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
|||||||
|
|
||||||
def __init__(self, config: dict) -> None:
|
def __init__(self, config: dict) -> None:
|
||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
self.parameters = self.load_params_tree("user_data/strategies/params/")
|
# self.parameters = self.load_params_tree("user_data/strategies/params/")
|
||||||
|
|
||||||
def setTrends(self, dataframe: DataFrame):
|
def setTrends(self, dataframe: DataFrame):
|
||||||
SMOOTH_WIN=10
|
SMOOTH_WIN=10
|
||||||
|
|||||||
22
tools/sklearn/RandomForestRegressor.py
Normal file
22
tools/sklearn/RandomForestRegressor.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
from sklearn.ensemble import RandomForestRegressor
|
||||||
|
from sklearn.metrics import r2_score, mean_absolute_error
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Données d'exemple
|
||||||
|
df = pd.DataFrame({
|
||||||
|
'sma5': [1, 2, 3, 4, 5],
|
||||||
|
'sma24': [2, 2, 2, 3, 4],
|
||||||
|
'close': [100, 102, 101, 105, 108]
|
||||||
|
})
|
||||||
|
df['future_gain'] = (df['close'].shift(-1) - df['close']) / df['close']
|
||||||
|
|
||||||
|
X = df[['sma5', 'sma24']][:-1]
|
||||||
|
y = df['future_gain'][:-1]
|
||||||
|
|
||||||
|
model = RandomForestRegressor(n_estimators=200, random_state=42)
|
||||||
|
model.fit(X, y)
|
||||||
|
y_pred = model.predict(X)
|
||||||
|
|
||||||
|
print("R²:", r2_score(y, y_pred))
|
||||||
|
print("MAE:", mean_absolute_error(y, y_pred))
|
||||||
|
print("Prédictions :", y_pred)
|
||||||
Reference in New Issue
Block a user