LGBMClassifier ajout des corrélations
This commit is contained in:
@@ -55,8 +55,13 @@ from sklearn.tree import export_text
|
||||
import inspect
|
||||
from sklearn.feature_selection import mutual_info_classif
|
||||
from sklearn.inspection import permutation_importance
|
||||
|
||||
from lightgbm import LGBMClassifier
|
||||
from sklearn.calibration import CalibratedClassifierCV
|
||||
from sklearn.feature_selection import SelectFromModel
|
||||
from tabulate import tabulate
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.feature_selection import VarianceThreshold
|
||||
import seaborn as sns
|
||||
|
||||
# Couleurs ANSI de base
|
||||
RED = "\033[31m"
|
||||
@@ -80,19 +85,39 @@ def normalize(df):
|
||||
class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
# Machine Learning
|
||||
model = joblib.load('rf_model.pkl')
|
||||
model_indicators = [
|
||||
'rsi', 'rsi_deriv1', "max_rsi_12",
|
||||
"bb_percent",
|
||||
'vol_24',
|
||||
'percent3',
|
||||
'sma5_dist', 'sma5_deriv1',
|
||||
'sma24_dist', 'sma24_deriv1',
|
||||
'sma60_dist', 'sma60_deriv1',
|
||||
'down_count', 'up_count',
|
||||
'down_pct', 'slope_norm',
|
||||
'min_max_60',
|
||||
'rsi_slope', 'adx_change', 'volatility_ratio'
|
||||
]
|
||||
# model_indicators = [
|
||||
# 'rsi', 'rsi_deriv1', 'rsi_deriv2', "max_rsi_12",
|
||||
# "bb_percent",
|
||||
# 'vol_24',
|
||||
# 'percent3',
|
||||
# 'sma5_dist', 'sma5_deriv1', 'sma5_deriv2',
|
||||
# 'sma24_dist', 'sma24_deriv1', 'sma24_deriv2',
|
||||
# 'sma60_dist', 'sma60_deriv1', 'sma60_deriv2',
|
||||
# 'down_pct', 'slope_norm',
|
||||
# 'min_max_60',
|
||||
# 'rsi_slope', 'adx_change', 'volatility_ratio',
|
||||
# 'slope_ratio', 'bb_width',
|
||||
# 'rsi_1h', 'rsi_deriv1_1h', 'rsi_deriv2_1h', "max_rsi_12_1h",
|
||||
# ]
|
||||
|
||||
model_indicators = ['open', 'high', 'low', 'close', 'volume', 'haopen', 'haclose', 'hapercent', 'mid',
|
||||
'percent', 'percent3', 'percent12', 'percent24', 'sma5', 'sma5_dist', 'sma5_deriv1',
|
||||
'sma5_deriv2', 'sma5_state', 'sma12', 'sma12_dist', 'sma12_deriv1', 'sma12_deriv2',
|
||||
'sma12_state', 'sma24', 'sma24_dist', 'sma24_deriv1', 'sma24_deriv2', 'sma24_state', 'sma48',
|
||||
'sma48_dist', 'sma48_deriv1', 'sma48_deriv2', 'sma48_state', 'sma60', 'sma60_dist',
|
||||
'sma60_deriv1', 'sma60_deriv2', 'sma60_state', 'mid_smooth_3', 'mid_smooth_3_dist',
|
||||
'mid_smooth_3_deriv1', 'mid_smooth_3_deriv2', 'mid_smooth_3_state', 'mid_smooth_5',
|
||||
'mid_smooth_5_dist', 'mid_smooth_5_deriv1', 'mid_smooth_5_deriv2', 'mid_smooth_5_state',
|
||||
'mid_smooth_12', 'mid_smooth_12_dist', 'mid_smooth_12_deriv1', 'mid_smooth_12_deriv2',
|
||||
'mid_smooth_12_state', 'mid_smooth_24', 'mid_smooth_24_dist', 'mid_smooth_24_deriv1',
|
||||
'mid_smooth_24_deriv2', 'mid_smooth_24_state', 'rsi', 'max_rsi_12', 'max_rsi_24', 'rsi_dist',
|
||||
'rsi_deriv1', 'rsi_deriv2', 'rsi_state', 'max12', 'max60', 'min60', 'min_max_60',
|
||||
'bb_lowerband', 'bb_middleband', 'bb_upperband', 'bb_percent', 'bb_width', 'macd',
|
||||
'macdsignal', 'macdhist', 'sma_20', 'sma_100', 'slope', 'slope_smooth', 'atr', 'atr_norm',
|
||||
'adx', 'obv', 'ret', 'vol_24', 'down_count', 'up_count', 'down_pct', 'up_pct',
|
||||
'rsi_slope', 'adx_change', 'volatility_ratio', 'rsi_diff', 'slope_ratio', 'volume_sma_deriv',
|
||||
'volume_dist', 'volume_deriv1', 'volume_deriv2', 'volume_state', 'slope_norm', 'trend_class',
|
||||
'mid_smooth']
|
||||
|
||||
levels = [1, 2, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
|
||||
# startup_candle_count = 12 * 24 * 5
|
||||
@@ -1072,12 +1097,98 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
return dataframe
|
||||
|
||||
def trainModel(self, dataframe: DataFrame, metadata: dict):
|
||||
df = dataframe.copy()
|
||||
pd.set_option('display.max_rows', None)
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option("display.width", 200)
|
||||
|
||||
# Étape 1 : sélectionner numériques
|
||||
numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns
|
||||
|
||||
# Étape 2 : enlever constantes
|
||||
usable_cols = [c for c in numeric_cols if dataframe[c].nunique() > 1
|
||||
and (not c.endswith("_state") and not c.endswith("_1h") and not c.endswith("_1d")
|
||||
and not c.endswith("_class") and not c.endswith("_price")
|
||||
and not c.startswith('stop_buying'))]
|
||||
|
||||
# Étape 3 : remplacer inf et NaN par 0
|
||||
dataframe[usable_cols] = dataframe[usable_cols].replace([np.inf, -np.inf], 0).fillna(0)
|
||||
|
||||
print("Colonnes utilisables pour le modèle :")
|
||||
print(usable_cols)
|
||||
|
||||
self.model_indicators = usable_cols
|
||||
df = dataframe[self.model_indicators].copy()
|
||||
|
||||
# Corrélations des colonnes
|
||||
corr = df.corr(numeric_only=True)
|
||||
print("Corrélation des colonnes")
|
||||
print(corr)
|
||||
|
||||
# 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies
|
||||
# df['target'] = (df['sma24'].shift(-24) > df['sma24']).astype(int)
|
||||
df['target'] = (df['sma5'].shift(-12).rolling(12).max() > df['sma5'] * 1.00025).astype(int)
|
||||
df['target'] = (df['sma24'].shift(-25).rolling(24).max() > df['sma24'] * 1.003).astype(int)
|
||||
df['target'] = df['target'].fillna(0).astype(int)
|
||||
|
||||
# Corrélations triées par importance avec une colonne cible
|
||||
target_corr = df.corr(numeric_only=True)["target"].sort_values(ascending=False)
|
||||
print("Corrélations triées par importance avec une colonne cible")
|
||||
print(target_corr)
|
||||
|
||||
# Corrélations triées par importance avec une colonne cible
|
||||
corr = df.corr(numeric_only=True)
|
||||
corr_unstacked = (
|
||||
corr.unstack()
|
||||
.reset_index()
|
||||
.rename(columns={"level_0": "col1", "level_1": "col2", 0: "corr"})
|
||||
)
|
||||
# Supprimer les doublons col1/col2 inversés et soi-même
|
||||
corr_unstacked = corr_unstacked[corr_unstacked["col1"] < corr_unstacked["col2"]]
|
||||
|
||||
# Trier par valeur absolue de corrélation
|
||||
corr_sorted = corr_unstacked.reindex(corr_unstacked["corr"].abs().sort_values(ascending=False).index)
|
||||
print("Trier par valeur absolue de corrélation")
|
||||
print(corr_sorted.head(20))
|
||||
|
||||
# --- Calcul de la corrélation ---
|
||||
corr = df.corr(numeric_only=True) # évite les colonnes non numériques
|
||||
corr = corr * 100 # passage en pourcentage
|
||||
|
||||
# --- Masque pour n’afficher que le triangle supérieur (optionnel) ---
|
||||
mask = np.triu(np.ones_like(corr, dtype=bool))
|
||||
|
||||
# --- Création de la figure ---
|
||||
fig, ax = plt.subplots(figsize=(96, 36))
|
||||
|
||||
# --- Heatmap avec un effet “température” ---
|
||||
sns.heatmap(
|
||||
corr,
|
||||
mask=mask,
|
||||
cmap="coolwarm", # palette bleu → rouge
|
||||
center=0, # 0 au centre
|
||||
annot=True, # affiche les valeurs dans chaque case
|
||||
fmt=".0f", # format entier (pas de décimale)
|
||||
cbar_kws={"label": "Corrélation (%)"}, # légende à droite
|
||||
linewidths=0.5, # petites lignes entre les cases
|
||||
ax=ax
|
||||
)
|
||||
|
||||
# --- Personnalisation ---
|
||||
ax.set_title("Matrice de corrélation (en %)", fontsize=20, pad=20)
|
||||
plt.xticks(rotation=45, ha="right")
|
||||
plt.yticks(rotation=0)
|
||||
|
||||
# --- Sauvegarde ---
|
||||
output_path = "/home/souti/freqtrade/user_data/plots/Matrice_de_correlation_temperature.png"
|
||||
plt.savefig(output_path, bbox_inches="tight", dpi=150)
|
||||
plt.close(fig)
|
||||
|
||||
print(f"✅ Matrice enregistrée : {output_path}")
|
||||
|
||||
# Exemple d'utilisation :
|
||||
selected_corr = self.select_uncorrelated_features(df, target="target", top_n=30, corr_threshold=0.7)
|
||||
print("===== 🎯 FEATURES SÉLECTIONNÉES =====")
|
||||
print(selected_corr)
|
||||
|
||||
# Nettoyage
|
||||
df = df.dropna()
|
||||
|
||||
@@ -1088,22 +1199,90 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
# 4️⃣ Split train/test
|
||||
X = df[self.model_indicators]
|
||||
y = df['target']
|
||||
# Séparation temporelle (train = 80 %, valid = 20 %)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
|
||||
|
||||
# Nettoyage des valeurs invalides
|
||||
|
||||
selector = VarianceThreshold(threshold=0.0001)
|
||||
selector.fit(X_train)
|
||||
selected = X_train.columns[selector.get_support()]
|
||||
print("Colonnes conservées :", list(selected))
|
||||
|
||||
# 5️⃣ Entraînement du modèle
|
||||
# train_model = RandomForestClassifier(n_estimators=200, random_state=42)
|
||||
train_model = RandomForestClassifier(
|
||||
n_estimators=300,
|
||||
max_depth=12,
|
||||
# min_samples_split=4,
|
||||
# min_samples_leaf=2,
|
||||
# max_features='sqrt',
|
||||
# random_state=42,
|
||||
# n_jobs=-1,
|
||||
class_weight='balanced'
|
||||
# train_model = RandomForestClassifier(
|
||||
# n_estimators=300,
|
||||
# max_depth=12,
|
||||
# # min_samples_split=4,
|
||||
# # min_samples_leaf=2,
|
||||
# # max_features='sqrt',
|
||||
# # random_state=42,
|
||||
# # n_jobs=-1,
|
||||
# class_weight='balanced'
|
||||
# )
|
||||
# 1️⃣ Entraîne ton modèle LGBM normal
|
||||
train_model = LGBMClassifier(
|
||||
n_estimators=800,
|
||||
learning_rate=0.02,
|
||||
max_depth=10,
|
||||
num_leaves=31,
|
||||
subsample=0.8,
|
||||
colsample_bytree=0.8,
|
||||
reg_alpha=0.2,
|
||||
reg_lambda=0.4,
|
||||
class_weight='balanced',
|
||||
random_state=42,
|
||||
)
|
||||
train_model.fit(X_train, y_train)
|
||||
|
||||
# 2️⃣ Sélection des features AVANT calibration
|
||||
sfm = SelectFromModel(train_model, threshold="median", prefit=True)
|
||||
selected_features = X_train.columns[sfm.get_support()]
|
||||
print(selected_features)
|
||||
|
||||
# 3️⃣ Calibration ensuite (facultative)
|
||||
calibrated = CalibratedClassifierCV(train_model, method='sigmoid', cv=5)
|
||||
calibrated.fit(X_train[selected_features], y_train)
|
||||
print(calibrated)
|
||||
|
||||
# # calibration
|
||||
# train_model = CalibratedClassifierCV(train_model, method='sigmoid', cv=5)
|
||||
# # Sélection
|
||||
# sfm = SelectFromModel(train_model, threshold="median")
|
||||
# sfm.fit(X_train, y_train)
|
||||
# selected_features = X_train.columns[sfm.get_support()]
|
||||
# print(selected_features)
|
||||
|
||||
train_model.fit(X_train, y_train)
|
||||
y_pred = train_model.predict(X_test)
|
||||
y_proba = train_model.predict_proba(X_test)[:, 1]
|
||||
# print(classification_report(y_test, y_pred))
|
||||
# print(confusion_matrix(y_test, y_pred))
|
||||
print("\nRapport de classification :\n", classification_report(y_test, y_pred))
|
||||
print("\nMatrice de confusion :\n", confusion_matrix(y_test, y_pred))
|
||||
|
||||
# Importances
|
||||
importances = pd.DataFrame({
|
||||
"feature": train_model.feature_name_,
|
||||
"importance": train_model.feature_importances_
|
||||
}).sort_values("importance", ascending=False)
|
||||
print("\n===== 🔍 IMPORTANCE DES FEATURES =====")
|
||||
|
||||
print(importances)
|
||||
|
||||
best_f1 = 0
|
||||
best_t = 0.5
|
||||
for t in [0.3, 0.4, 0.5, 0.6, 0.7]:
|
||||
y_pred_thresh = (y_proba > t).astype(int)
|
||||
score = f1_score(y_test, y_pred_thresh)
|
||||
print(f"Seuil {t:.1f} → F1: {score:.3f}")
|
||||
if score > best_f1:
|
||||
best_f1 = score
|
||||
best_t = t
|
||||
|
||||
print(f"✅ Meilleur seuil trouvé: {best_t} avec F1={best_f1:.3f}")
|
||||
|
||||
# 6️⃣ Évaluer la précision (facultatif)
|
||||
preds = train_model.predict(X_test)
|
||||
acc = accuracy_score(y_test, preds)
|
||||
@@ -1263,7 +1442,16 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
"importance": model.feature_importances_
|
||||
}).sort_values(by="importance", ascending=False)
|
||||
print(importance)
|
||||
importance.plot.bar(x="feature", y="importance", legend=False, figsize=(6, 3))
|
||||
|
||||
# Crée une figure plus grande
|
||||
fig, ax = plt.subplots(figsize=(24, 8)) # largeur=24 pouces, hauteur=8 pouces
|
||||
|
||||
# Trace le bar plot sur cet axe
|
||||
importance.plot.bar(x="feature", y="importance", legend=False, ax=ax)
|
||||
|
||||
# Tourner les labels pour plus de lisibilité
|
||||
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
|
||||
|
||||
plt.title("Importance des features")
|
||||
# plt.show()
|
||||
plt.savefig(os.path.join(output_dir, "Importance des features.png"), bbox_inches="tight")
|
||||
@@ -1439,8 +1627,8 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
dataframe["percent12"] = dataframe['close'].pct_change(12)
|
||||
dataframe["percent24"] = dataframe['close'].pct_change(24)
|
||||
|
||||
if self.dp.runmode.value in ('backtest'):
|
||||
dataframe['futur_percent'] = 100 * (dataframe['close'].shift(-1) - dataframe['close']) / dataframe['close']
|
||||
# if self.dp.runmode.value in ('backtest'):
|
||||
# dataframe['futur_percent'] = 100 * (dataframe['close'].shift(-1) - dataframe['close']) / dataframe['close']
|
||||
|
||||
dataframe['sma5'] = dataframe['mid'].ewm(span=5, adjust=False).mean() #dataframe["mid"].rolling(window=5).mean()
|
||||
self.calculeDerivees(dataframe, 'sma5', timeframe=timeframe, ema_period=5)
|
||||
@@ -1479,9 +1667,11 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
(dataframe["close"] - dataframe["bb_lowerband"]) /
|
||||
(dataframe["bb_upperband"] - dataframe["bb_lowerband"])
|
||||
)
|
||||
dataframe["bb_width"] = (
|
||||
(dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"]
|
||||
)
|
||||
dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["sma5"]
|
||||
|
||||
# dataframe["bb_width"] = (
|
||||
# (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"]
|
||||
# )
|
||||
|
||||
# Calcul MACD
|
||||
macd, macdsignal, macdhist = talib.MACD(
|
||||
@@ -1574,8 +1764,7 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
).on_balance_volume()
|
||||
|
||||
# --- Volatilité récente (écart-type des rendements) ---
|
||||
dataframe['ret'] = dataframe['close'].pct_change()
|
||||
dataframe['vol_24'] = dataframe['ret'].rolling(24).std()
|
||||
dataframe['vol_24'] = dataframe['percent'].rolling(24).std()
|
||||
|
||||
# Compter les baisses / hausses consécutives
|
||||
self.calculateDownAndUp(dataframe, limit=0.0001)
|
||||
@@ -1591,6 +1780,10 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
dataframe['adx_change'] = dataframe['adx'] - dataframe['adx'].shift(12) # évolution de la tendance
|
||||
dataframe['volatility_ratio'] = dataframe['atr_norm'] / dataframe['bb_width']
|
||||
|
||||
dataframe["rsi_diff"] = dataframe["rsi"] - dataframe["rsi"].shift(3)
|
||||
dataframe["slope_ratio"] = dataframe["sma5_deriv1"] / (dataframe["sma60_deriv1"] + 1e-9)
|
||||
dataframe["divergence"] = (dataframe["rsi_deriv1"] * dataframe["sma5_deriv1"]) < 0
|
||||
|
||||
###########################
|
||||
|
||||
dataframe['volume_sma_deriv'] = dataframe['volume'] * dataframe['sma5_deriv1'] / (dataframe['volume'].rolling(5).mean())
|
||||
@@ -3294,3 +3487,35 @@ class Zeus_8_3_2_B_4_2(IStrategy):
|
||||
def getParamValue(self, pair, trend, space, param):
|
||||
pair = self.getShortName(pair)
|
||||
return self.parameters[pair][trend][0]['content']['params'][space][param]
|
||||
|
||||
|
||||
def select_uncorrelated_features(self, df, target, top_n=20, corr_threshold=0.7):
|
||||
"""
|
||||
Sélectionne les features les plus corrélées avec target,
|
||||
tout en supprimant celles trop corrélées entre elles.
|
||||
"""
|
||||
# 1️⃣ Calcul des corrélations absolues avec la cible
|
||||
corr = df.corr(numeric_only=True)
|
||||
corr_target = corr[target].abs().sort_values(ascending=False)
|
||||
|
||||
# 2️⃣ Prend les N features les plus corrélées avec la cible (hors target)
|
||||
features = corr_target.drop(target).head(top_n).index.tolist()
|
||||
|
||||
# 3️⃣ Évite les features trop corrélées entre elles
|
||||
selected = []
|
||||
for feat in features:
|
||||
too_correlated = False
|
||||
for sel in selected:
|
||||
if abs(corr.loc[feat, sel]) > corr_threshold:
|
||||
too_correlated = True
|
||||
break
|
||||
if not too_correlated:
|
||||
selected.append(feat)
|
||||
|
||||
# 4️⃣ Retourne un DataFrame propre avec les valeurs de corrélation
|
||||
selected_corr = pd.DataFrame({
|
||||
"feature": selected,
|
||||
"corr_with_target": [corr.loc[f, target] for f in selected]
|
||||
}).sort_values(by="corr_with_target", key=np.abs, ascending=False)
|
||||
|
||||
return selected_corr
|
||||
|
||||
Reference in New Issue
Block a user