From dfe04a3f0cef7a362b76a228a88ba64353c860e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Delacotte?= Date: Sun, 16 Nov 2025 20:47:09 +0100 Subject: [PATCH] TensorFlow --- Zeus_TensorFlow.py | 3165 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3165 insertions(+) create mode 100644 Zeus_TensorFlow.py diff --git a/Zeus_TensorFlow.py b/Zeus_TensorFlow.py new file mode 100644 index 0000000..8be7767 --- /dev/null +++ b/Zeus_TensorFlow.py @@ -0,0 +1,3165 @@ +# Zeus Strategy: First Generation of GodStra Strategy with maximum +# AVG/MID profit in USDT +# Author: @Mablue (Masoud Azizi) +# github: https://github.com/mablue/ +# IMPORTANT: INSTALL TA BEFOUR RUN(pip install ta) +# freqtrade hyperopt --hyperopt-loss SharpeHyperOptLoss --spaces buy sell roi --strategy Zeus +# --- Do not remove these libs --- +from datetime import timedelta, datetime +from freqtrade.persistence import Trade +from freqtrade.strategy import (BooleanParameter, CategoricalParameter, DecimalParameter, stoploss_from_open, + IntParameter, IStrategy, merge_informative_pair, informative, stoploss_from_absolute) +import pandas as pd +import numpy as np +import os +import json +from pandas import DataFrame +from typing import Optional, Union, Tuple +import math +import logging +from pathlib import Path + +# -------------------------------- + +# Add your lib to import here test git +import ta +import talib.abstract as talib +import freqtrade.vendor.qtpylib.indicators as qtpylib +from datetime import timezone, timedelta + +logger = logging.getLogger(__name__) + +# Machine Learning +from sklearn.model_selection import train_test_split +import joblib +import matplotlib.pyplot as plt +from sklearn.metrics import ( + classification_report, + confusion_matrix, + accuracy_score, + roc_auc_score, + roc_curve, + precision_score, recall_score, precision_recall_curve, + f1_score, mean_squared_error, r2_score +) +from sklearn.tree import export_text +import inspect +from sklearn.feature_selection import SelectFromModel +from tabulate import tabulate +from sklearn.feature_selection import VarianceThreshold +import seaborn as sns +import lightgbm as lgb +from sklearn.model_selection import cross_val_score +import optuna.visualization as vis +import optuna +from lightgbm import LGBMRegressor +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression, Ridge, HuberRegressor +from sklearn.preprocessing import StandardScaler, PolynomialFeatures +from sklearn.pipeline import make_pipeline +from sklearn.svm import SVR +from sklearn.ensemble import RandomForestRegressor +from sklearn.ensemble import GradientBoostingRegressor +from sklearn.preprocessing import StandardScaler +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.impute import SimpleImputer +from sklearn.pipeline import Pipeline + +# Tensorflow +import pandas as pd +import numpy as np +import tensorflow as tf +from tensorflow.keras import layers, models +from tensorflow.keras.models import load_model +from keras.utils import plot_model +from keras.models import Sequential +from keras.layers import LSTM, Dense +from sklearn.preprocessing import MinMaxScaler +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import LSTM, Dense +from tensorflow.keras.optimizers import Adam + +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # désactive complètement le GPU +os.environ["TF_XLA_FLAGS"] = "--tf_xla_enable_xla_devices=false" + + +# Couleurs ANSI de base +RED = "\033[31m" +GREEN = "\033[32m" +YELLOW = "\033[33m" +BLUE = "\033[34m" +MAGENTA = "\033[35m" +CYAN = "\033[36m" +RESET = "\033[0m" + +import warnings +warnings.filterwarnings( + "ignore", + message=r".*No further splits with positive gain.*" +) + +def pprint_df(dframe): + print(tabulate(dframe, headers='keys', tablefmt='psql', showindex=False)) + + +def normalize(df): + df = (df - df.min()) / (df.max() - df.min()) + return df + + +class Zeus_LGBMRegressor(IStrategy): + startup_candle_count = 24 * 12 + + # Machine Learning + model_indicators = [] + indicator_target = 'percent12' + + model = None + # Tensorflow + lookback = 60 + future_steps = 12 + + path = f"user_data/plots/" + + # ROI table: + minimal_roi = { + "0": 0.564, + "567": 0.273, + "2814": 0.12, + "7675": 0 + } + + # Stoploss: + stoploss = -1 # 0.256 + # Custom stoploss + use_custom_stoploss = False + + trailing_stop = True + trailing_stop_positive = 0.15 + trailing_stop_positive_offset = 0.20 + trailing_only_offset_is_reached = True + + # Buy hypers + timeframe = '5m' + + max_open_trades = 5 + max_amount = 40 + + parameters = {} + # DCA config + position_adjustment_enable = True + + plot_config = { + "main_plot": { + "sma24_1h": { + "color": "pink" + }, + "sma5_1d": { + "color": "blue" + }, + # "sma24": { + # "color": "yellow" + # }, + "sma60": { + "color": "green" + }, + "bb_lowerband": { + "color": "#da59a6"}, + "bb_upperband": { + "color": "#da59a6", + }, + # "sma12": { + # "color": "blue" + # }, + "mid_smooth_3_1h": { + "color": "blue" + } + }, + "subplots": { + "Rsi": { + "max_rsi_24": { + "color": "blue" + }, + "max_rsi_24_1h": { + "color": "pink" + }, + # "rsi_1h": { + # "color": "red" + # }, + # "rsi_1d": { + # "color": "blue" + # } + }, + "Rsi_deriv1": { + "sma24_deriv1_1h": { + "color": "pink" + }, + "sma24_deriv1": { + "color": "yellow" + }, + "sma5_deriv1_1d": { + "color": "blue" + }, + "sma60_deriv1": { + "color": "green" + } + }, + "Rsi_deriv2": { + "sma24_deriv2_1h": { + "color": "pink" + }, + "sma24_deriv2": { + "color": "yellow" + }, + "sma5_deriv2_1d": { + "color": "blue" + }, + "sma60_deriv2": { + "color": "green" + } + }, + 'Macd': { + "macd_rel_1d": { + "color": "cyan" + }, + "macdsignal_rel_1d": { + "color": "pink" + }, + "macdhist_rel_1d": { + "color": "yellow" + } + } + } + } + columns_logged = False + pairs = { + pair: { + "first_buy": 0, + "last_buy": 0.0, + "last_min": 999999999999999.5, + "last_max": 0, + "trade_info": {}, + "max_touch": 0.0, + "last_sell": 0.0, + 'count_of_buys': 0, + 'current_profit': 0, + 'expected_profit': 0, + 'previous_profit': 0, + "last_candle": {}, + "last_count_of_buys": 0, + 'base_stake_amount': 0, + 'stop_buy': False, + 'last_date': 0, + 'stop': False, + 'max_profit': 0, + 'total_amount': 0, + 'has_gain': 0, + 'force_sell': False, + 'force_buy': False + } + for pair in ["BTC/USDC", "ETH/USDC", "DOGE/USDC", "XRP/USDC", "SOL/USDC", + "BTC/USDT", "ETH/USDT", "DOGE/USDT", "XRP/USDT", "SOL/USDT"] + } + # 20 20 40 60 100 160 260 420 + # 50 50 100 300 500 + # fibo = [1, 1, 2, 3, 5, 8, 13, 21] + # my fibo + # 50 50 50 100 100 150 200 250 350 450 600 1050 + fibo = [1, 1, 1, 2, 2, 3, 4, 5, 7, 9, 12, 16, 21] + baisse = [1, 2, 3, 5, 7, 10, 14, 19, 26, 35, 47, 63, 84] + # Ma suite 1 1 1 2 2 3 4 5 7 9 12 16 21 + # Mise 50 50 50 100 100 150 200 250 350 450 600 800 1050 + # Somme Mises 50 100 150 250 350 500 700 950 1300 1750 2350 3150 4200 + # baisse 1 2 3 5 7 10 14 19 26 35 47 63 84 + + # factors = [1, 1.1, 1.25, 1.5, 2.0, 3] + # thresholds = [2, 5, 10, 20, 30, 50] + + factors = [0.5, 0.75, 1, 1.25, 1.5, 2] + thresholds = [0, 2, 5, 10, 30, 45] + + trades = list() + max_profit_pairs = {} + + mise_factor_buy = DecimalParameter(0.01, 0.1, default=0.05, decimals=2, space='buy', optimize=True, load=True) + + indicators = {'sma5', 'sma12', 'sma24', 'sma60'} + indicators_percent = {'percent', 'percent3', 'percent12', 'percent24', 'percent_1h', 'percent3_1h', 'percent12_1h', 'percent24_1h'} + + mises = IntParameter(1, 50, default=5, space='buy', optimize=True, load=True) + + ml_prob_buy = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='buy', optimize=True, load=True) + ml_prob_sell = DecimalParameter(-0.5, 0.5, default=0.0, decimals=2, space='sell', optimize=True, load=True) + + pct = DecimalParameter(0.005, 0.05, default=0.012, decimals=3, space='buy', optimize=True, load=True) + pct_inc = DecimalParameter(0.0001, 0.003, default=0.0022, decimals=4, space='buy', optimize=True, load=True) + + rsi_deb_protect = IntParameter(50, 90, default=70, space='protection', optimize=True, load=True) + rsi_end_protect = IntParameter(20, 60, default=55, space='protection', optimize=True, load=True) + + sma24_deriv1_deb_protect = DecimalParameter(-4, 4, default=-2, decimals=1, space='protection', optimize=True, load=True) + sma24_deriv1_end_protect = DecimalParameter(-4, 4, default=0, decimals=1, space='protection', optimize=True, load=True) + + # ========================================================================= + should_enter_trade_count = 0 + + def confirm_trade_entry(self, pair: str, order_type: str, amount: float, rate: float, time_in_force: str, + current_time: datetime, entry_tag: Optional[str], **kwargs) -> bool: + + minutes = 0 + if self.pairs[pair]['last_date'] != 0: + minutes = round(int((current_time - self.pairs[pair]['last_date']).total_seconds() / 60)) + + dataframe, _ = self.dp.get_analyzed_dataframe(pair, self.timeframe) + last_candle = dataframe.iloc[-1].squeeze() + last_candle_2 = dataframe.iloc[-2].squeeze() + last_candle_3 = dataframe.iloc[-3].squeeze() + + condition = True #(last_candle[f"{indic_5m}_deriv1"] >= indic_deriv1_5m) and (last_candle[f"{indic_5m}_deriv2"] >= indic_deriv2_5m) + + allow_to_buy = True #(condition and not self.pairs[pair]['stop']) | (entry_tag == 'force_entry') + + if allow_to_buy: + self.trades = list() + self.pairs[pair]['first_buy'] = rate + self.pairs[pair]['last_buy'] = rate + self.pairs[pair]['max_touch'] = last_candle['close'] + self.pairs[pair]['last_candle'] = last_candle + self.pairs[pair]['count_of_buys'] = 1 + self.pairs[pair]['current_profit'] = 0 + self.pairs[pair]['last_max'] = max(last_candle['close'], self.pairs[pair]['last_max']) + self.pairs[pair]['last_min'] = min(last_candle['close'], self.pairs[pair]['last_min']) + + dispo = round(self.wallets.get_available_stake_amount()) + self.printLineLog() + + stake_amount = self.adjust_stake_amount(pair, last_candle) + + self.pairs[pair]['total_amount'] = stake_amount + + self.log_trade( + last_candle=last_candle, + date=current_time, + action=("🟩Buy" if allow_to_buy else "Canceled") + " " + str(minutes), + pair=pair, + rate=rate, + dispo=dispo, + profit=0, + trade_type=entry_tag, + buys=1, + stake=round(stake_amount, 2) + ) + + return allow_to_buy + + def confirm_trade_exit(self, pair: str, trade: Trade, order_type: str, amount: float, rate: float, + time_in_force: str, + exit_reason: str, current_time, **kwargs, ) -> bool: + + # allow_to_sell = (minutes > 30) + dataframe, _ = self.dp.get_analyzed_dataframe(pair, self.timeframe) + last_candle = dataframe.iloc[-1].squeeze() + + minutes = int(round((current_time - trade.open_date_utc).seconds / 60, 0)) + profit =trade.calc_profit(rate) + force = self.pairs[pair]['force_sell'] + allow_to_sell = minutes > 30 and (last_candle['hapercent'] < 0 ) or force or (exit_reason == 'force_exit') or (exit_reason == 'stop_loss') + + if allow_to_sell: + self.trades = list() + self.pairs[pair]['last_count_of_buys'] = trade.nr_of_successful_entries # self.pairs[pair]['count_of_buys'] + self.pairs[pair]['last_sell'] = rate + self.pairs[pair]['last_candle'] = last_candle + self.pairs[pair]['max_profit'] = 0 + self.pairs[pair]['previous_profit'] = 0 + self.trades = list() + dispo = round(self.wallets.get_available_stake_amount()) + # print(f"Sell {pair} {current_time} {exit_reason} dispo={dispo} amount={amount} rate={rate} open_rate={trade.open_rate}") + self.log_trade( + last_candle=last_candle, + date=current_time, + action="🟥Sell " + str(minutes), + pair=pair, + trade_type=exit_reason, + rate=last_candle['close'], + dispo=dispo, + profit=round(profit, 2) + ) + self.pairs[pair]['force_sell'] = False + self.pairs[pair]['has_gain'] = 0 + self.pairs[pair]['current_profit'] = 0 + self.pairs[pair]['total_amount'] = 0 + self.pairs[pair]['count_of_buys'] = 0 + self.pairs[pair]['max_touch'] = 0 + self.pairs[pair]['last_buy'] = 0 + self.pairs[pair]['last_date'] = current_time + self.pairs[pair]['current_trade'] = None + # else: + # self.printLog(f"{current_time} SELL triggered for {pair} ({exit_reason} profit={profit} minutes={minutes} percent={last_candle['hapercent']}) but condition blocked") + return (allow_to_sell) | (exit_reason == 'force_exit') | (exit_reason == 'stop_loss') + + def custom_stake_amount(self, pair: str, current_time: datetime, current_rate: float, + proposed_stake: float, min_stake: float, max_stake: float, + **kwargs) -> float: + + dataframe, _ = self.dp.get_analyzed_dataframe(pair=pair, timeframe=self.timeframe) + current_candle = dataframe.iloc[-1].squeeze() + adjusted_stake_amount = self.adjust_stake_amount(pair, current_candle) + + # print(f"{pair} adjusted_stake_amount{adjusted_stake_amount}") + + # Use default stake amount. + return adjusted_stake_amount + + def custom_exit(self, pair: str, trade: Trade, current_time, current_rate, current_profit, **kwargs): + + dataframe, _ = self.dp.get_analyzed_dataframe(pair, self.timeframe) + last_candle = dataframe.iloc[-1].squeeze() + last_candle_1h = dataframe.iloc[-13].squeeze() + before_last_candle = dataframe.iloc[-2].squeeze() + before_last_candle_2 = dataframe.iloc[-3].squeeze() + before_last_candle_12 = dataframe.iloc[-13].squeeze() + + expected_profit = self.expectedProfit(pair, last_candle) + # print(f"current_time={current_time} current_profit={current_profit} expected_profit={expected_profit}") + + max_touch_before = self.pairs[pair]['max_touch'] + self.pairs[pair]['last_max'] = max(last_candle['close'], self.pairs[pair]['last_max']) + self.pairs[pair]['last_min'] = min(last_candle['close'], self.pairs[pair]['last_min']) + self.pairs[pair]['current_trade'] = trade + + count_of_buys = trade.nr_of_successful_entries + + profit = trade.calc_profit(current_rate) #round(current_profit * trade.stake_amount, 1) + self.pairs[pair]['max_profit'] = max(self.pairs[pair]['max_profit'], profit) + max_profit = self.pairs[pair]['max_profit'] + baisse = 0 + if profit > 0: + baisse = 1 - (profit / max_profit) + mx = max_profit / 5 + self.pairs[pair]['count_of_buys'] = count_of_buys + self.pairs[pair]['current_profit'] = profit + + dispo = round(self.wallets.get_available_stake_amount()) + hours_since_first_buy = (current_time - trade.open_date_utc).seconds / 3600.0 + days_since_first_buy = (current_time - trade.open_date_utc).days + hours = (current_time - trade.date_last_filled_utc).total_seconds() / 3600.0 + + if hours % 4 == 0: + self.log_trade( + last_candle=last_candle, + date=current_time, + action="🔴 CURRENT" if self.pairs[pair]['stop'] or last_candle['stop_buying_1h'] else "🟢 CURRENT", + dispo=dispo, + pair=pair, + rate=last_candle['close'], + trade_type='', + profit=round(profit, 2), + buys=count_of_buys, + stake=0 + ) + + pair_name = self.getShortName(pair) + if last_candle['max_rsi_24'] > 85 and profit > max(5, expected_profit) and (last_candle['hapercent'] < 0) and last_candle['sma60_deriv1'] < 0.05: + self.pairs[pair]['force_sell'] = False + self.pairs[pair]['force_buy'] = False #(self.pairs[pair]['count_of_buys'] - self.pairs[pair]['has_gain'] > 3) + return str(count_of_buys) + '_' + 'Rsi85_' + pair_name + '_' + str(self.pairs[pair]['has_gain']) + + if self.pairs[pair]['force_sell']: + self.pairs[pair]['force_sell'] = False + self.pairs[pair]['force_buy'] = (self.pairs[pair]['count_of_buys'] - self.pairs[pair]['has_gain'] > 3) + return str(count_of_buys) + '_' + 'Frc_' + pair_name + '_' + str(self.pairs[pair]['has_gain']) + + if profit > 0 and baisse > 0.30: + self.pairs[pair]['force_sell'] = False + self.pairs[pair]['force_buy'] = (self.pairs[pair]['count_of_buys'] - self.pairs[pair]['has_gain'] > 3) + return str(count_of_buys) + '_' + 'B30_' + pair_name + '_' + str(self.pairs[pair]['has_gain']) + + # if max_profit > 0.5 * count_of_buys and baisse > 0.15: + # self.pairs[pair]['force_sell'] = False + # self.pairs[pair]['force_buy'] = (self.pairs[pair]['count_of_buys'] - self.pairs[pair]['has_gain'] > 3) + # return str(count_of_buys) + '_' + 'B15_' + pair_name + '_' + str(self.pairs[pair]['has_gain']) + + if (last_candle['sma5_1h'] - before_last_candle_12['sma5_1h']) / last_candle['sma5_1h'] > 0.0002: + return None + + factor = 1 + if (self.getShortName(pair) == 'BTC'): + factor = 0.5 + # if baisse > 2 and baisse > factor * self.pairs[pair]['total_amount'] / 100: + # self.pairs[pair]['force_sell'] = False + # self.pairs[pair]['force_buy'] = (self.pairs[pair]['count_of_buys'] - self.pairs[pair]['has_gain'] > 3) + # return 'Baisse_' + pair_name + '_' + str(count_of_buys) + '_' + str(self.pairs[pair]['has_gain']) + # + # if 1 <= count_of_buys <= 3: + if last_candle['max_rsi_24'] > 75 and profit > expected_profit and (last_candle['hapercent'] < 0) and last_candle['sma60_deriv1'] < 0: + self.pairs[pair]['force_sell'] = False + return str(count_of_buys) + '_' + 'Rsi75_' + pair_name + '_' + str(self.pairs[pair]['has_gain']) + + + self.pairs[pair]['max_touch'] = max(last_candle['close'], self.pairs[pair]['max_touch']) + + def getShortName(self, pair): + return pair.replace("/USDT", '').replace("/USDC", '').replace("_USDC", '').replace("_USDT", '') + + def informative_pairs(self): + # get access to all pairs available in whitelist. + pairs = self.dp.current_whitelist() + # informative_pairs = [(pair, '1d') for pair in pairs] + informative_pairs += [(pair, '1h') for pair in pairs] + + return informative_pairs + + from typing import List + + def multi_step_interpolate(self, pct: float, thresholds: List[float], factors: List[float]) -> float: + if pct <= thresholds[0]: + return factors[0] + if pct >= thresholds[-1]: + return factors[-1] + + for i in range(1, len(thresholds)): + if pct <= thresholds[i]: + # interpolation linéaire entre thresholds[i-1] et thresholds[i] + return factors[i - 1] + (pct - thresholds[i - 1]) * (factors[i] - factors[i - 1]) / ( + thresholds[i] - thresholds[i - 1]) + + # Juste au cas où (devrait jamais arriver) + return factors[-1] + + # def interpolate_factor(self, pct: float, start_pct: float = 5, end_pct: float = 30, + # start_factor: float = 1.0, end_factor: float = 2.0) -> float: + # if pct <= start_pct: + # return start_factor + # if pct >= end_pct: + # return end_factor + # # interpolation linéaire + # return start_factor + (pct - start_pct) * (end_factor - start_factor) / (end_pct - start_pct) + + def log_trade(self, action, pair, date, trade_type=None, rate=None, dispo=None, profit=None, buys=None, stake=None, + last_candle=None): + # Afficher les colonnes une seule fois + if self.config.get('runmode') == 'hyperopt' or self.dp.runmode.value in ('hyperopt'): + return + if self.columns_logged % 10 == 0: + self.printLog( + f"| {'Date':<16} | {'Action':<10} |{'Pair':<5}| {'Trade Type':<18} |{'Rate':>8} | {'Dispo':>6} | {'Profit':>8} " + f"| {'Pct':>6} | {'max_touch':>11} | {'last_lost':>12} | {'last_max':>7}| {'last_max':>7}|{'Buys':>5}| {'Stake':>5} |" + f"{'rsi':>6}|Distmax|s201d|s5_1d|s5_2d|s51h|s52h|smt1h|smt2h|tdc1d|tdc1h" + ) + self.printLineLog() + df = pd.DataFrame.from_dict(self.pairs, orient='index') + colonnes_a_exclure = ['last_candle', + 'trade_info', 'last_date', 'last_count_of_buys', 'base_stake_amount', 'stop_buy'] + df_filtered = df[df['count_of_buys'] > 0].drop(columns=colonnes_a_exclure) + # df_filtered = df_filtered["first_buy", "last_max", "max_touch", "last_sell","last_buy", 'count_of_buys', 'current_profit'] + + print(df_filtered) + + self.columns_logged += 1 + date = str(date)[:16] if date else "-" + limit = None + # if buys is not None: + # limit = round(last_rate * (1 - self.fibo[buys] / 100), 4) + + rsi = '' + rsi_pct = '' + # if last_candle is not None: + # if (not np.isnan(last_candle['rsi_1d'])) and (not np.isnan(last_candle['rsi_1h'])): + # rsi = str(int(last_candle['rsi_1d'])) + " " + str(int(last_candle['rsi_1h'])) + # if (not np.isnan(last_candle['rsi_pct_1d'])) and (not np.isnan(last_candle['rsi_pct_1h'])): + # rsi_pct = str(int(10000 * last_candle['bb_mid_pct_1d'])) + " " + str( + # int(last_candle['rsi_pct_1d'])) + " " + str(int(last_candle['rsi_pct_1h'])) + + # first_rate = self.percent_threshold.value + # last_rate = self.threshold.value + # action = self.color_line(action, action) + sma5_1d = '' + sma5_1h = '' + + sma5 = str(sma5_1d) + ' ' + str(sma5_1h) + + last_lost = self.getLastLost(last_candle, pair) + + if buys is None: + buys = '' + + max_touch = '' + pct_max = self.getPctFirstBuy(pair, last_candle) + + total_counts = str(buys) + '/' + str(sum(pair_data['count_of_buys'] for pair_data in self.pairs.values())) + + dist_max = '' + + color = GREEN if profit > 0 else RED + color_sma24 = GREEN if last_candle['sma24_deriv1_1h'] > 0 else RED + color_sma24_2 = GREEN if last_candle['sma24_deriv2_1h'] > 0 else RED + color_sma5 = GREEN if last_candle['mid_smooth_5_deriv1_1h'] > 0 else RED + color_sma5_2 = GREEN if last_candle['mid_smooth_5_deriv2_1h'] > 0 else RED + color_sma5_1h = GREEN if last_candle['sma60_deriv1'] > 0 else RED + color_sma5_2h = GREEN if last_candle['sma60_deriv2'] > 0 else RED + color_smooth_1h = GREEN if last_candle['mid_smooth_1h_deriv1'] > 0 else RED + color_smooth2_1h = GREEN if last_candle['mid_smooth_1h_deriv2'] > 0 else RED + + last_max = int(self.pairs[pair]['last_max']) if self.pairs[pair]['last_max'] > 1 else round( + self.pairs[pair]['last_max'], 3) + last_min = int(self.pairs[pair]['last_min']) if self.pairs[pair]['last_min'] > 1 else round( + self.pairs[pair]['last_min'], 3) + + profit = str(profit) + '/' + str(round(self.pairs[pair]['max_profit'], 2)) + + # 🟢 Dérivée 1 > 0 et dérivée 2 > 0: tendance haussière qui s’accélère. + # 🟡 Dérivée 1 > 0 et dérivée 2 < 0: tendance haussière qui ralentit → essoufflement potentiel. + # 🔴 Dérivée 1 < 0 et dérivée 2 < 0: tendance baissière qui s’accélère. + # 🟠 Dérivée 1 < 0 et dérivée 2 > 0: tendance baissière qui ralentit → possible bottom. + self.printLog( + f"| {date:<16} |{action:<10} | {pair[0:3]:<3} | {trade_type or '-':<18} |{rate or '-':>9}| {dispo or '-':>6} " + f"|{color}{profit or '-':>10}{RESET}| {pct_max or '-':>6} | {round(self.pairs[pair]['max_touch'], 2) or '-':>11} | {last_lost or '-':>12} " + f"| {last_max or '-':>7} | {last_min or '-':>7} |{total_counts or '-':>5}|{stake or '-':>7}" + # f"|{round(last_candle['mid_smooth_24_deriv1'],3) or '-':>6}|{round(last_candle['mid_smooth_1h_deriv1'],3) or '-':>6}|{round(last_candle['mid_smooth_deriv1_1d'],3) or '-' :>6}|" + # f"{round(last_candle['mid_smooth_24_deriv2'],3) or '-' :>6}|{round(last_candle['mid_smooth_1h_deriv2'],3) or '-':>6}|{round(last_candle['mid_smooth_deriv2_1d'],3) or '-':>6}|" + f"{round(last_candle['max_rsi_24'], 1) or '-' :>6}|" + f"{dist_max:>7}|{color_sma24}{round(last_candle['sma24_deriv1_1h'], 2):>5}{RESET}" + f"|{color_sma5}{round(last_candle['mid_smooth_5_deriv1_1h'], 2):>5}{RESET}|{color_sma5_2}{round(last_candle['mid_smooth_5_deriv2_1h'], 2):>5}{RESET}" + f"|{color_sma5_1h}{round(last_candle['sma60_deriv1'], 2):>5}{RESET}|{color_sma5_2h}{round(last_candle['sma60_deriv2'], 2):>5}{RESET}" + f"|{color_smooth_1h}{round(last_candle['mid_smooth_1h_deriv1'], 2):>5}{RESET}|{color_smooth2_1h}{round(last_candle['mid_smooth_1h_deriv2'], 2):>5}{RESET}" + ) + + def getLastLost(self, last_candle, pair): + last_lost = round((last_candle['close'] - self.pairs[pair]['max_touch']) / self.pairs[pair]['max_touch'], 3) + return last_lost + + def printLineLog(self): + # f"sum1h|sum1d|Tdc|Tdh|Tdd| drv1 |drv_1h|drv_1d|" + self.printLog( + f"+{'-' * 18}+{'-' * 12}+{'-' * 5}+{'-' * 20}+{'-' * 9}+{'-' * 8}+{'-' * 12}+{'-' * 8}+{'-' * 13}+{'-' * 14}+{'-' * 9}{'-' * 9}+{'-' * 5}+{'-' * 7}+" + f"+{'-' * 6}+{'-' * 7}+{'-' * 5}+{'-' * 5}+{'-' * 5}+{'-' * 5}+{'-' * 5}+{'-' * 5}+" + ) + + def printLog(self, str): + if self.config.get('runmode') == 'hyperopt' or self.dp.runmode.value in ('hyperopt'): + return; + if not self.dp.runmode.value in ('backtest', 'hyperopt', 'lookahead-analysis'): + logger.info(str) + else: + if not self.dp.runmode.value in ('hyperopt'): + print(str) + + def add_tendency_column(self, dataframe: pd.DataFrame, name: str, suffixe: str = '', eps: float = 1e-3, + d1_lim_inf: float = -0.01, d1_lim_sup: float = 0.01) -> pd.DataFrame: + """ + Ajoute une colonne 'tendency' basée sur les dérivées 1 et 2 lissées et normalisées. + eps permet de définir un seuil proche de zéro. + suffixe permet de gérer plusieurs indicateurs. + """ + + def tag_by_derivatives(row): + d1 = row[f"{name}{suffixe}_deriv1"] + d2 = row[f"{name}{suffixe}_deriv2"] + + # On considère les petites valeurs comme zéro + if abs(d1) < eps: + return 0 # Palier / neutre + if d1 > d1_lim_sup: + return 2 if d2 > eps else 1 # Acceleration Hausse / Ralentissement Hausse + if d1 < d1_lim_inf: + return -2 if d2 < -eps else -1 # Acceleration Baisse / Ralentissement Baisse + if abs(d1) < eps: + return 'DH' if d2 > eps else 'DB' # Depart Hausse / Depart Baisse + return 'Mid' + + print(f"{name}_tdc{suffixe}") + dataframe[f"{name}_tdc{suffixe}"] = dataframe.apply(tag_by_derivatives, axis=1) + return dataframe + + # def add_tendency_column(self, dataframe: pd.DataFrame, name, suffixe='') -> pd.DataFrame: + # def tag_by_derivatives(row): + # d1 = row[f"{name}{suffixe}_deriv1"] + # d2 = row[f"{name}{suffixe}_deriv2"] + # d1_lim_inf = -0.01 + # d1_lim_sup = 0.01 + # if d1 >= d1_lim_inf and d1 <= d1_lim_sup: # and d2 >= d2_lim_inf and d2 <= d2_lim_sup: + # return 0 # Palier + # if d1 == 0.0: + # return 'DH' if d2 > 0 else 'DB' # Depart Hausse / Départ Baisse + # if d1 > d1_lim_sup: + # return 2 if d2 > 0 else 1 # Acceleration Hausse / Ralentissement Hausse + # if d1 < d1_lim_inf: + # return -2 if d2 < 0 else -1 # Accéleration Baisse / Ralentissement Baisse + # return 'Mid' + # + # dataframe[f"tendency{suffixe}"] = dataframe.apply(tag_by_derivatives, axis=1) + # return dataframe + + def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + # Add all ta features + pair = metadata['pair'] + short_pair = self.getShortName(pair) + self.path = f"user_data/plots/{short_pair}/" + + dataframe = self.populateDataframe(dataframe, timeframe='5m') + + ################### INFORMATIVE 1h + informative = self.dp.get_pair_dataframe(pair=metadata['pair'], timeframe="1h") + informative = self.populateDataframe(informative, timeframe='1h') + informative = self.calculateRegression(informative, 'mid', lookback=5) + + # # TENSOR FLOW + # self.model_indicators = self.listUsableColumns(informative) + # if self.dp.runmode.value in ('backtest'): + # self.trainTensorFlow(informative, future_steps = self.future_steps) + # + # self.predictTensorFlow(informative) + # + # if self.dp.runmode.value in ('backtest'): + # self.kerasGenerateGraphs(informative) + + informative['stop_buying_deb'] = ((informative['max_rsi_24'] > self.rsi_deb_protect.value) + & (informative['sma24_deriv1'] < self.sma24_deriv1_deb_protect.value) + ) + informative['stop_buying_end'] = ((informative['max_rsi_24'] < self.rsi_end_protect.value) + & (informative['sma24_deriv1'] > self.sma24_deriv1_end_protect.value) + ) + + latched = np.zeros(len(informative), dtype=bool) + + for i in range(1, len(informative)): + if informative['stop_buying_deb'].iloc[i]: + latched[i] = True + elif informative['stop_buying_end'].iloc[i]: + latched[i] = False + else: + latched[i] = latched[i - 1] + + informative['stop_buying'] = latched + + dataframe = merge_informative_pair(dataframe, informative, self.timeframe, "1h", ffill=True) + + # ################### INFORMATIVE 1d + # informative = self.dp.get_pair_dataframe(pair=metadata['pair'], timeframe="1d") + # informative = self.populateDataframe(informative, timeframe='1d') + # # informative = self.calculateRegression(informative, 'mid', lookback=15) + # dataframe = merge_informative_pair(dataframe, informative, self.timeframe, "1d", ffill=True) + + dataframe['last_price'] = dataframe['close'] + dataframe['first_price'] = dataframe['close'] + if self.dp: + if self.dp.runmode.value in ('live', 'dry_run'): + self.getOpenTrades() + + for trade in self.trades: + if trade.pair != pair: + continue + filled_buys = trade.select_filled_orders('buy') + count = 0 + amount = 0 + for buy in filled_buys: + if count == 0: + dataframe['first_price'] = buy.price + self.pairs[pair]['first_buy'] = buy.price + self.pairs[pair]['first_amount'] = buy.price * buy.filled + # dataframe['close01'] = buy.price * 1.01 + + # Order(id=2396, trade=1019, order_id=29870026652, side=buy, filled=0.00078, price=63921.01, + # status=closed, date=2024-08-26 02:20:11) + dataframe['last_price'] = buy.price + self.pairs[pair]['last_buy'] = buy.price + count = count + 1 + amount += buy.price * buy.filled + # dataframe['mid_price'] = (dataframe['last_price'] + dataframe['first_price']) / 2 + count_buys = count + # dataframe['limit'] = dataframe['last_price'] * (1 - self.baisse[count] / 100) + self.pairs[pair]['total_amount'] = amount + + # dataframe['mid_smooth_tag'] = qtpylib.crossed_below(dataframe['mid_smooth_24_deriv1'], dataframe['mid_smooth_deriv2_24']) + + # =============================== + # lissage des valeurs horaires + dataframe['mid_smooth_1h'] = dataframe['mid'].rolling(window=6).mean() + dataframe["mid_smooth_1h_deriv1"] = 100 * dataframe["mid_smooth_1h"].diff().rolling(window=6).mean() / \ + dataframe['mid_smooth_1h'] + dataframe["mid_smooth_1h_deriv2"] = 100 * dataframe["mid_smooth_1h_deriv1"].diff().rolling(window=6).mean() + + dataframe['mid_smooth_5h'] = talib.EMA(dataframe, timeperiod=60) # dataframe['mid'].rolling(window=60).mean() + dataframe["mid_smooth_5h_deriv1"] = 100 * dataframe["mid_smooth_5h"].diff().rolling(window=60).mean() / \ + dataframe['mid_smooth_5h'] + dataframe["mid_smooth_5h_deriv2"] = 100 * dataframe["mid_smooth_5h_deriv1"].diff().rolling(window=60).mean() + + dataframe = self.calculateRegression(dataframe, 'mid', lookback=10, future_steps=10, model_type="poly") + dataframe = self.calculateRegression(dataframe, 'sma24', lookback=12, future_steps=12) + + # dataframe["ms-10"] = dataframe[self.indicator_target].shift(10) + # dataframe["ms-5"] = dataframe[self.indicator_target].shift(5) + # dataframe["ms-4"] = dataframe[self.indicator_target].shift(4) + # dataframe["ms-3"] = dataframe[self.indicator_target].shift(3) + # dataframe["ms-2"] = dataframe[self.indicator_target].shift(2) + # dataframe["ms-1"] = dataframe[self.indicator_target].shift(1) + # dataframe["ms-0"] = dataframe[self.indicator_target] + # dataframe["ms+10"] = dataframe["mid_smooth_24"].shift(-11) + + self.model_indicators = self.listUsableColumns(dataframe) + + # # Quantile + # self.add_future_quantiles( + # dataframe, + # indic="mid", + # lookback=40, + # future_steps=5 + # ) + + # TENSOR FLOW + if self.dp.runmode.value in ('backtest'): + self.trainTensorFlow(dataframe, future_steps = self.future_steps) + + self.predictTensorFlow(dataframe) + + if self.dp.runmode.value in ('backtest'): + self.kerasGenerateGraphs(dataframe) + + # SKLEARN + # if self.dp.runmode.value in ('backtest'): + # self.trainModel(dataframe, metadata) + + # short_pair = self.getShortName(pair) + # self.model = joblib.load(f"{short_pair}_rf_model.pkl") + # + # # Préparer les features pour la prédiction + # features = dataframe[self.model_indicators].fillna(0) + # + # # Prédiction : probabilité que le prix monte + # # probs = self.model.predict_proba(features)[:, 1] + # probs = self.model.predict(features) + # + # # Sauvegarder la probabilité pour l’analyse + # dataframe['ml_prob'] = probs + # + # self.inspect_model(self.model) + + return dataframe + + def trainModel(self, dataframe: DataFrame, metadata: dict): + pair = self.getShortName(metadata['pair']) + pd.set_option('display.max_rows', None) + pd.set_option('display.max_columns', None) + pd.set_option("display.width", 200) + + os.makedirs(self.path, exist_ok=True) + + df = dataframe[self.model_indicators].copy() + + # Corrélations des colonnes + corr = df.corr(numeric_only=True) + print("Corrélation des colonnes") + print(corr) + + # 3️⃣ Créer la cible : 1 si le prix monte dans les prochaines bougies + # df['target'] = (df['sma24'].shift(-24) > df['sma24']).astype(int) + df['target'] = dataframe[self.indicator_target].shift(-24) # > df['sma24'] * 1.003).astype(int) + df['target'] = df['target'].fillna(0) #.astype(int) + + # Corrélations triées par importance avec une colonne cible + target_corr = df.corr(numeric_only=True)["target"].sort_values(ascending=False) + print("Corrélations triées par importance avec une colonne cible") + print(target_corr) + + # Corrélations triées par importance avec une colonne cible + corr = df.corr(numeric_only=True) + corr_unstacked = ( + corr.unstack() + .reset_index() + .rename(columns={"level_0": "col1", "level_1": "col2", 0: "corr"}) + ) + # Supprimer les doublons col1/col2 inversés et soi-même + corr_unstacked = corr_unstacked[corr_unstacked["col1"] < corr_unstacked["col2"]] + + # Trier par valeur absolue de corrélation + corr_sorted = corr_unstacked.reindex(corr_unstacked["corr"].abs().sort_values(ascending=False).index) + print("Trier par valeur absolue de corrélation") + print(corr_sorted.head(20)) + + # --- Calcul de la corrélation --- + corr = df.corr(numeric_only=True) # évite les colonnes non numériques + corr = corr * 100 # passage en pourcentage + + # --- Masque pour n’afficher que le triangle supérieur (optionnel) --- + mask = np.triu(np.ones_like(corr, dtype=bool)) + + # --- Création de la figure --- + fig, ax = plt.subplots(figsize=(96, 36)) + + # --- Heatmap avec un effet “température” --- + sns.heatmap( + corr, + mask=mask, + cmap="coolwarm", # palette bleu → rouge + center=0, # 0 au centre + annot=True, # affiche les valeurs dans chaque case + fmt=".0f", # format entier (pas de décimale) + cbar_kws={"label": "Corrélation (%)"}, # légende à droite + linewidths=0.5, # petites lignes entre les cases + ax=ax + ) + + # --- Personnalisation --- + ax.set_title("Matrice de corrélation (en %)", fontsize=20, pad=20) + plt.xticks(rotation=45, ha="right") + plt.yticks(rotation=0) + + # --- Sauvegarde --- + output_path = f"{self.path}/Matrice_de_correlation_temperature.png" + plt.savefig(output_path, bbox_inches="tight", dpi=150) + plt.close(fig) + + print(f"✅ Matrice enregistrée : {output_path}") + + # Nettoyage + df = df.dropna() + + X = df[self.model_indicators] + y = df['target'] # ta colonne cible binaire ou numérique + print(self.feature_auc_scores(X, y)) + + # 4️⃣ Split train/test + X = df[self.model_indicators] + y = df['target'] + # Séparation temporelle (train = 80 %, valid = 20 %) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False) + + print("NaN per column:") + print(X_train.isna().sum().sort_values(ascending=False).head(20)) + + # Nettoyage des valeurs invalides + + selector = VarianceThreshold(threshold=0.0001) + selector.fit(X_train) + selected = X_train.columns[selector.get_support()] + print("Colonnes conservées :", list(selected)) + + # 1️⃣ Entraîne ton modèle LGBM normal + + # train_model = LGBMRegressor( + # objective='regression', + # metric='rmse', # tu peux aussi tester 'mae' + # n_estimators=300, + # learning_rate=0.05, + # max_depth=7, + # subsample=0.8, + # colsample_bytree=0.8, + # random_state=42 + # ) + # train_model.fit(X_train, y_train) + + train_model, selected_features = self.optuna(self.path, X_train, X_test, y_train, y_test) + print("Features retenues :", list(selected_features)) + + # # 2️⃣ Sélection des features AVANT calibration + # sfm = SelectFromModel(train_model, threshold="median", prefit=True) + # selected_features = X_train.columns[sfm.get_support()] + # print(selected_features) + + train_model.fit(X_train, y_train) + + # Importances + importances = pd.DataFrame({ + "feature": train_model.feature_name_, + "importance": train_model.feature_importances_ + }).sort_values("importance", ascending=False) + print("\n===== 🔍 IMPORTANCE DES FEATURES =====") + + print(importances) + + # 6️⃣ Évaluer la précision (facultatif) + preds = train_model.predict(X_test) + + mse = mean_squared_error(y_test, preds) + rmse = np.sqrt(mse) + r2 = r2_score(y_test, preds) + + print(f"RMSE: {rmse:.5f} | R²: {r2:.3f}") + + # acc = accuracy_score(y_test, preds) + # print(f"Accuracy: {acc:.3f}") + + # 7️⃣ Sauvegarde du modèle + joblib.dump(train_model, f"{pair}_rf_model.pkl") + print(f"✅ Modèle sauvegardé sous {pair}_rf_model.pkl") + + # # Quantile + # dataframe = self.add_future_quantiles( + # df, + # indic="mid", + # lookback=40, + # future_steps=5 + # ) + + self.analyze_model(pair, train_model, X_train, X_test, y_train, y_test) + + def listUsableColumns(self, dataframe): + # Étape 1 : sélectionner numériques + numeric_cols = dataframe.select_dtypes(include=['int64', 'float64']).columns + # Étape 2 : enlever constantes + usable_cols = [c for c in numeric_cols if dataframe[c].nunique() > 1 + and not c.endswith("_state") and not c.endswith("_1d") + # and not c.endswith("_1h") + and not c.endswith("_count") + # and not c.startswith("open") and not c.startswith("close") + # and not c.startswith("low") and not c.startswith("high") + # and not c.startswith("haopen") and not c.startswith("haclose") + # and not c.startswith("bb_lower") and not c.startswith("bb_upper") + # and not c.startswith("bb_middle") + and not c.endswith("_class") and not c.endswith("_price") + and not c.startswith('stop_buying')] + # Étape 3 : remplacer inf et NaN par 0 + dataframe[usable_cols] = dataframe[usable_cols].replace([np.inf, -np.inf], 0).fillna(0) + print("Colonnes utilisables pour le modèle :") + print(usable_cols) + self.model_indicators = usable_cols + # self.model_indicators = [ + # 'volume', 'hapercent', 'mid', 'percent', 'percent3', 'percent12', + # 'percent24', + # 'sma5', 'sma5_dist', 'sma5_deriv1', 'sma5_deriv2', 'sma12', 'sma12_dist', + # 'sma12_deriv1', 'sma12_deriv2', 'sma24', 'sma24_dist', 'sma24_deriv1', 'sma24_deriv2', + # # 'sma48', 'sma48_dist', 'sma48_deriv1', 'sma48_deriv2', 'sma60', 'sma60_dist', + # # 'sma60_deriv1', 'sma60_deriv2', 'mid_smooth_3', 'mid_smooth_3_dist', + # # 'mid_smooth_3_deriv1', 'mid_smooth_3_deriv2', 'mid_smooth_5', 'mid_smooth_5_dist', + # # 'mid_smooth_5_deriv1', 'mid_smooth_5_deriv2', 'mid_smooth_12', 'mid_smooth_12_dist', + # # 'mid_smooth_12_deriv1', 'mid_smooth_12_deriv2', 'mid_smooth_24', 'mid_smooth_24_dist', + # # 'mid_smooth_24_deriv1', 'mid_smooth_24_deriv2', 'rsi', 'max_rsi_12', 'max_rsi_24', + # 'rsi_dist', 'rsi_deriv1', 'rsi_deriv2', 'max12', 'min12', 'max60', 'min60', + # 'min_max_60', 'bb_percent', 'bb_width', 'macd', 'macdsignal', 'macdhist', 'slope', + # 'slope_smooth', 'atr', 'atr_norm', 'adx', 'obv', 'vol_24', + # # 'down_count', 'up_count', + # # 'down_pct', 'up_pct', 'rsi_slope', 'adx_change', 'volatility_ratio', 'rsi_diff', + # # 'slope_ratio', 'volume_sma_deriv', 'volume_dist', 'volume_deriv1', 'volume_deriv2', + # # 'slope_norm', 'mid_smooth_1h_deriv1', 'mid_smooth_1h_deriv2', 'mid_smooth_5h', + # # 'mid_smooth_5h_deriv1', 'mid_smooth_5h_deriv2', 'mid_future_pred_cons', + # # 'sma24_future_pred_cons' + # ] + return self.model_indicators + + def inspect_model(self, model): + """ + Affiche les informations d'un modèle ML déjà entraîné. + Compatible avec scikit-learn, xgboost, lightgbm, catboost... + """ + + print("===== 🔍 INFORMATIONS DU MODÈLE =====") + + # Type de modèle + print(f"Type : {type(model).__name__}") + print(f"Module : {model.__class__.__module__}") + + # Hyperparamètres + if hasattr(model, "get_params"): + params = model.get_params() + print(f"\n===== ⚙️ HYPERPARAMÈTRES ({len(params)}) =====") + for k, v in params.items(): + print(f"{k}: {v}") + + # Nombre d’estimateurs + if hasattr(model, "n_estimators"): + print(f"\nNombre d’estimateurs : {model.n_estimators}") + + # Importance des features + if hasattr(model, "feature_importances_"): + print("\n===== 📊 IMPORTANCE DES FEATURES =====") + + # Correction ici : + feature_names = getattr(model, "feature_names_in_", None) + if isinstance(feature_names, np.ndarray): + feature_names = feature_names.tolist() + elif feature_names is None: + feature_names = [f"feature_{i}" for i in range(len(model.feature_importances_))] + + fi = pd.DataFrame({ + "feature": feature_names, + "importance": model.feature_importances_ + }).sort_values(by="importance", ascending=False) + + print(fi) + + # Coefficients (modèles linéaires) + if hasattr(model, "coef_"): + print("\n===== ➗ COEFFICIENTS =====") + coef = np.array(model.coef_) + if coef.ndim == 1: + for i, c in enumerate(coef): + print(f"Feature {i}: {c:.6f}") + else: + print(coef) + + # Intercept + if hasattr(model, "intercept_"): + print("\nIntercept :", model.intercept_) + + # Classes connues + if hasattr(model, "classes_"): + print("\n===== 🎯 CLASSES =====") + print(model.classes_) + + # Scores internes + for attr in ["best_score_", "best_iteration_", "best_ntree_limit", "score_"]: + if hasattr(model, attr): + print(f"\n{attr} = {getattr(model, attr)}") + + # Méthodes disponibles + print("\n===== 🧩 MÉTHODES DISPONIBLES =====") + methods = [m for m, _ in inspect.getmembers(model, predicate=inspect.ismethod)] + print(", ".join(methods[:15]) + ("..." if len(methods) > 15 else "")) + + print("\n===== ✅ FIN DE L’INSPECTION =====") + + def analyze_model(self, pair, model, X_train, X_test, y_train, y_test): + """ + Analyse complète d'un modèle ML supervisé (classification binaire). + Affiche performances, importance des features, matrices, seuils, etc. + """ + output_dir = f"user_data/plots/{pair}/" + os.makedirs(output_dir, exist_ok=True) + + # ---- Importance des features ---- + if hasattr(model, "feature_importances_"): + print("\n===== 🔍 IMPORTANCE DES FEATURES =====") + importance = pd.DataFrame({ + "feature": X_train.columns, + "importance": model.feature_importances_ + }).sort_values(by="importance", ascending=False) + print(importance) + top_n = 20 + importance = importance.head(top_n) + + # Crée une figure plus grande + fig, ax = plt.subplots(figsize=(24, 8)) # largeur=24 pouces, hauteur=8 pouces + + # Trace le bar plot sur cet axe + importance.plot.bar(x="feature", y="importance", legend=False, ax=ax) + + # Tourner les labels pour plus de lisibilité + ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right') + + plt.title("Importance des features") + # plt.show() + plt.savefig(os.path.join(output_dir, "Importance des features.png"), bbox_inches="tight") + plt.close() + + # ---- Arbre de décision (extrait) ---- + if hasattr(model, "estimators_"): + print("\n===== 🌳 EXTRAIT D’UN ARBRE =====") + print(export_text(model.estimators_[0], feature_names=list(X_train.columns))[:800]) + + # --- Après l'entraînement du modèle --- + preds = model.predict(X_test) + + # --- Évaluation --- + mse = mean_squared_error(y_test, preds) + rmse = np.sqrt(mse) + r2 = r2_score(y_test, preds) + + print(f"RMSE: {rmse:.5f} | R²: {r2:.3f}") + + # --- Création du dossier de sortie --- + os.makedirs(output_dir, exist_ok=True) + + # --- Graphique prédiction vs réel --- + plt.figure(figsize=(8, 8)) + plt.scatter(y_test, preds, alpha=0.4, s=15) + plt.xlabel("Valeurs réelles", fontsize=12) + plt.ylabel("Valeurs prédites", fontsize=12) + plt.title(f"LightGBM Régression — Prédiction vs Réel\nRMSE={rmse:.5f} | R²={r2:.3f}", fontsize=14) + plt.plot( + [y_test.min(), y_test.max()], + [y_test.min(), y_test.max()], + 'r--', + linewidth=1, + label="Ligne idéale" + ) + plt.legend() + + # --- Sauvegarde --- + plot_path = os.path.join(output_dir, "LightGBM_regression_pred_vs_real.png") + plt.savefig(plot_path, bbox_inches="tight", dpi=200) + plt.close() + + self.plot_pred_vs_real_filtered(model, X_test, y_test, preds, output_dir) + + + print(f"✅ Graphique sauvegardé : {plot_path}") + + # ax = lgb.plot_tree(model, tree_index=0, figsize=(30, 20), show_info=["split_gain", "internal_value", "internal_count"]) + # plt.title("Arbre de décision n°0") + # plt.savefig(os.path.join(output_dir, "lgbm_tree_0.png"), bbox_inches="tight") + # plt.close() + + for i in range(5): + ax = lgb.plot_tree(model, tree_index=i, figsize=(20, 12)) + plt.title(f"Arbre {i}") + plt.savefig(os.path.join(output_dir, f"lgbm_tree_{i}.png"), bbox_inches="tight") + plt.close() + + ax = lgb.plot_tree(model, figsize=(40, 20)) + plt.title("Vue globale du modèle LGBM") + plt.savefig(os.path.join(output_dir, "lgbm_all_trees.png"), bbox_inches="tight") + plt.close() + # X_test = np.linspace(0, 10, 1000).reshape(-1, 1) + y_pred = model.predict(X_test) + + self.graphFonctionApprise(output_dir, X_test, y_test, y_pred) + self.graphFonctionAppriseFeature(output_dir, X_test, y_test, y_pred) + + # ============================================================================== + + ax = lgb.plot_importance(model, max_num_features=30, figsize=(12, 6)) + plt.title("Importance des features - LGBM") + plt.savefig(os.path.join(output_dir, "lgbm_feature_importance.png"), bbox_inches="tight") + plt.close() + + corr = X_train.corr() * 100 # en pourcentage + + plt.figure(figsize=(20, 16)) + sns.heatmap(corr, cmap="coolwarm", center=0, annot=False, fmt=".1f", cbar_kws={'label': 'Corrélation (%)'}) + plt.title("Matrice de corrélation (%)") + plt.savefig(os.path.join(output_dir, "correlation_matrix.png"), bbox_inches="tight") + plt.close() + + plt.figure(figsize=(10, 6)) + plt.scatter(y_test, model.predict(X_test), alpha=0.5) + plt.xlabel("Valeurs réelles") + plt.ylabel("Prédictions du modèle") + plt.title("Comparaison y_test vs y_pred") + plt.savefig(os.path.join(output_dir, "ytest_vs_ypred.png"), bbox_inches="tight") + plt.close() + + print("\n===== ✅ FIN DE L’ANALYSE =====") + + def plot_pred_vs_real_filtered(self, model, X_test, y_test, preds, output_dir, top_n=5): + """ + Affiche le graphique prédiction vs réel pour les N features les plus importantes. + """ + # --- 1️⃣ Extraire les features les plus importantes --- + importance_df = pd.DataFrame({ + "feature": X_test.columns, + "importance": model.feature_importances_ + }).sort_values(by="importance", ascending=False) + + top_features = importance_df.head(top_n)["feature"].tolist() + print(f"Top {top_n} features: {top_features}") + + # --- 2️⃣ Créer un masque pour ne garder que les lignes où au moins une des top features varie fortement --- + X_top = X_test[top_features] + + # Optionnel : filtrer les points atypiques pour lisser le nuage + mask = np.all(np.abs((X_top - X_top.mean()) / X_top.std()) < 3, axis=1) + X_filtered = X_top[mask] + y_filtered = y_test[mask] + preds_filtered = preds[mask] + + # --- 3️⃣ Tracer --- + plt.figure(figsize=(8, 8)) + plt.scatter(y_filtered, preds_filtered, alpha=0.4, s=15, c='blue', label=f"Top {top_n} features") + plt.xlabel("Valeurs réelles", fontsize=12) + plt.ylabel("Valeurs prédites", fontsize=12) + plt.title(f"LightGBM Régression — Prédiction vs Réel (filtré sur top {top_n} features)", fontsize=14) + plt.plot( + [y_filtered.min(), y_filtered.max()], + [y_filtered.min(), y_filtered.max()], + 'r--', + linewidth=1, + label="Ligne idéale" + ) + plt.legend() + plt.grid(True) + + out_path = f"{output_dir}/lgbm_pred_vs_real_top{top_n}.png" + plt.savefig(out_path, bbox_inches="tight") + plt.close() + + def plot_threshold_analysis(self, y_true, y_proba, step=0.05, save_path=None): + """ + Affiche la précision, le rappel et le F1-score selon le seuil de décision. + y_true : labels réels (0 ou 1) + y_proba : probabilités prédites (P(hausse)) + step : pas entre les seuils testés + save_path : si renseigné, enregistre l'image au lieu d'afficher + """ + + # Le graphique généré affichera trois courbes : + # + # 🔵 Precision — la fiabilité de tes signaux haussiers. + # 🟢 Recall — la proportion de hausses que ton modèle détecte. + # 🟣 F1-score — le compromis optimal entre les deux. + + thresholds = np.arange(0, 1.01, step) + precisions, recalls, f1s = [], [], [] + + for thr in thresholds: + preds = (y_proba >= thr).astype(int) + precisions.append(precision_score(y_true, preds)) + recalls.append(recall_score(y_true, preds)) + f1s.append(f1_score(y_true, preds)) + + plt.figure(figsize=(10, 6)) + plt.plot(thresholds, precisions, label="Precision", linewidth=2) + plt.plot(thresholds, recalls, label="Recall", linewidth=2) + plt.plot(thresholds, f1s, label="F1-score", linewidth=2, linestyle="--") + plt.axvline(0.5, color='gray', linestyle=':', label="Seuil 0.5") + plt.title("📊 Performance selon le seuil de probabilité", fontsize=14) + plt.xlabel("Seuil de décision (threshold)") + plt.ylabel("Score") + plt.legend() + plt.grid(True, alpha=0.3) + + if save_path: + plt.savefig(save_path, bbox_inches='tight') + print(f"✅ Graphique enregistré : {save_path}") + else: + plt.show() + + # # ============================= + # # Exemple d’utilisation : + # # ============================= + # if __name__ == "__main__": + # # Exemple : chargement d’un modèle et test + # import joblib + # + # model = joblib.load("/media/Home/home/souti/freqtrade/user_data/strategies/tools/sklearn/model.pkl") + # data = np.load("/media/Home/home/souti/freqtrade/user_data/strategies/tools/sklearn/test_data.npz") + # X_test, y_test = data["X"], data["y"] + # + # y_proba = model.predict_proba(X_test)[:, 1] + # + # # Trace ou enregistre le graphique + # plot_threshold_analysis(y_test, y_proba, step=0.05, + # save_path="/media/Home/home/souti/freqtrade/user_data/strategies/tools/sklearn/threshold_analysis.png") + + + def populateDataframe(self, dataframe, timeframe='5m'): + dataframe = dataframe.copy() + heikinashi = qtpylib.heikinashi(dataframe) + dataframe['haopen'] = heikinashi['open'] + dataframe['haclose'] = heikinashi['close'] + dataframe['hapercent'] = (dataframe['haclose'] - dataframe['haopen']) / dataframe['haclose'] + dataframe['mid'] = dataframe['haopen'] + (dataframe['haclose'] - dataframe['haopen']) / 2 + + dataframe["percent"] = dataframe['close'].pct_change() + dataframe["percent3"] = dataframe['close'].pct_change(3).rolling(3).mean() + dataframe["percent12"] = dataframe['close'].pct_change(12).rolling(12).mean() + dataframe["percent24"] = dataframe['close'].pct_change(24).rolling(24).mean() + + # if self.dp.runmode.value in ('backtest'): + # dataframe['futur_percent'] = 100 * (dataframe['close'].shift(-1) - dataframe['close']) / dataframe['close'] + + dataframe['sma5'] = dataframe['mid'].ewm(span=5, adjust=False).mean() #dataframe["mid"].rolling(window=5).mean() + self.calculeDerivees(dataframe, 'sma5', timeframe=timeframe, ema_period=5) + dataframe['sma12'] = dataframe['mid'].ewm(span=12, adjust=False).mean() #dataframe["mid"].rolling(window=12).mean() + self.calculeDerivees(dataframe, 'sma12', timeframe=timeframe, ema_period=12) + dataframe['sma24'] = dataframe['mid'].ewm(span=24, adjust=False).mean() #dataframe["mid"].rolling(window=24).mean() + self.calculeDerivees(dataframe, 'sma24', timeframe=timeframe, ema_period=24) + dataframe['sma48'] = dataframe['mid'].ewm(span=48, adjust=False).mean() #dataframe["mid"].rolling(window=48).mean() + self.calculeDerivees(dataframe, 'sma48', timeframe=timeframe, ema_period=48) + dataframe['sma60'] = dataframe['mid'].ewm(span=60, adjust=False).mean() #dataframe["mid"].rolling(window=60).mean() + self.calculeDerivees(dataframe, 'sma60', timeframe=timeframe, ema_period=60) + + dataframe = self.calculateDerivation(dataframe, window=3, suffixe="_3",timeframe=timeframe) + dataframe = self.calculateDerivation(dataframe, window=5, suffixe="_5",timeframe=timeframe) + dataframe = self.calculateDerivation(dataframe, window=12, suffixe="_12",timeframe=timeframe) + dataframe = self.calculateDerivation(dataframe, window=24, suffixe="_24", timeframe=timeframe) + # print(metadata['pair']) + dataframe['rsi'] = talib.RSI(dataframe['close'], timeperiod=14) + dataframe['max_rsi_12'] = talib.MAX(dataframe['rsi'], timeperiod=12) + dataframe['max_rsi_24'] = talib.MAX(dataframe['rsi'], timeperiod=24) + self.calculeDerivees(dataframe, 'rsi', timeframe=timeframe, ema_period=12) + dataframe['max12'] = talib.MAX(dataframe['close'], timeperiod=12) + dataframe['min12'] = talib.MIN(dataframe['close'], timeperiod=12) + dataframe['max60'] = talib.MAX(dataframe['close'], timeperiod=60) + dataframe['min60'] = talib.MIN(dataframe['close'], timeperiod=60) + dataframe['min_max_60'] = ((dataframe['max60'] - dataframe['close']) / dataframe['min60']) + # dataframe['min36'] = talib.MIN(dataframe['close'], timeperiod=36) + # dataframe['max36'] = talib.MAX(dataframe['close'], timeperiod=36) + # dataframe['pct36'] = 100 * (dataframe['max36'] - dataframe['min36']) / dataframe['min36'] + # dataframe['maxpct36'] = talib.MAX(dataframe['pct36'], timeperiod=36) + # Bollinger Bands + bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(dataframe), window=20, stds=2) + dataframe['bb_lowerband'] = bollinger['lower'] + dataframe['bb_middleband'] = bollinger['mid'] + dataframe['bb_upperband'] = bollinger['upper'] + dataframe["bb_percent"] = ( + (dataframe["close"] - dataframe["bb_lowerband"]) / + (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) + ) + dataframe["bb_width"] = (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["sma5"] + + # dataframe["bb_width"] = ( + # (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"] + # ) + + # Calcul MACD + macd, macdsignal, macdhist = talib.MACD( + dataframe['close'], + fastperiod=12, + slowperiod=26, + signalperiod=9 + ) + + # | Nom | Formule / définition | Signification | + # | ---------------------------- | ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | + # | **MACD** (`macd`) | `EMA_fast - EMA_slow` (ex : 12-26 périodes) | Montre l’écart entre la moyenne courte et la moyenne longue.
- Positive → tendance haussière
- Négative → tendance baissière | + # | **Signal** (`macdsignal`) | `EMA_9(MACD)` | Sert de ligne de **signal de déclenchement**.
- Croisement du MACD au-dessus → signal d’achat
- Croisement du MACD en dessous → signal de vente | + # | **Histogramme** (`macdhist`) | `MACD - Signal` | Montre la **force et l’accélération** de la tendance.
- Positif et croissant → tendance haussière qui s’accélère
- Positif mais décroissant → ralentissement de la hausse
- Négatif et décroissant → baisse qui s’accélère
- Négatif mais croissant → ralentissement de la baisse | + + # Ajouter dans le dataframe + dataframe['macd'] = macd + dataframe['macdsignal'] = macdsignal + dataframe['macdhist'] = macdhist + + # Regarde dans le futur + # # --- Rendre relatif sur chaque série (-1 → 1) --- + # for col in ['macd', 'macdsignal', 'macdhist']: + # series = dataframe[col] + # valid = series[~np.isnan(series)] # ignorer NaN + # min_val = valid.min() + # max_val = valid.max() + # span = max_val - min_val if max_val != min_val else 1 + # dataframe[f'{col}_rel'] = 2 * ((series - min_val) / span) - 1 + # + # dataframe['tdc_macd'] = self.macd_tendance_int( + # dataframe, + # macd_col='macd_rel', + # signal_col='macdsignal_rel', + # hist_col='macdhist_rel' + # ) + + # --- pente brute --- + dataframe['slope'] = dataframe['sma24'].diff() + + # --- lissage EMA --- + dataframe['slope_smooth'] = dataframe['slope'].ewm(span=10, adjust=False).mean() + + # --- Volatilité normalisée --- + dataframe['atr'] = ta.volatility.AverageTrueRange( + high=dataframe['high'], low=dataframe['low'], close=dataframe['close'], window=14 + ).average_true_range() + dataframe['atr_norm'] = dataframe['atr'] / dataframe['close'] + + # --- Force de tendance --- + dataframe['adx'] = ta.trend.ADXIndicator( + high=dataframe['high'], low=dataframe['low'], close=dataframe['close'], window=14 + ).adx() + + # --- Volume directionnel (On Balance Volume) --- + dataframe['obv'] = ta.volume.OnBalanceVolumeIndicator( + close=dataframe['close'], volume=dataframe['volume'] + ).on_balance_volume() + + # --- Volatilité récente (écart-type des rendements) --- + dataframe['vol_24'] = dataframe['percent'].rolling(24).std() + + # Compter les baisses / hausses consécutives + self.calculateDownAndUp(dataframe, limit=0.0001) + + # df : ton dataframe OHLCV + indicateurs existants + # Assurez-vous que les colonnes suivantes existent : + # 'max_rsi_12', 'roc_24', 'bb_percent_1h' + + # --- Filtrage des NaN initiaux --- + # dataframe = dataframe.dropna() + + dataframe['rsi_slope'] = dataframe['rsi'].diff(3) / 3 # vitesse moyenne du RSI + dataframe['adx_change'] = dataframe['adx'] - dataframe['adx'].shift(12) # évolution de la tendance + dataframe['volatility_ratio'] = dataframe['atr_norm'] / dataframe['bb_width'] + + dataframe["rsi_diff"] = dataframe["rsi"] - dataframe["rsi"].shift(3) + dataframe["slope_ratio"] = dataframe["sma5_deriv1"] / (dataframe["sma60_deriv1"] + 1e-9) + dataframe["divergence"] = (dataframe["rsi_deriv1"] * dataframe["sma5_deriv1"]) < 0 + + ########################### + + dataframe['volume_sma_deriv'] = dataframe['volume'] * dataframe['sma5_deriv1'] / (dataframe['volume'].rolling(5).mean()) + self.calculeDerivees(dataframe, 'volume', timeframe=timeframe, ema_period=12) + + self.setTrends(dataframe) + + return dataframe + + def feature_auc_scores(self, X, y): + aucs = {} + for col in X.columns: + try: + aucs[col] = roc_auc_score(y, X[col].ffill().fillna(0)) + except Exception: + aucs[col] = np.nan + return pd.Series(aucs).sort_values(ascending=False) + + def macd_tendance_int(self, dataframe: pd.DataFrame, + macd_col='macd', + signal_col='macdsignal', + hist_col='macdhist', + eps=0.0) -> pd.Series: + """ + Renvoie la tendance MACD sous forme d'entiers. + 2 : Haussier + 1 : Ralentissement hausse + 0 : Neutre + -1 : Ralentissement baisse + -2 : Baissier + """ + + # | Nom | Formule / définition | Signification | + # | ---------------------------- | ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | + # | **MACD** (`macd`) | `EMA_fast - EMA_slow` (ex : 12-26 périodes) | Montre l’écart entre la moyenne courte et la moyenne longue.
- Positive → tendance haussière
- Négative → tendance baissière | + # | **Signal** (`macdsignal`) | `EMA_9(MACD)` | Sert de ligne de **signal de déclenchement**.
- Croisement du MACD au-dessus → signal d’achat
- Croisement du MACD en dessous → signal de vente | + # | **Histogramme** (`macdhist`) | `MACD - Signal` | Montre la **force et l’accélération** de la tendance.
- Positif et croissant → tendance haussière qui s’accélère
- Positif mais décroissant → ralentissement de la hausse
- Négatif et décroissant → baisse qui s’accélère
- Négatif mais croissant → ralentissement de la baisse | + + # | Situation | MACD | Signal | Hist | Interprétation | + # | -------------------------- | ---------- | --------- | -------- | ------------------------------------------ | + # | MACD > 0, Hist croissant | au-dessus | croissant | Haussier | Momentum fort → tendance haussière | + # | MACD > 0, Hist décroissant | au-dessus | en baisse | Momentum | La hausse ralentit, prudence | + # | MACD < 0, Hist décroissant | en dessous | en baisse | Baissier | Momentum fort → tendance baissière | + # | MACD < 0, Hist croissant | en dessous | en hausse | Rebond ? | La baisse ralentit → possible retournement | + + # Créer une série de 0 par défaut + tendance = pd.Series(0, index=dataframe.index) + + # Cas MACD > signal + mask_up = dataframe[macd_col] > dataframe[signal_col] + eps + mask_up_hist_pos = mask_up & (dataframe[hist_col] > 0) + mask_up_hist_neg = mask_up & (dataframe[hist_col] <= 0) + + tendance[mask_up_hist_pos] = 2 # Haussier + tendance[mask_up_hist_neg] = 1 # Ralentissement hausse + + # Cas MACD < signal + mask_down = dataframe[macd_col] < dataframe[signal_col] - eps + mask_down_hist_neg = mask_down & (dataframe[hist_col] < 0) + mask_down_hist_pos = mask_down & (dataframe[hist_col] >= 0) + + tendance[mask_down_hist_neg] = -2 # Baissier + tendance[mask_down_hist_pos] = -1 # Ralentissement baisse + + # Les NaN deviennent neutre + tendance[dataframe[[macd_col, signal_col, hist_col]].isna().any(axis=1)] = 0 + + return tendance + + def calculateDownAndUp(self, dataframe, limit=0.0001): + dataframe['down'] = dataframe['hapercent'] <= limit + dataframe['up'] = dataframe['hapercent'] >= limit + dataframe['down_count'] = - dataframe['down'].astype(int) * ( + dataframe['down'].groupby((dataframe['down'] != dataframe['down'].shift()).cumsum()).cumcount() + 1) + dataframe['up_count'] = dataframe['up'].astype(int) * ( + dataframe['up'].groupby((dataframe['up'] != dataframe['up'].shift()).cumsum()).cumcount() + 1) + # Créer une colonne vide + dataframe['down_pct'] = self.calculateUpDownPct(dataframe, 'down_count') + dataframe['up_pct'] = self.calculateUpDownPct(dataframe, 'up_count') + + def calculateDerivation(self, dataframe, window=12, suffixe='', timeframe='5m'): + dataframe[f"mid_smooth{suffixe}"] = dataframe['mid'].rolling(window).mean() + dataframe = self.calculeDerivees(dataframe, f"mid_smooth{suffixe}", timeframe=timeframe, ema_period=window) + return dataframe + + def calculeDerivees( + self, + dataframe: pd.DataFrame, + name: str, + suffixe: str = '', + window: int = 100, + coef: float = 0.15, + ema_period: int = 10, + verbose: bool = True, + timeframe: str = '5m' + ) -> pd.DataFrame: + """ + Calcule deriv1/deriv2 (relative simple), applique EMA, calcule tendency + avec epsilon adaptatif basé sur rolling percentiles. + """ + + d1_col = f"{name}{suffixe}_deriv1" + d2_col = f"{name}{suffixe}_deriv2" + factor1 = 100 * (ema_period / 5) + factor2 = 10 * (ema_period / 5) + + dataframe[f"{name}{suffixe}_inv"] = (dataframe[f"{name}{suffixe}"].shift(2) >= dataframe[f"{name}{suffixe}"].shift(1)) \ + & (dataframe[f"{name}{suffixe}"].shift(1) <= dataframe[f"{name}{suffixe}"]) + # --- Distance à la moyenne mobile --- + dataframe[f"{name}{suffixe}_dist"] = (dataframe['close'] - dataframe[f"{name}{suffixe}"]) / dataframe[f"{name}{suffixe}"] + + # dérivée relative simple + dataframe[d1_col] = 1000 * (dataframe[name] - dataframe[name].shift(1)) / dataframe[name].shift(1) + dataframe[d2_col] = dataframe[d1_col] - dataframe[d1_col].shift(1) + return dataframe + + def getOpenTrades(self): + # if len(self.trades) == 0: + self.trades = Trade.get_open_trades() + return self.trades + + def calculateProbabilite2Index(self, df, futur_cols, indic_1, indic_2): + # # Définition des tranches pour les dérivées + # bins_deriv = [-np.inf, -0.05, -0.01, 0.01, 0.05, np.inf] + # labels = ['forte baisse', 'légère baisse', 'neutre', 'légère hausse', 'forte hausse'] + # + # # Ajout des colonnes bin (catégorisation) + # df[f"{indic_1}_bin"] = pd.cut(df['mid_smooth_1h_deriv1'], bins=bins_deriv, labels=labels) + # df[f"{indic_2}_bin"] = pd.cut(df['mid_smooth_deriv1_1d'], bins=bins_deriv, labels=labels) + # + # # Colonnes de prix futur à analyser + # futur_cols = ['futur_percent_1h', 'futur_percent_2h', 'futur_percent_3h', 'futur_percent_4h', 'futur_percent_5h'] + # + # # Calcul des moyennes et des effectifs + # grouped = df.groupby([f"{indic_2}_bin", f"{indic_1}_bin"])[futur_cols].agg(['mean', 'count']) + # + # pd.set_option('display.width', 200) # largeur max affichage + # pd.set_option('display.max_columns', None) + pd.set_option('display.max_columns', None) + pd.set_option('display.width', 300) # largeur max affichage + + # nettoyage + # series = df[f"{indic_2}"].dropna() + # unique_vals = df[f"{indic_2}"].nunique() + # print(unique_vals) + # print(df[f"{indic_2}"]) + n = len(self.labels) + + df[f"{indic_1}_bin"], bins_1h = pd.qcut(df[f"{indic_1}"], q=n, labels=self.labels, retbins=True, + duplicates='drop') + df[f"{indic_2}_bin"], bins_1d = pd.qcut(df[f"{indic_2}"], q=n, labels=self.labels, retbins=True, + duplicates='drop') + # Affichage formaté pour code Python + print(f"Bornes des quantiles pour {indic_1} : [{', '.join([f'{b:.4f}' for b in bins_1h])}]") + print(f"Bornes des quantiles pour {indic_2} : [{', '.join([f'{b:.4f}' for b in bins_1d])}]") + # Agrégation + grouped = df.groupby([f"{indic_2}_bin", f"{indic_1}_bin"], observed=True)[futur_cols].agg(['mean', 'count']) + # Affichage + with pd.option_context('display.max_rows', None, 'display.max_columns', None): + print(grouped.round(4)) + # Ajout des probabilités de hausse + for col in futur_cols: + df[f"{col}_is_up"] = df[col] > 0 + + # Calcul de la proba de hausse + proba_up = df.groupby([f"{indic_2}_bin", f"{indic_1}_bin"], observed=True)[f"{col}_is_up"].mean().unstack() + + print(f"\nProbabilité de hausse pour {col} (en %):") + with pd.option_context('display.max_rows', None, 'display.max_columns', None): + print((proba_up * 100).round(1)) + + # Affichage formaté des valeurs comme tableau Python + with pd.option_context('display.max_rows', None, 'display.max_columns', None): + df_formatted = (proba_up * 100).round(1) + + print("data = {") + for index, row in df_formatted.iterrows(): + row_values = ", ".join([f"{val:.1f}" for val in row]) + print(f"'{index}': [{row_values}], ") + print("}") + + data = {} + for index, row in df_formatted.iterrows(): + # on convertit proprement avec arrondi comme dans ton print, mais en données réelles + data[index] = [ + None if (isinstance(val, float) and math.isnan(val)) else val + for val in row + ] + + # Niveaux unicode pour les barres verticales (style sparkline) + # spark_chars = "▁▂▃▄▅▆▇█" + + # print(data.values()) + # # Collecte globale min/max + # all_values = [] + # for vals in data.values(): + # all_values.extend(v for v in vals if not (isinstance(v, float) and math.isnan(v))) + # + # global_min = min(all_values) if all_values else 0 + # global_max = max(all_values) if all_values else 1 + # global_span = (global_max - global_min) if global_max != global_min else 1 + # + # def sparkline_global(values): + # if all(isinstance(v, float) and math.isnan(v) for v in values): + # return "(no data)" + # out = "" + # for v in values: + # if isinstance(v, float) and math.isnan(v): + # out += " " + # else: + # idx = int((v - global_min) / global_span * (len(spark_chars) - 1)) + # out += spark_chars[idx] + # return out + # + # for key, values in data.items(): + # print(f"{key:>3} : {sparkline_global(values)}") + + # Palette ANSI 256 couleurs pour heatmap + def get_ansi_color(val): + """ + Échelle fixe 0→100 : + 0-20 : bleu (21) + 20-40 : cyan (51) + 40-60 : vert/jaune (46 / 226) + 60-80 : orange (208) + 80-100 : rouge (196) + """ + if val is None: + return "" + if val < 0: + val = 0 + elif val > 100: + val = 100 + + if val <= 20: + code = 21 + elif val <= 40: + code = 51 + elif val <= 60: + code = 226 + elif val <= 80: + code = 208 + else: + code = 196 + return f"\033[38;5;{code}m" + + RESET = "\033[0m" + + # Affichage + columns = ['B3', 'B2', 'B1', 'N0', 'H1', 'H2', 'H3'] + header = " " + " ".join([f"{col:>6}" for col in columns]) + print(header) + print("-" * len(header)) + + for key, values in data.items(): + line = f"{key:>3} |" + for v in values: + if v is None: + line += f" {' '} " # vide pour NaN / None + else: + color = get_ansi_color(v) + line += f" {color}{v:5.1f}{RESET} " + print(line) + + def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + dataframe.loc[ + ( + ( + ( + (dataframe['mid_future_pred_cons'].shift(2) > dataframe['mid_future_pred_cons'].shift(1)) + & (dataframe['mid_future_pred_cons'].shift(1) < dataframe['mid_future_pred_cons']) + & (dataframe['percent12'] < -0.0005) + ) + | ( + (dataframe['mid_future_pred_cons'] < dataframe['min12']) + ) + ) + & + ( + ((dataframe['mid_smooth_12_deriv1'] > 0) | (dataframe['mid_smooth_5_deriv1'] > 0)) + ) + + ), ['enter_long', 'enter_tag']] = (1, f"future") + + dataframe['test'] = np.where(dataframe['enter_long'] == 1, dataframe['close'] * 1.01, np.nan) + + if self.dp.runmode.value in ('backtest'): + dataframe.to_feather(f"user_data/backtest_results/{metadata['pair'].replace('/', '_')}_df.feather") + + return dataframe + + def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + + # dataframe.loc[ + # ( + # ( + # ( + # (dataframe['ml_prob'].shift(2) < dataframe['ml_prob'].shift(1)) + # & (dataframe['ml_prob'].shift(1) > dataframe['ml_prob']) + # ) + # | (dataframe['ml_prob'] < 0) + # ) + # & (dataframe['hapercent'] < 0) + # ), ['exit_long', 'exit_tag']] = (1, f"sma60_future") + + # dataframe.loc[ + # ( + # ( + # ( + # (dataframe['mid_future_pred_cons'].shift(2) < dataframe['mid_future_pred_cons'].shift(1)) + # & (dataframe['mid_future_pred_cons'].shift(1) > dataframe['mid_future_pred_cons']) + # ) + # # | (dataframe['mid_smooth_12_deriv1'] < 0) + # ) + # & (dataframe['sma60_future_pred_cons'] < dataframe['sma60_future_pred_cons'].shift(1)) + # & (dataframe['hapercent'] < 0) + # ), ['exit_long', 'exit_tag']] = (1, f"sma60_future") + + # + # dataframe.loc[ + # ( + # ( + # (dataframe['mid_future_pred_cons'].shift(2) < dataframe['mid_future_pred_cons'].shift(1)) + # & (dataframe['mid_future_pred_cons'].shift(1) > dataframe['mid_future_pred_cons']) + # + # ) + # # & (dataframe['mid_future_pred_cons'] > dataframe['max12']) + # & (dataframe['hapercent'] < 0) + # + # ), ['exit_long', 'exit_tag']] = (1, f"max12") + + return dataframe + + def adjust_trade_position(self, trade: Trade, current_time: datetime, + current_rate: float, current_profit: float, min_stake: float, + max_stake: float, **kwargs): + # ne rien faire si ordre deja en cours + if trade.has_open_orders: + # print("skip open orders") + return None + if (self.wallets.get_available_stake_amount() < 10): # or trade.stake_amount >= max_stake: + return 0 + + dataframe, _ = self.dp.get_analyzed_dataframe(trade.pair, self.timeframe) + last_candle = dataframe.iloc[-1].squeeze() + before_last_candle = dataframe.iloc[-2].squeeze() + # prépare les données + current_time = current_time.astimezone(timezone.utc) + open_date = trade.open_date.astimezone(timezone.utc) + dispo = round(self.wallets.get_available_stake_amount()) + hours_since_first_buy = (current_time - trade.open_date_utc).seconds / 3600.0 + days_since_first_buy = (current_time - trade.open_date_utc).days + hours = (current_time - trade.date_last_filled_utc).total_seconds() / 3600.0 + count_of_buys = trade.nr_of_successful_entries + current_time_utc = current_time.astimezone(timezone.utc) + open_date = trade.open_date.astimezone(timezone.utc) + days_since_open = (current_time_utc - open_date).days + pair = trade.pair + profit = trade.calc_profit(current_rate) #round(current_profit * trade.stake_amount, 1) + last_lost = self.getLastLost(last_candle, pair) + pct_first = 0 + + total_counts = sum( + pair_data['count_of_buys'] for pair_data in self.pairs.values() if not self.getShortName(pair) == 'BTC') + + if self.pairs[pair]['first_buy']: + pct_first = self.getPctFirstBuy(pair, last_candle) + + pct = self.pct.value + if count_of_buys == 1: + pct_max = current_profit + else: + if self.pairs[trade.pair]['last_buy']: + pct_max = self.getPctLastBuy(pair, last_candle) + else: + pct_max = - pct + + if (self.getShortName(pair) == 'BTC') or count_of_buys <= 2: + lim = - pct - (count_of_buys * self.pct_inc.value) + else: + pct = 0.05 + lim = - pct - (count_of_buys * 0.0025) + + if (len(dataframe) < 1): + # print("skip dataframe") + return None + + if not self.should_enter_trade(pair, last_candle, current_time): + return None + + condition = (last_candle['enter_long'] and last_candle['stop_buying_1h'] == False and last_candle['hapercent'] > 0) + # and last_candle['sma60_deriv1'] > 0 + # or last_candle['enter_tag'] == 'pct3' \ + # or last_candle['enter_tag'] == 'pct3_1h' + + # if (self.getShortName(pair) != 'BTC' and count_of_buys > 3): + # condition = before_last_candle_24['mid_smooth_3_1h'] > before_last_candle_12['mid_smooth_3_1h'] and before_last_candle_12['mid_smooth_3_1h'] < last_candle['mid_smooth_3_1h'] #and last_candle['mid_smooth_3_deriv1_1h'] < -1.5 + + limit_buy = 40 + if (count_of_buys < limit_buy) and condition and (pct_max < lim): + try: + + if self.pairs[pair]['has_gain'] and profit > 0: + self.pairs[pair]['force_sell'] = True + return None + + max_amount = self.config.get('stake_amount') * 2.5 + stake_amount = min(min(max_amount, self.wallets.get_available_stake_amount()), + self.adjust_stake_amount(pair, last_candle) * abs(last_lost / self.mise_factor_buy.value)) + + if stake_amount > 0: + trade_type = "Loss " + (last_candle['enter_tag'] if last_candle['enter_long'] == 1 else '') + self.pairs[trade.pair]['count_of_buys'] += 1 + self.pairs[pair]['total_amount'] += stake_amount + self.log_trade( + last_candle=last_candle, + date=current_time, + action="🟧 Loss -", + dispo=dispo, + pair=trade.pair, + rate=current_rate, + trade_type=trade_type, + profit=round(profit, 1), + buys=trade.nr_of_successful_entries + 1, + stake=round(stake_amount, 2) + ) + + self.pairs[trade.pair]['last_buy'] = current_rate + self.pairs[trade.pair]['max_touch'] = last_candle['close'] + self.pairs[trade.pair]['last_candle'] = last_candle + + # df = pd.DataFrame.from_dict(self.pairs, orient='index') + # colonnes_a_exclure = ['last_candle', 'stop', + # 'trade_info', 'last_date', 'expected_profit', 'last_count_of_buys', 'base_stake_amount', 'stop_buy'] + # df_filtered = df[df['count_of_buys'] > 0].drop(columns=colonnes_a_exclure) + # # df_filtered = df_filtered["first_buy", "last_max", "max_touch", "last_sell","last_buy", 'count_of_buys', 'current_profit'] + # + # print(df_filtered) + + return stake_amount + return None + except Exception as exception: + print(exception) + return None + + if (profit > self.pairs[pair]['previous_profit'] and profit > self.pairs[pair]['expected_profit'] and hours > 6 + # and last_candle['sma60_deriv1'] > 0 + and last_candle['max_rsi_12_1h'] < 75 + # and last_candle['rsi_1d'] < 58 + # and last_candle['stop_buying'] == False + # and last_candle['mid_smooth_5_deriv1_1d'] > 0 + and self.wallets.get_available_stake_amount() > 0 + ): + try: + self.pairs[pair]['previous_profit'] = profit + stake_amount = min(self.wallets.get_available_stake_amount(), self.pairs[pair]['first_amount']) + if stake_amount > 0: + self.pairs[pair]['has_gain'] += 1 + + trade_type = 'Gain +' + (last_candle['enter_tag'] if last_candle['enter_long'] == 1 else '') + self.pairs[trade.pair]['count_of_buys'] += 1 + self.pairs[pair]['total_amount'] += stake_amount + self.log_trade( + last_candle=last_candle, + date=current_time, + action="🟡 Gain +", + dispo=dispo, + pair=trade.pair, + rate=current_rate, + trade_type=str(round(pct_max, 4)), + profit=round(profit, 1), + buys=trade.nr_of_successful_entries + 1, + stake=round(stake_amount, 2) + ) + self.pairs[trade.pair]['last_buy'] = current_rate + self.pairs[trade.pair]['max_touch'] = last_candle['close'] + self.pairs[trade.pair]['last_candle'] = last_candle + return stake_amount + return None + except Exception as exception: + print(exception) + return None + + return None + + def getPctFirstBuy(self, pair, last_candle): + return round((last_candle['close'] - self.pairs[pair]['first_buy']) / self.pairs[pair]['first_buy'], 3) + + def getPctLastBuy(self, pair, last_candle): + return round((last_candle['close'] - self.pairs[pair]['last_buy']) / self.pairs[pair]['last_buy'], 4) + + def adjust_stake_amount(self, pair: str, last_candle: DataFrame): + # Calculer le minimum des 14 derniers jours + nb_pairs = len(self.dp.current_whitelist()) + + base_stake_amount = self.config.get('stake_amount') / (self.mises.value) # * nb_pairs) # Montant de base configuré + + # factors = [1, 1.2, 1.3, 1.4] + if self.pairs[pair]['count_of_buys'] == 0: + factor = 1 #65 / min(65, last_candle['rsi_1d']) + if last_candle['open'] < last_candle['sma5_1h'] and last_candle['mid_smooth_12_deriv1'] > 0: + factor = 2 + + adjusted_stake_amount = max(base_stake_amount / 5, base_stake_amount * factor) + else: + adjusted_stake_amount = self.pairs[pair]['first_amount'] + + if self.pairs[pair]['count_of_buys'] == 0: + self.pairs[pair]['first_amount'] = adjusted_stake_amount + + return adjusted_stake_amount + + def expectedProfit(self, pair: str, last_candle: DataFrame): + lim = 0.01 + pct = 0.002 + if (self.getShortName(pair) == 'BTC'): + lim = 0.005 + pct = 0.001 + pct_to_max = lim + pct * self.pairs[pair]['count_of_buys'] + expected_profit = lim * self.pairs[pair]['total_amount'] # min(3 * lim, max(lim, pct_to_max)) # 0.004 + 0.002 * self.pairs[pair]['count_of_buys'] #min(0.01, first_max) + + self.pairs[pair]['expected_profit'] = expected_profit + + return expected_profit + + def calculateUpDownPct(self, dataframe, key): + down_pct_values = np.full(len(dataframe), np.nan) + # Remplir la colonne avec les bons calculs + for i in range(len(dataframe)): + shift_value = abs(int(dataframe[key].iloc[i])) # Récupérer le shift actuel + if i - shift_value > 1: # Vérifier que le shift ne dépasse pas l'index + down_pct_values[i] = 100 * (dataframe['close'].iloc[i] - dataframe['close'].iloc[i - shift_value]) / \ + dataframe['close'].iloc[i - shift_value] + return down_pct_values + + @property + def protections(self): + return [ + { + "method": "CooldownPeriod", + "stop_duration_candles": 12 + } + # { + # "method": "MaxDrawdown", + # "lookback_period_candles": self.lookback.value, + # "trade_limit": self.trade_limit.value, + # "stop_duration_candles": self.protection_stop.value, + # "max_allowed_drawdown": self.protection_max_allowed_dd.value, + # "only_per_pair": False + # }, + # { + # "method": "StoplossGuard", + # "lookback_period_candles": 24, + # "trade_limit": 4, + # "stop_duration_candles": self.protection_stoploss_stop.value, + # "only_per_pair": False + # }, + # { + # "method": "StoplossGuard", + # "lookback_period_candles": 24, + # "trade_limit": 4, + # "stop_duration_candles": 2, + # "only_per_pair": False + # }, + # { + # "method": "LowProfitPairs", + # "lookback_period_candles": 6, + # "trade_limit": 2, + # "stop_duration_candles": 60, + # "required_profit": 0.02 + # }, + # { + # "method": "LowProfitPairs", + # "lookback_period_candles": 24, + # "trade_limit": 4, + # "stop_duration_candles": 2, + # "required_profit": 0.01 + # } + ] + + def get_stake_from_drawdown(self, pct: float, base_stake: float = 100.0, step: float = 0.04, growth: float = 1.15, + max_stake: float = 1000.0) -> float: + """ + Calcule la mise à allouer en fonction du drawdown. + + :param pct: Drawdown en pourcentage (ex: -0.12 pour -12%) + :param base_stake: Mise de base (niveau 0) + :param step: Espacement entre paliers (ex: tous les -4%) + :param growth: Facteur de croissance par palier (ex: 1.15 pour +15%) + :param max_stake: Mise maximale à ne pas dépasser + :return: Montant à miser + """ + if pct >= 0: + return base_stake + + level = int(abs(pct) / step) + stake = base_stake * (growth ** level) + return min(stake, max_stake) + + def polynomial_forecast(self, series: pd.Series, window: int = 20, degree: int = 2, steps=[12, 24, 36]): + """ + Calcule une régression polynomiale sur les `window` dernières valeurs de la série, + puis prédit les `n_future` prochaines valeurs. + + :param series: Série pandas (ex: dataframe['close']) + :param window: Nombre de valeurs récentes utilisées pour ajuster le polynôme + :param degree: Degré du polynôme (ex: 2 pour quadratique) + :param n_future: Nombre de valeurs futures à prédire + :return: tuple (poly_function, x_vals, y_pred), où y_pred contient les prédictions futures + """ + if len(series) < window: + raise ValueError("La série est trop courte pour la fenêtre spécifiée.") + + recent_y = series.iloc[-window:].values + x = np.arange(window) + + coeffs = np.polyfit(x, recent_y, degree) + poly = np.poly1d(coeffs) + + x_future = np.arange(window, window + len(steps)) + y_future = poly(x_future) + + # Affichage de la fonction + # print("Fonction polynomiale trouvée :") + # print(poly) + + current = series.iloc[-1] + count = 0 + for future_step in steps: # range(1, n_future + 1) + future_x = window - 1 + future_step + prediction = poly(future_x) + # series.loc[series.index[future_x], f'poly_pred_t+{future_step}'] = prediction + + # ➕ Afficher les prédictions + # print(f"{current} → t+{future_step}: x={future_x}, y={prediction:.2f}") + if prediction > 0: # current: + count += 1 + + return poly, x_future, y_future, count + + def should_enter_trade(self, pair: str, last_candle, current_time) -> bool: + limit = 3 + + # if self.pairs[pair]['stop'] and last_candle['max_rsi_12_1h'] <= 60 and last_candle['trend_class_1h'] == -1: + # dispo = round(self.wallets.get_available_stake_amount()) + # self.pairs[pair]['stop'] = False + # self.log_trade( + # last_candle=last_candle, + # date=current_time, + # action="🟢RESTART", + # dispo=dispo, + # pair=pair, + # rate=last_candle['close'], + # trade_type='', + # profit=0, + # buys=self.pairs[pair]['count_of_buys'], + # stake=0 + # ) + + # 🟢 Dérivée 1 > 0 et dérivée 2 > 0: tendance haussière qui s’accélère. + # 🟡 Dérivée 1 > 0 et dérivée 2 < 0: tendance haussière qui ralentit → essoufflement potentiel. + # 🔴 Dérivée 1 < 0 et dérivée 2 < 0: tendance baissière qui s’accélère. + # 🟠 Dérivée 1 < 0 et dérivée 2 > 0: tendance baissière qui ralentit → possible bottom. + + # if not pair.startswith('BTC'): + dispo = round(self.wallets.get_available_stake_amount()) + + # if self.pairs[pair]['stop'] \ + # and last_candle[f"{self.indic_1d_p.value}_deriv1_1h"] >= self.indic_deriv1_1d_p_start.value \ + # and last_candle[f"{self.indic_1d_p.value}_deriv2_1h"] >= self.indic_deriv2_1d_p_start.value: + # self.pairs[pair]['stop'] = False + # self.log_trade( + # last_candle=last_candle, + # date=current_time, + # action="🟢RESTART", + # dispo=dispo, + # pair=pair, + # rate=last_candle['close'], + # trade_type='', + # profit=0, + # buys=self.pairs[pair]['count_of_buys'], + # stake=0 + # ) + # else: + # if self.pairs[pair]['stop'] == False \ + # and last_candle[f"{self.indic_1d_p.value}_deriv1_1h"] <= self.indic_deriv1_1d_p_stop.value \ + # and last_candle[f"{self.indic_1d_p.value}_deriv2_1h"] <= self.indic_deriv2_1d_p_stop.value: + # self.pairs[pair]['stop'] = True + # # if self.pairs[pair]['current_profit'] > 0: + # # self.pairs[pair]['force_sell'] = True + # self.log_trade( + # last_candle=last_candle, + # date=current_time, + # action="🔴STOP", + # dispo=dispo, + # pair=pair, + # rate=last_candle['close'], + # trade_type='', + # profit=self.pairs[pair]['current_profit'], + # buys=self.pairs[pair]['count_of_buys'], + # stake=0 + # ) + # return False + # if self.pairs[pair]['stop']: + # return False + + return True + + # Filtrer les paires non-BTC + non_btc_pairs = [p for p in self.pairs if not p.startswith('BTC')] + + # Compter les positions actives sur les paires non-BTC + max_nb_trades = 0 + total_non_btc = 0 + max_pair = '' + limit_amount = 250 + max_amount = 0 + for p in non_btc_pairs: + max_nb_trades = max(max_nb_trades, self.pairs[p]['count_of_buys']) + max_amount = max(max_amount, self.pairs[p]['total_amount']) + + for p in non_btc_pairs: + if (max_nb_trades == self.pairs[p]['count_of_buys'] and max_nb_trades > limit): + # if (max_amount == self.pairs[p]['total_amount'] and max_amount > limit_amount): + max_pair = p + total_non_btc += self.pairs[p]['count_of_buys'] + + pct_max = self.getPctFirstBuy(pair, last_candle) # self.getPctLastBuy(pair, last_candle) + + if last_candle['mid_smooth_1h_deriv1'] < -0.02: # and last_candle['mid_smooth_1h_deriv2'] > 0): + return False + + self.should_enter_trade_count = 0 + + # if max_pair != pair and self.pairs[pair]['total_amount'] > 300: + # return False + + if (max_pair != '') & (self.pairs[pair]['count_of_buys'] >= limit): + trade = self.pairs[max_pair]['current_trade'] + current_time = current_time.astimezone(timezone.utc) + open_date = trade.open_date.astimezone(timezone.utc) + current_time_utc = current_time.astimezone(timezone.utc) + days_since_open = (current_time_utc - open_date).days + pct_max_max = self.getPctFirstBuy(max_pair, last_candle) + # print(f"days_since_open {days_since_open} max_pair={max_pair} pair={pair}") + return max_pair == pair or pct_max < - 0.25 or ( + pct_max_max < - 0.15 and max_pair != pair and days_since_open > 30) + else: + return True + + def select_uncorrelated_features(self, df, target, top_n=20, corr_threshold=0.7): + """ + Sélectionne les features les plus corrélées avec target, + tout en supprimant celles trop corrélées entre elles. + """ + # 1️⃣ Calcul des corrélations absolues avec la cible + corr = df.corr(numeric_only=True) + corr_target = corr[target].abs().sort_values(ascending=False) + + # 2️⃣ Prend les N features les plus corrélées avec la cible (hors target) + features = corr_target.drop(target).head(top_n).index.tolist() + + # 3️⃣ Évite les features trop corrélées entre elles + selected = [] + for feat in features: + too_correlated = False + for sel in selected: + if abs(corr.loc[feat, sel]) > corr_threshold: + too_correlated = True + break + if not too_correlated: + selected.append(feat) + + # 4️⃣ Retourne un DataFrame propre avec les valeurs de corrélation + selected_corr = pd.DataFrame({ + "feature": selected, + "corr_with_target": [corr.loc[f, target] for f in selected] + }).sort_values(by="corr_with_target", key=np.abs, ascending=False) + + return selected_corr + + def graphFonctionApprise(self, path, X_test, y_test, y_pred): + # Exemple : trier les valeurs de X_test et les prédictions + x_sorted = np.argsort(X_test.iloc[:, 0]) + x = X_test.iloc[:, 0].iloc[x_sorted] + y_true = y_test.iloc[x_sorted] + y_pred = y_pred[x_sorted] + + plt.figure(figsize=(12, 6)) + plt.plot(x, y_true, label="Réel", color="blue", alpha=0.7) + plt.plot(x, y_pred, label="Prédit (LGBM)", color="red", alpha=0.7) + + plt.title("Fonction apprise par LGBMRegressor") + plt.xlabel("Feature principale") + plt.ylabel("Valeur prédite") + plt.legend() + plt.grid(True) + + out_path = f"{self.path}/lgbm_function.png" + plt.savefig(out_path, bbox_inches="tight") + plt.close() + + print(f"Graphique sauvegardé : {out_path}") + + def graphFonctionAppriseFeature(self, path, X_test, y_test, y_pred): + plt.figure(figsize=(14, 8)) + colors = sns.color_palette("coolwarm", n_colors=X_test.shape[1]) + + # Conversion en DataFrame pour manip plus simple + df = X_test.copy() + df["y_pred"] = y_pred + + # --- filtrage sur y_pred (ou sur chaque feature si tu veux) + mean = df["y_pred"].mean() + std = df["y_pred"].std() + + df = df[(df["y_pred"] >= mean - 2 * std) & (df["y_pred"] <= mean + 2 * std)] + + # --- tracé + for i, col in enumerate(X_test.columns): + plt.plot(df[col], df["y_pred"], '.', color=colors[i], alpha=0.4, label=col) + + plt.title("Fonction apprise par LGBMRegressor (filtrée à ±2σ)") + plt.xlabel("Valeur feature") + plt.ylabel("Valeur prédite") + plt.legend(loc="right") + plt.grid(True) + + out_path = f"{self.path}/lgbm_features.png" + plt.savefig(out_path, bbox_inches="tight") + plt.close() + + print(f"Graphique sauvegardé : {out_path}") + + def optuna(self, path, X_train, X_test, y_train, y_test): + # Suppose que X_train, y_train sont déjà définis + # ou sinon : + # X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42) + + print("Description") + print(X_train.describe().T.sort_values("std")) + def objective(trial): + params = { + 'objective': 'regression', + 'metric': 'rmse', + 'n_estimators': trial.suggest_int('n_estimators', 100, 1000), + 'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.2, log=True), + 'max_depth': trial.suggest_int('max_depth', 3, 15), + 'num_leaves': trial.suggest_int('num_leaves', 20, 300), + 'subsample': trial.suggest_float('subsample', 0.5, 1.0), + 'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0), + 'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 10.0), + 'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 10.0), + 'random_state': 42, + } + + model = LGBMRegressor(**params) + model.fit(X_train, y_train) + + # On peut aussi valider sur un split interne + preds = model.predict(X_test) + rmse = np.sqrt(mean_squared_error(y_test, preds)) + return rmse + + # Crée une étude Optuna + study = optuna.create_study(direction="minimize") # on veut minimiser l'erreur + study.optimize(objective, n_trials=50, show_progress_bar=True) + + # 🔹 Afficher les meilleurs résultats + print("✅ Meilleurs hyperparamètres trouvés :") + print(study.best_params) + print(f"Meilleur RMSE : {study.best_value:.4f}") + + # 🔹 Sauvegarder les résultats + optuna_path = f"{self.path}/optuna_lgbm_results.txt" + with open(optuna_path, "w") as f: + f.write(f"Best params:\n{study.best_params}\n") + f.write(f"Best RMSE: {study.best_value:.4f}\n") + + print(f"Résultats sauvegardés dans : {optuna_path}") + + # 🔹 Créer le modèle final avec les meilleurs paramètres + print("🚀 Entraînement du modèle LightGBM...") + + # -- Appliquer le filtrage -- + X_train_filtered = self.filter_features(X_train, y_train) + best_model = LGBMRegressor(**study.best_params) + best_model.fit(X_train_filtered, y_train) + + # fig1 = vis.plot_optimization_history(study) + # fig1.write_image("/home/souti/freqtrade/user_data/plots/optuna_history.png") + # + # fig2 = vis.plot_param_importances(study) + # fig2.write_image("/home/souti/freqtrade/user_data/plots/optuna_importance.png") + + return best_model, X_train_filtered + + def filter_features(self, X: pd.DataFrame, y: pd.Series, corr_threshold: float = 0.95): + """Filtre les colonnes peu utiles ou redondantes""" + print("🔍 Filtrage automatique des features...") + + # 1️⃣ Supprimer les colonnes constantes + vt = VarianceThreshold(threshold=1e-5) + X_var = pd.DataFrame(vt.fit_transform(X), columns=X.columns[vt.get_support()]) + print(f" - {len(X.columns) - X_var.shape[1]} colonnes supprimées (variance faible)") + + # 2️⃣ Supprimer les colonnes très corrélées entre elles + corr = X_var.corr().abs() + upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool)) + drop_cols = [column for column in upper.columns if any(upper[column] > corr_threshold)] + X_corr = X_var.drop(columns=drop_cols, errors='ignore') + print(f" - {len(drop_cols)} colonnes supprimées (corrélation > {corr_threshold})") + + # 3️⃣ Facultatif : supprimer les colonnes entièrement NaN + X_clean = X_corr.dropna(axis=1, how='all') + + print(f"✅ {X_clean.shape[1]} colonnes conservées après filtrage.\n") + return X_clean + + def setTrends(self, dataframe: DataFrame): + SMOOTH_WIN=10 + df = dataframe.copy() + + # # --- charger les données --- + # df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce') + + # --- calcul SMA14 --- + # df['sma'] = talib.SMA(df, timeperiod=20) # ta.trend.sma_indicator(df['close'], 14) + + # --- pente brute --- + df['slope'] = df['sma12'].diff() + + # --- lissage EMA --- + df['slope_smooth'] = df['slope'].ewm(span=SMOOTH_WIN, adjust=False).mean() + + # df["slope_smooth"] = savgol_filter(df["slope_smooth"], window_length=21, polyorder=3) + + # --- normalisation relative --- + df['slope_norm'] = 10000 * df['slope_smooth'] / df['close'] + # df['slope_norm'].fillna(0, inplace=True) + df['slope_norm'] = df['slope_norm'].fillna(0) + dataframe['slope_norm'] = df['slope_norm'] + + + try: + from lightgbm import LGBMRegressor + _HAS_LGBM = True + except Exception: + _HAS_LGBM = False + + def make_model(self, model_type="linear", degree=2, random_state=0): + model_type = model_type.lower() + if model_type == "linear": + return LinearRegression() + if model_type == "poly": + return make_pipeline(StandardScaler(), PolynomialFeatures(degree=degree, include_bias=False), + LinearRegression()) + if model_type == "svr": + return make_pipeline(StandardScaler(), SVR(kernel="rbf", C=1.0, epsilon=0.1)) + if model_type == "rf": + return RandomForestRegressor(n_estimators=100, random_state=random_state, n_jobs=1) + if model_type == "lgbm": + if not _HAS_LGBM: + raise RuntimeError("lightgbm n'est pas installé") + return LGBMRegressor(n_estimators=100, random_state=random_state) + raise ValueError(f"model_type inconnu: {model_type}") + + def calculateRegressionNew(self, df, indic, lookback=20, future_steps=5, model_type="linear"): + df = df.copy() + pred_col = f"{indic}_future_pred_cons" + df[pred_col] = np.nan + + X_idx = np.arange(lookback).reshape(-1, 1) + + values = df[indic].values + n = len(values) + + model = LinearRegression() + + for i in range(lookback, n - future_steps): + window = values[i - lookback:i] + + # cible = vraie valeur future + y_target = values[i + future_steps] + + if np.isnan(window).any() or np.isnan(y_target): + continue + + # entraînement + model.fit(X_idx, window) + + # prédiction de la valeur future + future_x = np.array([[lookback + future_steps - 1]]) + pred_future = model.predict(future_x)[0] + + # la prédiction concerne i + future_steps + df.iloc[i + future_steps, df.columns.get_loc(pred_col)] = pred_future + + return df + + # ========================================================== + # NOUVELLE VERSION : calcule AUSSI les dernières valeurs ! + # ========================================================== + def calculateRegression( + self, + df, + indic, + lookback=30, + future_steps=5, + model_type="linear", + degree=2, + weight_mode="exp", + weight_strength=2, + clip_k=2.0, + blend_alpha=0.7, + ): + + values = df[indic].values.astype(float) + n = len(values) + colname = f"{indic}_future_pred_cons" + + df[colname] = np.nan + + # pré-calcul des fenêtres + windows = np.lib.stride_tricks.sliding_window_view(values, lookback) + # windows[k] = valeurs de [k .. k+lookback-1] + + # indices valides d’entraînement + trainable_end = n - future_steps + + # créer une fois le modèle + model = self.make_model(model_type=model_type, degree=degree) + + # ================ + # BOUCLE TRAINING + # ================ + for i in range(lookback, trainable_end): + + window = values[i - lookback:i] + if np.isnan(window).any(): + continue + + # delta future réelle + y_target = values[i + future_steps] - values[i] + + # features = positions dans la fenêtre : 0..lookback-1 + X_window = np.arange(lookback).reshape(-1, 1) + + # sample weights + if weight_mode == "exp": + weights = np.linspace(0.1, 1, lookback) ** weight_strength + else: + weights = None + + # entraînement + try: + model.fit(X_window, window, sample_weight=weights) + except Exception: + model.fit(X_window, window) + + # prédiction de la valeur future (position lookback+future_steps-1) + y_pred_value = model.predict( + np.array([[lookback + future_steps - 1]]) + )[0] + + pred_delta = y_pred_value - values[i] + + # clipping par volatilité locale + local_std = np.std(window) + max_change = clip_k * (local_std if local_std > 0 else 1e-9) + pred_delta = np.clip(pred_delta, -max_change, max_change) + + # blend + final_pred_value = ( + blend_alpha * (values[i] + pred_delta) + + (1 - blend_alpha) * values[i] + ) + + df.iloc[i, df.columns.get_loc(colname)] = final_pred_value + + # ========================================================== + # 🔥 CALCUL DES DERNIÈRES VALEURS MANQUANTES 🔥 + # ========================================================== + + # Il reste les indices : [n - future_steps … n - 1] + for i in range(trainable_end, n): + + # fenêtre glissante de fin + if i - lookback < 0: + continue + + window = values[i - lookback:i] + if np.isnan(window).any(): + continue + + # features + X_window = np.arange(lookback).reshape(-1, 1) + + try: + model.fit(X_window, window) + except: + continue + + # prédiction d’une continuation locale : future_steps = 1 en fin + y_pred_value = model.predict(np.array([[lookback]]))[0] + pred_delta = y_pred_value - values[i - 1] + + final_pred_value = ( + blend_alpha * (values[i - 1] + pred_delta) + + (1 - blend_alpha) * values[i - 1] + ) + + df.iloc[i, df.columns.get_loc(colname)] = final_pred_value + + return df + + # def calculateRegression(self, + # df, + # indic, + # lookback=30, + # future_steps=5, + # model_type="linear", + # degree=2, + # random_state=0, + # weight_mode="exp", # "exp", "linear" ou None + # weight_strength=0.2, # plus c’est grand, plus les dernières bougies comptent + # ): + # """ + # Ajoute une régression glissante qui prévoit la valeur future à horizon 'future_steps', + # avec pondération des dernières valeurs si weight_mode != None. + # """ + # df = df.copy() + # colname = f"{indic}_future_pred_{model_type}" + # df[colname] = np.nan + # + # values = df[indic].values + # n = len(values) + # X_window = np.arange(lookback).reshape(-1, 1) + # + # # génération du schéma de pondération + # if weight_mode == "exp": + # # exponentiel → les derniers points pèsent beaucoup plus + # weights = np.exp(np.linspace(-weight_strength, weight_strength, lookback)) + # elif weight_mode == "linear": + # # poids linéaire → 1..lookback + # weights = np.linspace(0.5, 1.0, lookback) + # else: + # weights = np.ones(lookback) + # + # for i in range(lookback, n - future_steps): + # y_window = values[i - lookback:i] + # if np.isnan(y_window).any(): + # continue + # + # model = self.make_model(model_type=model_type, degree=degree, random_state=random_state) + # + # try: + # model.fit(X_window, y_window, sample_weight=weights) + # except TypeError: + # # certains modèles (RF) ne supportent pas sample_weight dans ce contexte + # model.fit(X_window, y_window) + # except Exception: + # continue + # + # X_pred = np.array([[lookback + future_steps - 1]]) + # try: + # pred = model.predict(X_pred)[0] + # except Exception: + # continue + # + # df.iloc[i, df.columns.get_loc(colname)] = pred + # + # return df + + # def calculateRegression(self, df, indic, lookback=30, future_steps=5): + # """ + # Ajoute un indicateur {indic}_future_pred qui contient, + # pour chaque bougie n, la valeur attendue à n + future_steps + # selon une régression linéaire sur les lookback dernières bougies. + # """ + # df = df.copy() + # df[f"{indic}_future_pred"] = np.nan + # + # values = df[indic].values + # n = len(values) + # + # model = LinearRegression() + # + # for i in range(lookback, n - future_steps): + # # Fenêtre d’apprentissage + # X = np.arange(lookback).reshape(-1, 1) + # y = values[i - lookback:i] + # + # model.fit(X, y) + # + # # Prédiction future + # next_X = np.array([[lookback + future_steps - 1]]) + # future_pred = model.predict(next_X)[0] + # + # # On insère la prédiction à la position actuelle (n) + # df.iloc[i, df.columns.get_loc(f"{indic}_future_pred")] = future_pred + # + # return df + + def add_future_quantiles(self, dataframe, indic, lookback=30, future_steps=5, quantiles=[0.1, 0.5, 0.9]): + + working_columns = self.listUsableColumns(dataframe) + + df = dataframe[self.model_indicators].copy() + n = len(df) + target = self.indicator_target + "_future" + + df[target] = dataframe[self.indicator_target].shift(-24) # > df['sma24'] * 1.003).astype(int) + df[target] = df[target].fillna(0) #.astype(int) + + # Créer les colonnes pour chaque quantile + for q in quantiles: + df[f"{indic}_future_q{int(q * 100)}"] = np.nan + + # Préparer toutes les fenêtres X + X = np.array([df[indic].iloc[i - lookback:i].values for i in range(lookback, n - future_steps)]) + y_idx = np.arange(lookback, n - future_steps) + future_steps # index des valeurs futures + + # Imputer les NaN + imputer = SimpleImputer(strategy='median') + X_imputed = imputer.fit_transform(X) + + # Pour chaque quantile, créer un modèle et prédire + for q in quantiles: + model = HistGradientBoostingRegressor(loss='quantile', quantile=q, max_iter=100) + # Entrainer chaque ligne X_imputed à prédire la dernière valeur de la fenêtre + future_steps + # Ici, comme on prédit delta future par fenêtre, on peut utiliser la valeur cible correspondante + y = df[indic].iloc[y_idx].values + model.fit(X_imputed, y) + y_pred = model.predict(X_imputed) + + # Écrire les prédictions dans le dataframe + df.iloc[lookback:n - future_steps, df.columns.get_loc(f"{indic}_future_q{int(q * 100)}")] = y_pred + + df_plot = df.iloc[lookback:-future_steps] + self.plot_future_quantiles_band(df_plot, indic=self.indicator_target, quantiles=[0.1, 0.5, 0.9]) + # self.compute_quantile_confidence(df_plot, indic=self.indicator_target, quantiles=[0.1, 0.5, 0.9]) + + # fig, ax = plt.subplots(figsize=(20, 20)) + # for q in quantiles: + # plt.plot(stats.index.astype(str), stats[q], marker='o', label=f"Q{int(q * 100)}") + # plt.xticks(rotation=45) + # plt.xlabel(f"{indic} bins") + # plt.ylabel(f"Quantiles") + # plt.title(f"Distribution quantile de {indic}") + # plt.legend() + # plt.grid(True) + # plt.tight_layout() + # # plt.show() + # # --- Sauvegarde --- + # output_path = f"{path}/Distribution_quantile.png" + # plt.savefig(output_path, bbox_inches="tight", dpi=150) + # plt.close(fig) + # + # target = "future_return" + quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] + for indicator in working_columns: + df["bin"] = pd.qcut(df[indicator], q=20, duplicates="drop") + stats = df.groupby("bin")[target].quantile(quantiles).unstack() + + fig, ax = plt.subplots(figsize=(10, 10)) + # plt.figure(figsize=(12, 6)) + for q in stats.columns: + plt.plot(stats.index.astype(str), stats[q], marker='o', label=f"Q{int(q * 100)}") + + plt.xticks(rotation=45) + plt.xlabel(f"{indicator} bins") + plt.ylabel(f"Quantiles of {target}") + plt.title(f"Distribution quantile de {target} selon {indicator}") + plt.legend() + plt.grid(True) + plt.tight_layout() + # --- Sauvegarde --- + output_path = f"{self.path}/Distribution_{indicator}.png" + plt.savefig(output_path, bbox_inches="tight", dpi=150) + plt.close(fig) + # plt.show() + + return df + + def plot_future_quantiles_band(self, df, indic, quantiles=[0.1, 0.5, 0.9], lookback=30, future_steps=5): + """ + df: DataFrame contenant la colonne réelle et les colonnes de quantiles + indic: nom de la colonne cible (ex: 'mid') + quantiles: liste des quantiles prédits + """ + # plt.figure(figsize=(16, 6)) + fig, ax = plt.subplots(figsize=(96, 30)) + + # Série réelle + plt.plot(df[indic], label=f"{indic} réel", color='black', linewidth=1.2) + + # Récupérer les colonnes de quantiles + cols_q = [f"{indic}_future_q{int(q * 100)}" for q in quantiles] + + # Vérifier que tous les quantiles existent + cols_q = [c for c in cols_q if c in df.columns] + + if len(cols_q) < 2: + print("Au moins deux quantiles sont nécessaires pour afficher les bandes") + return + + # Ordre : q_min, q_median, q_max + df_plot = df[cols_q] + + # Couleur pour la bande + color = sns.color_palette("coolwarm", n_colors=1)[0] + + # Tracer la bande entre min et max quantiles + plt.fill_between(df.index, + df_plot.iloc[:, 0], # quantile bas (ex: 10%) + df_plot.iloc[:, -1], # quantile haut (ex: 90%) + color=color, + alpha=0.3, + label=f"Intervalle {quantiles[0] * 100}-{quantiles[-1] * 100}%") + + # Tracer la médiane + if len(cols_q) >= 3: + plt.plot(df_plot.iloc[:, 1], color=color, linestyle='--', linewidth=1, label="Quantile médian") + + plt.title(f"Prédiction futures valeurs de {indic} avec intervalle de quantiles") + plt.xlabel("Index / Bougies") + plt.ylabel(indic) + plt.legend() + plt.grid(True) + # plt.show() + # --- Sauvegarde --- + output_path = f"{self.path}/Prédiction futures valeurs de {indic}.png" + plt.savefig(output_path, bbox_inches="tight", dpi=150) + plt.close(fig) + + def compute_quantile_confidence(self, df, indic, quantiles=[0.1, 0.5, 0.9]): + """ + df: DataFrame contenant les colonnes des quantiles + indic: nom de la colonne réelle + quantiles: liste des quantiles prédits + Retourne une série score [-1,1], positif = au-dessus de la médiane, négatif = en dessous + """ + + # df['quantile_conf'] = compute_quantile_confidence(df_plot, indic='mid') + # + # # Exemple de signal simple + # df['buy_signal'] = df['quantile_conf'] < -0.5 # valeur sous la médiane + bande étroite + # df['sell_signal'] = df['quantile_conf'] > 0.5 # valeur au-dessus de la médiane + bande étroite + + col_low = f"{indic}_future_q{int(quantiles[0] * 100)}" + col_med = f"{indic}_future_q{int(quantiles[1] * 100)}" + col_high = f"{indic}_future_q{int(quantiles[2] * 100)}" + + # largeur de bande (incertitude) + band_width = df[col_high] - df[col_low] + 1e-9 # éviter division par 0 + + # distance normalisée à la médiane + score = (df[indic] - df[col_med]) / band_width + + # clipper le score dans [-1,1] pour éviter les valeurs extrêmes + score = np.clip(score, -1, 1) + + # plt.figure(figsize=(16, 6)) + fig, ax = plt.subplots(figsize=(16, 6)) + plt.plot(df[indic], color='black', label='Valeur réelle') + plt.fill_between(df.index, + df[f"{indic}_future_q10"], + df[f"{indic}_future_q90"], + alpha=0.3, color='blue', label='Intervalle 10%-90%') + plt.plot(df[f"{indic}_future_q50"], linestyle='--', color='blue', label='Médiane') + + # Ajouter le score comme couleur de fond + plt.scatter(df.index, df[indic], c=df['quantile_conf'], cmap='coolwarm', s=20) + plt.colorbar(label='Score de confiance') + plt.title("Prédiction + score de confiance quantile") + plt.legend() + plt.grid(True) + # plt.show() + # --- Sauvegarde --- + output_path = f"{self.path}/Prédiction score confiance de {indic}.png" + plt.savefig(output_path, bbox_inches="tight", dpi=150) + plt.close(fig) + + return score + + # def loadTensorFlow(self, dataframe, metadata, lookback=50, future_steps=1): + # self.model = load_model(f"{self.path}/lstm_model.keras", compile=False) + # + # # features = toutes les colonnes sauf la cible + # feature_columns = self.model_indicators #[col for col in dataframe.columns if col != self.indicator_target] + # X_values = dataframe[feature_columns].values + # + # # normalisation avec le même scaler que l'entraînement + # scaler_X = MinMaxScaler() + # scaler_X.fit(X_values) # ou charger les paramètres si sauvegardés + # X_scaled = scaler_X.transform(X_values) + # + # # création des fenêtres glissantes + # X = np.lib.stride_tricks.sliding_window_view(X_scaled, window_shape=(self.lookback, X_scaled.shape[1])) + # # np.lib.stride_tricks.sliding_window_view ne supporte pas directement 2D → il vaut mieux utiliser une boucle : + # X_seq = [] + # for i in range(len(X_scaled) - self.lookback): + # X_seq.append(X_scaled[i:i + self.lookback]) + # X_seq = np.array(X_seq) + # + # # prédiction + # y_pred = self.model.predict(X_seq, verbose=0).flatten() + # + # # alignement avec les données + # preds = [np.nan] * len(dataframe) + # start = self.lookback + # end = start + len(y_pred) + # preds[start:end] = y_pred[:end - start] + # + # dataframe["lstm_pred"] = preds + # + # def trainTensorFlow(self, dataframe, metadata, lookback=50, future_steps=1): + # # 1) définir la cible + # y_values = dataframe[self.indicator_target].values.reshape(-1, 1) + # + # # 2) définir les features (toutes les colonnes sauf la cible) + # feature_columns = self.model_indicators #[col for col in dataframe.columns if col != self.indicator_target] + # X_values = dataframe[feature_columns].values + # + # # 3) normalisation + # scaler_X = MinMaxScaler() + # X_scaled = scaler_X.fit_transform(X_values) + # + # scaler_y = MinMaxScaler() + # y_scaled = scaler_y.fit_transform(y_values) + # + # # 4) création des fenêtres glissantes + # X = [] + # y = [] + # for i in range(len(X_scaled) - lookback - future_steps): + # X.append(X_scaled[i:i + lookback]) + # y.append(y_scaled[i + lookback + future_steps]) + # + # X = np.array(X) + # y = np.array(y) + # + # # 5) définition du modèle LSTM + # model = Sequential([ + # LSTM(64, return_sequences=False, input_shape=(lookback, X.shape[2])), + # Dense(32, activation="relu"), + # Dense(1) + # ]) + # + # model.compile(loss="mse", optimizer="adam") + # model.fit(X, y, epochs=20, batch_size=32, verbose=1) + # + # # 6) sauvegarde + # model.save(f"{self.path}/lstm_model.keras") + # np.save(f"{self.path}/lstm_scaler_X.npy", scaler_X.data_max_) + # np.save(f"{self.path}/lstm_scaler_y.npy", scaler_y.data_max_) + # # pour restaurer + # + # # df = dataframe[self.model_indicators].copy() + # # + # # # Construction dataset X / y + # # X = [] + # # y = [] + # # + # # prices = df[self.indicator_target].values + # # + # # for i in range(lookback, len(prices) - future_steps): + # # X.append(prices[i - lookback:i]) + # # y.append(prices[i + future_steps]) + # # + # # X = np.array(X).reshape(-1, lookback, 1) + # # y = np.array(y) + # # + # # # --- Définition du modèle --- + # # model = models.Sequential([ + # # layers.Input((lookback, 1)), + # # layers.LSTM(64), + # # layers.Dense(32, activation="relu"), + # # layers.Dense(1) + # # ]) + # # + # # model.compile(optimizer="adam", loss="mse") + # # model.summary() + # # + # # # --- Entraînement --- + # # model.fit(X, y, epochs=20, batch_size=32, verbose=1) + # # + # # # --- Sauvegarde --- + # # model.save(f"{self.path}/lstm_model.keras", include_optimizer=False) + # # + # print("Modèle entraîné et sauvegardé → lstm_model.h5") + + def kerasGenerateGraphs(self, dataframe): + model = self.model + self.kerasGenerateGraphModel(model) + self.kerasGenerateGraphPredictions(model, dataframe, self.lookback) + self.kerasGenerateGraphPoids(model) + + def kerasGenerateGraphModel(self, model): + plot_model( + model, + to_file=f"{self.path}/lstm_model.png", + show_shapes=True, + show_layer_names=True + ) + + def kerasGenerateGraphPredictions(self, model, dataframe, lookback): + preds = self.tensorFlowGeneratePredictions(dataframe, lookback, model) + + # plot + plt.figure(figsize=(36, 8)) + plt.plot(dataframe[self.indicator_target].values, label=self.indicator_target) + plt.plot(preds, label="lstm_pred") + plt.legend() + plt.savefig(f"{self.path}/lstm_predictions.png") + plt.close() + + def kerasGenerateGraphPoids(self, model): + for i, layer in enumerate(model.layers): + weights = layer.get_weights() # liste de tableaux numpy + + # Sauvegarde SAFE : tableau d’objets + np.save( + f"{self.path}/layer_{i}_weights.npy", + np.array(weights, dtype=object) + ) + + # Exemple lecture et heatmap + weights_layer0 = np.load( + f"{self.path}/layer_{i}_weights.npy", + allow_pickle=True + ) + + # Choisir un poids 2D + W = None + for w in weights_layer0: + if isinstance(w, np.ndarray) and w.ndim == 2: + W = w + break + + if W is None: + print(f"Aucune matrice 2D dans layer {i} (rien à afficher).") + return + + plt.figure(figsize=(8, 6)) + sns.heatmap(W, cmap="viridis") + plt.title(f"Poids 2D du layer {i}") + plt.savefig(f"{self.path}/layer{i}_weights.png") + plt.close() + + # ------------------- + # Entraînement + # ------------------- + def trainTensorFlow(self, dataframe, future_steps=1, lookback=50, epochs=40, batch_size=32): + X_seq, y_seq = self.tensorFlowPrepareDataFrame(dataframe, future_steps, lookback) + + # 6) Modèle LSTM + self.model = Sequential([ + LSTM(64, return_sequences=False, input_shape=(lookback, X_seq.shape[2])), + Dense(32, activation="relu"), + Dense(1) + ]) + + self.model.compile(loss='mse', optimizer=Adam(learning_rate=1e-4)) + self.model.fit(X_seq, y_seq, epochs=epochs, batch_size=batch_size, verbose=1) + + # 7) Sauvegarde + self.model.save(f"{self.path}/lstm_model.keras") + np.save(f"{self.path}/lstm_scaler_X.npy", self.scaler_X.data_max_) + np.save(f"{self.path}/lstm_scaler_y.npy", self.scaler_y.data_max_) + + def tensorFlowPrepareDataFrame(self, dataframe, future_steps, lookback): + target = self.indicator_target + # 1) Détecter NaN / Inf et nettoyer + feature_columns = self.model_indicators # [col for col in dataframe.columns if col != target] + df = dataframe.copy() + df.replace([np.inf, -np.inf], np.nan, inplace=True) + df.dropna(subset=feature_columns + [target], inplace=True) + # 2) Séparer features et cible + X_values = df[feature_columns].values + y_values = df[target].values.reshape(-1, 1) + # 3) Gestion colonnes constantes (éviter division par zéro) + for i in range(X_values.shape[1]): + if X_values[:, i].max() == X_values[:, i].min(): + X_values[:, i] = 0.0 + if y_values.max() == y_values.min(): + y_values[:] = 0.0 + # 4) Normalisation + self.scaler_X = MinMaxScaler() + X_scaled = self.scaler_X.fit_transform(X_values) + self.scaler_y = MinMaxScaler() + y_scaled = self.scaler_y.fit_transform(y_values) + # 5) Création des fenêtres glissantes + X_seq = [] + y_seq = [] + for i in range(len(X_scaled) - lookback - future_steps): + X_seq.append(X_scaled[i:i + lookback]) + y_seq.append(y_scaled[i + lookback + future_steps]) + X_seq = np.array(X_seq) + y_seq = np.array(y_seq) + # Vérification finale + if np.isnan(X_seq).any() or np.isnan(y_seq).any(): + raise ValueError("X_seq ou y_seq contient encore des NaN") + if np.isinf(X_seq).any() or np.isinf(y_seq).any(): + raise ValueError("X_seq ou y_seq contient encore des Inf") + return X_seq, y_seq + + # ------------------- + # Prédiction + # ------------------- + def predictTensorFlow(self, dataframe, future_steps=1, lookback=50): + feature_columns = self.model_indicators #[col for col in dataframe.columns if col != self.indicator_target] + # charger le modèle si pas déjà chargé + if self.model is None: + self.model = load_model(f"{self.path}/lstm_model.keras", compile=False) + X_seq, y_seq = self.tensorFlowPrepareDataFrame(dataframe, future_steps, lookback) + + preds = self.tensorFlowGeneratePredictions(dataframe, lookback, self.model) + + # # features = toutes les colonnes sauf la cible + # feature_columns = self.model_indicators #[col for col in dataframe.columns if col != self.indicator_target] + # X_values = dataframe[feature_columns].values + # + # # normalisation (avec le scaler utilisé à l'entraînement) + # X_scaled = self.scaler_X.transform(X_values) + # + # # créer les séquences glissantes + # X_seq = [] + # for i in range(len(X_scaled) - self.lookback): + # X_seq.append(X_scaled[i:i + self.lookback]) + # X_seq = np.array(X_seq) + # + # # prédictions + # y_pred_scaled = self.model.predict(X_seq, verbose=0).flatten() + # y_pred = self.scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten() + # + # # alignement avec les données + # preds = [np.nan] * len(dataframe) + # start = self.lookback + # end = start + len(y_pred) + # # preds[start:end] = y_pred[:end - start] + # preds[start:start + len(y_pred)] = y_pred + # + # # # features + # # X_values = dataframe[feature_columns].values + # # X_scaled = self.scaler_X.transform(X_values) + # # + # # # création des fenêtres + # # X_seq = [] + # # for i in range(len(X_scaled) - self.lookback): + # # X_seq.append(X_scaled[i:i + self.lookback]) + # # X_seq = np.array(X_seq) + # # + # # # prédiction + # # y_pred_scaled = self.model.predict(X_seq, verbose=0).flatten() + # # y_pred = self.scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten() + # # + # # # alignement avec le dataframe + # # preds = [np.nan] * len(dataframe) + # # start = self.lookback + # # end = start + len(y_pred) + # # preds[start:end] = y_pred[:end-start] + # # # preds[start:start + len(y_pred)] = y_pred + + dataframe["lstm_pred"] = preds + return dataframe + + def tensorFlowGeneratePredictions(self, dataframe, lookback, model): + # features = toutes les colonnes sauf la cible + feature_columns = self.model_indicators # [col for col in dataframe.columns if col != self.indicator_target] + X_values = dataframe[feature_columns].values + + # normalisation (avec le scaler utilisé à l'entraînement) + X_scaled = self.scaler_X.transform(X_values) + # créer les séquences glissantes + X_seq = [] + for i in range(len(X_scaled) - lookback): + X_seq.append(X_scaled[i:i + lookback]) + X_seq = np.array(X_seq) + + + # prédictions + y_pred_scaled = model.predict(X_seq, verbose=0).flatten() + y_pred = self.scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten() + # alignement avec les données + preds = [np.nan] * len(dataframe) + start = lookback + end = start + len(y_pred) + # preds[start:end] = y_pred[:end - start] + preds[start:start + len(y_pred)] = y_pred + return preds \ No newline at end of file