From b070a572097cf068818ba4596f23eef9886edcee Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Tue, 21 May 2024 23:59:46 +0330 Subject: [PATCH 01/38] simplify the script --- run.py | 334 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 180 insertions(+), 154 deletions(-) diff --git a/run.py b/run.py index a50bbdbf6..cf2b9472e 100644 --- a/run.py +++ b/run.py @@ -1,166 +1,192 @@ -import argparse +from utils.tools import dotdict import torch from experiments.exp_long_term_forecasting import Exp_Long_Term_Forecast from experiments.exp_long_term_forecasting_partial import Exp_Long_Term_Forecast_Partial import random import numpy as np -if __name__ == '__main__': - fix_seed = 2023 - random.seed(fix_seed) - torch.manual_seed(fix_seed) - np.random.seed(fix_seed) - - parser = argparse.ArgumentParser(description='iTransformer') - - # basic config - parser.add_argument('--is_training', type=int, required=True, default=1, help='status') - parser.add_argument('--model_id', type=str, required=True, default='test', help='model id') - parser.add_argument('--model', type=str, required=True, default='iTransformer', - help='model name, options: [iTransformer, iInformer, iReformer, iFlowformer, iFlashformer]') - - # data loader - parser.add_argument('--data', type=str, required=True, default='custom', help='dataset type') - parser.add_argument('--root_path', type=str, default='./data/electricity/', help='root path of the data file') - parser.add_argument('--data_path', type=str, default='electricity.csv', help='data csv file') - parser.add_argument('--features', type=str, default='M', - help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate') - parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task') - parser.add_argument('--freq', type=str, default='h', - help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h') - parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints') - - # forecasting task - parser.add_argument('--seq_len', type=int, default=96, help='input sequence length') - parser.add_argument('--label_len', type=int, default=48, help='start token length') # no longer needed in inverted Transformers - parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length') - - # model define - parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') - parser.add_argument('--dec_in', type=int, default=7, help='decoder input size') - parser.add_argument('--c_out', type=int, default=7, help='output size') # applicable on arbitrary number of variates in inverted Transformers - parser.add_argument('--d_model', type=int, default=512, help='dimension of model') - parser.add_argument('--n_heads', type=int, default=8, help='num of heads') - parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers') - parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers') - parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn') - parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average') - parser.add_argument('--factor', type=int, default=1, help='attn factor') - parser.add_argument('--distil', action='store_false', - help='whether to use distilling in encoder, using this argument means not using distilling', - default=True) - parser.add_argument('--dropout', type=float, default=0.1, help='dropout') - parser.add_argument('--embed', type=str, default='timeF', - help='time features encoding, options:[timeF, fixed, learned]') - parser.add_argument('--activation', type=str, default='gelu', help='activation') - parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder') - parser.add_argument('--do_predict', action='store_true', help='whether to predict unseen future data') - - # optimization - parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers') - parser.add_argument('--itr', type=int, default=1, help='experiments times') - parser.add_argument('--train_epochs', type=int, default=10, help='train epochs') - parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data') - parser.add_argument('--patience', type=int, default=3, help='early stopping patience') - parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate') - parser.add_argument('--des', type=str, default='test', help='exp description') - parser.add_argument('--loss', type=str, default='MSE', help='loss function') - parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate') - parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False) - - # GPU - parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu') - parser.add_argument('--gpu', type=int, default=0, help='gpu') - parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False) - parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus') - - # iTransformer - parser.add_argument('--exp_name', type=str, required=False, default='MTSF', - help='experiemnt name, options:[MTSF, partial_train]') - parser.add_argument('--channel_independence', type=bool, default=False, help='whether to use channel_independence mechanism') - parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False) - parser.add_argument('--class_strategy', type=str, default='projection', help='projection/average/cls_token') - parser.add_argument('--target_root_path', type=str, default='./data/electricity/', help='root path of the data file') - parser.add_argument('--target_data_path', type=str, default='electricity.csv', help='data file') - parser.add_argument('--efficient_training', type=bool, default=False, help='whether to use efficient_training (exp_name should be partial train)') # See Figure 8 of our paper for the detail - parser.add_argument('--use_norm', type=int, default=True, help='use norm and denorm') - parser.add_argument('--partial_start_index', type=int, default=0, help='the start index of variates for partial training, ' - 'you can select [partial_start_index, min(enc_in + partial_start_index, N)]') - - args = parser.parse_args() - args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False - - if args.use_gpu and args.use_multi_gpu: - args.devices = args.devices.replace(' ', '') - device_ids = args.devices.split(',') - args.device_ids = [int(id_) for id_ in device_ids] - args.gpu = args.device_ids[0] - - print('Args in experiment:') - print(args) - - if args.exp_name == 'partial_train': # See Figure 8 of our paper, for the detail - Exp = Exp_Long_Term_Forecast_Partial - else: # MTSF: multivariate time series forecasting - Exp = Exp_Long_Term_Forecast - - - if args.is_training: - for ii in range(args.itr): - # setting record of experiments - setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format( - args.model_id, - args.model, - args.data, - args.features, - args.seq_len, - args.label_len, - args.pred_len, - args.d_model, - args.n_heads, - args.e_layers, - args.d_layers, - args.d_ff, - args.factor, - args.embed, - args.distil, - args.des, - args.class_strategy, ii) - - exp = Exp(args) # set experiments - print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting)) - exp.train(setting) - - print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) - exp.test(setting) - - if args.do_predict: - print('>>>>>>>predicting : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) - exp.predict(setting, True) - - torch.cuda.empty_cache() + + +fix_seed = 2023 +random.seed(fix_seed) +torch.manual_seed(fix_seed) +np.random.seed(fix_seed) + + +arg = dotdict() + + +arg.is_training = 1 # help: status +arg.model_id = 'test' + +arg.model = 'iTransformer' # help: model name. options: iTransformer, iInformer, iReformer, iFlowformer, iFlashformer +arg.data = 'custom' # help: dataset type + +arg.root_path = 'input/train' # help: main directory path of the data file +arg.data_path = 'data.csv' # help: name of data csv file + +arg.target_root_path = 'input/test' +arg.target_data_path = 'data.csv' + + +arg.features = 'MS' # help: forecasting task , options: M ->multivariate predict multivariate , or +# S ->univariate predict univariate , or +# MS ->multivariate predict univariate + +arg.target = 'Close' # help: target feature in S or MS task + +arg.freq = 'b' # help: Freq for time features encoding. options: s ->secondly , t ->minutely, h:hourly +# d ->daily , w ->weekly, m ->monthly +# b ->business days +# also more detailed freq like 15min or 3h + +arg.checkpoints = './checkpoints/' # help: location to save model checkpoints + +arg.seq_len = 1*5*3 # help: input sequence length +arg.label_len = 1*1 # help: start token length +arg.pred_len = 1*3 # help: prediction sequence length + +arg.enc_in = 6 # help: encoder input size +arg.dec_in = 6 # help: decoder input size +arg.c_out = 1 # help: output size -> applicable on arbitrary number of variates in inverted Transformers +arg.d_model = 512 # help: dimension of model +arg.n_heads = 8 # help: num of heads +arg.e_layers = 8 # help: num of encoder layers +arg.d_layers = 8 # help: num of decoder layers +arg.d_ff = 2048 # help: dimension of fcn +arg.moving_avg = 25 # help: window size of moving average +arg.factor = 1 # help: attn factor +arg.distil = True # help: whether to use distilling in encoder, using this argument means not using distilling + +arg.dropout = 0.01 + +arg.embed = 'timeF' # help: time features encoding, options: timeF OR fixed OR learned +arg.activation = 'ReLU' # help: Name of activation Function + +#arg.output_attention = None # help: Whether to output attention in ecoder +#arg.do_predict = None # help: whether to predict unseen future data + +arg.num_workers = 10 # help: data loader num workers +arg.itr = 1 # help: How many times repeat experiments + +arg.train_epochs = 21 + +arg.batch_size = 16 + +arg.patience = 7 # help: early stopping patience + +arg.learning_rate = 0.00005 + +arg.des = 'test' # help: exp description + +arg.loss = 'MSE' # help: loss function + +arg.lradj = 'type1' # help: adjust learning rate +arg.use_amp = False # help: use automatic mixed precision training + +arg.use_gpu = True if torch.cuda.is_available() else False # help: whether to use gpu +arg.gpu = 0 # help: GPU +arg.use_multi_gpu = False +arg.devices = '0,1,2,3' + +arg.exp_name = 'MTSF' + +arg.channel_independence = False # help: whether to use channel_independence mechanism + +arg.inverse = True # help: inverse output data + +arg.class_strategy = 'projection' # help: options: projection/average/cls_token + + + + + +arg.efficient_training = False # help: whether to use efficient_training (exp_name should be partial_train) | See Figure 8 + +arg.use_norm = True # help: use norm and denorm | type=int + +arg.partial_start_index = 0 # help: the start index of variates for partial training, +# you can select [partial_start_index, min(enc_in + partial_start_index, N)] + +#if arg.use_gpu and arg.use_multi_gpu: +# arg.devices = arg.devices.replace(' ', '') +# device_ids = arg.devices.split(',') +# arg.device_ids = [int(id_) for id_ in device_ids] +# arg.gpu = arg.device_ids[0] + + +print('Args in experiment:') +print(arg) + + + +if input("Press Enter To Start :" ) == '' : + pass +else: + exit() + +if arg.exp_name == 'partial_train': # See Figure 8 of our paper, for the detail + Exp = Exp_Long_Term_Forecast_Partial +else: # MTSF: multivariate time series forecasting + Exp = Exp_Long_Term_Forecast + +if arg.is_training: + for ii in range(arg.itr): + # setting record of experiments + setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format( + arg.model_id, + arg.model, + arg.data, + arg.features, + arg.seq_len, + arg.label_len, + arg.pred_len, + arg.d_model, + arg.n_heads, + arg.e_layers, + arg.d_layers, + arg.d_ff, + arg.factor, + arg.embed, + arg.distil, + arg.des, + arg.class_strategy, ii) + + exp = Exp(arg) # set experiments + print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting)) + exp.train(setting) + + print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) + exp.test(setting) + + if arg.do_predict: + print('>>>>>>>predicting : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) + exp.predict(setting, True) + + torch.cuda.empty_cache() else: ii = 0 setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format( - args.model_id, - args.model, - args.data, - args.features, - args.seq_len, - args.label_len, - args.pred_len, - args.d_model, - args.n_heads, - args.e_layers, - args.d_layers, - args.d_ff, - args.factor, - args.embed, - args.distil, - args.des, - args.class_strategy, ii) - - exp = Exp(args) # set experiments + arg.model_id, + arg.model, + arg.data, + arg.features, + arg.seq_len, + arg.label_len, + arg.pred_len, + arg.d_model, + arg.n_heads, + arg.e_layers, + arg.d_layers, + arg.d_ff, + arg.factor, + arg.embed, + arg.distil, + arg.des, + arg.class_strategy, ii) + + exp = Exp(arg) # set experiments print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) exp.test(setting, test=1) torch.cuda.empty_cache() +#end# From c4aa75f56cb10b4d24fd9954ae9f95a0a329b52e Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 00:12:15 +0330 Subject: [PATCH 02/38] new Options kind_of_scaler -> 'Standard' or 'MinMax' dynamic name of date col -> name_of_col_with_date separately scale the data. save the scaler for further inverse --- data_provider/data_loader.py | 65 +++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py index c86471da7..55f1d67f3 100644 --- a/data_provider/data_loader.py +++ b/data_provider/data_loader.py @@ -1,9 +1,10 @@ import os import numpy as np import pandas as pd +import joblib import torch from torch.utils.data import Dataset, DataLoader -from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import StandardScaler, MinMaxScaler from utils.timefeatures import time_features import warnings @@ -189,9 +190,11 @@ def inverse_transform(self, data): class Dataset_Custom(Dataset): - def __init__(self, root_path, flag='train', size=None, - features='S', data_path='ETTh1.csv', - target='OT', scale=True, timeenc=0, freq='h'): + def __init__(self, + root_path, flag='train', size=None, + features='S', data_path='data.csv', + target='Close', scale=True, timeenc=0, freq='b', + test_size = 0.2, direct_data = None, name_of_col_with_dates = 'date'): # size [seq_len, label_len, pred_len] # info if size == None: @@ -212,25 +215,33 @@ def __init__(self, root_path, flag='train', size=None, self.scale = scale self.timeenc = timeenc self.freq = freq - + self.test_size = test_size + self.train_size = 0.90 - test_size + self.kind_of_scaler = kind_of_scaler + self.name_of_col_with_dates = name_of_col_with_dates self.root_path = root_path self.data_path = data_path self.__read_data__() def __read_data__(self): - self.scaler = StandardScaler() + df_raw = pd.read_csv(os.path.join(self.root_path, - self.data_path)) + self.data_path)) ''' df_raw.columns: ['date', ...(other features), target feature] ''' + cols = list(df_raw.columns) cols.remove(self.target) - cols.remove('date') - df_raw = df_raw[['date'] + cols + [self.target]] - num_train = int(len(df_raw) * 0.7) - num_test = int(len(df_raw) * 0.2) + cols.remove(self.name_of_col_with_dates) + df_raw = df_raw[[self.name_of_col_with_dates] + cols + [self.target]] + cols.insert(0, 'date') + cols.append(self.target) + df_raw = df_raw.set_axis(cols, axis=1) + + num_train = int(len(df_raw) * self.train_size) + num_test = int(len(df_raw) * self.test_size) num_vali = len(df_raw) - num_train - num_test border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] border2s = [num_train, num_train + num_vali, len(df_raw)] @@ -242,14 +253,36 @@ def __read_data__(self): df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] - + if self.scale: - train_data = df_data[border1s[0]:border2s[0]] - self.scaler.fit(train_data.values) - data = self.scaler.transform(df_data.values) + col_scaled = [] + for col in df_data.columns: + col_data = df_data[[col]].values + if self.kind_of_scaler == 'MinMax': + if col == self.target: + self.scaler = MinMaxScaler() + else: + scaler = MinMaxScaler() + else: + if col == self.target: + self.scaler = StandardScaler() + else: + scaler = StandardScaler() + if col == self.target: + self.scaler.fit(col_data[border1s[0]:border2s[0]]) + joblib.dump(self.scaler, os.path.join(self.root_path, 'scaler.pkl')) + col_temp = self.scaler.transform(col_data) + else: + scaler.fit(col_data[border1s[0]:border2s[0]]) + col_temp = scaler.transform(col_data) + col_scaled.append(col_temp) + if len(col_scaled) == 1: + data = col_scaled[0] + else: + data = np.concatenate(col_scaled, axis = 1) else: data = df_data.values - + df_stamp = df_raw[['date']][border1:border2] df_stamp['date'] = pd.to_datetime(df_stamp.date) if self.timeenc == 0: From 3ae291aeff9483164d7c4e97d016f53fa1fd19ad Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 00:18:26 +0330 Subject: [PATCH 03/38] Update data_loader.py --- data_provider/data_loader.py | 57 +++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py index 55f1d67f3..cb60b9d01 100644 --- a/data_provider/data_loader.py +++ b/data_provider/data_loader.py @@ -194,13 +194,13 @@ def __init__(self, root_path, flag='train', size=None, features='S', data_path='data.csv', target='Close', scale=True, timeenc=0, freq='b', - test_size = 0.2, direct_data = None, name_of_col_with_dates = 'date'): + test_size = 0.2, direct_data = None, name_of_col_with_date = 'date'): # size [seq_len, label_len, pred_len] # info if size == None: - self.seq_len = 24 * 4 * 4 - self.label_len = 24 * 4 - self.pred_len = 24 * 4 + self.seq_len = 1 * 5 * 3 # Three week - work week ! only 5 days are alive! + self.label_len = 1 * 1 # Predict one day ahead + self.pred_len = 1 * 1 # Just for one time! else: self.seq_len = size[0] self.label_len = size[1] @@ -255,31 +255,40 @@ def __read_data__(self): df_data = df_raw[[self.target]] if self.scale: - col_scaled = [] - for col in df_data.columns: - col_data = df_data[[col]].values - if self.kind_of_scaler == 'MinMax': - if col == self.target: - self.scaler = MinMaxScaler() + if self.features == 'S' or self.features == 'MS': + col_scaled = [] + for col in df_data.columns: + col_data = df_data[[col]].values + if self.kind_of_scaler == 'MinMax': + if col == self.target: + self.scaler = MinMaxScaler() + else: + scaler = MinMaxScaler() else: - scaler = MinMaxScaler() - else: + if col == self.target: + self.scaler = StandardScaler() + else: + scaler = StandardScaler() if col == self.target: - self.scaler = StandardScaler() + self.scaler.fit(col_data[border1s[0]:border2s[0]]) + joblib.dump(self.scaler, os.path.join(self.root_path, 'scaler.pkl')) + col_temp = self.scaler.transform(col_data) else: - scaler = StandardScaler() - if col == self.target: - self.scaler.fit(col_data[border1s[0]:border2s[0]]) - joblib.dump(self.scaler, os.path.join(self.root_path, 'scaler.pkl')) - col_temp = self.scaler.transform(col_data) + scaler.fit(col_data[border1s[0]:border2s[0]]) + col_temp = scaler.transform(col_data) + col_scaled.append(col_temp) + if len(col_scaled) == 1: + data = col_scaled[0] else: - scaler.fit(col_data[border1s[0]:border2s[0]]) - col_temp = scaler.transform(col_data) - col_scaled.append(col_temp) - if len(col_scaled) == 1: - data = col_scaled[0] + data = np.concatenate(col_scaled, axis = 1) else: - data = np.concatenate(col_scaled, axis = 1) + if self.kind_of_scaler == 'MinMax': + self.scaler = MinMaxScaler() + else: + self.scaler = StandardScaler() + train_data = df_data[border1s[0]:border2s[0]] + self.scaler.fit(train_data.values) + data = self.scaler.transform(df_data.values) else: data = df_data.values From 0ec7b4fd534fde8e6bfd4b82682fa54325fdc2c4 Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 00:30:41 +0330 Subject: [PATCH 04/38] Update exp_long_term_forecasting.py --- experiments/exp_long_term_forecasting.py | 51 ++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index 1df9760a9..0ece1a64d 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -16,6 +16,8 @@ class Exp_Long_Term_Forecast(Exp_Basic): def __init__(self, args): super(Exp_Long_Term_Forecast, self).__init__(args) + self.train_losses = [] + self.test_losses = [] def _build_model(self): model = self.model_dict[self.args.model].Model(self.args).float() @@ -29,11 +31,51 @@ def _get_data(self, flag): return data_set, data_loader def _select_optimizer(self): - model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) + if self.args.kind_of_optim == 'AdamW': + model_optim = optim.AdamW(self.model.parameters(), lr=self.args.learning_rate) + elif self.args.kind_of_optim == 'SparseAdam': + model_optim = optim.SparseAdam(self.model.parameters(), lr=self.args.learning_rate) + elif self.args.kind_of_optim == 'SGD': + model_optim = optim.SGD(self.model.parameters(), lr=self.args.learning_rate) + elif self.args.kind_of_optim == 'RMSprop': + model_optim = optim.RMSprop(self.model.parameters(), lr=self.args.learning_rate) + elif self.args.kind_of_optim == 'RAdam': + model_optim = optim.RAdam(self.model.parameters(), lr=self.args.learning_rate) + elif self.args.kind_of_optim == 'NAdam': + model_optim = optim.NAdam(self.model.parameters(), lr=self.args.learning_rate) + elif self.args.kind_of_optim == 'LBFGS': + model_optim = optim.LBFGS(self.model.parameters(), lr=self.args.learning_rate) + elif self.args.kind_of_optim == 'Adamax': + model_optim = optim.Adamax(self.model.parameters(), lr=self.args.learning_rate) + elif self.args.kind_of_optim == 'ASGD': + model_optim = optim.ASGD(self.model.parameters(), lr=self.args.learning_rate) + elif self.args.kind_of_optim == 'Adadelta': + model_optim = optim.Adadelta(self.model.parameters(), lr=self.args.learning_rate) + elif self.args.kind_of_optim == 'Adagrad': + model_optim = optim.Adagrad(self.model.parameters(), lr=self.args.learning_rate) + else: + model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) + return model_optim def _select_criterion(self): - criterion = nn.MSELoss() + if self.args.criter.lower() == 'wmape': + criterion = WeightedMeanAbsolutePercentageError() + elif self.args.criter.lower() == 'smape': + criterion = SymmetricMeanAbsolutePercentageError() + elif self.args.criter.lower() == 'mae': + criterion = nn.L1Loss() + elif self.args.criter.lower() == 'rmse': + criterion = RMSELoss() + elif self.args.criter.lower() == 'quantileloss': + criterion = QuantileLoss() + elif self.args.criter.lower() == 'huberloss': + criterion = HuberLoss() + elif self.args.criter.lower() == 'pinballloss': + criterion = PinballLoss() + else: + criterion = nn.MSELoss() # Default to Mean Squared Error + return criterion def vali(self, vali_data, vali_loader, criterion): @@ -168,7 +210,8 @@ def train(self, setting): train_loss = np.average(train_loss) vali_loss = self.vali(vali_data, vali_loader, criterion) test_loss = self.vali(test_data, test_loader, criterion) - + self.train_losses.append(train_loss) + self.test_losses.append(test_loss) print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( epoch + 1, train_steps, train_loss, vali_loss, test_loss)) early_stopping(vali_loss, self.model, path) @@ -328,4 +371,4 @@ def predict(self, setting, load=False): np.save(folder_path + 'real_prediction.npy', preds) - return \ No newline at end of file + return From ff1d90a86e3c80f7a7165e44360eb3039cb540a2 Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 00:36:00 +0330 Subject: [PATCH 05/38] Update data_factory.py scale name_of_col_with_date kind_of_scaler --- data_provider/data_factory.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/data_provider/data_factory.py b/data_provider/data_factory.py index 51a5fc87f..cb3cfde2d 100644 --- a/data_provider/data_factory.py +++ b/data_provider/data_factory.py @@ -43,6 +43,11 @@ def data_provider(args, flag): target=args.target, timeenc=timeenc, freq=freq, + test_size = args.test_size, + kind_of_scaler=args.kind_of_scaler, + name_of_col_with_date = args.name_of_col_with_date, + scale = args.scale, + ) print(flag, len(data_set)) data_loader = DataLoader( From e08ebc6516abab51bb4cb34e734bc99ebb10191b Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 00:40:43 +0330 Subject: [PATCH 06/38] Update data_loader.py --- data_provider/data_loader.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py index cb60b9d01..afd17105c 100644 --- a/data_provider/data_loader.py +++ b/data_provider/data_loader.py @@ -194,7 +194,7 @@ def __init__(self, root_path, flag='train', size=None, features='S', data_path='data.csv', target='Close', scale=True, timeenc=0, freq='b', - test_size = 0.2, direct_data = None, name_of_col_with_date = 'date'): + test_size = 0.2, kind_of_scaler = 'Standard', name_of_col_with_date = 'date'): # size [seq_len, label_len, pred_len] # info if size == None: @@ -218,7 +218,7 @@ def __init__(self, self.test_size = test_size self.train_size = 0.90 - test_size self.kind_of_scaler = kind_of_scaler - self.name_of_col_with_dates = name_of_col_with_dates + self.name_of_col_with_date = name_of_col_with_date self.root_path = root_path self.data_path = data_path self.__read_data__() @@ -234,8 +234,8 @@ def __read_data__(self): cols = list(df_raw.columns) cols.remove(self.target) - cols.remove(self.name_of_col_with_dates) - df_raw = df_raw[[self.name_of_col_with_dates] + cols + [self.target]] + cols.remove(self.name_of_col_with_date) + df_raw = df_raw[[self.name_of_col_with_date] + cols + [self.target]] cols.insert(0, 'date') cols.append(self.target) df_raw = df_raw.set_axis(cols, axis=1) From 066c872759e75bf872e5226e163ea7401cf5b107 Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 02:45:18 +0330 Subject: [PATCH 07/38] Update data_loader.py --- data_provider/data_loader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py index afd17105c..45253f05c 100644 --- a/data_provider/data_loader.py +++ b/data_provider/data_loader.py @@ -194,7 +194,7 @@ def __init__(self, root_path, flag='train', size=None, features='S', data_path='data.csv', target='Close', scale=True, timeenc=0, freq='b', - test_size = 0.2, kind_of_scaler = 'Standard', name_of_col_with_date = 'date'): + test_size = 0.2, kind_of_scaler = None, name_of_col_with_date = None): # size [seq_len, label_len, pred_len] # info if size == None: @@ -217,8 +217,8 @@ def __init__(self, self.freq = freq self.test_size = test_size self.train_size = 0.90 - test_size - self.kind_of_scaler = kind_of_scaler - self.name_of_col_with_date = name_of_col_with_date + self.kind_of_scaler = kind_of_scaler if kind_of_scaler is not None else 'Standard' + self.name_of_col_with_date = name_of_col_with_date if name_of_col_with_dateis is not None else 'date' self.root_path = root_path self.data_path = data_path self.__read_data__() From 854d97c54db853adb59306e271e62a22cb6a2438 Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 02:46:35 +0330 Subject: [PATCH 08/38] Update data_loader.py --- data_provider/data_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py index 45253f05c..a1fe43719 100644 --- a/data_provider/data_loader.py +++ b/data_provider/data_loader.py @@ -299,7 +299,7 @@ def __read_data__(self): df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) - data_stamp = df_stamp.drop(['date'], 1).values + data_stamp = df_stamp.drop(['date'], axis = 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) From 98e62b913830b1ec926f73b75aa532156b0ba0e8 Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 16:25:16 +0330 Subject: [PATCH 09/38] add options data_loader.py --- data_provider/data_loader.py | 61 +++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py index a1fe43719..217d24cfc 100644 --- a/data_provider/data_loader.py +++ b/data_provider/data_loader.py @@ -473,21 +473,25 @@ def inverse_transform(self, data): class Dataset_Pred(Dataset): def __init__(self, root_path, flag='pred', size=None, - features='S', data_path='ETTh1.csv', - target='OT', scale=True, inverse=False, timeenc=0, freq='15min', cols=None): + features='S', data_path='data.csv', + target='Close', scale=True, inverse=True, timeenc=0, freq='b', cols=None, + test_size = None, kind_of_scaler = None, name_of_col_with_date = None): # size [seq_len, label_len, pred_len] # info if size == None: - self.seq_len = 24 * 4 * 4 - self.label_len = 24 * 4 - self.pred_len = 24 * 4 + self.seq_len = 1 * 5 * 6 + self.label_len = 1 * 1 + self.pred_len = 1 * 1 else: self.seq_len = size[0] self.label_len = size[1] self.pred_len = size[2] # init assert flag in ['pred'] - + + self.test_size = None + self.kind_of_scaler = kind_of_scaler if kind_of_scaler is not None else 'Standard' + self.name_of_col_with_date = name_of_col_with_date if name_of_col_with_date is not None else 'date' self.features = features self.target = target self.scale = scale @@ -512,8 +516,11 @@ def __read_data__(self): else: cols = list(df_raw.columns) cols.remove(self.target) - cols.remove('date') - df_raw = df_raw[['date'] + cols + [self.target]] + cols.remove(self.name_of_col_with_date) + df_raw = df_raw[[self.name_of_col_with_date] + cols + [self.target]] + cols.insert(0, 'date') + cols.append(self.target) + df_raw = df_raw.set_axis(cols, axis=1) border1 = len(df_raw) - self.seq_len border2 = len(df_raw) @@ -524,8 +531,40 @@ def __read_data__(self): df_data = df_raw[[self.target]] if self.scale: - self.scaler.fit(df_data.values) - data = self.scaler.transform(df_data.values) + if self.features == 'S' or self.features == 'MS': + col_scaled = [] + for col in df_data.columns: + col_data = df_data[[col]].values + if self.kind_of_scaler == 'MinMax': + if col == self.target: + self.scaler = MinMaxScaler() + else: + scaler = MinMaxScaler() + else: + if col == self.target: + self.scaler = StandardScaler() + else: + scaler = StandardScaler() + if col == self.target: + self.scaler.fit(col_data) + joblib.dump(self.scaler, os.path.join(self.root_path, 'scaler.pkl')) + col_temp = self.scaler.transform(col_data) + else: + scaler.fit(col_data) + col_temp = scaler.transform(col_data) + col_scaled.append(col_temp) + if len(col_scaled) == 1: + data = col_scaled[0] + else: + data = np.concatenate(col_scaled, axis = 1) + else: + if self.kind_of_scaler == 'MinMax': + self.scaler = MinMaxScaler() + else: + self.scaler = StandardScaler() + + self.scaler.fit(df_data.values) + data = self.scaler.transform(df_data.values) else: data = df_data.values @@ -542,7 +581,7 @@ def __read_data__(self): df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1) df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15) - data_stamp = df_stamp.drop(['date'], 1).values + data_stamp = df_stamp.drop(['date'], axis=1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) From d07c027c3af9983b86ff797dcef027a8a60c7716 Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 16:29:50 +0330 Subject: [PATCH 10/38] add options exp_long_term_forecasting.py --- experiments/exp_long_term_forecasting.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index 0ece1a64d..c217ab92e 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -16,6 +16,8 @@ class Exp_Long_Term_Forecast(Exp_Basic): def __init__(self, args): super(Exp_Long_Term_Forecast, self).__init__(args) + self.trues_during_training = [] + self.preds_during_training = [] self.train_losses = [] self.test_losses = [] @@ -188,6 +190,8 @@ def train(self, setting): outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) loss = criterion(outputs, batch_y) + self.preds_during_training.append(outputs) + self.trues_during_training.append(batch_y) train_loss.append(loss.item()) if (i + 1) % 100 == 0: From 0c8255761019ee413d3b1de3ad96778229193c0b Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 16:36:55 +0330 Subject: [PATCH 11/38] Update LICENSE --- LICENSE | 1 + 1 file changed, 1 insertion(+) diff --git a/LICENSE b/LICENSE index 6f6856e74..f4ad474ad 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ MIT License Copyright (c) 2022 THUML @ Tsinghua University +Copyright (c) 2024 cloner174 @ Hamed Hajipour Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 5b57bec4fadac78a4b8aba03c64f19b71c11ffcf Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 16:38:14 +0330 Subject: [PATCH 12/38] Update LICENSE --- LICENSE | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/LICENSE b/LICENSE index f4ad474ad..7c00b002c 100644 --- a/LICENSE +++ b/LICENSE @@ -3,20 +3,8 @@ MIT License Copyright (c) 2022 THUML @ Tsinghua University Copyright (c) 2024 cloner174 @ Hamed Hajipour -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. From ce974dbf605403c36219a2db05735d9d2aeb8a7b Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 16:40:40 +0330 Subject: [PATCH 13/38] Update LICENSE --- LICENSE | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/LICENSE b/LICENSE index 7c00b002c..f4ad474ad 100644 --- a/LICENSE +++ b/LICENSE @@ -3,8 +3,20 @@ MIT License Copyright (c) 2022 THUML @ Tsinghua University Copyright (c) 2024 cloner174 @ Hamed Hajipour -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From 1370d35a36bbd0f6ce363ac0f495479cb450f50f Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 16:53:01 +0330 Subject: [PATCH 14/38] add options run.py --- run.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/run.py b/run.py index cf2b9472e..821b0e195 100644 --- a/run.py +++ b/run.py @@ -15,6 +15,27 @@ arg = dotdict() +# NEW OPTIONS : # + +arg.test_size = None # default is 0.2 which makes the training 0.7 ! # +arg.kind_of_scaler = None # default is 'Standard'. Another Option is 'MinMax' (recommended) # +arg.name_of_col_with_date = None # default is 'date'. Name of your date column in your dataset # + +arg.kind_of_optim = 'default' # default is 'Adam'. + # other options : 'AdamW', 'SparseAdam', 'SGD', 'RMSprop', 'RAdam', 'NAdam' ,'LBFGS', + # 'Adamax' 'ASGD' 'Adadelta' 'Adagrad' + +arg.criter = 'default' # default is nn.MSELoss ( Mean Squared Error ) + # other options : 'wmape', 'smape', 'mae', 'rmse', 'quantileloss', 'huberloss', 'pinballloss' + +# NEW Accessories : # + +exp.trues_during_training +exp.preds_during_training +exp.train_losses +exp.test_losses + +##################### arg.is_training = 1 # help: status arg.model_id = 'test' From b2bbe59dd1006b2132e2945c3c64931bbee785b9 Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Wed, 22 May 2024 20:08:34 +0330 Subject: [PATCH 15/38] fix data_loader.py --- data_provider/data_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py index 217d24cfc..af6864476 100644 --- a/data_provider/data_loader.py +++ b/data_provider/data_loader.py @@ -218,7 +218,7 @@ def __init__(self, self.test_size = test_size self.train_size = 0.90 - test_size self.kind_of_scaler = kind_of_scaler if kind_of_scaler is not None else 'Standard' - self.name_of_col_with_date = name_of_col_with_date if name_of_col_with_dateis is not None else 'date' + self.name_of_col_with_date = name_of_col_with_date if name_of_col_with_date is not None else 'date' self.root_path = root_path self.data_path = data_path self.__read_data__() From fd85cbd634072c132ae49387f01c0aae1d99b1bd Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Thu, 23 May 2024 19:29:23 +0330 Subject: [PATCH 16/38] save the trues for predict function exp_long_term_forecasting.py --- experiments/exp_long_term_forecasting.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index c217ab92e..f6b771b4f 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -335,7 +335,8 @@ def predict(self, setting, load=False): self.model.load_state_dict(torch.load(best_model_path)) preds = [] - + true_values = [] + self.model.eval() with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader): @@ -344,6 +345,7 @@ def predict(self, setting, load=False): batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) + true_values.append(batch_y.cpu().numpy()) # decoder input dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) @@ -368,11 +370,18 @@ def predict(self, setting, load=False): preds = np.array(preds) preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) + true_values = np.concatenate(true_values, axis=0) # result save folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) - np.save(folder_path + 'real_prediction.npy', preds) + pred_save_path = folder_path + 'Preds real_prediction.npy' + true_save_path = folder_path + 'true_values.npy' + np.save(folder_path + 'Preds real_prediction.npy', preds) + np.save(folder_path + 'true_values.npy', true_values) + print(f'''The Results of Prediction for The Next {self.args.pred_len} Days Are + Now Stored in {true_save_path} for The True values and + {pred_save_path} for the Predictions''') return From f618b60b0d0cb5d81ba5cac63c8c0bc5bcffc392 Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Thu, 23 May 2024 19:57:38 +0330 Subject: [PATCH 17/38] Update exp_long_term_forecasting.py --- experiments/exp_long_term_forecasting.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index f6b771b4f..b872e7d38 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -369,19 +369,19 @@ def predict(self, setting, load=False): preds = np.array(preds) preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) - - true_values = np.concatenate(true_values, axis=0) + true_values = np.array(true_values) + true_values = true_values.reshape(-1, true_values.shape[-2], true_values.shape[-1]) # result save folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) pred_save_path = folder_path + 'Preds real_prediction.npy' - true_save_path = folder_path + 'true_values.npy' + true_save_path = folder_path + 'Trues real_prediction.npy' np.save(folder_path + 'Preds real_prediction.npy', preds) - np.save(folder_path + 'true_values.npy', true_values) + np.save(folder_path + 'Trues real_prediction.npy', true_values) print(f'''The Results of Prediction for The Next {self.args.pred_len} Days Are - Now Stored in {true_save_path} for The True values and - {pred_save_path} for the Predictions''') + Now Stored in {true_save_path} for The True values and + {pred_save_path} for the Predictions''') return From bf34f066654750d49a1e3f741972c56341f51de9 Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Thu, 23 May 2024 20:12:19 +0330 Subject: [PATCH 18/38] Update exp_long_term_forecasting.py --- experiments/exp_long_term_forecasting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index b872e7d38..d49cccb94 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -370,7 +370,7 @@ def predict(self, setting, load=False): preds = np.array(preds) preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) true_values = np.array(true_values) - true_values = true_values.reshape(-1, true_values.shape[-2], true_values.shape[-1]) + #true_values = true_values.reshape(-1, true_values.shape[-2], true_values.shape[-1]) # result save folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): From 1b26f1db7c8a8f31681f816b32fc1b144dc087bf Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 01:45:29 +0330 Subject: [PATCH 19/38] fix --- .gitignore | 5 +++- experiments/exp_long_term_forecasting.py | 13 +++++---- run.py | 36 ++++++++++++++---------- 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 104809f88..fa84c6d66 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,10 @@ __pycache__/ *.py[cod] *$py.class - +test_results/ +checkpoints +results +result_long_term_forecast.txt # C extensions *.so diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index d49cccb94..567f27989 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -328,12 +328,12 @@ def test(self, setting, test=0): def predict(self, setting, load=False): pred_data, pred_loader = self._get_data(flag='pred') - + if load: path = os.path.join(self.args.checkpoints, setting) best_model_path = path + '/' + 'checkpoint.pth' self.model.load_state_dict(torch.load(best_model_path)) - + preds = [] true_values = [] @@ -344,8 +344,7 @@ def predict(self, setting, load=False): batch_y = batch_y.float() batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) - - true_values.append(batch_y.cpu().numpy()) + # decoder input dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) @@ -362,15 +361,17 @@ def predict(self, setting, load=False): else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) outputs = outputs.detach().cpu().numpy() + batch_y = batch_y.detach().cpu().numpy() if pred_data.scale and self.args.inverse: shape = outputs.shape outputs = pred_data.inverse_transform(outputs.squeeze(0)).reshape(shape) preds.append(outputs) - + true_values.append(batch_y) + preds = np.array(preds) preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) true_values = np.array(true_values) - #true_values = true_values.reshape(-1, true_values.shape[-2], true_values.shape[-1]) + true_values = true_values.reshape(-1, true_values.shape[-2], true_values.shape[-1]) # result save folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): diff --git a/run.py b/run.py index 821b0e195..e9f2691e6 100644 --- a/run.py +++ b/run.py @@ -17,23 +17,25 @@ # NEW OPTIONS : # -arg.test_size = None # default is 0.2 which makes the training 0.7 ! # -arg.kind_of_scaler = None # default is 'Standard'. Another Option is 'MinMax' (recommended) # -arg.name_of_col_with_date = None # default is 'date'. Name of your date column in your dataset # +arg.scale = True + +arg.test_size = 0.2 # default is 0.2 which makes the training 0.7 ! # +arg.kind_of_scaler = 'Standard' # default is 'Standard'. Another Option is 'MinMax' (recommended) # +arg.name_of_col_with_date = 'date' # default is 'date'. Name of your date column in your dataset # arg.kind_of_optim = 'default' # default is 'Adam'. - # other options : 'AdamW', 'SparseAdam', 'SGD', 'RMSprop', 'RAdam', 'NAdam' ,'LBFGS', - # 'Adamax' 'ASGD' 'Adadelta' 'Adagrad' +# other options : 'AdamW', 'SparseAdam', 'SGD', 'RMSprop', 'RAdam', 'NAdam' ,'LBFGS', +# 'Adamax' 'ASGD' 'Adadelta' 'Adagrad' arg.criter = 'default' # default is nn.MSELoss ( Mean Squared Error ) - # other options : 'wmape', 'smape', 'mae', 'rmse', 'quantileloss', 'huberloss', 'pinballloss' +# other options : 'wmape', 'smape', 'mae', 'rmse', 'quantileloss', 'huberloss', 'pinballloss' # NEW Accessories : # -exp.trues_during_training -exp.preds_during_training -exp.train_losses -exp.test_losses +#exp.trues_during_training +#exp.preds_during_training +#exp.train_losses +#exp.test_losses ##################### @@ -81,7 +83,7 @@ arg.dropout = 0.01 -arg.embed = 'timeF' # help: time features encoding, options: timeF OR fixed OR learned +arg.embed = 'learned' # help: time features encoding, options: timeF OR fixed OR learned arg.activation = 'ReLU' # help: Name of activation Function #arg.output_attention = None # help: Whether to output attention in ecoder @@ -90,11 +92,11 @@ arg.num_workers = 10 # help: data loader num workers arg.itr = 1 # help: How many times repeat experiments -arg.train_epochs = 21 +arg.train_epochs = 25 arg.batch_size = 16 -arg.patience = 7 # help: early stopping patience +arg.patience = 10 # help: early stopping patience arg.learning_rate = 0.00005 @@ -114,14 +116,13 @@ arg.channel_independence = False # help: whether to use channel_independence mechanism -arg.inverse = True # help: inverse output data +arg.inverse = False # help: inverse output data arg.class_strategy = 'projection' # help: options: projection/average/cls_token - arg.efficient_training = False # help: whether to use efficient_training (exp_name should be partial_train) | See Figure 8 arg.use_norm = True # help: use norm and denorm | type=int @@ -178,6 +179,10 @@ exp.train(setting) print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) + + train_losses = exp.train_losses##### --->>> Use These To Plot the Loss Values + test_losses = exp.test_losses#### --->>> Use These To Plot the Loss Values + exp.test(setting) if arg.do_predict: @@ -210,4 +215,5 @@ print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) exp.test(setting, test=1) torch.cuda.empty_cache() + #end# From 40fe7f7f0c6b2be70b8f6118e2595c223c982b66 Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 01:46:50 +0330 Subject: [PATCH 20/38] add empty folder for results during train test val --- results/DONOTREMOVE | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 results/DONOTREMOVE diff --git a/results/DONOTREMOVE b/results/DONOTREMOVE new file mode 100644 index 000000000..e69de29bb From 45588e8ca5a0081c38594abd1e2770f0b4fb8e38 Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 01:46:59 +0330 Subject: [PATCH 21/38] modify --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fa84c6d66..ad800b3c8 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ __pycache__/ *$py.class test_results/ checkpoints -results +results/test_iTransformer_custom_MS_ft15_sl1_ll3_pl512_dm8_nh8_el8_dl1024_df1_fctimeF_ebTrue_dttest_projection_0 result_long_term_forecast.txt # C extensions *.so From 8c3b27ab08f1ea38e310480bb1a618b70367fb17 Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 01:48:40 +0330 Subject: [PATCH 22/38] add empty folder for train test val results --- test_results/DONOTREMOVE | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test_results/DONOTREMOVE diff --git a/test_results/DONOTREMOVE b/test_results/DONOTREMOVE new file mode 100644 index 000000000..e69de29bb From 73e22fff2db97fe7f561002941b1a10a79e0db7f Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 01:48:46 +0330 Subject: [PATCH 23/38] modify --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ad800b3c8..31f8251c9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,8 @@ __pycache__/ *.py[cod] *$py.class -test_results/ + +test_results/test_iTransformer_custom_MS_ft15_sl1_ll3_pl512_dm8_nh8_el8_dl1024_df1_fctimeF_ebTrue_dttest_projection_0 checkpoints results/test_iTransformer_custom_MS_ft15_sl1_ll3_pl512_dm8_nh8_el8_dl1024_df1_fctimeF_ebTrue_dttest_projection_0 result_long_term_forecast.txt From 56ec65279005c9d4a200a9de356b9551994f465f Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 01:52:52 +0330 Subject: [PATCH 24/38] fix --- data_provider/data_loader.py | 57 +++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py index af6864476..8090d56c5 100644 --- a/data_provider/data_loader.py +++ b/data_provider/data_loader.py @@ -191,10 +191,18 @@ def inverse_transform(self, data): class Dataset_Custom(Dataset): def __init__(self, - root_path, flag='train', size=None, - features='S', data_path='data.csv', - target='Close', scale=True, timeenc=0, freq='b', - test_size = 0.2, kind_of_scaler = None, name_of_col_with_date = None): + root_path, + flag='train', + size=None, + features='MS', + data_path='data.csv', + target='Close', + scale=True, + timeenc=0, + freq='b', + test_size = 0.2, + kind_of_scaler = None, + name_of_col_with_date = None): # size [seq_len, label_len, pred_len] # info if size == None: @@ -215,7 +223,7 @@ def __init__(self, self.scale = scale self.timeenc = timeenc self.freq = freq - self.test_size = test_size + self.test_size = test_size if test_size is not None else 0.2 self.train_size = 0.90 - test_size self.kind_of_scaler = kind_of_scaler if kind_of_scaler is not None else 'Standard' self.name_of_col_with_date = name_of_col_with_date if name_of_col_with_date is not None else 'date' @@ -330,8 +338,8 @@ def inverse_transform(self, data): class Dataset_PEMS(Dataset): def __init__(self, root_path, flag='train', size=None, - features='S', data_path='ETTh1.csv', - target='OT', scale=True, timeenc=0, freq='h'): + features='S', data_path='ETTh1.csv', + target='OT', scale=True, timeenc=0, freq='h'): # size [seq_len, label_len, pred_len] # info self.seq_len = size[0] @@ -473,15 +481,15 @@ def inverse_transform(self, data): class Dataset_Pred(Dataset): def __init__(self, root_path, flag='pred', size=None, - features='S', data_path='data.csv', - target='Close', scale=True, inverse=True, timeenc=0, freq='b', cols=None, - test_size = None, kind_of_scaler = None, name_of_col_with_date = None): + features='S', data_path='data.csv', + target='Close', scale=True, inverse=False, timeenc=0, freq='b', cols=None, + test_size = None, kind_of_scaler = None, name_of_col_with_date = None): # size [seq_len, label_len, pred_len] # info if size == None: self.seq_len = 1 * 5 * 6 self.label_len = 1 * 1 - self.pred_len = 1 * 1 + self.pred_len = 1 * 3 else: self.seq_len = size[0] self.label_len = size[1] @@ -489,7 +497,7 @@ def __init__(self, root_path, flag='pred', size=None, # init assert flag in ['pred'] - self.test_size = None + self.test_size = test_size self.kind_of_scaler = kind_of_scaler if kind_of_scaler is not None else 'Standard' self.name_of_col_with_date = name_of_col_with_date if name_of_col_with_date is not None else 'date' self.features = features @@ -502,11 +510,12 @@ def __init__(self, root_path, flag='pred', size=None, self.root_path = root_path self.data_path = data_path self.__read_data__() - + + def __read_data__(self): self.scaler = StandardScaler() df_raw = pd.read_csv(os.path.join(self.root_path, - self.data_path)) + self.data_path)) ''' df_raw.columns: ['date', ...(other features), target feature] ''' @@ -523,13 +532,13 @@ def __read_data__(self): df_raw = df_raw.set_axis(cols, axis=1) border1 = len(df_raw) - self.seq_len border2 = len(df_raw) - + if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] - + if self.scale: if self.features == 'S' or self.features == 'MS': col_scaled = [] @@ -567,11 +576,11 @@ def __read_data__(self): data = self.scaler.transform(df_data.values) else: data = df_data.values - + tmp_stamp = df_raw[['date']][border1:border2] tmp_stamp['date'] = pd.to_datetime(tmp_stamp.date) pred_dates = pd.date_range(tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq) - + df_stamp = pd.DataFrame(columns=['date']) df_stamp.date = list(tmp_stamp.date.values) + list(pred_dates[1:]) if self.timeenc == 0: @@ -585,20 +594,20 @@ def __read_data__(self): elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) - + self.data_x = data[border1:border2] if self.inverse: self.data_y = df_data.values[border1:border2] else: self.data_y = data[border1:border2] self.data_stamp = data_stamp - + def __getitem__(self, index): s_begin = index s_end = s_begin + self.seq_len r_begin = s_end - self.label_len r_end = r_begin + self.label_len + self.pred_len - + seq_x = self.data_x[s_begin:s_end] if self.inverse: seq_y = self.data_x[r_begin:r_begin + self.label_len] @@ -606,11 +615,11 @@ def __getitem__(self, index): seq_y = self.data_y[r_begin:r_begin + self.label_len] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] - + return seq_x, seq_y, seq_x_mark, seq_y_mark - + def __len__(self): return len(self.data_x) - self.seq_len + 1 - + def inverse_transform(self, data): return self.scaler.inverse_transform(data) From 0c30094855ddebd6332ebaf0fdc7aa6923acadd0 Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 01:53:51 +0330 Subject: [PATCH 25/38] add empty input and 2 root for data --- input/test/DONOTREMOVE | 0 input/train/DONOTREMOVE | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 input/test/DONOTREMOVE create mode 100644 input/train/DONOTREMOVE diff --git a/input/test/DONOTREMOVE b/input/test/DONOTREMOVE new file mode 100644 index 000000000..e69de29bb diff --git a/input/train/DONOTREMOVE b/input/train/DONOTREMOVE new file mode 100644 index 000000000..e69de29bb From f97121a14ac31f2852b7eb5247b08e83e2b33b16 Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 01:57:01 +0330 Subject: [PATCH 26/38] add other loss functions for Model --- utils/criter.py | 105 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 utils/criter.py diff --git a/utils/criter.py b/utils/criter.py new file mode 100644 index 000000000..0e16662c1 --- /dev/null +++ b/utils/criter.py @@ -0,0 +1,105 @@ +# # in The Name of GOD # # +# +# # These are some simple and straightforward classes # # +# to interact with our losses functions # +# cloner174.org@gmail.com +# +import torch +import torch.nn as nn + + + +class WeightedMeanAbsolutePercentageError(nn.Module): + + def __init__(self): + + super(WeightedMeanAbsolutePercentageError, self).__init__() + + + def forward(self, y_pred, y_true): + + absolute_percentage_errors = torch.abs((y_true - y_pred) / (y_true + 1e-8)) + weighted_errors = absolute_percentage_errors * (torch.abs(y_true) + 1e-8) + + return torch.mean(weighted_errors) + + + +class SymmetricMeanAbsolutePercentageError(nn.Module): + + def __init__(self): + + super(SymmetricMeanAbsolutePercentageError, self).__init__() + + def forward(self, y_pred, y_true): + + absolute_percentage_errors = torch.abs((y_true - y_pred) / ((torch.abs(y_true) + torch.abs(y_pred)) / 2 + 1e-8)) + + return torch.mean(absolute_percentage_errors) + + + +class RMSELoss(nn.Module): + + def __init__(self): + + super(RMSELoss, self).__init__() + + + def forward(self, y_pred, y_true): + + return torch.sqrt(torch.mean((y_pred - y_true) ** 2)) + + + +class QuantileLoss(nn.Module): + + def __init__(self, quantile=0.5): + + super(QuantileLoss, self).__init__() + self.quantile = quantile + + + def forward(self, y_pred, y_true): + + errors = y_true - y_pred + quantile_loss = torch.max((self.quantile - 1) * errors, self.quantile * errors) + + return torch.mean(quantile_loss) + + + +class HuberLoss(nn.Module): + + def __init__(self, delta=1.0): + + super(HuberLoss, self).__init__() + self.delta = delta + + + def forward(self, y_pred, y_true): + + errors = torch.abs(y_pred - y_true) + quadratic = torch.min(errors, self.delta) + linear = errors - quadratic + + return torch.mean(0.5 * quadratic ** 2 + self.delta * linear) + + + +class PinballLoss(nn.Module): + + def __init__(self, tau=0.5): + + super(PinballLoss, self).__init__() + self.tau = tau + + + def forward(self, y_pred, y_true): + + delta = y_pred - y_true + loss = torch.max((self.tau - 1) * delta, self.tau * delta) + + return torch.mean(loss) + +# From 9ae3c0345d529cdaa06b6748390339ad0c8c0a8f Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 01:57:24 +0330 Subject: [PATCH 27/38] add loss functions access with arg.criter --- experiments/exp_long_term_forecasting.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index 567f27989..91069b8fe 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -2,6 +2,7 @@ from experiments.exp_basic import Exp_Basic from utils.tools import EarlyStopping, adjust_learning_rate, visual from utils.metrics import metric +from utils.criter import WeightedMeanAbsolutePercentageError, SymmetricMeanAbsolutePercentageError, RMSELoss,QuantileLoss, HuberLoss, PinballLoss import torch import torch.nn as nn from torch import optim @@ -20,18 +21,17 @@ def __init__(self, args): self.preds_during_training = [] self.train_losses = [] self.test_losses = [] - + def _build_model(self): model = self.model_dict[self.args.model].Model(self.args).float() - if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model - + def _get_data(self, flag): data_set, data_loader = data_provider(self.args, flag) return data_set, data_loader - + def _select_optimizer(self): if self.args.kind_of_optim == 'AdamW': model_optim = optim.AdamW(self.model.parameters(), lr=self.args.learning_rate) From 6c450f4bc1d77c84dbe8aebf9aa309eb40c41073 Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 17:33:00 +0330 Subject: [PATCH 28/38] add validate info --- .gitignore | 7 +- data_provider/data_factory.py | 6 +- experiments/exp_long_term_forecasting.py | 161 ++++++++++++++++++----- run.py | 4 +- 4 files changed, 136 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index 31f8251c9..95ad9a9a7 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,10 @@ results/test_iTransformer_custom_MS_ft15_sl1_ll3_pl512_dm8_nh8_el8_dl1024_df1_fc result_long_term_forecast.txt # C extensions *.so - +/home/user/Documents/Python/Others/iTransformer/input/test/data.csv +/home/user/Documents/Python/Others/iTransformer/input/test/scaler.pkl +/home/user/Documents/Python/Others/iTransformer/input/train/data.csv +/home/user/Documents/Python/Others/iTransformer/input/train/scaler.pkl */.DS_Store # Distribution / packaging @@ -132,4 +135,4 @@ venv.bak/ dmypy.json # Pyre type checker -.pyre/ \ No newline at end of file +.pyre/ diff --git a/data_provider/data_factory.py b/data_provider/data_factory.py index cb3cfde2d..195b04d5e 100644 --- a/data_provider/data_factory.py +++ b/data_provider/data_factory.py @@ -33,10 +33,10 @@ def data_provider(args, flag): drop_last = True batch_size = args.batch_size # bsz for train and valid freq = args.freq - + data_set = Data( - root_path=args.root_path, - data_path=args.data_path, + root_path=args.root_path if flag != 'pred' else args.pred_root_path, + data_path=args.data_path if flag != 'pred' else args.pred_data_path, flag=flag, size=[args.seq_len, args.label_len, args.pred_len], features=args.features, diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index 91069b8fe..60b1f2c36 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -17,10 +17,11 @@ class Exp_Long_Term_Forecast(Exp_Basic): def __init__(self, args): super(Exp_Long_Term_Forecast, self).__init__(args) - self.trues_during_training = [] - self.preds_during_training = [] self.train_losses = [] self.test_losses = [] + self.vali_losses = [] + self.trues_during_vali = [] + self.preds_during_vali = [] def _build_model(self): model = self.model_dict[self.args.model].Model(self.args).float() @@ -82,19 +83,20 @@ def _select_criterion(self): def vali(self, vali_data, vali_loader, criterion): total_loss = [] + trues_during_vali = [] + preds_during_vali = [] self.model.eval() with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float() - + if 'PEMS' in self.args.data or 'Solar' in self.args.data: batch_x_mark = None batch_y_mark = None else: batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) - # decoder input dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) @@ -113,48 +115,85 @@ def vali(self, vali_data, vali_loader, criterion): f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) - + pred = outputs.detach().cpu() true = batch_y.detach().cpu() - + loss = criterion(pred, true) - + trues_during_vali.append(batch_y.detach().cpu().numpy()) + preds_during_vali.append(outputs.detach().cpu().numpy()) + total_loss.append(loss) + total_loss = np.average(total_loss) self.model.train() + try: + if len(self.trues_during_vali) == 0: + trues_during_vali = np.array(trues_during_vali) + preds_during_vali = np.array(preds_during_vali) + self.trues_during_vali = trues_during_vali.reshape(-1, trues_during_vali.shape[-2], trues_during_vali.shape[-1]) + self.preds_during_vali = preds_during_vali.reshape(-1, preds_during_vali.shape[-2], preds_during_vali.shape[-1]) + else: + shape_self_true = self.trues_during_vali.shape + shape_self_pred = self.preds_during_vali.shape + + trues_during_vali = np.array(trues_during_vali) + preds_during_vali = np.array(preds_during_vali) + trues_during_vali = trues_during_vali.reshape(-1, trues_during_vali.shape[-2], trues_during_vali.shape[-1]) + preds_during_vali = preds_during_vali.reshape(-1, preds_during_vali.shape[-2], preds_during_vali.shape[-1]) + shape_funv_true = trues_during_vali.shape + shape_funv_pred = preds_during_vali.shape + + self.trues_during_vali = self.trues_during_vali.flatten().tolist() + self.preds_during_vali = self.preds_during_vali.flatten().tolist() + trues_during_vali = trues_during_vali.flatten().tolist() + preds_during_vali = preds_during_vali.flatten().tolist() + trues_during_vali = self.trues_during_vali + trues_during_vali + preds_during_vali = self.preds_during_vali + preds_during_vali + + trues_during_vali = np.array(trues_during_vali) + preds_during_vali = np.array(preds_during_vali) + self.trues_during_vali = trues_during_vali.reshape(shape_funv_true[0]+shape_self_true[0], shape_self_true[1], shape_self_true[2]) + self.preds_during_vali = preds_during_vali.reshape(shape_self_pred[0]+shape_funv_pred[0], shape_self_pred[1],shape_self_pred[2]) + except: + pass return total_loss - + + def train(self, setting): train_data, train_loader = self._get_data(flag='train') vali_data, vali_loader = self._get_data(flag='val') test_data, test_loader = self._get_data(flag='test') - + + trues_during_training = [] + preds_during_training = [] + path = os.path.join(self.args.checkpoints, setting) if not os.path.exists(path): os.makedirs(path) - + time_now = time.time() - + train_steps = len(train_loader) early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) - + model_optim = self._select_optimizer() criterion = self._select_criterion() - + if self.args.use_amp: scaler = torch.cuda.amp.GradScaler() - + for epoch in range(self.args.train_epochs): iter_count = 0 train_loss = [] - + self.model.train() epoch_time = time.time() for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): iter_count += 1 model_optim.zero_grad() batch_x = batch_x.float().to(self.device) - + batch_y = batch_y.float().to(self.device) if 'PEMS' in self.args.data or 'Solar' in self.args.data: batch_x_mark = None @@ -162,11 +201,11 @@ def train(self, setting): else: batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) - + # decoder input dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) - + # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): @@ -174,7 +213,7 @@ def train(self, setting): outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) - + f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) @@ -185,15 +224,15 @@ def train(self, setting): outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) - + f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) loss = criterion(outputs, batch_y) - self.preds_during_training.append(outputs) - self.trues_during_training.append(batch_y) + preds_during_training.append(outputs.detach().cpu().numpy()) + trues_during_training.append(batch_y.detach().cpu().numpy()) train_loss.append(loss.item()) - + if (i + 1) % 100 == 0: print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) speed = (time.time() - time_now) / iter_count @@ -201,7 +240,7 @@ def train(self, setting): print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) iter_count = 0 time_now = time.time() - + if self.args.use_amp: scaler.scale(loss).backward() scaler.step(model_optim) @@ -209,29 +248,80 @@ def train(self, setting): else: loss.backward() model_optim.step() - + print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) train_loss = np.average(train_loss) vali_loss = self.vali(vali_data, vali_loader, criterion) test_loss = self.vali(test_data, test_loader, criterion) self.train_losses.append(train_loss) self.test_losses.append(test_loss) + self.vali_losses.append(vali_loss) print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( epoch + 1, train_steps, train_loss, vali_loss, test_loss)) early_stopping(vali_loss, self.model, path) if early_stopping.early_stop: print("Early stopping") break - + adjust_learning_rate(model_optim, epoch + 1, self.args) - + # get_cka(self.args, setting, self.model, train_loader, self.device, epoch) - + best_model_path = path + '/' + 'checkpoint.pth' self.model.load_state_dict(torch.load(best_model_path)) - + + preds_during_training = np.array(preds_during_training) + trues_during_training = np.array(trues_during_training) + print('\n') + print('train shape:', preds_during_training.shape, trues_during_training.shape) + preds_during_training = preds_during_training.reshape(-1, preds_during_training.shape[-2], preds_during_training.shape[-1]) + trues_during_training = trues_during_training.reshape(-1, trues_during_training.shape[-2], trues_during_training.shape[-1]) + print('train shape:', preds_during_training.shape, trues_during_training.shape) + # result save + folder_path = './results/' + setting + '/' + if not os.path.exists(folder_path): + os.makedirs(folder_path) + + mae, mse, rmse, mape, mspe = metric(preds_during_training, trues_during_training) + + print('Train mse:{},Train mae:{}'.format(mse, mae)) + print('Train rmse:{},Train mape:{}'.format(rmse, mape)) + print('\n') + time.sleep(2) + f = open("result_long_term_forecast.txt", 'a') + f.write(setting + " \n") + f.write('Train mse:{},Train mae:{}'.format(mse, mae)) + f.write('\n') + f.write('\n') + f.close() + + np.save(folder_path + 'metrics_during_training.npy', np.array([mae, mse, rmse, mape, mspe])) + np.save(folder_path + 'preds_during_training.npy', preds_during_training) + np.save(folder_path + 'trues_during_training.npy', trues_during_training) + try: + preds_during_vali = np.array(self.preds_during_vali) + trues_during_vali = np.array(self.trues_during_vali) + print('Validate shape:', (preds_during_vali.shape[0]//self.args.batch_size, self.args.batch_size, preds_during_vali.shape[1],preds_during_vali.shape[2]),(trues_during_vali.shape[0]//self.args.batch_size, self.args.batch_size, trues_during_vali.shape[1],trues_during_vali.shape[2])) + preds_during_vali = preds_during_vali.reshape(-1, preds_during_vali.shape[-2], preds_during_vali.shape[-1]) + trues_during_vali = trues_during_vali.reshape(-1, trues_during_vali.shape[-2], trues_during_vali.shape[-1]) + print('Validate shape:', preds_during_vali.shape, trues_during_vali.shape) + + mae, mse, rmse, mape, _ = metric(preds_during_vali, trues_during_vali) + print('Validate mse:{},Validate mae:{}'.format(mse, mae)) + print('Validate rmse:{},Validate mape:{}'.format(rmse, mape)) + print('\n') + time.sleep(2) + f = open("result_long_term_forecast.txt", 'a') + f.write("Validate Info:" + " \n") + f.write('mse:{}, mae:{}'.format(mse, mae)) + f.write('\n') + f.write('\n') + f.close() + except: + pass return self.model - + + def test(self, setting, test=0): test_data, test_loader = self._get_data(flag='test') if test: @@ -283,10 +373,10 @@ def test(self, setting, test=0): shape = outputs.shape outputs = test_data.inverse_transform(outputs.squeeze(0)).reshape(shape) batch_y = test_data.inverse_transform(batch_y.squeeze(0)).reshape(shape) - + pred = outputs true = batch_y - + preds.append(pred) trues.append(true) if i % 20 == 0: @@ -297,7 +387,7 @@ def test(self, setting, test=0): gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0) pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0) visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) - + preds = np.array(preds) trues = np.array(trues) print('test shape:', preds.shape, trues.shape) @@ -311,14 +401,15 @@ def test(self, setting, test=0): os.makedirs(folder_path) mae, mse, rmse, mape, mspe = metric(preds, trues) - print('mse:{}, mae:{}'.format(mse, mae)) + print('Test mse:{},Test mae:{}'.format(mse, mae)) + print('Test rmse:{},Test mape:{}'.format(rmse, mape)) f = open("result_long_term_forecast.txt", 'a') f.write(setting + " \n") f.write('mse:{}, mae:{}'.format(mse, mae)) f.write('\n') f.write('\n') f.close() - + np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe])) np.save(folder_path + 'pred.npy', preds) np.save(folder_path + 'true.npy', trues) diff --git a/run.py b/run.py index e9f2691e6..0aecb6901 100644 --- a/run.py +++ b/run.py @@ -48,8 +48,8 @@ arg.root_path = 'input/train' # help: main directory path of the data file arg.data_path = 'data.csv' # help: name of data csv file -arg.target_root_path = 'input/test' -arg.target_data_path = 'data.csv' +arg.pred_root_path = 'input/test' +arg.pred_data_path = 'data.csv' arg.features = 'MS' # help: forecasting task , options: M ->multivariate predict multivariate , or From 60abf1a450337f0e9c3505bf3b7c14bdd41fe826 Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 19:23:40 +0330 Subject: [PATCH 29/38] predict like a pro --- .gitignore | 8 +- data_provider/data_factory.py | 70 ++-- data_provider/data_loader.py | 15 +- experiments/exp_long_term_forecasting.py | 34 +- input/test/data-pred.csv | 92 +++++ run.ipynb | 449 +++++++++++++++++++++++ run.py | 13 +- 7 files changed, 626 insertions(+), 55 deletions(-) create mode 100644 input/test/data-pred.csv create mode 100644 run.ipynb diff --git a/.gitignore b/.gitignore index 95ad9a9a7..4f3189218 100644 --- a/.gitignore +++ b/.gitignore @@ -9,10 +9,10 @@ results/test_iTransformer_custom_MS_ft15_sl1_ll3_pl512_dm8_nh8_el8_dl1024_df1_fc result_long_term_forecast.txt # C extensions *.so -/home/user/Documents/Python/Others/iTransformer/input/test/data.csv -/home/user/Documents/Python/Others/iTransformer/input/test/scaler.pkl -/home/user/Documents/Python/Others/iTransformer/input/train/data.csv -/home/user/Documents/Python/Others/iTransformer/input/train/scaler.pkl +input/test/data.csv +input/test/scaler.pkl +input/train/data.csv +input/train/scaler.pkl */.DS_Store # Distribution / packaging diff --git a/data_provider/data_factory.py b/data_provider/data_factory.py index 195b04d5e..6e5ecd545 100644 --- a/data_provider/data_factory.py +++ b/data_provider/data_factory.py @@ -16,44 +16,58 @@ def data_provider(args, flag): Data = data_dict[args.data] timeenc = 0 if args.embed != 'timeF' else 1 - - if flag == 'test': - shuffle_flag = False - drop_last = True - batch_size = 1 # bsz=1 for evaluation - freq = args.freq - elif flag == 'pred': + if flag == 'pred' : shuffle_flag = False drop_last = False batch_size = 1 freq = args.freq Data = Dataset_Pred + data_set = Data( + root_path=args.pred_root_path, + data_path=args.pred_data_path, + flag=flag, + size=[args.seq_len, args.label_len, args.pred_len], + features=args.features, + target=args.target, + timeenc=timeenc, + freq=freq, + kind_of_scaler=args.kind_of_scaler, + name_of_col_with_date = args.name_of_col_with_date, + scale = args.scale, + max_use_of_row = args.max_use_of_row if hasattr(args, 'max_use_of_row') else 'No Lim', + ) + print(flag, len(data_set)) else: - shuffle_flag = True - drop_last = True - batch_size = args.batch_size # bsz for train and valid - freq = args.freq - - data_set = Data( - root_path=args.root_path if flag != 'pred' else args.pred_root_path, - data_path=args.data_path if flag != 'pred' else args.pred_data_path, - flag=flag, - size=[args.seq_len, args.label_len, args.pred_len], - features=args.features, - target=args.target, - timeenc=timeenc, - freq=freq, - test_size = args.test_size, - kind_of_scaler=args.kind_of_scaler, - name_of_col_with_date = args.name_of_col_with_date, - scale = args.scale, - - ) - print(flag, len(data_set)) + if flag == 'test': + shuffle_flag = False + drop_last = True + batch_size = 1 # bsz=1 for evaluation + freq = args.freq + else: + shuffle_flag = True + drop_last = True + batch_size = args.batch_size # bsz for train and valid + freq = args.freq + data_set = Data( + root_path=args.root_path, + data_path=args.data_path, + flag=flag, + size=[args.seq_len, args.label_len, args.pred_len], + features=args.features, + target=args.target, + timeenc=timeenc, + freq=freq, + test_size = args.test_size, + kind_of_scaler=args.kind_of_scaler, + name_of_col_with_date = args.name_of_col_with_date, + scale = args.scale, + ) + print(flag, len(data_set)) data_loader = DataLoader( data_set, batch_size=batch_size, shuffle=shuffle_flag, num_workers=args.num_workers, drop_last=drop_last) + return data_set, data_loader diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py index 8090d56c5..6bd99086d 100644 --- a/data_provider/data_loader.py +++ b/data_provider/data_loader.py @@ -481,9 +481,11 @@ def inverse_transform(self, data): class Dataset_Pred(Dataset): def __init__(self, root_path, flag='pred', size=None, - features='S', data_path='data.csv', + features='MS', data_path='data-pred.csv', target='Close', scale=True, inverse=False, timeenc=0, freq='b', cols=None, - test_size = None, kind_of_scaler = None, name_of_col_with_date = None): + max_use_of_row = 'No Lim',#It also can be 'All Except a Week' or 'All Except 3 Days' + kind_of_scaler = None, + name_of_col_with_date = None): # size [seq_len, label_len, pred_len] # info if size == None: @@ -497,7 +499,6 @@ def __init__(self, root_path, flag='pred', size=None, # init assert flag in ['pred'] - self.test_size = test_size self.kind_of_scaler = kind_of_scaler if kind_of_scaler is not None else 'Standard' self.name_of_col_with_date = name_of_col_with_date if name_of_col_with_date is not None else 'date' self.features = features @@ -509,6 +510,9 @@ def __init__(self, root_path, flag='pred', size=None, self.cols = cols self.root_path = root_path self.data_path = data_path + max_use_of_row = max_use_of_row if max_use_of_row is not None else 'No Lim' + self.max_use_of_row = 7 if max_use_of_row.lower() == 'all except a week' else 3 if max_use_of_row.lower() == 'all except 3 days' else 0 + self.__read_data__() @@ -530,8 +534,9 @@ def __read_data__(self): cols.insert(0, 'date') cols.append(self.target) df_raw = df_raw.set_axis(cols, axis=1) - border1 = len(df_raw) - self.seq_len - border2 = len(df_raw) + + border1 = len(df_raw) - self.max_use_of_row - self.seq_len + border2 = len(df_raw) - self.max_use_of_row if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index 60b1f2c36..610856ddf 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -10,10 +10,12 @@ import time import warnings import numpy as np +import pickle warnings.filterwarnings('ignore') - +print("This is The enhanced version of Orginal code, Written in 2024") +time.sleep(1) class Exp_Long_Term_Forecast(Exp_Basic): def __init__(self, args): super(Exp_Long_Term_Forecast, self).__init__(args) @@ -22,6 +24,17 @@ def __init__(self, args): self.vali_losses = [] self.trues_during_vali = [] self.preds_during_vali = [] + if args.is_training != 0: + if os.path.exists('input'): + path_to_saved_args = 'input/args.pkl' + else: + path_to_saved_args = 'args.pkl' + self.path_to_saved_args = path_to_saved_args + with open(path_to_saved_args, 'wb') as f: + pickle.dump(args, f) + print("Args object saved to args.pkl") + print("It Can be further used by pickle.load()") + time.sleep(2) def _build_model(self): model = self.model_dict[self.args.model].Model(self.args).float() @@ -386,7 +399,8 @@ def test(self, setting, test=0): input = test_data.inverse_transform(input.squeeze(0)).reshape(shape) gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0) pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0) - visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) + if self.args.do_visual: + visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) preds = np.array(preds) trues = np.array(trues) @@ -426,7 +440,6 @@ def predict(self, setting, load=False): self.model.load_state_dict(torch.load(best_model_path)) preds = [] - true_values = [] self.model.eval() with torch.no_grad(): @@ -453,27 +466,22 @@ def predict(self, setting, load=False): outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) outputs = outputs.detach().cpu().numpy() batch_y = batch_y.detach().cpu().numpy() + self.batch_y = batch_y if pred_data.scale and self.args.inverse: shape = outputs.shape outputs = pred_data.inverse_transform(outputs.squeeze(0)).reshape(shape) preds.append(outputs) - true_values.append(batch_y) preds = np.array(preds) preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) - true_values = np.array(true_values) - true_values = true_values.reshape(-1, true_values.shape[-2], true_values.shape[-1]) # result save folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) - + pred_save_path = folder_path + 'Preds real_prediction.npy' - true_save_path = folder_path + 'Trues real_prediction.npy' np.save(folder_path + 'Preds real_prediction.npy', preds) - np.save(folder_path + 'Trues real_prediction.npy', true_values) - - print(f'''The Results of Prediction for The Next {self.args.pred_len} Days Are - Now Stored in {true_save_path} for The True values and - {pred_save_path} for the Predictions''') + + print(f'''The Results of Prediction for The Next {self.args.pred_len} Days Are Now Stored in + {pred_save_path}''') return diff --git a/input/test/data-pred.csv b/input/test/data-pred.csv new file mode 100644 index 000000000..8e4462c6b --- /dev/null +++ b/input/test/data-pred.csv @@ -0,0 +1,92 @@ +date,Open,High,Low,Final,Volume,Close +2023-06-14,24160,25450,23880,24660,1582455,25050 +2023-06-17,25230,25400,24540,24760,951221,25090 +2023-06-18,24850,25100,23530,24510,1452335,24580 +2023-06-19,23780,24590,23760,24440,835471,24180 +2023-06-20,23710,24480,23710,24400,722180,24120 +2023-06-21,24120,24510,23680,24300,1275763,23770 +2023-06-24,24440,24500,23490,24160,1430972,23500 +2023-06-25,24150,24990,23920,24230,1405600,23920 +2023-06-26,23920,24620,23840,24250,1285828,24270 +2023-06-27,24840,24840,23830,24240,417790,24500 +2023-06-28,24500,24500,23870,24240,166582,24210 +2023-07-01,24010,24300,23890,24220,316105,23950 +2023-07-02,23850,24300,23850,24210,225749,24000 +2023-07-03,24000,25380,24000,24610,1845067,25350 +2023-07-04,25410,25840,25190,25830,12050212,25840 +2023-07-05,25860,27120,25860,26910,8759567,27120 +2023-07-08,28250,28250,27160,28160,9653944,27950 +2023-07-09,28600,28600,26760,27290,4907045,26830 +2023-07-10,27470,28000,26810,27400,2509940,26850 +2023-07-11,27840,27840,26400,27250,2323055,26530 +2023-07-12,27350,27460,26330,27020,1714130,26920 +2023-07-15,26600,28200,26600,27540,3019085,27500 +2023-07-16,28000,28300,26320,27400,1505321,26680 +2023-07-17,27600,27600,26230,26850,2235544,26450 +2023-07-18,27000,27700,26700,27250,4308218,27460 +2023-07-19,28000,28450,27500,28000,3511742,27800 +2023-07-30,15570,15870,14730,15240,2994404,15580 +2023-07-31,15790,16000,15480,15770,12438565,16000 +2023-08-01,16550,16550,16420,16230,4997528,16550 +2023-08-07,17000,17040,16650,16760,7352642,16840 +2023-08-08,17100,17100,16200,16700,1317333,16350 +2023-08-09,16370,17000,16330,16710,1232807,16740 +2023-08-12,16960,16960,15900,16630,1124644,16330 +2023-08-13,16000,16840,15880,16600,1060048,16760 +2023-08-14,16800,17120,16390,16640,1826041,16660 +2023-08-15,17130,17130,16430,16630,775217,16540 +2023-08-16,16900,16900,16150,16590,749652,16300 +2023-08-19,16590,16590,15900,16530,814198,15960 +2023-08-20,16250,16280,15720,16460,1070796,16240 +2023-08-21,16200,17190,16200,16520,1075552,16680 +2023-08-22,16540,16600,15820,16500,443343,16200 +2023-08-23,16010,16500,15870,16400,1927499,16360 +2023-08-26,16360,16950,16100,16400,2618701,16200 +2023-08-27,16400,16660,15990,16340,5490421,16160 +2023-08-28,16300,16480,15690,16160,3013819,15690 +2023-08-29,16190,16200,15490,16010,2479892,15580 +2023-08-30,15970,16290,15310,15820,3059116,15680 +2023-09-02,15680,15810,15200,15700,1898880,15290 +2023-09-03,15250,16190,15200,15770,4649446,15880 +2023-09-04,16000,16110,15760,15810,2334294,15770 +2023-09-05,16290,16290,15910,15920,2825830,15990 +2023-09-09,16030,16160,15600,15920,1086045,15690 +2023-09-10,16000,16170,15620,15900,1446115,15620 +2023-09-11,16140,16150,15390,15810,1813168,15540 +2023-09-12,15900,15930,15400,15790,1671081,15860 +2023-09-13,15900,16250,15840,16010,4936767,16090 +2023-09-17,15900,16250,15820,16000,1268601,15880 +2023-09-18,15710,16180,15710,15990,1996193,15800 +2023-09-19,15710,16180,15630,15980,246012,15980 +2023-09-20,16080,16280,15750,15980,670497,15890 +2023-09-23,15690,16000,15680,15970,668922,15750 +2023-09-25,15610,15900,15470,15880,1547997,15840 +2023-09-26,15960,16100,15350,15760,2340217,15400 +2023-09-27,15330,15800,15060,15500,3783233,15160 +2023-09-30,15480,15600,14880,15390,2216511,14960 +2023-10-01,15010,15010,14630,14950,4857796,14630 +2023-10-02,14630,15300,14610,15010,3015505,15230 +2023-10-04,15020,15250,14920,15010,749502,15070 +2023-10-07,15100,15100,14360,14960,1389248,14530 +2023-10-08,14460,14560,14220,14820,1827769,14290 +2023-10-09,14780,15050,14280,14790,1694643,14720 +2023-10-10,14980,14990,14480,14800,1409809,14720 +2023-10-15,14370,14970,14200,14760,1092343,14790 +2023-10-16,14300,14960,14300,14700,2492089,14680 +2023-10-17,14950,15200,14500,14800,2890626,15000 +2023-10-18,14520,15000,14340,14770,968760,14430 +2023-10-21,14470,14980,14310,14740,1128312,14420 +2023-10-22,14700,14810,14400,14720,460109,14400 +2023-10-23,14770,14770,14150,14650,1372361,14260 +2023-10-24,14550,14550,13990,14290,6379809,14010 +2023-10-25,14450,14450,13590,14170,2043651,13750 +2023-10-28,13590,13630,13470,14030,1656182,13470 +2023-10-29,13370,14100,13370,14000,1064123,13750 +2023-10-30,13540,14170,13540,13990,1025542,13740 +2023-10-31,13820,13990,13520,13970,720875,13710 +2023-11-01,14160,14420,14010,14300,8419279,14300 +2023-11-04,14500,15000,14320,14690,6333267,14640 +2023-11-05,14890,15000,14350,14680,2392003,14550 +2023-11-06,14220,14650,14140,14600,1470757,14210 +2023-11-07,14530,14800,14200,14580,1910602,14490 +2023-11-08,14600,14600,14230,14550,748934,14300 diff --git a/run.ipynb b/run.ipynb new file mode 100644 index 000000000..157bf4b7e --- /dev/null +++ b/run.ipynb @@ -0,0 +1,449 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Here we are trying to cheat on model, By assking it predict base on info we are hiding from it!\n", + "\n", + "#let's call our data ( Assume its name is data.csv ) and create a copy of some of its rows!\n", + "\n", + "import pandas as pd\n", + "\n", + "df = pd.read_csv('input/train/data.csv')\n", + "\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_pred = df.iloc[3600:,:]\n", + "df = df.iloc[:3600,:]\n", + "df_pred.shape, df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv('input/train/data-rowcut.csv', index=False)\n", + "df_pred.to_csv('input/test/data-pred.csv', index=False)\n", + "print(\"Now We Have Two Data set Which one of them Is not being seen by Model During trian and test\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# First let us assume that you are used the enhanced version which saved args to a pickle file:\n", + "# We are loading the args same to when trained the model:\n", + "# It tipically save it in input folder:\n", + "import pickle\n", + "\n", + "with open('input/args.pkl', 'rb') as f:\n", + " loaded_arg = pickle.load(f)\n", + "\n", + "print(\"Args object loaded from args.pkl\")\n", + "print(loaded_arg)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Just a few changes!\n", + "loaded_arg.test_size = None\n", + "loaded_arg.max_use_of_row = 'All Except 3 Days'\n", + "loaded_arg.pred_data_path = 'data-pred.csv'\n", + "loaded_arg.is_training = 0\n", + "print(loaded_arg)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We should use Model class :\n", + "# let us import same as when we trained model:\n", + "from utils.tools import dotdict\n", + "import torch\n", + "from experiments.exp_long_term_forecasting import Exp_Long_Term_Forecast\n", + "from experiments.exp_long_term_forecasting_partial import Exp_Long_Term_Forecast_Partial\n", + "import random\n", + "import numpy as np\n", + "\n", + "if loaded_arg.exp_name == 'partial_train':\n", + " Exp = Exp_Long_Term_Forecast_Partial\n", + "else:\n", + " Exp = Exp_Long_Term_Forecast" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "exp = Exp(loaded_arg)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#setting is same as the name of the folder that is saved our model:\n", + "setting = 'test_iTransformer_custom_MS_ft15_sl1_ll3_pl2_dm1_nh1_el1_dl2_df1_fctimeF_ebTrue_dttest_projection_0'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "exp.predict(setting, load=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "predicted_path = os.path.join('results', setting, 'Preds real_prediction.npy')\n", + "predicted_values = np.load(predicted_path)\n", + "predicted_close = predicted_values[0,:,-1].reshape(-1,1)\n", + "predicted_close" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#This is the last 3 days of the pred dataset :\n", + "import pandas as pd\n", + "df = pd.read_csv('input/test/data-pred.csv')\n", + "df['Close'].tail(4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import joblib\n", + "scaler = joblib.load('input/test/scaler.pkl')\n", + "iversed_close = scaler.inverse_transform(predicted_close)\n", + "[round(any_) for any_ in iversed_close.reshape(-1).tolist()]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# مدل در هنگام پیش بینی ۱ ردیف آخر را فقط دریافت می کند\n", + "# از آنجا که ما با آرگومان مکس ردیف یوز ، برای او مشخص کردیم که ۳ تای آخر را دست نزند\n", + "# پس چهارمین مانده به آخر را گرفته است\n", + "#برای اینکه از اسکیل مطمئن باشیم آن را بر میگردانیم تا یکسان باشند\n", + "\n", + "scaler.inverse_transform(exp.batch_y[0,:,-1].reshape(-1,1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# OR OR ##" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# OR IF YOU HAVE NOT YOUR ARGS ! HERE LETS TRAIN FROM SCRATCH ! #" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the arg object\n", + "arg = dotdict()\n", + "\n", + "arg.root_path = 'input/train'\n", + "arg.data_path = 'data-rowcut.csv'\n", + "arg.pred_root_path = 'input/test'\n", + "arg.pred_data_path = 'data-pred.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils.tools import dotdict\n", + "import torch\n", + "from experiments.exp_long_term_forecasting import Exp_Long_Term_Forecast\n", + "from experiments.exp_long_term_forecasting_partial import Exp_Long_Term_Forecast_Partial\n", + "import random\n", + "import numpy as np\n", + "\n", + "# Setting the seed\n", + "fix_seed = 2023\n", + "random.seed(fix_seed)\n", + "torch.manual_seed(fix_seed)\n", + "np.random.seed(fix_seed)\n", + "\n", + "\n", + "# NEW OPTIONS : #\n", + "arg.scale = True\n", + "arg.test_size = 0.2\n", + "arg.kind_of_scaler = 'Standard'\n", + "arg.name_of_col_with_date = 'date'\n", + "arg.kind_of_optim = 'default'\n", + "arg.criter = 'default'\n", + "arg.do_visual = False\n", + "arg.max_use_of_row = 'All Except 3 Days'#It also can be 'All Except a Week' or 'No Lim'\n", + "# # #\n", + "\n", + "arg.is_training = 1\n", + "arg.model_id = 'test'\n", + "arg.model = 'iTransformer'\n", + "arg.data = 'custom'\n", + "arg.features = 'MS'\n", + "arg.target = 'Close'\n", + "arg.freq = 'b'\n", + "arg.checkpoints = './checkpoints/'\n", + "arg.seq_len = 1*5*3\n", + "arg.label_len = 1*1\n", + "arg.pred_len = 1*3\n", + "arg.enc_in = 6\n", + "arg.dec_in = 6\n", + "arg.c_out = 1\n", + "arg.d_model = 2\n", + "arg.n_heads = 1\n", + "arg.e_layers = 1\n", + "arg.d_layers = 1\n", + "arg.d_ff = 2\n", + "arg.moving_avg = 25\n", + "arg.factor = 1\n", + "arg.distil = True\n", + "arg.dropout = 0.01\n", + "arg.embed = 'timeF'\n", + "arg.activation = 'ReLU'\n", + "arg.num_workers = 1\n", + "arg.itr = 1\n", + "arg.train_epochs = 3\n", + "arg.batch_size = 64\n", + "arg.patience = 10\n", + "arg.learning_rate = 0.9\n", + "arg.des = 'test'\n", + "arg.loss = 'MSE'\n", + "arg.lradj = 'type1'\n", + "arg.use_amp = False\n", + "arg.use_gpu = True if torch.cuda.is_available() else False\n", + "arg.gpu = 0\n", + "arg.use_multi_gpu = False\n", + "arg.devices = '0,1,2,3'\n", + "arg.exp_name = 'MTSF'\n", + "arg.channel_independence = False\n", + "arg.inverse = False\n", + "arg.class_strategy = 'projection'\n", + "arg.efficient_training = False\n", + "arg.use_norm = True\n", + "arg.partial_start_index = 0\n", + "\n", + "print('Args in experiment:')\n", + "print(arg)\n", + "\n", + "if arg.exp_name == 'partial_train':\n", + " Exp = Exp_Long_Term_Forecast_Partial\n", + "else:\n", + " Exp = Exp_Long_Term_Forecast\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if arg.is_training:\n", + " for ii in range(arg.itr):\n", + " # setting record of experiments\n", + " setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(\n", + " arg.model_id,\n", + " arg.model,\n", + " arg.data,\n", + " arg.features,\n", + " arg.seq_len,\n", + " arg.label_len,\n", + " arg.pred_len,\n", + " arg.d_model,\n", + " arg.n_heads,\n", + " arg.e_layers,\n", + " arg.d_layers,\n", + " arg.d_ff,\n", + " arg.factor,\n", + " arg.embed,\n", + " arg.distil,\n", + " arg.des,\n", + " arg.class_strategy, ii)\n", + " \n", + " exp = Exp(arg) # set experiments\n", + " print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))\n", + " exp.train(setting)\n", + " \n", + " print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))\n", + " \n", + " train_losses = exp.train_losses##### --->>> Use These To Plot the Loss Values\n", + " test_losses = exp.test_losses#### --->>> Use These To Plot the Loss Values\n", + " \n", + " exp.test(setting)\n", + " \n", + " if arg.do_predict:\n", + " print('>>>>>>>predicting : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))\n", + " exp.predict(setting, True)\n", + " \n", + " torch.cuda.empty_cache()\n", + "else:\n", + " ii = 0\n", + " setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(\n", + " arg.model_id,\n", + " arg.model,\n", + " arg.data,\n", + " arg.features,\n", + " arg.seq_len,\n", + " arg.label_len,\n", + " arg.pred_len,\n", + " arg.d_model,\n", + " arg.n_heads,\n", + " arg.e_layers,\n", + " arg.d_layers,\n", + " arg.d_ff,\n", + " arg.factor,\n", + " arg.embed,\n", + " arg.distil,\n", + " arg.des,\n", + " arg.class_strategy, ii)\n", + " \n", + " exp = Exp(arg) # set experiments\n", + " print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))\n", + " exp.test(setting, test=1)\n", + " torch.cuda.empty_cache()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# مستقیم بریم برای پیشبینی چون همه چیز در روند ترین مشخص شده\n", + "\n", + "exp.predict(setting,True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "predicted_path = os.path.join('results', setting, 'Preds real_prediction.npy')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "predicted_path = os.path.join('results', setting, 'Preds real_prediction.npy')\n", + "predicted_values = np.load(predicted_path)\n", + "predicted_close = predicted_values[0,:,-1].reshape(-1,1)\n", + "predicted_close\n", + "#This is the last 3 days of the pred dataset :\n", + "import pandas as pd\n", + "df = pd.read_csv('input/test/data-pred.csv')\n", + "df['Close'].tail(4)\n", + "import joblib\n", + "scaler = joblib.load('input/test/scaler.pkl')\n", + "iversed_close = scaler.inverse_transform(predicted_close)\n", + "[round(any_) for any_ in iversed_close.reshape(-1).tolist()]\n", + "# مدل در هنگام پیش بینی ۱ ردیف آخر را فقط دریافت می کند\n", + "# از آنجا که ما با آرگومان مکس ردیف یوز ، برای او مشخص کردیم که ۳ تای آخر را دست نزند\n", + "# پس چهارمین مانده به آخر را گرفته است\n", + "#برای اینکه از اسکیل مطمئن باشیم آن را بر میگردانیم تا یکسان باشند\n", + "\n", + "scaler.inverse_transform(exp.batch_y[0,:,-1].reshape(-1,1))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/run.py b/run.py index 0aecb6901..0a9539b3f 100644 --- a/run.py +++ b/run.py @@ -24,16 +24,19 @@ arg.name_of_col_with_date = 'date' # default is 'date'. Name of your date column in your dataset # arg.kind_of_optim = 'default' # default is 'Adam'. -# other options : 'AdamW', 'SparseAdam', 'SGD', 'RMSprop', 'RAdam', 'NAdam' ,'LBFGS', -# 'Adamax' 'ASGD' 'Adadelta' 'Adagrad' + #other options : 'AdamW', 'SparseAdam', 'SGD', 'RMSprop', 'RAdam', 'NAdam' ,'LBFGS', + # 'Adamax' 'ASGD' 'Adadelta' 'Adagrad' arg.criter = 'default' # default is nn.MSELoss ( Mean Squared Error ) -# other options : 'wmape', 'smape', 'mae', 'rmse', 'quantileloss', 'huberloss', 'pinballloss' + # other options : 'wmape', 'smape', 'mae', 'rmse', 'quantileloss', 'huberloss', 'pinballloss' + +arg.do_visual = False +arg.max_use_of_row = 'No Lim'#This is for prediction, Other options are: 'All Except a Week' or 'All Except 3 Days' # NEW Accessories : # -#exp.trues_during_training -#exp.preds_during_training +#exp.path_to_saved_args +#exp.vali_losses #exp.train_losses #exp.test_losses From 3db20433c504c208a6667339897515e0637ece5e Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 21:23:17 +0330 Subject: [PATCH 30/38] add save_args function --- experiments/exp_long_term_forecasting.py | 16 ++--- utils/save_args.py | 92 ++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 10 deletions(-) create mode 100644 utils/save_args.py diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index 610856ddf..29e8be3e4 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -10,7 +10,7 @@ import time import warnings import numpy as np -import pickle +from utils.save_args import SaveArgs warnings.filterwarnings('ignore') @@ -25,16 +25,12 @@ def __init__(self, args): self.trues_during_vali = [] self.preds_during_vali = [] if args.is_training != 0: - if os.path.exists('input'): - path_to_saved_args = 'input/args.pkl' - else: - path_to_saved_args = 'args.pkl' - self.path_to_saved_args = path_to_saved_args - with open(path_to_saved_args, 'wb') as f: - pickle.dump(args, f) - print("Args object saved to args.pkl") - print("It Can be further used by pickle.load()") + try: + SaveArgs(args=args, path='input') + except: + print("Fail To Save The Args. Continue ..") time.sleep(2) + def _build_model(self): model = self.model_dict[self.args.model].Model(self.args).float() diff --git a/utils/save_args.py b/utils/save_args.py new file mode 100644 index 000000000..e5dd40cf3 --- /dev/null +++ b/utils/save_args.py @@ -0,0 +1,92 @@ +# cloner174 +# enhanced version files + +class DotDict: + + def __init__(self, dictionary): + self.__dict__.update(dictionary) + + def __getattr__(self, attr): + try: + return self.__dict__[attr] + except KeyError: + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") + + def __setattr__(self, key, value): + self.__dict__[key] = value + + def __str__(self): + return str(self.__dict__) + + +class SaveArgs: + + def __init__(self, args, path) : + + if not isinstance(args, dict): + raise TypeError("THis CLass ONly SUpports DIctionary AS AN INput!") + self.args = args + self.path = path + self.arg_creator = DotDict + + self.__start__() + + def __start__(self) : + + temp = {} + + for any_key, any_val in self.args.items() : + + temp[any_key] = any_val + + self.__modify__(temp) + + def __modify__(self, dict): + + try: + + arg_new = self.arg_creator(dict) + except: + print("Fail to Save Args") + return + + self.__save__(arg_new) + + def __path_checker__(self): + + try: + import os + if os.path.exists(self.path): + pass + else: + try: + os.makedirs(self.path) + except: + self.path = 'Args.pkl' + return True + self.path = os.path.join(self.path, 'Args.pkl') + return True + except: + return False + + def __save__(self, arg): + + try: + import pickle + if self.__path_checker__(): + with open(self.path, 'wb') as file : + pickle.dump(arg, file) + else: + print("Fail to Save Args") + return + except: + print("Fail to Save Args") + return + + def __repr__(self) -> str: + return f""" + Args Object Saved to {self.path} + It Can be further used by pickle.load() + """ + +#end# \ No newline at end of file From 9fcf3d39e247ccd49ba9fda54091b526267403eb Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 21:23:41 +0330 Subject: [PATCH 31/38] add save_args function --- input/test/data-pred.csv | 92 ---------------------------------------- 1 file changed, 92 deletions(-) delete mode 100644 input/test/data-pred.csv diff --git a/input/test/data-pred.csv b/input/test/data-pred.csv deleted file mode 100644 index 8e4462c6b..000000000 --- a/input/test/data-pred.csv +++ /dev/null @@ -1,92 +0,0 @@ -date,Open,High,Low,Final,Volume,Close -2023-06-14,24160,25450,23880,24660,1582455,25050 -2023-06-17,25230,25400,24540,24760,951221,25090 -2023-06-18,24850,25100,23530,24510,1452335,24580 -2023-06-19,23780,24590,23760,24440,835471,24180 -2023-06-20,23710,24480,23710,24400,722180,24120 -2023-06-21,24120,24510,23680,24300,1275763,23770 -2023-06-24,24440,24500,23490,24160,1430972,23500 -2023-06-25,24150,24990,23920,24230,1405600,23920 -2023-06-26,23920,24620,23840,24250,1285828,24270 -2023-06-27,24840,24840,23830,24240,417790,24500 -2023-06-28,24500,24500,23870,24240,166582,24210 -2023-07-01,24010,24300,23890,24220,316105,23950 -2023-07-02,23850,24300,23850,24210,225749,24000 -2023-07-03,24000,25380,24000,24610,1845067,25350 -2023-07-04,25410,25840,25190,25830,12050212,25840 -2023-07-05,25860,27120,25860,26910,8759567,27120 -2023-07-08,28250,28250,27160,28160,9653944,27950 -2023-07-09,28600,28600,26760,27290,4907045,26830 -2023-07-10,27470,28000,26810,27400,2509940,26850 -2023-07-11,27840,27840,26400,27250,2323055,26530 -2023-07-12,27350,27460,26330,27020,1714130,26920 -2023-07-15,26600,28200,26600,27540,3019085,27500 -2023-07-16,28000,28300,26320,27400,1505321,26680 -2023-07-17,27600,27600,26230,26850,2235544,26450 -2023-07-18,27000,27700,26700,27250,4308218,27460 -2023-07-19,28000,28450,27500,28000,3511742,27800 -2023-07-30,15570,15870,14730,15240,2994404,15580 -2023-07-31,15790,16000,15480,15770,12438565,16000 -2023-08-01,16550,16550,16420,16230,4997528,16550 -2023-08-07,17000,17040,16650,16760,7352642,16840 -2023-08-08,17100,17100,16200,16700,1317333,16350 -2023-08-09,16370,17000,16330,16710,1232807,16740 -2023-08-12,16960,16960,15900,16630,1124644,16330 -2023-08-13,16000,16840,15880,16600,1060048,16760 -2023-08-14,16800,17120,16390,16640,1826041,16660 -2023-08-15,17130,17130,16430,16630,775217,16540 -2023-08-16,16900,16900,16150,16590,749652,16300 -2023-08-19,16590,16590,15900,16530,814198,15960 -2023-08-20,16250,16280,15720,16460,1070796,16240 -2023-08-21,16200,17190,16200,16520,1075552,16680 -2023-08-22,16540,16600,15820,16500,443343,16200 -2023-08-23,16010,16500,15870,16400,1927499,16360 -2023-08-26,16360,16950,16100,16400,2618701,16200 -2023-08-27,16400,16660,15990,16340,5490421,16160 -2023-08-28,16300,16480,15690,16160,3013819,15690 -2023-08-29,16190,16200,15490,16010,2479892,15580 -2023-08-30,15970,16290,15310,15820,3059116,15680 -2023-09-02,15680,15810,15200,15700,1898880,15290 -2023-09-03,15250,16190,15200,15770,4649446,15880 -2023-09-04,16000,16110,15760,15810,2334294,15770 -2023-09-05,16290,16290,15910,15920,2825830,15990 -2023-09-09,16030,16160,15600,15920,1086045,15690 -2023-09-10,16000,16170,15620,15900,1446115,15620 -2023-09-11,16140,16150,15390,15810,1813168,15540 -2023-09-12,15900,15930,15400,15790,1671081,15860 -2023-09-13,15900,16250,15840,16010,4936767,16090 -2023-09-17,15900,16250,15820,16000,1268601,15880 -2023-09-18,15710,16180,15710,15990,1996193,15800 -2023-09-19,15710,16180,15630,15980,246012,15980 -2023-09-20,16080,16280,15750,15980,670497,15890 -2023-09-23,15690,16000,15680,15970,668922,15750 -2023-09-25,15610,15900,15470,15880,1547997,15840 -2023-09-26,15960,16100,15350,15760,2340217,15400 -2023-09-27,15330,15800,15060,15500,3783233,15160 -2023-09-30,15480,15600,14880,15390,2216511,14960 -2023-10-01,15010,15010,14630,14950,4857796,14630 -2023-10-02,14630,15300,14610,15010,3015505,15230 -2023-10-04,15020,15250,14920,15010,749502,15070 -2023-10-07,15100,15100,14360,14960,1389248,14530 -2023-10-08,14460,14560,14220,14820,1827769,14290 -2023-10-09,14780,15050,14280,14790,1694643,14720 -2023-10-10,14980,14990,14480,14800,1409809,14720 -2023-10-15,14370,14970,14200,14760,1092343,14790 -2023-10-16,14300,14960,14300,14700,2492089,14680 -2023-10-17,14950,15200,14500,14800,2890626,15000 -2023-10-18,14520,15000,14340,14770,968760,14430 -2023-10-21,14470,14980,14310,14740,1128312,14420 -2023-10-22,14700,14810,14400,14720,460109,14400 -2023-10-23,14770,14770,14150,14650,1372361,14260 -2023-10-24,14550,14550,13990,14290,6379809,14010 -2023-10-25,14450,14450,13590,14170,2043651,13750 -2023-10-28,13590,13630,13470,14030,1656182,13470 -2023-10-29,13370,14100,13370,14000,1064123,13750 -2023-10-30,13540,14170,13540,13990,1025542,13740 -2023-10-31,13820,13990,13520,13970,720875,13710 -2023-11-01,14160,14420,14010,14300,8419279,14300 -2023-11-04,14500,15000,14320,14690,6333267,14640 -2023-11-05,14890,15000,14350,14680,2392003,14550 -2023-11-06,14220,14650,14140,14600,1470757,14210 -2023-11-07,14530,14800,14200,14580,1910602,14490 -2023-11-08,14600,14600,14230,14550,748934,14300 From 2f1f7666fc2915c435cadcdc0e3a1e7f7dbb3934 Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 21:30:42 +0330 Subject: [PATCH 32/38] add save_args class --- utils/save_args.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/save_args.py b/utils/save_args.py index e5dd40cf3..1c75a13ca 100644 --- a/utils/save_args.py +++ b/utils/save_args.py @@ -82,11 +82,11 @@ def __save__(self, arg): except: print("Fail to Save Args") return + print(f"Args Object Saved to {self.path}") + print("It Can be further used by pickle.load()") + def __repr__(self) -> str: - return f""" - Args Object Saved to {self.path} - It Can be further used by pickle.load() - """ + return "cloner174 in github 2024" #end# \ No newline at end of file From ed5c051671cfbc9876f78dfccb9c3dd1db01089e Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 22:03:21 +0330 Subject: [PATCH 33/38] dynamic save args --- utils/save_args.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/save_args.py b/utils/save_args.py index 1c75a13ca..2019f1894 100644 --- a/utils/save_args.py +++ b/utils/save_args.py @@ -10,7 +10,8 @@ def __getattr__(self, attr): try: return self.__dict__[attr] except KeyError: - raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") + self.__setattr__(attr, False) + return self.__dict__[attr] def __setattr__(self, key, value): self.__dict__[key] = value From d38989dd76f61e81e0446897b87fc29a11b6953e Mon Sep 17 00:00:00 2001 From: cloner174 Date: Fri, 24 May 2024 22:03:46 +0330 Subject: [PATCH 34/38] modified: .gitignore new file: checkpoints/DONOTREMOVE --- .gitignore | 2 +- checkpoints/DONOTREMOVE | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 checkpoints/DONOTREMOVE diff --git a/.gitignore b/.gitignore index 4f3189218..dc57879be 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ __pycache__/ *$py.class test_results/test_iTransformer_custom_MS_ft15_sl1_ll3_pl512_dm8_nh8_el8_dl1024_df1_fctimeF_ebTrue_dttest_projection_0 -checkpoints + results/test_iTransformer_custom_MS_ft15_sl1_ll3_pl512_dm8_nh8_el8_dl1024_df1_fctimeF_ebTrue_dttest_projection_0 result_long_term_forecast.txt # C extensions diff --git a/checkpoints/DONOTREMOVE b/checkpoints/DONOTREMOVE new file mode 100644 index 000000000..e69de29bb From 995832b8f58897c2bab86fc1585a605e8512fc53 Mon Sep 17 00:00:00 2001 From: cloner174 Date: Sun, 9 Jun 2024 21:58:47 +0330 Subject: [PATCH 35/38] fixed --- utils/save_args.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/utils/save_args.py b/utils/save_args.py index 2019f1894..98990bdbd 100644 --- a/utils/save_args.py +++ b/utils/save_args.py @@ -1,5 +1,6 @@ # cloner174 # enhanced version files +import os class DotDict: @@ -45,10 +46,9 @@ def __start__(self) : def __modify__(self, dict): try: - arg_new = self.arg_creator(dict) except: - print("Fail to Save Args") + print("Fail to Save Args - arg_creator") return self.__save__(arg_new) @@ -56,15 +56,7 @@ def __modify__(self, dict): def __path_checker__(self): try: - import os - if os.path.exists(self.path): - pass - else: - try: - os.makedirs(self.path) - except: - self.path = 'Args.pkl' - return True + os.makedirs(self.path, exist_ok=True) self.path = os.path.join(self.path, 'Args.pkl') return True except: @@ -81,7 +73,7 @@ def __save__(self, arg): print("Fail to Save Args") return except: - print("Fail to Save Args") + print("Fail to Save Args - __save__") return print(f"Args Object Saved to {self.path}") print("It Can be further used by pickle.load()") From 6d95bb3a25daff3f4784a29addbb6a9926cd0e62 Mon Sep 17 00:00:00 2001 From: cloner174 Date: Tue, 11 Jun 2024 15:45:53 +0330 Subject: [PATCH 36/38] new updates --- data_provider/data_factory.py | 14 ++++---- data_provider/data_loader.py | 42 ++++++++++++++++++++---- experiments/exp_long_term_forecasting.py | 2 +- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/data_provider/data_factory.py b/data_provider/data_factory.py index 6e5ecd545..b55e7c4f5 100644 --- a/data_provider/data_factory.py +++ b/data_provider/data_factory.py @@ -31,9 +31,9 @@ def data_provider(args, flag): target=args.target, timeenc=timeenc, freq=freq, - kind_of_scaler=args.kind_of_scaler, - name_of_col_with_date = args.name_of_col_with_date, - scale = args.scale, + kind_of_scaler=args.kind_of_scaler if hasattr(args, 'kind_of_scaler') else 'standard', + name_of_col_with_date = args.name_of_col_with_date if hasattr(args, 'name_of_col_with_date') else 'date', + scale = args.scale if hasattr(args, 'scale') else True, max_use_of_row = args.max_use_of_row if hasattr(args, 'max_use_of_row') else 'No Lim', ) print(flag, len(data_set)) @@ -57,10 +57,10 @@ def data_provider(args, flag): target=args.target, timeenc=timeenc, freq=freq, - test_size = args.test_size, - kind_of_scaler=args.kind_of_scaler, - name_of_col_with_date = args.name_of_col_with_date, - scale = args.scale, + test_size = args.test_size if hasattr(args, 'test_size') else 0.2, + kind_of_scaler= args.kind_of_scaler if hasattr(args, 'kind_of_scaler') else 'standard', + name_of_col_with_date = args.name_of_col_with_date if hasattr(args, 'name_of_col_with_date') else 'date', + scale = args.scale if hasattr(args, 'scale') else True, ) print(flag, len(data_set)) data_loader = DataLoader( diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py index 6bd99086d..0885d6400 100644 --- a/data_provider/data_loader.py +++ b/data_provider/data_loader.py @@ -7,6 +7,7 @@ from sklearn.preprocessing import StandardScaler, MinMaxScaler from utils.timefeatures import time_features import warnings +import time warnings.filterwarnings('ignore') @@ -229,13 +230,20 @@ def __init__(self, self.name_of_col_with_date = name_of_col_with_date if name_of_col_with_date is not None else 'date' self.root_path = root_path self.data_path = data_path + + self.scaler_path = os.path.join('./input', 'Scalers') + os.makedirs(self.scaler_path, exist_ok=True) + self.__read_data__() def __read_data__(self): - df_raw = pd.read_csv(os.path.join(self.root_path, - self.data_path)) - + if self.root_path == 'None': + df_raw = pd.read_csv(self.data_path) + else: + df_raw = pd.read_csv(os.path.join(self.root_path, + self.data_path)) + ''' df_raw.columns: ['date', ...(other features), target feature] ''' @@ -262,6 +270,13 @@ def __read_data__(self): elif self.features == 'S': df_data = df_raw[[self.target]] + file_path = self.scaler_path + 'scaler.pkl' + if os.path.exists(file_path): + base, ext = os.path.splitext(file_path) + timestamp = time.strftime("%Y%m%d_%H%M%S") + file_path = f"{base}_{timestamp}{ext}" + self.scaler_path = file_path + if self.scale: if self.features == 'S' or self.features == 'MS': col_scaled = [] @@ -279,7 +294,7 @@ def __read_data__(self): scaler = StandardScaler() if col == self.target: self.scaler.fit(col_data[border1s[0]:border2s[0]]) - joblib.dump(self.scaler, os.path.join(self.root_path, 'scaler.pkl')) + joblib.dump(self.scaler, self.scaler_path) col_temp = self.scaler.transform(col_data) else: scaler.fit(col_data[border1s[0]:border2s[0]]) @@ -513,13 +528,19 @@ def __init__(self, root_path, flag='pred', size=None, max_use_of_row = max_use_of_row if max_use_of_row is not None else 'No Lim' self.max_use_of_row = 7 if max_use_of_row.lower() == 'all except a week' else 3 if max_use_of_row.lower() == 'all except 3 days' else 0 + self.scaler_path = os.path.join('./input', 'Scalers') + os.makedirs(self.scaler_path, exist_ok=True) + self.__read_data__() def __read_data__(self): self.scaler = StandardScaler() - df_raw = pd.read_csv(os.path.join(self.root_path, - self.data_path)) + if self.root_path == 'None': + df_raw = pd.read_csv(self.data_path) + else: + df_raw = pd.read_csv(os.path.join(self.root_path, + self.data_path)) ''' df_raw.columns: ['date', ...(other features), target feature] ''' @@ -544,6 +565,13 @@ def __read_data__(self): elif self.features == 'S': df_data = df_raw[[self.target]] + + file_path = self.scaler_path + 'scaler.pkl' + if os.path.exists(file_path): + base, ext = os.path.splitext(file_path) + timestamp = time.strftime("%Y%m%d_%H%M%S") + file_path = f"{base}_{timestamp}{ext}" + self.scaler_path = file_path if self.scale: if self.features == 'S' or self.features == 'MS': col_scaled = [] @@ -561,7 +589,7 @@ def __read_data__(self): scaler = StandardScaler() if col == self.target: self.scaler.fit(col_data) - joblib.dump(self.scaler, os.path.join(self.root_path, 'scaler.pkl')) + joblib.dump(self.scaler, self.scaler_path) col_temp = self.scaler.transform(col_data) else: scaler.fit(col_data) diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index 29e8be3e4..4915937d7 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -29,7 +29,7 @@ def __init__(self, args): SaveArgs(args=args, path='input') except: print("Fail To Save The Args. Continue ..") - time.sleep(2) + time.sleep(1) def _build_model(self): From ec7ebef2ed1af1a9d217a50f59ff1c226ed22bff Mon Sep 17 00:00:00 2001 From: cloner174 Date: Tue, 11 Jun 2024 19:00:49 +0330 Subject: [PATCH 37/38] added prediction function --- experiments/after_train.py | 202 ++++++ experiments/exp_long_term_forecasting.py | 2 +- experiments/pre_train.py | 92 +++ input/{test => pred}/DONOTREMOVE | 0 run.ipynb | 775 ++++++++++++++++++----- utils/save_args.py | 85 --- utils/tools.py | 8 +- 7 files changed, 929 insertions(+), 235 deletions(-) create mode 100644 experiments/after_train.py create mode 100644 experiments/pre_train.py rename input/{test => pred}/DONOTREMOVE (100%) delete mode 100644 utils/save_args.py diff --git a/experiments/after_train.py b/experiments/after_train.py new file mode 100644 index 000000000..e4bf930a9 --- /dev/null +++ b/experiments/after_train.py @@ -0,0 +1,202 @@ +import os +import time +import torch +import tempfile +import numpy as np +import pandas as pd +from datetime import timedelta +from .exp_long_term_forecasting import Exp_Long_Term_Forecast +from .exp_long_term_forecasting_partial import Exp_Long_Term_Forecast_Partial +from .pre_train import SaveArgs, load_args + + +def predict(args, model, + predict_root = None, predict_data = None, + days_to_predict = 1, retrain = False, new_data = None):#model= setting or actual model + """ + Use Model To Predict Future Days! + Argumans: + args: Object | str, The models setup. Can be an Object of type DotDict class, or the path to saved file of it -> (args.json). + model: str|Object, Whether can be the setting or folder name of path to the 'checkpoint.pth' or the actual model object! + days_to_predict: int, How much days, should to be predicted! + predict_data: the name of predict data inside pred folder. if None, will use the current name in args. + + retrain: bool, Optional. If True, and new_data is not None, It would change the setting and args to retrain the current model with new data. + new_data: str, The new data name inside the root path from args. If None, and retrain is True, It would use the current root path and data name in args to retrain model. + \Will Raise an Error, If no data is available/ + """ + + if isinstance(args, str): + try: + arg = load_args(args) + except Exception as e: + raise AssertionError(f"Fail to read args.pkl reason -> {e}") + else: + try: + args_path = SaveArgs(args=args, path='', temporary=True) + args_path = args_path.path + arg = load_args(args_path) + os.unlink(args_path) + except Exception as e: + raise AssertionError(f"Fail to read args.pkl reason -> {e}") + + + if retrain and new_data is not None: + arg.data_path = new_data + + if predict_data is not None: + if predict_root is not None: + arg.pred_root_path = predict_root + arg.pred_data_path = predict_data + + if isinstance(model, Exp_Long_Term_Forecast) or isinstance(model, Exp_Long_Term_Forecast_Partial): + model.args = arg + exp = model + elif isinstance(model, str): + if arg.exp_name == 'partial_train': + Exp = Exp_Long_Term_Forecast_Partial + else: + Exp = Exp_Long_Term_Forecast + exp = Exp(arg) + try: + path = os.path.join(arg.checkpoints, model) + path = path + '/' + 'checkpoint.pth' + exp.model.load_state_dict(torch.load(path)) + except Exception as e: + try: + exp.model.load_state_dict(torch.load(model)) + except: + raise AssertionError(f" There was an Error loading your model with the provded path.Assumed path is {model} and Error was: {e}") + else: + raise TypeError(" The Model Object can be of type str(model checkpoint.pth path) or the actual model from experiments kind of models from this repo.") + + if retrain: + timestamp = time.strftime("%Y%m%d_%H%M%S") + for ii in range(arg.itr): + setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}_{}'.format( + arg.model_id, + arg.model, + arg.data, + arg.features, + arg.seq_len, + arg.label_len, + arg.pred_len, + arg.d_model, + arg.n_heads, + arg.e_layers, + arg.d_layers, + arg.d_ff, + arg.factor, + arg.embed, + arg.distil, + arg.des, + arg.class_strategy, ii, timestamp) + print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting)) + exp.train(setting) + + try: + df_temp = pd.read_csv(os.path.join(arg.pred_root_path, arg.pred_data_path)) + except: + print(f'please inter the path to your prediction data in input arguman : predict_root and predict_data') + print('Where predict_root is the main folder contained your csv file and predict_data is name of the csv file with .csv at the end') + return 0 + end_at_first = df_temp.shape[0] - 1 + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + temp_path = f"{temp_file.name}.csv" + df_temp.to_csv(temp_path, index= False) + temp_file.seek(0) + del df_temp + + folder_path = 'results/Prediction Results/' + os.makedirs(folder_path, exist_ok=True) + file_path = folder_path + 'prediction.csv' + + if os.path.exists(file_path): + base, ext = os.path.splitext(file_path) + timestamp = time.strftime("%Y%m%d_%H%M%S") + file_path = f"{base}_{timestamp}{ext}" + + for jj in range(days_to_predict): + if jj == 0: + pass + else: + arg.pred_root_path = 'None' + arg.pred_data_path = temp_path + exp.args = arg + pred_data, pred_loader = exp._get_data(flag='pred') + preds = [] + exp.model.eval() + with torch.no_grad(): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader): + batch_x = batch_x.float().to(exp.device) + batch_y = batch_y.float() + batch_x_mark = batch_x_mark.float().to(exp.device) + batch_y_mark = batch_y_mark.float().to(exp.device) + dec_inp = torch.zeros_like(batch_y[:, -exp.args.pred_len:, :]).float() + dec_inp = torch.cat([batch_y[:, :exp.args.label_len, :], dec_inp], dim=1).float().to(exp.device) + if exp.args.use_amp: + with torch.cuda.amp.autocast(): + if exp.args.output_attention: + outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + else: + outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + else: + if exp.args.output_attention: + outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + else: + outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + outputs = outputs.detach().cpu().numpy() + if pred_data.scale: + shape = outputs.shape + outputs = pred_data.inverse_transform(outputs.squeeze(0)).reshape(shape) + preds.append(outputs) + preds = np.array(preds) + preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) + #preds = [round(any_) for any_ in preds.reshape(-1).tolist()] + preds = list(preds[0,0,:]) + data = pd.read_csv(temp_path) + cols = list(data.columns) + date_name = arg.name_of_col_with_date if hasattr(arg, 'name_of_col_with_date') else 'date' + target = arg.target + data[date_name] = pd.to_datetime(data[date_name]) + last_day = data.loc[data.shape[0]-1,date_name] + next_day = last_day + timedelta(days=1) + date_index = cols.index(date_name) + cols.pop(date_index) + temp = {} + for i in range(len(cols)): + col = cols[i] + if col == target : + if arg.features == 'MS' or arg.features == 'S' : + temp[col] = preds[-1] + else: + temp[col] = preds[i] + else: + if arg.features == 'S': + temp[col] = data.loc[end_at_first, col] + else: + temp[col] = preds[i] + temp = pd.DataFrame(temp, index=[data.shape[0]], dtype=int) + temp.insert(loc = date_index, column=date_name, value=next_day) + data = pd.concat([data, temp]) + if days_to_predict > 1: + data.to_csv(temp_path, index = False) + #if use_predict_on_prediction and retrain: + # if arg.data == 'custom': + # arg.root_path = 'None' + # arg.data_path = temp_path + # exp.args = arg + # exp.train(setting) + # else: + # print("sorry can not be done") + + + if arg.features == 'S' or arg.features == 'MS': + data = pd.concat( [data.loc[end_at_first:,date_name], data.loc[end_at_first:,target]],axis=1) + else: + data = data.loc[end_at_first:,:] + data.to_csv(file_path, index = False) + os.unlink(temp_path) + print(f'''The Results of Prediction for The Next {days_to_predict} Days Are Now Stored in + {file_path}''') + return True \ No newline at end of file diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py index 4915937d7..bb41d5a22 100644 --- a/experiments/exp_long_term_forecasting.py +++ b/experiments/exp_long_term_forecasting.py @@ -10,7 +10,7 @@ import time import warnings import numpy as np -from utils.save_args import SaveArgs +from .pre_train import SaveArgs warnings.filterwarnings('ignore') diff --git a/experiments/pre_train.py b/experiments/pre_train.py new file mode 100644 index 000000000..61df5b092 --- /dev/null +++ b/experiments/pre_train.py @@ -0,0 +1,92 @@ +import os +import json +import time +import tempfile + +class DotDict: + + def __init__(self, dictionary): + self.__dict__.update(dictionary) + + def __getattr__(self, attr): + try: + return self.__dict__[attr] + except KeyError: + self.__setattr__(attr, False) + return self.__dict__[attr] + + def __setattr__(self, key, value): + self.__dict__[key] = value + + def __str__(self): + return str(self.__dict__) + + +def load_args(path): + + with open(path, 'r') as f: + loaded_args = json.load(f) + + return DotDict(loaded_args) + + + +class SaveArgs: + + def __init__(self, args, path, temporary = False) : + + if not isinstance(args, dict): + raise TypeError("THis CLass ONly SUpports DIctionary AS AN INput!") + self.args = args + self.path = path + self.temporary = temporary + + self.__start__() + + + def __start__(self) : + + temp = {} + for any_key, any_val in self.args.items() : + + temp[any_key] = any_val + + self.__save__(temp) + + + def __path_checker__(self): + + if self.temporary: + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + file_path = temp_file.name + '.json' + + self.path = file_path + return + else: + os.makedirs( self.path, exist_ok=True) + file_path = os.path.join(self.path, 'args.json') + if os.path.exists(file_path): + base, ext = os.path.splitext(file_path) + timestamp = time.strftime("%Y%m%d_%H%M%S") + file_path = f"{base}_{timestamp}{ext}" + + self.path = file_path + + + def __save__(self, arg): + + try: + self.__path_checker__() + with open(self.path, 'w') as file : + json.dump(arg, file) + except: + print("Fail to Save Args - continue..") + return + if self.temporary: + pass + else: + print(f"Args Object Saved to {self.path}") + #print("It Can be further used by pickle.load()") + + def __repr__(self) -> str: + return "cloner174 in github 2024" \ No newline at end of file diff --git a/input/test/DONOTREMOVE b/input/pred/DONOTREMOVE similarity index 100% rename from input/test/DONOTREMOVE rename to input/pred/DONOTREMOVE diff --git a/run.ipynb b/run.ipynb index 157bf4b7e..64848784f 100644 --- a/run.ipynb +++ b/run.ipynb @@ -6,26 +6,123 @@ "metadata": {}, "outputs": [], "source": [ - "#Here we are trying to cheat on model, By assking it predict base on info we are hiding from it!\n", - "\n", - "#let's call our data ( Assume its name is data.csv ) and create a copy of some of its rows!\n", - "\n", - "import pandas as pd\n", - "\n", - "df = pd.read_csv('input/train/data.csv')\n", - "\n", - "df.shape" + "%pip install reformer_pytorch" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateOpenHighLowFinalVolumeClose
02001-03-2526000260002488524885452024885
12001-03-26248852488524885248854524885
22001-04-0825000250002500025000200025000
32001-04-10250032500325003250037025003
42001-04-152501025010250102501012025010
\n", + "
" + ], + "text/plain": [ + " date Open High Low Final Volume Close\n", + "0 2001-03-25 26000 26000 24885 24885 4520 24885\n", + "1 2001-03-26 24885 24885 24885 24885 45 24885\n", + "2 2001-04-08 25000 25000 25000 25000 2000 25000\n", + "3 2001-04-10 25003 25003 25003 25003 70 25003\n", + "4 2001-04-15 25010 25010 25010 25010 120 25010" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df_pred = df.iloc[3600:,:]\n", - "df = df.iloc[:3600,:]\n", - "df_pred.shape, df.shape" + "#دیتا رو فرا میخونیم برای اینکه تقسیمش کنیم بین دو تا فولدر!\n", + "\n", + "# فرض میکنیم اسمش دیتاست.سی اس وی واون رو بعد از چک کردن به دو فولدر کپی میکنیم!\n", + "\n", + "import pandas as pd\n", + "\n", + "df = pd.read_csv('input/train/data.csv')\n", + "\n", + "df.head()" ] }, { @@ -34,9 +131,10 @@ "metadata": {}, "outputs": [], "source": [ - "df.to_csv('input/train/data-rowcut.csv', index=False)\n", - "df_pred.to_csv('input/test/data-pred.csv', index=False)\n", - "print(\"Now We Have Two Data set Which one of them Is not being seen by Model During trian and test\")" + "df.to_csv('input/train/data.csv', index=False)\n", + "df.to_csv('input/test/data.csv', index=False)\n", + "\n", + "print(\"Now We Have Two Data set, Same as each other , But one of them going to be used during pred\")" ] }, { @@ -45,30 +143,33 @@ "metadata": {}, "outputs": [], "source": [ - "# First let us assume that you are used the enhanced version which saved args to a pickle file:\n", - "# We are loading the args same to when trained the model:\n", + "# اول فرض میکنیم شما دیروز مدلتون رو تمرین دادید و آماده هست و در درایو یا جای دیگه ای سیو شده\n", + "# بهتره دوباره ۱۸ ساعت رو تلف نکنیم برای ترین دوباره ، شما اینطور فکر نمیکنید ؟:) ینی چی؟ ببینیم \n", + "# ما هنگام ترین دادن مدل یک پیغام مشاهده کردیم . این شکلی بود :\n", + "# Args Object Saved to input/args.json\n", + "# همچنین یکی دیگه :\n", + "# test_iTransformer_custom_MS_ft5_sl1_ll1_pl1_dm1_nh1_el1_dl1_df1_fctimeF_ebTrue_dttest_projection_0\n", + "# اولی ینی آرگز در واقع ست آپ مدله. و دومی چکپوینت یا دیکشنری وضعیت در واقع همه ی اون ۱۸ ساعتی که راجبش حرف زدیمو توی خودش داره\n", + "# ما اول اینا رو پیدا میکنیم\n", + "# بعد اگر بخوایم میتونیم با یه دیتای جدید هم دوباره مدل رو تمرین بدیم ولی نه از اول بلکه ادامه ی کارش. این هم در هزینه و هم انرژی و هم\n", + "# حفظ عقلانیت بهمون کمک میکنه\n", "# It tipically save it in input folder:\n", - "import pickle\n", + "# همونطور که بالا بصورت قلمبه و سلمبه نوشتم : معمولا توی اینپوت سیو میشه.\n", + "# و اگر بیشتر از یه فایل اونجا هست بهتره که اون ساعت و تاریخی\n", + "# رو بردارید که زیر همون تمرین ثبت شدتون نوشته شده\n", + "# همچنین اسم فولدری که مدل توش سیو شده یا همون چک پوینت توش سیو شده، به عنوان مدل توی تابع شناخته میشه\n", "\n", - "with open('input/args.pkl', 'rb') as f:\n", - " loaded_arg = pickle.load(f)\n", "\n", - "print(\"Args object loaded from args.pkl\")\n", - "print(loaded_arg)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Just a few changes!\n", - "loaded_arg.test_size = None\n", - "loaded_arg.max_use_of_row = 'All Except 3 Days'\n", - "loaded_arg.pred_data_path = 'data-pred.csv'\n", - "loaded_arg.is_training = 0\n", - "print(loaded_arg)" + "#تابع لود آرگز ادرس فایل آرگز دات جیسون رو ازتون میگیره و همون آرگز رو بهتون تحویل میده که موقع تمرین مدل ساختید\n", + "# دقت کنید که میتونید که میتونید خود فایل رو هم فراخونی کنید اگر بخواید:\n", + "# اینجا روشش رو بصورت کامنت آوردم\n", + "\n", + "#from experiments.pre_train import load_args\n", + "#\n", + "# path_to_saved_args = '****************'# به جای این ستاره ها اسم همون فایل رو بزارید مثلا مال من این بود: input/args_20240611_174533.json\n", + "#loaded_arg = load_args(path_to_saved_args)\n", + "#print(f\"Args object loaded from {path_to_saved_args}\")\n", + "#print(loaded_arg)" ] }, { @@ -77,84 +178,146 @@ "metadata": {}, "outputs": [], "source": [ - "# We should use Model class :\n", - "# let us import same as when we trained model:\n", - "from utils.tools import dotdict\n", - "import torch\n", - "from experiments.exp_long_term_forecasting import Exp_Long_Term_Forecast\n", - "from experiments.exp_long_term_forecasting_partial import Exp_Long_Term_Forecast_Partial\n", - "import random\n", - "import numpy as np\n", + "# اگر بالا رو ران کنید میتونید این کارا رو انجام بدین\n", "\n", - "if loaded_arg.exp_name == 'partial_train':\n", - " Exp = Exp_Long_Term_Forecast_Partial\n", - "else:\n", - " Exp = Exp_Long_Term_Forecast" + "#loaded_arg.test_size = None\n", + "#loaded_arg.max_use_of_row = 'No Lim'\n", + "#loaded_arg.pred_data_path = 'data.csv'\n", + "#loaded_arg.is_training = 0 #beacuse it is predicting!:)\n", + "#print(loaded_arg)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "exp = Exp(loaded_arg)" + "# این اسم فولدریه که توش چک پوینت هست\n", + "# زیر همون تمرینی که میدید به مدل نوشته شده\n", + "\n", + "checkpoints_folder_name = 'test_iTransformer_custom_MS_ft5_sl1_ll1_pl1_dm1_nh1_el1_dl1_df1_fctimeF_ebTrue_dttest_projection_0'" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "#setting is same as the name of the folder that is saved our model:\n", - "setting = 'test_iTransformer_custom_MS_ft15_sl1_ll3_pl2_dm1_nh1_el1_dl2_df1_fctimeF_ebTrue_dttest_projection_0'" + "# به جای این ستاره ها اسم همون فایل رو بزارید مثلا مال من این بود: input/args_20240611_182910.json\n", + "\n", + "path_to_saved_args = '****************'\n", + "path_to_saved_args = 'input/args_20240611_182910.json'" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "exp.predict(setting, load=True)" + "from experiments.after_train import predict" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Use CPU\n", + "Fail To Save The Args. Continue ..\n", + "pred 1\n", + "pred 1\n", + "The Results of Prediction for The Next 2 Days Are Now Stored in \n", + " results/Prediction Results/prediction_20240611_183102.csv\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "import os\n", - "predicted_path = os.path.join('results', setting, 'Preds real_prediction.npy')\n", - "predicted_values = np.load(predicted_path)\n", - "predicted_close = predicted_values[0,:,-1].reshape(-1,1)\n", - "predicted_close" + "predict(args= path_to_saved_args, model= checkpoints_folder_name , days_to_predict = 2, retrain= False)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateClose
02023-11-0814300
12023-11-0914346
22023-11-1014293
\n", + "
" + ], + "text/plain": [ + " date Close\n", + "0 2023-11-08 14300\n", + "1 2023-11-09 14346\n", + "2 2023-11-10 14293" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#This is the last 3 days of the pred dataset :\n", "import pandas as pd\n", - "df = pd.read_csv('input/test/data-pred.csv')\n", - "df['Close'].tail(4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import joblib\n", - "scaler = joblib.load('input/test/scaler.pkl')\n", - "iversed_close = scaler.inverse_transform(predicted_close)\n", - "[round(any_) for any_ in iversed_close.reshape(-1).tolist()]" + "\n", + "pd.read_csv('results/Prediction Results/prediction_20240611_183102.csv')#این همین آدرسیه که این بالا نوشته" ] }, { @@ -163,12 +326,7 @@ "metadata": {}, "outputs": [], "source": [ - "# مدل در هنگام پیش بینی ۱ ردیف آخر را فقط دریافت می کند\n", - "# از آنجا که ما با آرگومان مکس ردیف یوز ، برای او مشخص کردیم که ۳ تای آخر را دست نزند\n", - "# پس چهارمین مانده به آخر را گرفته است\n", - "#برای اینکه از اسکیل مطمئن باشیم آن را بر میگردانیم تا یکسان باشند\n", - "\n", - "scaler.inverse_transform(exp.batch_y[0,:,-1].reshape(-1,1))" + "# OR OR ##" ] }, { @@ -177,7 +335,7 @@ "metadata": {}, "outputs": [], "source": [ - "# OR OR ##" + "# اگر اینا رو ندارید و میخواید ترین بدید مدل رو و بعدش پیش بینی کنید بفرمایید" ] }, { @@ -191,33 +349,36 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# Define the arg object\n", - "arg = dotdict()\n", + "from utils.tools import dotdict\n", + "\n", + "arg = dotdict() # $$ این همون آرگز معروفه که ز ازش افتاده بنده خدا\n", "\n", "arg.root_path = 'input/train'\n", - "arg.data_path = 'data-rowcut.csv'\n", - "arg.pred_root_path = 'input/test'\n", - "arg.pred_data_path = 'data-pred.csv'" + "arg.data_path = 'data.csv'\n", + "arg.pred_root_path = 'input/pred'\n", + "arg.pred_data_path = 'data.csv'" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Args in experiment:\n", + "{'root_path': 'input/train', 'data_path': 'data.csv', 'pred_root_path': 'input/pred', 'pred_data_path': 'data.csv', 'scale': True, 'test_size': 0.2, 'kind_of_scaler': 'MinMax', 'name_of_col_with_date': 'date', 'kind_of_optim': 'default', 'criter': 'default', 'do_visual': False, 'max_use_of_row': 'No Lim', 'is_training': 1, 'model_id': 'test', 'model': 'iTransformer', 'data': 'custom', 'features': 'MS', 'target': 'Close', 'freq': 'b', 'checkpoints': './checkpoints/', 'seq_len': 5, 'label_len': 1, 'pred_len': 1, 'enc_in': 6, 'dec_in': 6, 'c_out': 1, 'd_model': 1, 'n_heads': 1, 'e_layers': 1, 'd_layers': 1, 'd_ff': 1, 'moving_avg': 25, 'factor': 1, 'distil': True, 'dropout': 0.1, 'embed': 'timeF', 'activation': 'ReLU', 'num_workers': 1, 'itr': 1, 'train_epochs': 2, 'batch_size': 16, 'patience': 2, 'learning_rate': 0.9, 'des': 'test', 'loss': 'MSE', 'lradj': 'type1', 'use_amp': False, 'use_gpu': False, 'gpu': 0, 'use_multi_gpu': False, 'devices': '0,1,2,3', 'exp_name': 'MTSF', 'channel_independence': False, 'inverse': False, 'class_strategy': 'projection', 'efficient_training': False, 'use_norm': True, 'partial_start_index': 0}\n" + ] + } + ], "source": [ - "from utils.tools import dotdict\n", "import torch\n", "from experiments.exp_long_term_forecasting import Exp_Long_Term_Forecast\n", "from experiments.exp_long_term_forecasting_partial import Exp_Long_Term_Forecast_Partial\n", @@ -234,12 +395,12 @@ "# NEW OPTIONS : #\n", "arg.scale = True\n", "arg.test_size = 0.2\n", - "arg.kind_of_scaler = 'Standard'\n", + "arg.kind_of_scaler = 'MinMax'\n", "arg.name_of_col_with_date = 'date'\n", "arg.kind_of_optim = 'default'\n", "arg.criter = 'default'\n", "arg.do_visual = False\n", - "arg.max_use_of_row = 'All Except 3 Days'#It also can be 'All Except a Week' or 'No Lim'\n", + "arg.max_use_of_row = 'No Lim'#It also can be 'All Except a Week' or 'All Except 3 Days'\n", "# # #\n", "\n", "arg.is_training = 1\n", @@ -250,28 +411,28 @@ "arg.target = 'Close'\n", "arg.freq = 'b'\n", "arg.checkpoints = './checkpoints/'\n", - "arg.seq_len = 1*5*3\n", + "arg.seq_len = 1*5*1\n", "arg.label_len = 1*1\n", - "arg.pred_len = 1*3\n", + "arg.pred_len = 1*1\n", "arg.enc_in = 6\n", "arg.dec_in = 6\n", "arg.c_out = 1\n", - "arg.d_model = 2\n", + "arg.d_model = 1\n", "arg.n_heads = 1\n", "arg.e_layers = 1\n", "arg.d_layers = 1\n", - "arg.d_ff = 2\n", + "arg.d_ff = 1\n", "arg.moving_avg = 25\n", "arg.factor = 1\n", "arg.distil = True\n", - "arg.dropout = 0.01\n", + "arg.dropout = 0.1\n", "arg.embed = 'timeF'\n", "arg.activation = 'ReLU'\n", "arg.num_workers = 1\n", "arg.itr = 1\n", - "arg.train_epochs = 3\n", - "arg.batch_size = 64\n", - "arg.patience = 10\n", + "arg.train_epochs = 2\n", + "arg.batch_size = 16\n", + "arg.patience = 2\n", "arg.learning_rate = 0.9\n", "arg.des = 'test'\n", "arg.loss = 'MSE'\n", @@ -300,9 +461,67 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Use CPU\n", + "Args Object Saved to input/args_20240611_182910.json\n", + ">>>>>>>start training : test_iTransformer_custom_MS_ft5_sl1_ll1_pl1_dm1_nh1_el1_dl1_df1_fctimeF_ebTrue_dttest_projection_0>>>>>>>>>>>>>>>>>>>>>>>>>>\n", + "train 2578\n", + "val 370\n", + "test 738\n", + "\titers: 100, epoch: 1 | loss: 0.0002855\n", + "\tspeed: 0.0182s/iter; left time: 4.0624s\n", + "Epoch: 1 cost time: 2.5937039852142334\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[W NNPACK.cpp:64] Could not initialize NNPACK! Reason: Unsupported hardware.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 1, Steps: 161 | Train Loss: 0.0045442 Vali Loss: 0.0050075 Test Loss: 0.0024899\n", + "Validation loss decreased (inf --> 0.005007). Saving model ...\n", + "Updating learning rate to 0.9\n", + "\titers: 100, epoch: 2 | loss: 0.0002948\n", + "\tspeed: 0.0697s/iter; left time: 4.3202s\n", + "Epoch: 2 cost time: 2.6332802772521973\n", + "Epoch: 2, Steps: 161 | Train Loss: 0.0012735 Vali Loss: 0.0055475 Test Loss: 0.0031544\n", + "EarlyStopping counter: 1 out of 2\n", + "Updating learning rate to 0.45\n", + "\n", + "\n", + "train shape: (322, 16, 1, 1) (322, 16, 1, 1)\n", + "train shape: (5152, 1, 1) (5152, 1, 1)\n", + "Train mse:0.002908848924562335,Train mae:0.01775975525379181\n", + "Train rmse:0.05393374711275101,Train mape:inf\n", + "\n", + "\n", + "Validate shape: (138, 16, 1, 1) (138, 16, 1, 1)\n", + "Validate shape: (2212, 1, 1) (2212, 1, 1)\n", + "Validate mse:0.003639126092588629,Validate mae:0.028315382694680695\n", + "Validate rmse:0.06032516964409324,Validate mape:0.049521959696378764\n", + "\n", + "\n", + ">>>>>>>testing : test_iTransformer_custom_MS_ft5_sl1_ll1_pl1_dm1_nh1_el1_dl1_df1_fctimeF_ebTrue_dttest_projection_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", + "test 738\n", + "test shape: (738, 1, 1, 1) (738, 1, 1, 1)\n", + "test shape: (738, 1, 1) (738, 1, 1)\n", + "Test mse:0.002489923033863306,Test mae:0.022032689303159714\n", + "Test rmse:0.04989912733435631,Test mape:0.04616886377334595\n" + ] + } + ], "source": [ "if arg.is_training:\n", " for ii in range(arg.itr):\n", @@ -371,10 +590,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# مستقیم بریم برای پیشبینی چون همه چیز در روند ترین مشخص شده\n", + "\n", + "from experiments.after_train import predict" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pred 1\n", + "pred 1\n", + "pred 1\n", + "pred 1\n", + "pred 1\n", + "The Results of Prediction for The Next 5 Days Are Now Stored in \n", + " results/Prediction Results/prediction_20240611_175838.csv\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predict(args= arg, model=exp, days_to_predict=5)# این آرگ که این جا نوشتم یکم بالا تر با علامت دلار چلوش کامنت گذاشتم" + ] }, { "cell_type": "code", @@ -382,46 +638,279 @@ "metadata": {}, "outputs": [], "source": [ - "# مستقیم بریم برای پیشبینی چون همه چیز در روند ترین مشخص شده\n", + "predicted_path = 'results/Prediction Results/prediction_20240611_175838.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateClose
02023-11-0814300
12023-11-0914346
22023-11-1014293
32023-11-1114247
42023-11-1214255
52023-11-1314212
\n", + "
" + ], + "text/plain": [ + " date Close\n", + "0 2023-11-08 14300\n", + "1 2023-11-09 14346\n", + "2 2023-11-10 14293\n", + "3 2023-11-11 14247\n", + "4 2023-11-12 14255\n", + "5 2023-11-13 14212" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "pd.read_csv(predicted_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "یاد تونه بالا گفتم میتونید آرگز رو خودتون لود کنید\n", "\n", - "exp.predict(setting,True)" + "بدرد این میخوره مثلا\n", + "\n", + "فکر کنید شما با حالت ام اس مدل رو ترین دادید . ینی بالایی\n", + "\n", + "حالا من تقلب میکنم و به آرگ میگم تو ام خالی هستی و بعد جای ارگز در مدل رو با این عوض میکنم \n", + "\n", + "و میبینیم که گول میخوره و همه ی پیش بینی هاشو برامون رو میکنه\n", + "\n", + "این فقط در حالت بین ام اس و اس ممکنه" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "import os\n", - "predicted_path = os.path.join('results', setting, 'Preds real_prediction.npy')" + "arg.features = 'M'" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "import os\n", - "predicted_path = os.path.join('results', setting, 'Preds real_prediction.npy')\n", - "predicted_values = np.load(predicted_path)\n", - "predicted_close = predicted_values[0,:,-1].reshape(-1,1)\n", - "predicted_close\n", - "#This is the last 3 days of the pred dataset :\n", - "import pandas as pd\n", - "df = pd.read_csv('input/test/data-pred.csv')\n", - "df['Close'].tail(4)\n", - "import joblib\n", - "scaler = joblib.load('input/test/scaler.pkl')\n", - "iversed_close = scaler.inverse_transform(predicted_close)\n", - "[round(any_) for any_ in iversed_close.reshape(-1).tolist()]\n", - "# مدل در هنگام پیش بینی ۱ ردیف آخر را فقط دریافت می کند\n", - "# از آنجا که ما با آرگومان مکس ردیف یوز ، برای او مشخص کردیم که ۳ تای آخر را دست نزند\n", - "# پس چهارمین مانده به آخر را گرفته است\n", - "#برای اینکه از اسکیل مطمئن باشیم آن را بر میگردانیم تا یکسان باشند\n", - "\n", - "scaler.inverse_transform(exp.batch_y[0,:,-1].reshape(-1,1))" + "exp.args = arg" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pred 1\n", + "pred 1\n", + "pred 1\n", + "pred 1\n", + "pred 1\n", + "The Results of Prediction for The Next 5 Days Are Now Stored in \n", + " results/Prediction Results/prediction_20240611_183425.csv\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predict(args=arg, model=exp, days_to_predict=5)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateOpenHighLowFinalVolumeClose
02023-11-081460014600142301455074893414300
12023-11-0914446147161417214544191282514346
22023-11-1014435146641414314515150031614293
32023-11-1114363146071410514483136439914247
42023-11-1214398145981409714460134179214255
52023-11-1314372145601407614436124647414212
\n", + "
" + ], + "text/plain": [ + " date Open High Low Final Volume Close\n", + "0 2023-11-08 14600 14600 14230 14550 748934 14300\n", + "1 2023-11-09 14446 14716 14172 14544 1912825 14346\n", + "2 2023-11-10 14435 14664 14143 14515 1500316 14293\n", + "3 2023-11-11 14363 14607 14105 14483 1364399 14247\n", + "4 2023-11-12 14398 14598 14097 14460 1341792 14255\n", + "5 2023-11-13 14372 14560 14076 14436 1246474 14212" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_csv('results/Prediction Results/prediction_20240611_183425.csv')" ] } ], @@ -441,7 +930,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/utils/save_args.py b/utils/save_args.py deleted file mode 100644 index 98990bdbd..000000000 --- a/utils/save_args.py +++ /dev/null @@ -1,85 +0,0 @@ -# cloner174 -# enhanced version files -import os - -class DotDict: - - def __init__(self, dictionary): - self.__dict__.update(dictionary) - - def __getattr__(self, attr): - try: - return self.__dict__[attr] - except KeyError: - self.__setattr__(attr, False) - return self.__dict__[attr] - - def __setattr__(self, key, value): - self.__dict__[key] = value - - def __str__(self): - return str(self.__dict__) - - -class SaveArgs: - - def __init__(self, args, path) : - - if not isinstance(args, dict): - raise TypeError("THis CLass ONly SUpports DIctionary AS AN INput!") - self.args = args - self.path = path - self.arg_creator = DotDict - - self.__start__() - - def __start__(self) : - - temp = {} - - for any_key, any_val in self.args.items() : - - temp[any_key] = any_val - - self.__modify__(temp) - - def __modify__(self, dict): - - try: - arg_new = self.arg_creator(dict) - except: - print("Fail to Save Args - arg_creator") - return - - self.__save__(arg_new) - - def __path_checker__(self): - - try: - os.makedirs(self.path, exist_ok=True) - self.path = os.path.join(self.path, 'Args.pkl') - return True - except: - return False - - def __save__(self, arg): - - try: - import pickle - if self.__path_checker__(): - with open(self.path, 'wb') as file : - pickle.dump(arg, file) - else: - print("Fail to Save Args") - return - except: - print("Fail to Save Args - __save__") - return - print(f"Args Object Saved to {self.path}") - print("It Can be further used by pickle.load()") - - - def __repr__(self) -> str: - return "cloner174 in github 2024" - -#end# \ No newline at end of file diff --git a/utils/tools.py b/utils/tools.py index c3b922ec7..e0c32d25e 100644 --- a/utils/tools.py +++ b/utils/tools.py @@ -1,10 +1,6 @@ -import os - -import numpy as np import torch +import numpy as np import matplotlib.pyplot as plt -import pandas as pd - plt.switch_backend('agg') @@ -112,4 +108,4 @@ def adjustment(gt, pred): def cal_accuracy(y_pred, y_true): - return np.mean(y_pred == y_true) + return np.mean(y_pred == y_true) \ No newline at end of file From 8cbed3db6543ffff75dc2aef85998486b50d2645 Mon Sep 17 00:00:00 2001 From: Hamed <147453648+cloner174@users.noreply.github.com> Date: Tue, 11 Jun 2024 19:12:20 +0330 Subject: [PATCH 38/38] add prediction fynction after_train.py --- experiments/after_train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experiments/after_train.py b/experiments/after_train.py index e4bf930a9..baa4cbc9b 100644 --- a/experiments/after_train.py +++ b/experiments/after_train.py @@ -129,7 +129,7 @@ def predict(args, model, with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader): batch_x = batch_x.float().to(exp.device) - batch_y = batch_y.float() + batch_y = batch_y.float().to(exp.device) batch_x_mark = batch_x_mark.float().to(exp.device) batch_y_mark = batch_y_mark.float().to(exp.device) dec_inp = torch.zeros_like(batch_y[:, -exp.args.pred_len:, :]).float() @@ -199,4 +199,4 @@ def predict(args, model, os.unlink(temp_path) print(f'''The Results of Prediction for The Next {days_to_predict} Days Are Now Stored in {file_path}''') - return True \ No newline at end of file + return True