@@ -44,13 +44,42 @@ def csv_files_to_frame(list, directory, filename):
4444 return df
4545
4646
47- def construct_log_returns (in_filename , out_filename ):
47+ def json_file_to_frame (input_filename , output_filename ):
48+ """
49+ Given a file name with json data in the format
50+ {
51+ "Entity1" : [Values],
52+ "Entity2" : [Values],
53+ ...
54+ "EntityN" : [Values]
55+ }
56+ Convert the data to a pandas dataframe for further processing
57+
58+ """
59+
60+ entity_data = pd .read_json (input_filename )
61+ # select_data = entity_data.drop(columns=['High', 'Low', 'Open', 'Close', 'Volume'])
62+ # index_data = select_data.set_index('Date')
63+ # rename_data = index_data.rename(columns={"Adj Close": entry_name})
64+ # df = pd.concat([df, rename_data], axis=1, sort=False)
65+ #
66+ entity_data .to_csv (output_filename , index = False )
67+
68+ return entity_data
69+
70+
71+ def construct_log_returns (in_filename , out_filename , drop_columns = None ):
4872 """
4973 Load a dataframe with level data from file
74+ Drop a list of columns that are not to be processed
5075 Store to file
5176
5277 """
53- level_data = pd .read_csv (in_filename ).drop (columns = ['Date' ])
78+ if drop_columns :
79+ level_data = pd .read_csv (in_filename ).drop (columns = drop_columns )
80+ else :
81+ level_data = pd .read_csv (in_filename )
82+
5483 log_return_data = pd .DataFrame ()
5584
5685 for column in level_data :
@@ -67,9 +96,11 @@ def normalize_log_returns(in_filename, out_filename):
6796 Store to file
6897
6998 """
99+ mean_vals , std_vals = [], []
70100 log_return_data = pd .read_csv (in_filename )
71101 data = log_return_data .values
72102 cols = list (log_return_data )
103+ print ('Entity Names: ' , cols )
73104 scaled_data = np .asarray (data )
74105 for ts in range (data .shape [1 ]):
75106 mean = data [:, ts ].mean ()
@@ -78,3 +109,9 @@ def normalize_log_returns(in_filename, out_filename):
78109
79110 scaled_returns = pd .DataFrame (scaled_data , columns = cols )
80111 scaled_returns .to_csv (out_filename , index = False )
112+
113+ for ts in range (data .shape [1 ]):
114+ mean_vals .append (scaled_data [:, ts ].mean ())
115+ std_vals .append (scaled_data [:, ts ].std ())
116+
117+ return mean_vals , std_vals
0 commit comments