Skip to content

Commit 5e80851

Browse files
committed
0.1.1 Cleaned up PairwiseCorrelation, matrix_print
0.1.2 Added JSON based examples
1 parent c75a18e commit 5e80851

File tree

7 files changed

+3117
-309
lines changed

7 files changed

+3117
-309
lines changed

CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ ChangeLog
33

44
PLEASE NOTE THAT THE API IS STILL UNSTABLE AS MORE USE CASES / FEATURES ARE ADDED REGULARLY
55

6+
v0.1.2 (26-03-2019)
7+
-------------------
8+
9+
* Added example matrix_from_json_data
10+
* Cleaned up PairwiseCorrelation, matrix_print
11+
612

713
v0.1.0 (5-03-2019)
814
-------------------

correlationMatrix/utils/preprocessing.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,42 @@ def csv_files_to_frame(list, directory, filename):
4444
return df
4545

4646

47-
def construct_log_returns(in_filename, out_filename):
47+
def json_file_to_frame(input_filename, output_filename):
48+
"""
49+
Given a file name with json data in the format
50+
{
51+
"Entity1" : [Values],
52+
"Entity2" : [Values],
53+
...
54+
"EntityN" : [Values]
55+
}
56+
Convert the data to a pandas dataframe for further processing
57+
58+
"""
59+
60+
entity_data = pd.read_json(input_filename)
61+
# select_data = entity_data.drop(columns=['High', 'Low', 'Open', 'Close', 'Volume'])
62+
# index_data = select_data.set_index('Date')
63+
# rename_data = index_data.rename(columns={"Adj Close": entry_name})
64+
# df = pd.concat([df, rename_data], axis=1, sort=False)
65+
#
66+
entity_data.to_csv(output_filename, index=False)
67+
68+
return entity_data
69+
70+
71+
def construct_log_returns(in_filename, out_filename, drop_columns=None):
4872
"""
4973
Load a dataframe with level data from file
74+
Drop a list of columns that are not to be processed
5075
Store to file
5176
5277
"""
53-
level_data = pd.read_csv(in_filename).drop(columns=['Date'])
78+
if drop_columns:
79+
level_data = pd.read_csv(in_filename).drop(columns=drop_columns)
80+
else:
81+
level_data = pd.read_csv(in_filename)
82+
5483
log_return_data = pd.DataFrame()
5584

5685
for column in level_data:
@@ -67,9 +96,11 @@ def normalize_log_returns(in_filename, out_filename):
6796
Store to file
6897
6998
"""
99+
mean_vals, std_vals = [], []
70100
log_return_data = pd.read_csv(in_filename)
71101
data = log_return_data.values
72102
cols = list(log_return_data)
103+
print('Entity Names: ', cols)
73104
scaled_data = np.asarray(data)
74105
for ts in range(data.shape[1]):
75106
mean = data[:, ts].mean()
@@ -78,3 +109,9 @@ def normalize_log_returns(in_filename, out_filename):
78109

79110
scaled_returns = pd.DataFrame(scaled_data, columns=cols)
80111
scaled_returns.to_csv(out_filename, index=False)
112+
113+
for ts in range(data.shape[1]):
114+
mean_vals.append(scaled_data[:, ts].mean())
115+
std_vals.append(scaled_data[:, ts].std())
116+
117+
return mean_vals, std_vals

0 commit comments

Comments
 (0)