-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathEDA.py
More file actions
21 lines (16 loc) · 695 Bytes
/
EDA.py
File metadata and controls
21 lines (16 loc) · 695 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# training data exploration
train_df = pd.read_csv("data\Train.csv")
with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
print(train_df.head())
# explore location
percentage_NA_location = sum(pd.isnull(train_df['location'])) / len(train_df['location'])
print(f'Percentage of missing location: {percentage_NA_location}')
# explore sample
sample_distribution = train_df['target'].value_counts()
print(sample_distribution)
print(sample_distribution[1] / (sample_distribution[0] + sample_distribution[1]))
sns.countplot(train_df['target'])
plt.show()