Skip to content

Commit 5cbd7da

Browse files
committed
added tests for multivariate
1 parent 6c29cd8 commit 5cbd7da

File tree

1 file changed

+122
-0
lines changed

1 file changed

+122
-0
lines changed
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import pandas as pd
2+
3+
from fslite.fs.fdataframe import FSDataFrame
4+
from fslite.fs.multivariate import FSMultivariate
5+
from fslite.utils.datasets import get_tnbc_data_path
6+
7+
8+
# test multivariate_filter method with 'm_corr' method
9+
def test_multivariate_filter_corr_strict_mode():
10+
"""
11+
Test multivariate_filter method with 'm_corr' method.
12+
:return: None
13+
"""
14+
15+
# import tsv as pandas DataFrame
16+
df = pd.read_csv(get_tnbc_data_path(), sep="\t")
17+
18+
# create FSDataFrame instance
19+
fs_df = FSDataFrame(df=df, sample_col="Sample", label_col="label")
20+
21+
# create FSMultivariate instance
22+
fs_multivariate = FSMultivariate(fs_method="m_corr",
23+
selection_mode="strict",
24+
selection_threshold=0.75)
25+
26+
fsdf_filtered = fs_multivariate.select_features(fs_df)
27+
28+
assert fs_df.count_features() == 500
29+
assert fsdf_filtered.count_features() == 239
30+
31+
# Export the filtered DataFrame as Pandas DataFrame
32+
df_filtered = fsdf_filtered.to_pandas()
33+
df_filtered.to_csv("filtered_tnbc_data.csv", index=False)
34+
35+
36+
# test multivariate_filter method with 'm_corr' method in approximate mode
37+
def test_multivariate_filter_corr_approximate_mode():
38+
"""
39+
Test multivariate_filter method with 'm_corr' method in approximate mode.
40+
:return: None
41+
"""
42+
43+
# import tsv as pandas DataFrame
44+
df = pd.read_csv(get_tnbc_data_path(), sep="\t")
45+
46+
# create FSDataFrame instance
47+
fs_df = FSDataFrame(df=df, sample_col="Sample", label_col="label")
48+
49+
# create FSMultivariate instance
50+
fs_multivariate = FSMultivariate(fs_method="m_corr",
51+
selection_mode="approximate",
52+
selection_threshold=0.75)
53+
54+
fsdf_filtered = fs_multivariate.select_features(fs_df)
55+
56+
assert fs_df.count_features() == 500
57+
58+
# test if number of features selected is within the expected range [240-260]
59+
assert 240 <= fsdf_filtered.count_features() <= 260
60+
61+
# Export the filtered DataFrame as Pandas DataFrame
62+
df_filtered = fsdf_filtered.to_pandas()
63+
df_filtered.to_csv("filtered_tnbc_data.csv", index=False)
64+
65+
66+
# test multivariate_filter method with 'variance' method
67+
def test_multivariate_filter_variance_percentile_mode():
68+
"""
69+
Test multivariate_filter method with 'variance' method.
70+
:return: None
71+
"""
72+
73+
# import tsv as pandas DataFrame
74+
df = pd.read_csv(get_tnbc_data_path(), sep="\t")
75+
76+
# create FSDataFrame instance
77+
fs_df = FSDataFrame(df=df, sample_col="Sample", label_col="label")
78+
79+
# create FSMultivariate instance
80+
fs_multivariate = FSMultivariate(fs_method="variance",
81+
selection_mode="percentile",
82+
selection_threshold=0.2)
83+
84+
fsdf_filtered = fs_multivariate.select_features(fs_df)
85+
86+
assert fs_df.count_features() == 500
87+
assert fsdf_filtered.count_features() == 400
88+
89+
# Export the filtered DataFrame as Pandas DataFrame
90+
df_filtered = fsdf_filtered.to_pandas()
91+
df_filtered.to_csv("filtered_tnbc_data.csv", index=False)
92+
93+
94+
# test multivariate_filter method with 'variance' method in k_best mode
95+
def test_multivariate_filter_variance_k_best_mode():
96+
"""
97+
Test multivariate_filter method with 'variance' method in k_best mode.
98+
:return: None
99+
"""
100+
101+
# import tsv as pandas DataFrame
102+
df = pd.read_csv(get_tnbc_data_path(), sep="\t")
103+
104+
# create FSDataFrame instance
105+
fs_df = FSDataFrame(df=df, sample_col="Sample", label_col="label")
106+
107+
# create FSMultivariate instance
108+
fs_multivariate = FSMultivariate(fs_method="variance",
109+
selection_mode="k_best",
110+
selection_threshold=68100000.0
111+
# TODO: check this value (should be normalized variance?)
112+
)
113+
114+
fsdf_filtered = fs_multivariate.select_features(fs_df)
115+
116+
assert fs_df.count_features() == 500
117+
assert fsdf_filtered.count_features() == 87
118+
119+
# Export the filtered DataFrame as Pandas DataFrame
120+
df_filtered = fsdf_filtered.to_pandas()
121+
df_filtered.to_csv("filtered_tnbc_data.csv", index=False)
122+

0 commit comments

Comments
 (0)