Skip to content

Commit ff54b85

Browse files
Merge branch 'main' of github.com:kalininalab/DataSAIL
2 parents f464acd + 9a570d4 commit ff54b85

File tree

2 files changed

+34
-13
lines changed

2 files changed

+34
-13
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# DataSAIL: Data Splitting Against Information Leaking
22

3-
![testing](https://github.com/kalininalab/glyles/actions/workflows/test.yaml/badge.svg)
3+
![testing](https://github.com/kalininalab/datasail/actions/workflows/test.yaml/badge.svg)
44
[![docs-image](https://readthedocs.org/projects/glyles/badge/?version=latest)](https://datasail.readthedocs.io/en/latest/index.html)
55
[![codecov](https://codecov.io/gh/kalininalab/DataSAIL/branch/main/graph/badge.svg)](https://codecov.io/gh/kalininalab/DataSAIL)
66
[![anaconda](https://anaconda.org/kalininalab/datasail/badges/version.svg)](https://anaconda.org/kalininalab/datasail)

datasail/routine.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from datasail.reader.read import read_data
1111
from datasail.reader.utils import DataSet
1212
from datasail.report import report
13-
from datasail.settings import LOGGER, KW_INTER, KW_TECHNIQUES, KW_EPSILON, KW_RUNS, KW_SPLITS, KW_NAMES, \
13+
from datasail.settings import DIM_1, LOGGER, KW_INTER, KW_TECHNIQUES, KW_EPSILON, KW_RUNS, KW_SPLITS, KW_NAMES, \
1414
KW_MAX_SEC, KW_SOLVER, KW_LOGDIR, NOT_ASSIGNED, KW_OUTDIR, MODE_E, MODE_F, DIM_2, SRC_CL, KW_DELTA, \
1515
KW_E_CLUSTERS, KW_F_CLUSTERS, KW_CC, CDHIT, INSTALLED, FOLDSEEK, TMALIGN, CDHIT_EST, DIAMOND, MMSEQS, MASH, TEC_R, TEC_I1, TEC_C1, TEC_I2, TEC_C2, MODE_E, MODE_F, KW_LINKAGE, KW_OVERFLOW
1616
from datasail.solver.overflow import check_dataset
@@ -29,6 +29,15 @@ def list_cluster_algos():
2929
print("\t", name, sep="")
3030

3131

32+
def tech2oneD(tech: str) -> tuple[str, str]:
33+
if tech == TEC_I2:
34+
return TEC_I1 + MODE_E, TEC_I1 + MODE_F
35+
elif tech == TEC_C2:
36+
return TEC_C1 + MODE_E, TEC_C1 + MODE_F
37+
else:
38+
raise ValueError(f"Technique {tech} is not a two-dimensional technique.")
39+
40+
3241
def datasail_main(**kwargs) -> Optional[Tuple[Dict, Dict, Dict]]:
3342
"""
3443
Main routine of DataSAIL. Here the parsed input is aggregated into structures and then split and saved.
@@ -123,18 +132,30 @@ def datasail_main(**kwargs) -> Optional[Tuple[Dict, Dict, Dict]]:
123132
)
124133
# integrate pre_maps into the split maps
125134
for run in range(kwargs[KW_RUNS]):
126-
for technique in kwargs[KW_TECHNIQUES]:
127-
for map_, pre_map in [(e_name_split_map, pre_e_name_split_map),
128-
(f_name_split_map, pre_f_name_split_map),
129-
(e_cluster_split_map, pre_e_cluster_split_map),
130-
(f_cluster_split_map, pre_f_cluster_split_map)]:
131-
if technique not in pre_map:
135+
for map_, pre_map in [(e_name_split_map, pre_e_name_split_map),
136+
(f_name_split_map, pre_f_name_split_map),
137+
(e_cluster_split_map, pre_e_cluster_split_map),
138+
(f_cluster_split_map, pre_f_cluster_split_map)]:
139+
for technique in kwargs[KW_TECHNIQUES]:
140+
if technique == "R":
132141
continue
133-
if technique not in map_:
134-
map_[technique] = []
135-
if run >= len(map_[technique]):
136-
map_[technique].append({})
137-
map_[technique][run].update(pre_map[technique])
142+
if technique[1] == DIM_1:
143+
if technique not in pre_map:
144+
continue
145+
if technique not in map_:
146+
map_[technique] = []
147+
if run >= len(map_[technique]):
148+
map_[technique].append({})
149+
map_[technique][run].update(pre_map[technique])
150+
else:
151+
for one_d_tech in tech2oneD(technique):
152+
if one_d_tech not in pre_map:
153+
continue
154+
if technique not in map_:
155+
map_[technique] = []
156+
if run >= len(map_[technique]):
157+
map_[technique].append({})
158+
map_[technique][run].update(pre_map[one_d_tech])
138159

139160
LOGGER.info("Store results")
140161

0 commit comments

Comments
 (0)