Skip to content

Commit 3d2f347

Browse files
authored
Treat Series input like dict for chromsizes and midpoints
1 parent baf3344 commit 3d2f347

File tree

2 files changed

+47
-29
lines changed

2 files changed

+47
-29
lines changed

bioframe/extras.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@ def make_chromarms(
2828
2929
Parameters
3030
----------
31-
chromsizes : pandas.Dataframe or pandas.Series
32-
If pandas.Series, a map from chromosomes to lengths in bp.
31+
chromsizes : pandas.Dataframe or dict-like
32+
If dict or pandas.Series, a map from chromosomes to lengths in bp.
3333
If pandas.Dataframe, a dataframe with columns defined by cols_chroms.
3434
If cols_chroms is a triplet (e.g. 'chrom','start','end'), then
3535
values in chromsizes[cols_chroms[1]].values must all be zero.
3636
3737
midpoints : pandas.Dataframe or dict-like
3838
Mapping of chromosomes to midpoint (aka centromere) locations.
39-
If pandas.Series, a map from chromosomes to midpoints in bp.
39+
If dict or pandas.Series, a map from chromosomes to midpoints in bp.
4040
If pandas.Dataframe, a dataframe with columns defined by cols_mids.
4141
4242
cols_chroms : (str, str) or (str, str, str)
@@ -59,9 +59,13 @@ def make_chromarms(
5959
elif len(cols_chroms) == 3:
6060
ck1, sk1, ek1 = cols_chroms
6161

62-
if isinstance(chromsizes, pd.Series):
62+
if isinstance(chromsizes, (pd.Series, dict)):
63+
chromsizes = dict(chromsizes)
6364
df_chroms = (
64-
pd.DataFrame(chromsizes).reset_index().rename(columns={"index": ck1})
65+
pd.DataFrame({
66+
ck1: list(chromsizes.keys()),
67+
"length": list(chromsizes.values()),
68+
})
6569
)
6670
elif isinstance(chromsizes, pd.DataFrame):
6771
df_chroms = chromsizes.copy()
@@ -83,7 +87,8 @@ def make_chromarms(
8387
raise ValueError("invalid number of cols_chroms")
8488

8589
ck2, sk2 = cols_mids
86-
if isinstance(midpoints, dict):
90+
if isinstance(midpoints, (pd.Series, dict)):
91+
midpoints = dict(midpoints)
8792
df_mids = pd.DataFrame.from_dict(midpoints, orient="index", columns=[sk2])
8893
df_mids.reset_index(inplace=True)
8994
df_mids.rename(columns={"index": ck2}, inplace=True)

tests/test_extras.py

+36-23
Original file line numberDiff line numberDiff line change
@@ -12,46 +12,59 @@
1212
def test_make_chromarms():
1313

1414
### test the case where columns have different names
15-
df1 = pd.DataFrame(
15+
df = pd.DataFrame(
1616
[["chrX", 0, 8]],
1717
columns=["chromosome", "lo", "hi"],
1818
)
19-
20-
df2 = pd.DataFrame([["chrX", 4]], columns=["chromosome", "loc"])
21-
22-
df_result = pd.DataFrame(
19+
mids = pd.DataFrame([["chrX", 4]], columns=["chromosome", "loc"])
20+
arms = pd.DataFrame(
2321
[
2422
["chrX", 0, 4, "chrX_p"],
2523
["chrX", 4, 8, "chrX_q"],
2624
],
27-
columns=["chromosome", "lo", "hi", "name"],
25+
columns=["chrom", "start", "end", "name"],
2826
)
27+
arms = arms.astype({"start": pd.Int64Dtype(), "end": pd.Int64Dtype()})
2928

3029
# test passing 3 columns
30+
result = bioframe.make_chromarms(
31+
df,
32+
mids,
33+
cols_chroms=["chromosome", "lo", "hi"],
34+
cols_mids=["chromosome", "loc"],
35+
)
3136
pd.testing.assert_frame_equal(
32-
df_result.astype({"lo": pd.Int64Dtype(), "hi": pd.Int64Dtype()}),
33-
bioframe.make_chromarms(
34-
df1,
35-
df2,
36-
cols_chroms=["chromosome", "lo", "hi"],
37-
cols_mids=["chromosome", "loc"],
38-
),
37+
result,
38+
arms.rename(columns={"chrom": "chromosome", "start": "lo", "end": "hi"})
3939
)
4040

4141
# test passing 2 columns
42+
result = bioframe.make_chromarms(
43+
df,
44+
mids,
45+
cols_chroms=["chromosome", "hi"],
46+
cols_mids=["chromosome", "loc"],
47+
)
4248
pd.testing.assert_frame_equal(
43-
df_result.astype({"lo": pd.Int64Dtype(), "hi": pd.Int64Dtype()}).rename(
44-
columns={"lo": "start", "hi": "end"}
45-
),
46-
bioframe.make_chromarms(
47-
df1,
48-
df2,
49-
cols_chroms=["chromosome", "hi"],
50-
cols_mids=["chromosome", "loc"],
51-
),
49+
result
50+
arms.rename(columns={"chrom": "chromosome"}),
5251
)
5352

54-
# todo: test for passing pd.series !
53+
# test for passing Series or dict
54+
result = bioframe.make_chromarms(pd.Series({"chrX": 8}), mids, cols_mids=["chromosome", "loc"])
55+
pd.testing.assert_frame_equal(arms, result)
56+
57+
result = bioframe.make_chromarms(pd.Series({"chrX": 8}), pd.Series({"chrX": 4}))
58+
pd.testing.assert_frame_equal(arms, result)
59+
60+
bioframe.make_chromarms({"chrX": 8}, mids, cols_mids=["chromosome", "loc"])
61+
pd.testing.assert_frame_equal(arms, result)
62+
63+
bioframe.make_chromarms({"chrX": 8}, pd.Series({"chrX": 4}))
64+
pd.testing.assert_frame_equal(arms, result)
65+
66+
bioframe.make_chromarms({"chrX": 8}, {"chrX": 4})
67+
pd.testing.assert_frame_equal(arms, result)
5568

5669

5770
def test_binnify():

0 commit comments

Comments
 (0)