Skip to content

Commit 5e60e26

Browse files
broke out the __repr__ code to a function for clarity, reintroduced the check for internal splits in init
1 parent 5a64679 commit 5e60e26

File tree

1 file changed

+130
-85
lines changed

1 file changed

+130
-85
lines changed

src/pydna/dseq.py

Lines changed: 130 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,87 @@
3030
from pydna.utils import rc as _rc
3131
from pydna.utils import flatten as _flatten
3232
from pydna.utils import cuts_overlap as _cuts_overlap
33-
from pydna.utils import bp_dict
34-
from pydna.utils import to_watson_table
35-
from pydna.utils import to_crick_table
36-
from pydna.utils import to_full_sequence
37-
from pydna.utils import to_5tail_table
38-
from pydna.utils import to_3tail_table
33+
from pydna.alphabet import bp_dict
34+
from pydna.alphabet import annealing_dict
35+
36+
# from pydna.utils import bp_dict
37+
# from pydna.utils import annealing_dict
38+
from pydna.alphabet import dscode_to_watson_table
39+
from pydna.alphabet import dscode_to_crick_table
40+
from pydna.alphabet import dscode_to_to_full_sequence_table
41+
from pydna.alphabet import dscode_to_watson_tail_table
42+
from pydna.alphabet import dscode_to_crick_tail_table
43+
from pydna.alphabet import placeholder1
44+
from pydna.alphabet import placeholder2
45+
from pydna.alphabet import interval
3946
from pydna.common_sub_strings import common_sub_strings as _common_sub_strings
4047
from pydna.common_sub_strings import terminal_overlap as _terminal_overlap
4148
from pydna.types import DseqType, EnzymesType, CutSiteType
4249

50+
length_limit_for_repr = (
51+
30 # Sequences larger than this gets a truncated representation.
52+
)
53+
54+
55+
def representation(data=b"", length_limit_for_repr=30):
56+
"""
57+
Two line string representation of a sequence of symbols.
58+
59+
60+
Parameters
61+
----------
62+
data : TYPE, optional
63+
DESCRIPTION. The default is b"".
64+
65+
Returns
66+
-------
67+
TYPE
68+
DESCRIPTION.
69+
70+
"""
71+
m = _re.match(
72+
b"([PEXIpexi]*)([QFZJqfzj]*)(?=[GATCUONgatcuon])(.*)(?<=[GATCUONgatcuon])([PEXIpexi]*)([QFZJqfzj]*)|([PEXIpexiQFZJqfzj]+)",
73+
data,
74+
)
75+
result = m.groups() if m else (b"",) * 6
76+
sticky_left5, sticky_left3, middle, sticky_right5, sticky_right3, single = result
77+
if len(data) > length_limit_for_repr:
78+
sticky_left5 = (
79+
sticky_left5[:4] + placeholder2 * 2 + sticky_left5[-4:]
80+
if sticky_left5 and len(sticky_left5) > 10
81+
else sticky_left5
82+
)
83+
sticky_left3 = (
84+
sticky_left3[:4] + placeholder1 * 2 + sticky_left3[-4:]
85+
if sticky_left3 and len(sticky_left3) > 10
86+
else sticky_left3
87+
)
88+
middle = (
89+
middle[:4] + interval * 2 + middle[-4:]
90+
if middle and len(middle) > 10
91+
else middle
92+
)
93+
sticky_right5 = (
94+
sticky_right5[:4] + placeholder2 * 2 + sticky_right5[-4:]
95+
if sticky_right5 and len(sticky_right5) > 10
96+
else sticky_right5
97+
)
98+
sticky_right3 = (
99+
sticky_right3[:4] + placeholder1 * 2 + sticky_right3[-4:]
100+
if sticky_right3 and len(sticky_right3) > 10
101+
else sticky_right3
102+
)
103+
r = (
104+
(sticky_left5 or sticky_left3 or b"")
105+
+ (middle or b"")
106+
+ (sticky_right5 or sticky_right3 or single or b"")
107+
)
108+
109+
return _pretty_str(
110+
f"{r.translate(dscode_to_watson_table).decode().rstrip()}\n"
111+
f"{r.translate(dscode_to_crick_table).decode().rstrip()}"
112+
)
113+
43114

44115
class CircularString(str):
45116
"""
@@ -429,8 +500,6 @@ class Dseq(_Seq):
429500
430501
"""
431502

432-
trunc = 30
433-
434503
def __init__(
435504
self,
436505
watson: _Union[str, bytes],
@@ -490,6 +559,8 @@ def __init__(
490559
sense = f"{sense:<{max_len}}" # pad on right side to max_len
491560
antisense = f"{antisense:<{max_len}}" # pad on right side to max_len
492561

562+
assert len(sense) == len(antisense)
563+
493564
data = bytearray()
494565

495566
for w, c in zip(sense, antisense):
@@ -501,11 +572,22 @@ def __init__(
501572
self._data = bytes(data)
502573

503574
self.circular = circular
504-
# self.watson = _pretty_str(watson)
505-
# self.crick = _pretty_str(crick)
506-
self.length = len(self._data)
507-
# self.ovhg = ovhg
508575
self.pos = pos
576+
test_data = self._data
577+
if circular:
578+
test_data += self._data[0:1]
579+
msg = ""
580+
counter = 0
581+
for mobj in _re.finditer(
582+
b"(.{0,3})([PEXIpexi][QFZJqfzj]|[QFZJqfzj][PEXIpexi])(.{0,3})", test_data
583+
):
584+
chunk = mobj.group()
585+
msg += f"[{mobj.start()}:{mobj.end()}]\n{representation(chunk)}\n\n"
586+
counter += 1
587+
if counter:
588+
raise ValueError(
589+
f"Molecule is internally split in {counter} location(s):\n\n{msg}".strip()
590+
)
509591

510592
@classmethod
511593
def quick(cls, data: bytes, *args, circular=False, pos=0, **kwargs):
@@ -516,7 +598,6 @@ def quick(cls, data: bytes, *args, circular=False, pos=0, **kwargs):
516598
"""
517599
obj = cls.__new__(cls)
518600
obj.circular = circular
519-
obj.length = len(data)
520601
obj.pos = pos
521602
obj._data = data
522603
return obj
@@ -526,13 +607,11 @@ def from_string(
526607
cls,
527608
dna: str,
528609
*args,
529-
# linear=True,
530610
circular=False,
531611
**kwargs,
532612
):
533-
obj = cls.__new__(cls) # Does not call __init__
613+
obj = cls.__new__(cls)
534614
obj.circular = circular
535-
obj.length = len(dna)
536615
obj.pos = 0
537616
obj._data = dna.encode("ASCII")
538617
return obj
@@ -558,7 +637,6 @@ def from_representation(cls, dsdna: str, *args, **kwargs):
558637
print(f"Base mismatch in representation {err}")
559638
raise
560639
obj._data = bytes(data)
561-
obj.length = len(data)
562640
return obj
563641

564642
@classmethod
@@ -637,7 +715,7 @@ def watson(self):
637715
DESCRIPTION.
638716
639717
"""
640-
return self._data.translate(to_watson_table).strip().decode("ascii")
718+
return self._data.translate(dscode_to_watson_table).strip().decode("ascii")
641719

642720
@property
643721
def crick(self):
@@ -650,7 +728,7 @@ def crick(self):
650728
DESCRIPTION.
651729
652730
"""
653-
return self._data.translate(to_crick_table).strip().decode("ascii")[::-1]
731+
return self._data.translate(dscode_to_crick_table).strip().decode("ascii")[::-1]
654732

655733
@property
656734
def ovhg(self):
@@ -680,7 +758,7 @@ def to_blunt_string(self):
680758
DESCRIPTION.
681759
682760
"""
683-
return self._data.translate(to_full_sequence).decode("ascii")
761+
return self._data.translate(dscode_to_to_full_sequence_table).decode("ascii")
684762

685763
__str__ = to_blunt_string
686764

@@ -798,52 +876,10 @@ def __eq__(self, other: DseqType) -> bool:
798876
def __repr__(self):
799877

800878
header = f"{self.__class__.__name__}({({False: '-', True: 'o'}[self.circular])}{len(self)})"
801-
# m = _re.match(
802-
# b"([PEXIpexi]*)([QFZJqfzj]*)(?=[GATCUOgatcuo])(.*)(?<=[GATCUOgatcuo])([PEXIpexi]*)([QFZJqfzj]*)|([PEXIpexiQFZJqfzj]+)",
803-
# self._data,
804-
# )
805-
m = _re.match(
806-
b"([PEXIpexi]*)([QFZJqfzj]*)(?=[GATCUONgatcuon])(.*)(?<=[GATCUONgatcuon])([PEXIpexi]*)([QFZJqfzj]*)|([PEXIpexiQFZJqfzj]+)",
807-
self._data,
808-
)
809-
result = m.groups() if m else (b"",) * 6
810-
sticky_left5, sticky_left3, middle, sticky_right5, sticky_right3, single = (
811-
result
812-
)
813-
if len(self) > self.trunc:
814-
sticky_left5 = (
815-
sticky_left5[:4] + b"22" + sticky_left5[-4:]
816-
if sticky_left5 and len(sticky_left5) > 10
817-
else sticky_left5
818-
)
819-
sticky_left3 = (
820-
sticky_left3[:4] + b"11" + sticky_left3[-4:]
821-
if sticky_left3 and len(sticky_left3) > 10
822-
else sticky_left3
823-
)
824-
middle = (
825-
middle[:4] + b".." + middle[-4:]
826-
if middle and len(middle) > 10
827-
else middle
828-
)
829-
sticky_right5 = (
830-
sticky_right5[:4] + b"22" + sticky_right5[-4:]
831-
if sticky_right5 and len(sticky_right5) > 10
832-
else sticky_right5
833-
)
834-
sticky_right3 = (
835-
sticky_right3[:4] + b"11" + sticky_right3[-4:]
836-
if sticky_right3 and len(sticky_right3) > 10
837-
else sticky_right3
838-
)
839-
r = (
840-
(sticky_left5 or sticky_left3 or b"")
841-
+ (middle or b"")
842-
+ (sticky_right5 or sticky_right3 or single or b"")
843-
)
844-
return _pretty_str(
845-
f"{header}\n{r.translate(to_watson_table).decode().rstrip()}\n{r.translate(to_crick_table).decode().rstrip()}"
846-
)
879+
880+
rpr = representation(self._data)
881+
882+
return _pretty_str(f"{header}\n{rpr}")
847883

848884
def reverse_complement(self) -> "Dseq":
849885
"""Dseq object where watson and crick have switched places.
@@ -940,7 +976,7 @@ def looped(self: DseqType) -> DseqType:
940976

941977
junction = b"".join(
942978
[
943-
bp_dict.get((bytes([w]), bytes([c])), b"-")
979+
annealing_dict.get((bytes([w]), bytes([c])), b"-")
944980
for w, c in zip(sticky_left_just, sticky_right_just)
945981
]
946982
)
@@ -1125,10 +1161,10 @@ def _add(self: DseqType, other: DseqType, perfectmatch) -> DseqType:
11251161
assert len(sticky_self_just) == len(sticky_other_just)
11261162

11271163
if perfectmatch:
1128-
1164+
# breakpoint()
11291165
junction = b"".join(
11301166
[
1131-
bp_dict.get((bytes([w]), bytes([c])), b"-")
1167+
annealing_dict.get((bytes([w]), bytes([c])), b"-")
11321168
for w, c in zip(sticky_self_just, sticky_other_just)
11331169
]
11341170
)
@@ -1139,8 +1175,9 @@ def _add(self: DseqType, other: DseqType, perfectmatch) -> DseqType:
11391175
else:
11401176

11411177
result = _terminal_overlap(
1142-
sticky_self.translate(to_full_sequence).decode("ascii"),
1143-
sticky_other.translate(to_full_sequence).decode("ascii") + "@",
1178+
sticky_self.translate(dscode_to_to_full_sequence_table).decode("ascii"),
1179+
sticky_other.translate(dscode_to_to_full_sequence_table).decode("ascii")
1180+
+ "@",
11441181
limit=1,
11451182
)
11461183

@@ -1154,7 +1191,7 @@ def _add(self: DseqType, other: DseqType, perfectmatch) -> DseqType:
11541191

11551192
junction = b"".join(
11561193
[
1157-
bp_dict.get((bytes([w]), bytes([c])), b"-")
1194+
annealing_dict.get((bytes([w]), bytes([c])), b"-")
11581195
for w, c in zip(sticky_self, sticky_other)
11591196
]
11601197
)
@@ -1530,7 +1567,7 @@ def cas9(self, RNA: str) -> _Tuple[slice, ...]:
15301567
cuts = [0]
15311568
for m in _re.finditer(bRNA, self._data):
15321569
cuts.append(m.start() + 17)
1533-
cuts.append(self.length)
1570+
cuts.append(len(self))
15341571
slices = tuple(slice(x, y, 1) for x, y in zip(cuts, cuts[1:]))
15351572
return slices
15361573

@@ -1869,7 +1906,7 @@ def cast_to_ds_right(self):
18691906
"""
18701907

18711908
def replace(m):
1872-
return m.group(1).translate(to_full_sequence)
1909+
return m.group(1).translate(dscode_to_to_full_sequence_table)
18731910

18741911
# Not using f-strings below to avoid bytes/string conversion
18751912
return self.__class__(
@@ -1900,7 +1937,7 @@ def cast_to_ds_left(self):
19001937
"""
19011938

19021939
def replace(m):
1903-
return m.group(1).translate(to_full_sequence)
1940+
return m.group(1).translate(dscode_to_to_full_sequence_table)
19041941

19051942
# Not using f-strings below to avoid bytes/string conversion
19061943
return self.__class__(
@@ -1970,14 +2007,14 @@ def _shed_ss_dna(self, length):
19702007

19712008
for x, y in watsonnicks:
19722009
stuffer = new[x:y]
1973-
ss = Dseq.quick(stuffer.translate(to_5tail_table))
1974-
new = new[:x] + stuffer.translate(to_3tail_table) + new[y:]
2010+
ss = Dseq.quick(stuffer.translate(dscode_to_watson_tail_table))
2011+
new = new[:x] + stuffer.translate(dscode_to_crick_tail_table) + new[y:]
19752012
watsonstrands.append((x, y, ss))
19762013

19772014
for x, y in cricknicks:
19782015
stuffer = new[x:y]
1979-
ss = Dseq.quick(stuffer.translate(to_3tail_table))
1980-
new = new[:x] + stuffer.translate(to_5tail_table) + new[y:]
2016+
ss = Dseq.quick(stuffer.translate(dscode_to_crick_tail_table))
2017+
new = new[:x] + stuffer.translate(dscode_to_watson_tail_table) + new[y:]
19812018
crickstrands.append((x, y, ss))
19822019

19832020
ordered_strands = sorted(watsonstrands + crickstrands)
@@ -1986,8 +2023,8 @@ def _shed_ss_dna(self, length):
19862023

19872024
for x, y, ss in ordered_strands:
19882025
seq = (
1989-
ss._data[::-1].translate(to_watson_table).strip()
1990-
or ss._data.translate(to_crick_table).strip()
2026+
ss._data[::-1].translate(dscode_to_watson_table).strip()
2027+
or ss._data.translate(dscode_to_crick_table).strip()
19912028
)
19922029
strands.append(_Seq(seq))
19932030

@@ -2050,7 +2087,7 @@ def apply_cut(self, left_cut: CutSiteType, right_cut: CutSiteType) -> "Dseq":
20502087
GttCTTAA
20512088
20522089
"""
2053-
if _cuts_overlap(left_cut, right_cut, self.length):
2090+
if _cuts_overlap(left_cut, right_cut, len(self)):
20542091
raise ValueError("Cuts by {} {} overlap.".format(left_cut[1], right_cut[1]))
20552092

20562093
if left_cut:
@@ -2062,12 +2099,20 @@ def apply_cut(self, left_cut: CutSiteType, right_cut: CutSiteType) -> "Dseq":
20622099
(right_watson_cut, right_overhang), _ = right_cut
20632100
else:
20642101
(right_watson_cut, right_overhang), _ = (
2065-
(self.length, 0),
2102+
(len(self), 0),
20662103
None,
20672104
)
20682105

2069-
table1 = to_5tail_table if left_overhang > 0 else to_3tail_table
2070-
table2 = to_5tail_table if right_overhang < 0 else to_3tail_table
2106+
table1 = (
2107+
dscode_to_watson_tail_table
2108+
if left_overhang > 0
2109+
else dscode_to_crick_tail_table
2110+
)
2111+
table2 = (
2112+
dscode_to_watson_tail_table
2113+
if right_overhang < 0
2114+
else dscode_to_crick_tail_table
2115+
)
20712116

20722117
left_stck_begin = min(left_watson_cut, left_watson_cut - left_overhang)
20732118
left_stck_end = left_stck_begin + abs(left_overhang)

0 commit comments

Comments
 (0)