30
30
from pydna .utils import rc as _rc
31
31
from pydna .utils import flatten as _flatten
32
32
from pydna .utils import cuts_overlap as _cuts_overlap
33
- from pydna .utils import bp_dict
34
- from pydna .utils import to_watson_table
35
- from pydna .utils import to_crick_table
36
- from pydna .utils import to_full_sequence
37
- from pydna .utils import to_5tail_table
38
- from pydna .utils import to_3tail_table
33
+ from pydna .alphabet import bp_dict
34
+ from pydna .alphabet import annealing_dict
35
+
36
+ # from pydna.utils import bp_dict
37
+ # from pydna.utils import annealing_dict
38
+ from pydna .alphabet import dscode_to_watson_table
39
+ from pydna .alphabet import dscode_to_crick_table
40
+ from pydna .alphabet import dscode_to_to_full_sequence_table
41
+ from pydna .alphabet import dscode_to_watson_tail_table
42
+ from pydna .alphabet import dscode_to_crick_tail_table
43
+ from pydna .alphabet import placeholder1
44
+ from pydna .alphabet import placeholder2
45
+ from pydna .alphabet import interval
39
46
from pydna .common_sub_strings import common_sub_strings as _common_sub_strings
40
47
from pydna .common_sub_strings import terminal_overlap as _terminal_overlap
41
48
from pydna .types import DseqType , EnzymesType , CutSiteType
42
49
50
+ length_limit_for_repr = (
51
+ 30 # Sequences larger than this gets a truncated representation.
52
+ )
53
+
54
+
55
+ def representation (data = b"" , length_limit_for_repr = 30 ):
56
+ """
57
+ Two line string representation of a sequence of symbols.
58
+
59
+
60
+ Parameters
61
+ ----------
62
+ data : TYPE, optional
63
+ DESCRIPTION. The default is b"".
64
+
65
+ Returns
66
+ -------
67
+ TYPE
68
+ DESCRIPTION.
69
+
70
+ """
71
+ m = _re .match (
72
+ b"([PEXIpexi]*)([QFZJqfzj]*)(?=[GATCUONgatcuon])(.*)(?<=[GATCUONgatcuon])([PEXIpexi]*)([QFZJqfzj]*)|([PEXIpexiQFZJqfzj]+)" ,
73
+ data ,
74
+ )
75
+ result = m .groups () if m else (b"" ,) * 6
76
+ sticky_left5 , sticky_left3 , middle , sticky_right5 , sticky_right3 , single = result
77
+ if len (data ) > length_limit_for_repr :
78
+ sticky_left5 = (
79
+ sticky_left5 [:4 ] + placeholder2 * 2 + sticky_left5 [- 4 :]
80
+ if sticky_left5 and len (sticky_left5 ) > 10
81
+ else sticky_left5
82
+ )
83
+ sticky_left3 = (
84
+ sticky_left3 [:4 ] + placeholder1 * 2 + sticky_left3 [- 4 :]
85
+ if sticky_left3 and len (sticky_left3 ) > 10
86
+ else sticky_left3
87
+ )
88
+ middle = (
89
+ middle [:4 ] + interval * 2 + middle [- 4 :]
90
+ if middle and len (middle ) > 10
91
+ else middle
92
+ )
93
+ sticky_right5 = (
94
+ sticky_right5 [:4 ] + placeholder2 * 2 + sticky_right5 [- 4 :]
95
+ if sticky_right5 and len (sticky_right5 ) > 10
96
+ else sticky_right5
97
+ )
98
+ sticky_right3 = (
99
+ sticky_right3 [:4 ] + placeholder1 * 2 + sticky_right3 [- 4 :]
100
+ if sticky_right3 and len (sticky_right3 ) > 10
101
+ else sticky_right3
102
+ )
103
+ r = (
104
+ (sticky_left5 or sticky_left3 or b"" )
105
+ + (middle or b"" )
106
+ + (sticky_right5 or sticky_right3 or single or b"" )
107
+ )
108
+
109
+ return _pretty_str (
110
+ f"{ r .translate (dscode_to_watson_table ).decode ().rstrip ()} \n "
111
+ f"{ r .translate (dscode_to_crick_table ).decode ().rstrip ()} "
112
+ )
113
+
43
114
44
115
class CircularString (str ):
45
116
"""
@@ -429,8 +500,6 @@ class Dseq(_Seq):
429
500
430
501
"""
431
502
432
- trunc = 30
433
-
434
503
def __init__ (
435
504
self ,
436
505
watson : _Union [str , bytes ],
@@ -490,6 +559,8 @@ def __init__(
490
559
sense = f"{ sense :<{max_len }} " # pad on right side to max_len
491
560
antisense = f"{ antisense :<{max_len }} " # pad on right side to max_len
492
561
562
+ assert len (sense ) == len (antisense )
563
+
493
564
data = bytearray ()
494
565
495
566
for w , c in zip (sense , antisense ):
@@ -501,11 +572,22 @@ def __init__(
501
572
self ._data = bytes (data )
502
573
503
574
self .circular = circular
504
- # self.watson = _pretty_str(watson)
505
- # self.crick = _pretty_str(crick)
506
- self .length = len (self ._data )
507
- # self.ovhg = ovhg
508
575
self .pos = pos
576
+ test_data = self ._data
577
+ if circular :
578
+ test_data += self ._data [0 :1 ]
579
+ msg = ""
580
+ counter = 0
581
+ for mobj in _re .finditer (
582
+ b"(.{0,3})([PEXIpexi][QFZJqfzj]|[QFZJqfzj][PEXIpexi])(.{0,3})" , test_data
583
+ ):
584
+ chunk = mobj .group ()
585
+ msg += f"[{ mobj .start ()} :{ mobj .end ()} ]\n { representation (chunk )} \n \n "
586
+ counter += 1
587
+ if counter :
588
+ raise ValueError (
589
+ f"Molecule is internally split in { counter } location(s):\n \n { msg } " .strip ()
590
+ )
509
591
510
592
@classmethod
511
593
def quick (cls , data : bytes , * args , circular = False , pos = 0 , ** kwargs ):
@@ -516,7 +598,6 @@ def quick(cls, data: bytes, *args, circular=False, pos=0, **kwargs):
516
598
"""
517
599
obj = cls .__new__ (cls )
518
600
obj .circular = circular
519
- obj .length = len (data )
520
601
obj .pos = pos
521
602
obj ._data = data
522
603
return obj
@@ -526,13 +607,11 @@ def from_string(
526
607
cls ,
527
608
dna : str ,
528
609
* args ,
529
- # linear=True,
530
610
circular = False ,
531
611
** kwargs ,
532
612
):
533
- obj = cls .__new__ (cls ) # Does not call __init__
613
+ obj = cls .__new__ (cls )
534
614
obj .circular = circular
535
- obj .length = len (dna )
536
615
obj .pos = 0
537
616
obj ._data = dna .encode ("ASCII" )
538
617
return obj
@@ -558,7 +637,6 @@ def from_representation(cls, dsdna: str, *args, **kwargs):
558
637
print (f"Base mismatch in representation { err } " )
559
638
raise
560
639
obj ._data = bytes (data )
561
- obj .length = len (data )
562
640
return obj
563
641
564
642
@classmethod
@@ -637,7 +715,7 @@ def watson(self):
637
715
DESCRIPTION.
638
716
639
717
"""
640
- return self ._data .translate (to_watson_table ).strip ().decode ("ascii" )
718
+ return self ._data .translate (dscode_to_watson_table ).strip ().decode ("ascii" )
641
719
642
720
@property
643
721
def crick (self ):
@@ -650,7 +728,7 @@ def crick(self):
650
728
DESCRIPTION.
651
729
652
730
"""
653
- return self ._data .translate (to_crick_table ).strip ().decode ("ascii" )[::- 1 ]
731
+ return self ._data .translate (dscode_to_crick_table ).strip ().decode ("ascii" )[::- 1 ]
654
732
655
733
@property
656
734
def ovhg (self ):
@@ -680,7 +758,7 @@ def to_blunt_string(self):
680
758
DESCRIPTION.
681
759
682
760
"""
683
- return self ._data .translate (to_full_sequence ).decode ("ascii" )
761
+ return self ._data .translate (dscode_to_to_full_sequence_table ).decode ("ascii" )
684
762
685
763
__str__ = to_blunt_string
686
764
@@ -798,52 +876,10 @@ def __eq__(self, other: DseqType) -> bool:
798
876
def __repr__ (self ):
799
877
800
878
header = f"{ self .__class__ .__name__ } ({ ({False : '-' , True : 'o' }[self .circular ])} { len (self )} )"
801
- # m = _re.match(
802
- # b"([PEXIpexi]*)([QFZJqfzj]*)(?=[GATCUOgatcuo])(.*)(?<=[GATCUOgatcuo])([PEXIpexi]*)([QFZJqfzj]*)|([PEXIpexiQFZJqfzj]+)",
803
- # self._data,
804
- # )
805
- m = _re .match (
806
- b"([PEXIpexi]*)([QFZJqfzj]*)(?=[GATCUONgatcuon])(.*)(?<=[GATCUONgatcuon])([PEXIpexi]*)([QFZJqfzj]*)|([PEXIpexiQFZJqfzj]+)" ,
807
- self ._data ,
808
- )
809
- result = m .groups () if m else (b"" ,) * 6
810
- sticky_left5 , sticky_left3 , middle , sticky_right5 , sticky_right3 , single = (
811
- result
812
- )
813
- if len (self ) > self .trunc :
814
- sticky_left5 = (
815
- sticky_left5 [:4 ] + b"22" + sticky_left5 [- 4 :]
816
- if sticky_left5 and len (sticky_left5 ) > 10
817
- else sticky_left5
818
- )
819
- sticky_left3 = (
820
- sticky_left3 [:4 ] + b"11" + sticky_left3 [- 4 :]
821
- if sticky_left3 and len (sticky_left3 ) > 10
822
- else sticky_left3
823
- )
824
- middle = (
825
- middle [:4 ] + b".." + middle [- 4 :]
826
- if middle and len (middle ) > 10
827
- else middle
828
- )
829
- sticky_right5 = (
830
- sticky_right5 [:4 ] + b"22" + sticky_right5 [- 4 :]
831
- if sticky_right5 and len (sticky_right5 ) > 10
832
- else sticky_right5
833
- )
834
- sticky_right3 = (
835
- sticky_right3 [:4 ] + b"11" + sticky_right3 [- 4 :]
836
- if sticky_right3 and len (sticky_right3 ) > 10
837
- else sticky_right3
838
- )
839
- r = (
840
- (sticky_left5 or sticky_left3 or b"" )
841
- + (middle or b"" )
842
- + (sticky_right5 or sticky_right3 or single or b"" )
843
- )
844
- return _pretty_str (
845
- f"{ header } \n { r .translate (to_watson_table ).decode ().rstrip ()} \n { r .translate (to_crick_table ).decode ().rstrip ()} "
846
- )
879
+
880
+ rpr = representation (self ._data )
881
+
882
+ return _pretty_str (f"{ header } \n { rpr } " )
847
883
848
884
def reverse_complement (self ) -> "Dseq" :
849
885
"""Dseq object where watson and crick have switched places.
@@ -940,7 +976,7 @@ def looped(self: DseqType) -> DseqType:
940
976
941
977
junction = b"" .join (
942
978
[
943
- bp_dict .get ((bytes ([w ]), bytes ([c ])), b"-" )
979
+ annealing_dict .get ((bytes ([w ]), bytes ([c ])), b"-" )
944
980
for w , c in zip (sticky_left_just , sticky_right_just )
945
981
]
946
982
)
@@ -1125,10 +1161,10 @@ def _add(self: DseqType, other: DseqType, perfectmatch) -> DseqType:
1125
1161
assert len (sticky_self_just ) == len (sticky_other_just )
1126
1162
1127
1163
if perfectmatch :
1128
-
1164
+ # breakpoint()
1129
1165
junction = b"" .join (
1130
1166
[
1131
- bp_dict .get ((bytes ([w ]), bytes ([c ])), b"-" )
1167
+ annealing_dict .get ((bytes ([w ]), bytes ([c ])), b"-" )
1132
1168
for w , c in zip (sticky_self_just , sticky_other_just )
1133
1169
]
1134
1170
)
@@ -1139,8 +1175,9 @@ def _add(self: DseqType, other: DseqType, perfectmatch) -> DseqType:
1139
1175
else :
1140
1176
1141
1177
result = _terminal_overlap (
1142
- sticky_self .translate (to_full_sequence ).decode ("ascii" ),
1143
- sticky_other .translate (to_full_sequence ).decode ("ascii" ) + "@" ,
1178
+ sticky_self .translate (dscode_to_to_full_sequence_table ).decode ("ascii" ),
1179
+ sticky_other .translate (dscode_to_to_full_sequence_table ).decode ("ascii" )
1180
+ + "@" ,
1144
1181
limit = 1 ,
1145
1182
)
1146
1183
@@ -1154,7 +1191,7 @@ def _add(self: DseqType, other: DseqType, perfectmatch) -> DseqType:
1154
1191
1155
1192
junction = b"" .join (
1156
1193
[
1157
- bp_dict .get ((bytes ([w ]), bytes ([c ])), b"-" )
1194
+ annealing_dict .get ((bytes ([w ]), bytes ([c ])), b"-" )
1158
1195
for w , c in zip (sticky_self , sticky_other )
1159
1196
]
1160
1197
)
@@ -1530,7 +1567,7 @@ def cas9(self, RNA: str) -> _Tuple[slice, ...]:
1530
1567
cuts = [0 ]
1531
1568
for m in _re .finditer (bRNA , self ._data ):
1532
1569
cuts .append (m .start () + 17 )
1533
- cuts .append (self . length )
1570
+ cuts .append (len ( self ) )
1534
1571
slices = tuple (slice (x , y , 1 ) for x , y in zip (cuts , cuts [1 :]))
1535
1572
return slices
1536
1573
@@ -1869,7 +1906,7 @@ def cast_to_ds_right(self):
1869
1906
"""
1870
1907
1871
1908
def replace (m ):
1872
- return m .group (1 ).translate (to_full_sequence )
1909
+ return m .group (1 ).translate (dscode_to_to_full_sequence_table )
1873
1910
1874
1911
# Not using f-strings below to avoid bytes/string conversion
1875
1912
return self .__class__ (
@@ -1900,7 +1937,7 @@ def cast_to_ds_left(self):
1900
1937
"""
1901
1938
1902
1939
def replace (m ):
1903
- return m .group (1 ).translate (to_full_sequence )
1940
+ return m .group (1 ).translate (dscode_to_to_full_sequence_table )
1904
1941
1905
1942
# Not using f-strings below to avoid bytes/string conversion
1906
1943
return self .__class__ (
@@ -1970,14 +2007,14 @@ def _shed_ss_dna(self, length):
1970
2007
1971
2008
for x , y in watsonnicks :
1972
2009
stuffer = new [x :y ]
1973
- ss = Dseq .quick (stuffer .translate (to_5tail_table ))
1974
- new = new [:x ] + stuffer .translate (to_3tail_table ) + new [y :]
2010
+ ss = Dseq .quick (stuffer .translate (dscode_to_watson_tail_table ))
2011
+ new = new [:x ] + stuffer .translate (dscode_to_crick_tail_table ) + new [y :]
1975
2012
watsonstrands .append ((x , y , ss ))
1976
2013
1977
2014
for x , y in cricknicks :
1978
2015
stuffer = new [x :y ]
1979
- ss = Dseq .quick (stuffer .translate (to_3tail_table ))
1980
- new = new [:x ] + stuffer .translate (to_5tail_table ) + new [y :]
2016
+ ss = Dseq .quick (stuffer .translate (dscode_to_crick_tail_table ))
2017
+ new = new [:x ] + stuffer .translate (dscode_to_watson_tail_table ) + new [y :]
1981
2018
crickstrands .append ((x , y , ss ))
1982
2019
1983
2020
ordered_strands = sorted (watsonstrands + crickstrands )
@@ -1986,8 +2023,8 @@ def _shed_ss_dna(self, length):
1986
2023
1987
2024
for x , y , ss in ordered_strands :
1988
2025
seq = (
1989
- ss ._data [::- 1 ].translate (to_watson_table ).strip ()
1990
- or ss ._data .translate (to_crick_table ).strip ()
2026
+ ss ._data [::- 1 ].translate (dscode_to_watson_table ).strip ()
2027
+ or ss ._data .translate (dscode_to_crick_table ).strip ()
1991
2028
)
1992
2029
strands .append (_Seq (seq ))
1993
2030
@@ -2050,7 +2087,7 @@ def apply_cut(self, left_cut: CutSiteType, right_cut: CutSiteType) -> "Dseq":
2050
2087
GttCTTAA
2051
2088
2052
2089
"""
2053
- if _cuts_overlap (left_cut , right_cut , self . length ):
2090
+ if _cuts_overlap (left_cut , right_cut , len ( self ) ):
2054
2091
raise ValueError ("Cuts by {} {} overlap." .format (left_cut [1 ], right_cut [1 ]))
2055
2092
2056
2093
if left_cut :
@@ -2062,12 +2099,20 @@ def apply_cut(self, left_cut: CutSiteType, right_cut: CutSiteType) -> "Dseq":
2062
2099
(right_watson_cut , right_overhang ), _ = right_cut
2063
2100
else :
2064
2101
(right_watson_cut , right_overhang ), _ = (
2065
- (self . length , 0 ),
2102
+ (len ( self ) , 0 ),
2066
2103
None ,
2067
2104
)
2068
2105
2069
- table1 = to_5tail_table if left_overhang > 0 else to_3tail_table
2070
- table2 = to_5tail_table if right_overhang < 0 else to_3tail_table
2106
+ table1 = (
2107
+ dscode_to_watson_tail_table
2108
+ if left_overhang > 0
2109
+ else dscode_to_crick_tail_table
2110
+ )
2111
+ table2 = (
2112
+ dscode_to_watson_tail_table
2113
+ if right_overhang < 0
2114
+ else dscode_to_crick_tail_table
2115
+ )
2071
2116
2072
2117
left_stck_begin = min (left_watson_cut , left_watson_cut - left_overhang )
2073
2118
left_stck_end = left_stck_begin + abs (left_overhang )
0 commit comments