From fcd4d76ca5a350c1df75766d4a9bf159c237364b Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Wed, 1 Oct 2025 11:43:56 -0500 Subject: [PATCH 1/5] feat(io): Enable input from FASTA files Add the ability for RF3 to load in from FASTA files. The file format is inspired by Boltz's FASTA input format, but slightly more flexible. (You should be able to input a protein FASTA as-is and have it work, albeit without MSA.) It's written with an eye to be flexible for additional sequence file input formats, as desire dictactes. The FASTA input is basically just syntactic sugar around the JSON input format, with a reduced feature set. --- src/modelhub/utils/inference.py | 67 ++++++++++++++++++++++++++++++++- src/modelhub/utils/io.py | 22 +++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/src/modelhub/utils/inference.py b/src/modelhub/utils/inference.py index dc7b9fa..51f8c5d 100644 --- a/src/modelhub/utils/inference.py +++ b/src/modelhub/utils/inference.py @@ -19,8 +19,10 @@ from modelhub.utils.io import ( CIF_LIKE_EXTENSIONS, DICTIONARY_LIKE_EXTENSIONS, + SEQUENCE_LIKE_EXTENSIONS, create_example_id_extractor, find_files_with_extension, + parse_generalized_fasta, ) @@ -87,6 +89,66 @@ def _spoof_cif_from_dictionary(item: dict, temp_dir: PathLike) -> Path: return Path(save_path) +def _spoof_cif_from_sequences(sequencepath: PathLike, temp_dir: PathLike) -> Path: + """Parses and unpacks a sequence file to create a CIF file from its components. + + Creates only one CIF file per sequence file (potentially containing multiple sequences). + + The label of the sequence (roughly) follows the Boltz convention + + >CHAIN_ID|ENTITY_TYPE|MSA_PATH + + Where ENTITY_TYPE is in [protein, dna, rna, smiles, ccd, path] (last is non-Boltz) + + However, both the CHAIN_ID and MSA_PATH are optional (if present the latter must have "a3m" in the name). + If the header does not follow the format, then it's assumed that it's a polymeric (protein) sequence + + Args: + sequencepath (Path): The path to a sequence file. + temp_dir (Path): Path to the temporary directory for storing CIF files. + + Returns: + Path: The path to the created CIF file, saved in the temporary directory. + + """ + seqs: list[ tuple[str, str] ] = parse_generalized_fasta(sequencepath); + + components = [] + + for label, value in seqs: + entry = {} + cif_or_pdb_file = False + + header_parts = label.split("|") + for hp in header_parts: + if "a3m" in hp: + entry["msa_path"] = hp + break + if "ccd" in header_parts: + entry["ccd_code"] = value + elif "path" in header_parts: + entry["path"] = value + if '.pdb' in value.lower() or '.cif' in value.lower(): + cif_or_pdb_file = True + elif "smiles" in header_parts: + entry["smiles"] = value + elif "protein" in header_parts or "rna" in header_parts or "dna" in header_parts: + entry["seq"] = value + else: + logging.warning(f"Header for entry `{label}` in `{sequencepath}` omits an entity designation: assuming polymeric") + entry["seq"] = value + + if len(header_parts) > 1 and len(header_parts[0]) == 1: + if cif_or_pdb_file: + logging.warning("Cannot reset chain_id for PDB or CIF in sequence file header -- chain letter is specified by structure file.") + else: + entry["chain_id"] = header_parts[0] + + components.append(entry) + + item = {"name":sequencepath.stem, "components":components} + + return _spoof_cif_from_dictionary(item, temp_dir) def build_file_paths_for_prediction( input: PathLike | list[PathLike], @@ -125,7 +187,7 @@ def build_file_paths_for_prediction( if Path(_path).is_dir(): paths_to_raw_input_files.extend( find_files_with_extension( - _path, DICTIONARY_LIKE_EXTENSIONS | CIF_LIKE_EXTENSIONS + _path, DICTIONARY_LIKE_EXTENSIONS | CIF_LIKE_EXTENSIONS | SEQUENCE_LIKE_EXTENSIONS ) ) else: @@ -156,6 +218,9 @@ def build_file_paths_for_prediction( elif _path.name.endswith(tuple(CIF_LIKE_EXTENSIONS)): # Directly use CIF-like files paths_to_cif_like_files.append(_path) + elif _path.name.endswith(tuple(SEQUENCE_LIKE_EXTENSIONS)): + # Spoof CIF files from sequence-like formats + paths_to_cif_like_files.append( _spoof_cif_from_sequences(_path, temp_dir) ) else: raise ValueError( f"Unsupported file extension: {_path.suffix} (path: {_path}; paths: {paths_to_raw_input_files})." diff --git a/src/modelhub/utils/io.py b/src/modelhub/utils/io.py index d0e219b..57c7304 100644 --- a/src/modelhub/utils/io.py +++ b/src/modelhub/utils/io.py @@ -15,6 +15,7 @@ DICTIONARY_LIKE_EXTENSIONS = {".json", ".yaml", ".yml", ".pkl"} CIF_LIKE_EXTENSIONS = {".cif", ".pdb", ".bcif", ".cif.gz", ".pdb.gz", ".bcif.gz"} +SEQUENCE_LIKE_EXTENSIONS = {".fas", ".fasta"} def build_stack_from_atom_array_and_batched_coords( @@ -205,3 +206,24 @@ def extract_example_id_from_path(file_path: PathLike, extensions: set | list) -> """Extract example_id from file path with specified extensions.""" extractor = create_example_id_extractor(extensions) return extractor(file_path) + +def parse_generalized_fasta(file_path: PathLike) -> list[ tuple[str, str] ]: + """A robust FASTA parser, where the sequence & label components can be arbitrary strings, not limited to a specific alphabet.""" + parsed: list[ tuple[str, str] ] = [] + current_header: str | None = None + current_body: list[str] = [] + with open(file_path, "r") as f: + for line in f: + line = line.strip() + if len(line) == 0: continue + if line[0] == ">": + if current_header is not None: + parsed.append( (current_header, ''.join(current_body) ) ) + current_header = line[1:] + current_body = [] + else: + current_body.append(line) + if current_header is not None: + parsed.append( (current_header, ''.join(current_body) ) ) + + return parsed From 21c6f967b453cdaeff62f6913e715f7695ac40d4 Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Wed, 1 Oct 2025 12:37:01 -0500 Subject: [PATCH 2/5] docs: add documentation and example for FASTA file loading --- docs/rf3/examples/2FLD_from_fasta.fasta | 14 +++++++ docs/rf3/examples/msas/2FLD_CD.a3m.gz | Bin 0 -> 21158 bytes src/modelhub/inference_engines/README.md | 50 +++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 docs/rf3/examples/2FLD_from_fasta.fasta create mode 100644 docs/rf3/examples/msas/2FLD_CD.a3m.gz diff --git a/docs/rf3/examples/2FLD_from_fasta.fasta b/docs/rf3/examples/2FLD_from_fasta.fasta new file mode 100644 index 0000000..ebe7707 --- /dev/null +++ b/docs/rf3/examples/2FLD_from_fasta.fasta @@ -0,0 +1,14 @@ +>2FLD_1|Chain A[auth C]|5'-D(*GP*CP*AP*GP*AP*AP*GP*GP*TP*CP*GP*TP*GP*AP*GP*AP*CP*CP*GP*TP*TP*CP*CP*G)-3'| +GCAGAAGGTCGTGAGACCGTTCCG +>B|dna +CGGAACGGTCTCACGACCTTCTGC +>2FLD_3|Chains C[auth A], D[auth B]|DNA ENDONUCLEASE I-MSOI|Monomastix sp. (141716)|docs/rf3/examples/msas/2FLD_CD.a3m.gz +TLQPTEAAYIAGFLDGDGSIYALLIPRPDYKDIKYQVSLAISFIQRKDKFPYLQDIYDQLGKRGNLRKDRGDGIADYRIIGSTHLSIILPDLVPYLRIKKKQANRILHIINLYPQAQKNPSKFLDLVKIVDDVQNLNKRADELKSTNYDRLLEEFLKAGKIESSP +>D|protein|docs/rf3/examples/msas/2FLD_CD.a3m.gz +TLQPTEAAYIAGFLDGDGSIYALLIPRPDYKDIKYQVSLAISFIQRKDKFPYLQDIYDQLGKRGNLRKDRGDGIADYRIIGSTHLSIILPDLVPYLRIKKKQANRILHIINLYPQAQKNPSKFLDLVKIVDDVQNLNKRADELKSTNYDRLLEEFLKAGKIESSP +>E|ccd +NA +>ccd +CA +>smiles +[Ca+2] diff --git a/docs/rf3/examples/msas/2FLD_CD.a3m.gz b/docs/rf3/examples/msas/2FLD_CD.a3m.gz new file mode 100644 index 0000000000000000000000000000000000000000..aaea1c0e5c3e6220ebdeb4d921cb4dc8666847ef GIT binary patch literal 21158 zcmV((K;XY0iwFpySlws<12RTTL@r@7Z2;U`Yi}gCmHn*wFD2BoA727+y2vV;Y@+xW zDz#r0LE>N+_AcVs@bAyLWL5P;t+C{IgLs8xJfo3%>h!tyoO3U!KU8J)DQG(d&e$y3 zAapap zdL8klPAa7=GfwJJN~7n=GONtw75+6Hm5hBKtYw`c?sl%^Fd`<59c|X0vbpf5WWH13=clYoL97sLNC03qW@O^xPb30BsY{G67wG z_f}gv&97m)w_(cLww!-+m<2wrVcLT~>*}d$2;c6wdupG%`oN2(y(SXBqIGwB;Nw2% zS2RZH2_K7uPkIpP#fk8_Ib?jC>0X@%e@kN+CPw(8o<^;&@adTF*?D42DKlz3zXv`Y zdn*$@HT8T_4EU^1_-tTeRlRc99f-$I z#W^U>mA_y-$+NaPV-$rRfYC8LwV18KJFj}7=NG)$MieRzvvfu8T;EI2fiVt(v&e_2 zmDY+d>V%PNra3Tv_tW3-38&;7CawWRGsPn+L-yLtUMVo$$EPKx_$OTP28q+t!K3LR)U|4<(2)Lp01v>0Csa7Fr2F6h5NUtP0+ZFEQ#D zVU^NDaT@TKs&EE00wQ=Bl%cW{^e(9h&lr_+s7x&Z@l;sUgyOcJtQX+$#PP3qiS9Q& z@VVmg#~qFJc+_9M07F#~#y!gNx!!jPW3wPj(n`PUx=(n8DFl5}ssgYg>e9^W2b3Ct z^mq{BF$Cc`9v3{CSrAC#adavGB-Lu<-f)m8b)&PQ(wA_=IS zCK46TB)ag+AjQGY?6?4I(ShxAz3Vm-y95w5+8*h5ATiirC7bn1B7_1Y;=RWM7ZU%l zl`8sZlGbpiMu8_63NVHu3_(+!5{Us1i8__WqY=TL1wNrOKI%n@%HUQ{I)LZNCapOs z&3YU0*F>UmUiwE6A4n?iSYM-7OOl+Xp;|4EEnqeTZjqdd=mf3%L}dC%IO7GUnGCvJ z6+(>;4;#cB19-H~z2E^!D!3d9^kZ~=QTPF}Wap^CvYPY=l0wF*0C>?Hm2rA>pr62= z=J6C>NOKrVI0Nb-cpjo-QTJZsWzozJt)oOU?#LrEPNHwaxx3&jcTaV9e5y%!(uYoj z$}6CXfX)f1g$f2?6Hxvbs3V~LEMEgP;-3M^-vXKlt;G8XZFcqVkI<|>ww`wj$hNGZ zK4n)WknN^Eq_AWpO~Uv{eUNulKO0*l@qwuOWFp?MMSQr1`1nP9%$4}S6cogVCliU; zL}H)BM;fI$EYbJq=p;V&R(#Yii;q2i{%nAxq(s*oN|r>)T zUmPSl5}(yUqL%@vRWNW6q2&u`RJ!O-BtCEuP;!QYFyuMOd~`;_00o((B2To8{)}|N z)p_#4lK3zo$Uy+zLa;@4koVw&L!PS@AMjl8)eEX_WpZDEzc3+_X~L&3;;N%3{)+Iz z37-y;dXb)$Y7}pXRaB!4wV8Uc54jfu58lKX1*m$|=t$za)~H)*)GG#(C0<6KOpN^) zSwM{j!Z$y(j|ZaD^Xk}@Nmc-}L+Zh=ly&Ol97`_}qs>;Qk9yHP)|@wb@omlda4)_O zXEu>)#hEl`#aVBfvs*lbU>b_RdV%RL((0@?S1{RIFy~crkjsB-$$22Hket%jD5v9! zvAyQ9+AtE0u#c)qp$^q0mps6d+3a#p?g(JoeQZ5>1rl#8R1kvcoWsoM$N;1f z!H&_5%ACQDv^-#ljHErYoWf{Orxf0l<9$?^ExAiW7SLK~tHV^|&$5@oSmX^8|Ni5T z-~ad*Ofy_*!Ojd5f9Pn(kp7_#O0SV_mA*F7bJMYM$F& zvue*$v-J`Mj=<+93=aO9P7`4zE?J4o^W?NxrA7E*f}()+Q4~JHSY3f;?FX_-4`YbR zM~<3J_)yUa->KJUlxX}DrvA}0ybluE3Y3X>m#W z$R)pq@3d!s{Bl_F?VGgP*#Td57tEMdgzwUfv}m2IiJ(gJQKn{;X)6FiX+x)sLb|X;uZ;)369x~4a`o~3{IT84301~- zsIKre+v=)I8_LBTxlj0R#D)qKQxghQ&lj=5Tc5;6Ti|nPbp>U)tgcS9^?=xT79k^f z1F6&MDr$KGdPNrCMvL^)(+2XyY3qTZO&jz6)m8PfWNHB-vL<0}p#l_XolH_l4o)y+>xIY0lz6`%3`!7@<} zl2VR;3+H1x$AM<$v~6wZ^rNcwE1g}p4IgQ7PO~zV&uf2Fu7{>FJ&0R6QL+MbT7!Np z=6inxohVsllH{n2RDn*AuO&$i`6JO8GM(iAn3cmqXOQ8)zWwRv@9_ygfqg#9hd{q{ z^U!Y~tNIEgSw&8*`@A*Zx1aFxQ|MV*Ug0 zMEKHqR&Nk_5al?lX2ICjUD~+b_hqw~V@ulV`HlAYyf&wVO&izSpd)k8QF;-2 zW#dYH2`;H*c_NY#kyPkr?MusYUn-hDg8A6m{Jfj3->p0%?_4mFW9-)ekbXx2pirFO zY7d=$avQS#5<{`{Bk{`x0DgBCrJbHRX78x?Gn`#1Vhw9%I0mV+L%S|tr$Q}oC>A*@zHa~2)+zz{uh z>);3X-Twrn0}?IN5)sPoP-0l7&LKub+_Gk!sp9USQH5SL~fDBT+aW%z9d%5o_6bm&%)w9nZ<-kzp6`55+W z71!FolAkdTlyeeUy~qz(mpqGXfS;)BCnm`cTe7U3qvi5me$qhcot%_~bOuI7VQ2b$ zukoijWbtuL%m|$*H;*KTP~ouUge%lACyw=azQprDQ<5? zWE&wKfo|oI@|{_4Mc@k}oFr{tbvh6M`!I`?3x4ReV-m3`OhByd+2<5HsM|XcQAN%{ zBzp|9I#yv z9xf@VD-6%fo>MlTPU~=qq8zzA@qkpQjO~%<>FUHODu&oJd~Cg3kF7hf{erQr>$_sD zFKaAH>a+prL$LXn`3B@RU5b28;8ThXImJ?e+r%W@;VIOVK@yWN>ZnF3$K`$toYIXn zL2HV}R&!wF1*O<@`6GoM?mn;~NTFwL>is6IbWU(6o4bx&wmj`VCG`;04aRwMklc}x zp|zq&9m!B?wDvv?IePr3`gX_E1l7| zn4mWz@x}224szUJR#!0J;~?zDLHw12eD(Ow-#W{yYKn58<1Bq%(m=o5$3p1YV!DBw(1OJ49MeD6 zmZh&{|HpPk2|c}_++94Ryl;~<;_#nW zr*|Nij{Ow!9xoJp()`NPyX%bJEhrJ|y63t*emvHh$*^B{zlHsBQzD|X*Abn4;7q;e z`_CMblVM`6{3^`$E0YMcXGkzrnnd1Auk%HF=3l2h@q^mabj>M>-GUL*)&*m=g`Otw zDCsl=xs+LVd)+nLhzj#Ki()1lF*4Z*0*rHrp|iGylVM;K?3U0&%Mc7AF%KeAu|VN< z&VvZq2!bdgjk5Urz%WNj=n?FOGLOCN2cnS!QqSPnBq$-X#t`q z2W6F)nR$^M=Nw&f5mwMz5#zoN>8wDG$SDR!==G55)VTqv+orRS7&x63%+MRf;=Bz!We!It$U05;MTXvHb=W^04@q(kbeT6D_mo%Lk9G+HX_Q28 z(KnYQ7Y0oV0(9KNCV`Bd81x*u6={y5d5W!i9VIm}Xq@ePkV)q}(&n@De3=(Xop#ft zKLwqN^t8c^7)X0HFVWl}nvd^*SRVK48(psu>nn)eCD82_=UC1q`_t>=0tVM3gH(CM zN@sRpz^NC+(HDAhNnt1GCG4C}lv)KwtRd343!1hFh!=$UEJq?vpIoFP zpFt-YAI~1IG1lk3G`p2X&~|)2)H$@4Hxm}+fZZq5%=AML`o0qxjaWVfy2RrW=z`*b zi^-hNhJPOfA}c+D%LkJ)?RN$>qVg3fX3+9-D_iGQ zxL2FL`dsa+1!R>j@U$H-Xce-7Y;)C8t)i2Ha(AKCE@}foT|%o|$4HB$L!s55Wnpx( zM|nL)T5b@)g*J3diMQqqP=K|X9tRc!*6e|V9~|@g+h_`$;|7R7p<0- z|L#Rgq=CZut2lnjaa<23B7JHMhCC}O9WX<)>1L#Nh^m-D1D$5$6U)y=Y+*c_K0E;X z`f$?iM?l;!(WZXxszZW!QJZ?39x>m0i>!PrHkN)8WeUsADo>9(iH*m&CC)GnllM8J zb}sL8d}>4|!HP>40LT|?EobzJe?nW!_?jYVQxx6Ha zat}K#$2(Ce;PbRD4L*E`69J!;0vvoW)|d++PSZNXnotGA8q%6xtI!V(Cn$KRc<9Zb zF12AZ2WhcG#sI;H(>kE^PfhFA)f!AVL)@P3Aim_%IPPMl-H4`u*G$e>BnAiSB6STa zQP%|>e^-Oc3Wyr}{!^;c((F>#2$$%cqV zIrAPyCjZ0dL&kX8-eP<<`K!C+?_=^0WpYT(g{2j~wVeDfaLAba8{~!JHu*O>`FB(D z2L-ywBv5`L`KRt``3f5)h==t8;$c>`oS8QA-avG$NC1jN02If938T*A^9uq=!_3HU z7$1df1_l`XpKvVD)Jm*cdSmdyBZO)8p+J2^BmjL8Z`sDDgK3PS5a?3tBl3Ug%m&#I~Tx5N6FM_W9oj#e$ zouXAzmEwmlzyJRAcMDujWPP%0@n)aWCsS_jUu^mLl#p)Yw(F5=lJ6%;F5Ns#k}Rys zoFr8&9#IhC@6xA;QGu=xZBCLQ6^{t4LPZ`>sG%!e;sHvgnnM(vLn`Scibqx=(U^-Q zX+GFRW+O*@-er4kb_(@Mo!56s=o21c+kjU8-Srg#72((e-U7R=*_Ox*{!oB zkH=33yxEi}k7?^8cUO;qeBRG$v$?hkD=yebJ20xJH;aT8-)v!P34lll@y&{c@&Ip; z@5eWbdKrERKcu_?7?BIvM6Od4`QKlD`t{eJk$-hZnN>{CR&j$FDaqQ#{91X&UkUzE ziY|EWP)O7L@{{(`xd&hi3#8|l?4TzU%iD>e&-9h{jQ%O*P1kBtd{{)7kVdY4%q7-P zOfI;T0dYX}dCC}%`vpc4fz6mNBl+PXh%;|wO2wp8R*@1+7<+?iU&SSDna9e363KC_ zOqmigAWWxhk|_}*swq*o_&4y$8WFZ#xel3!13_mxK+|iajg8I-@xko;&MkF;|?gs=n z*C0w?L+W=71elmBU6DK5rUsYSAsCrdmoRR5*kZ!S&iJ%;h7x3(hC0$;ha~(gMI|fO)XJ50Qy~OZ^) zO+6YS$Y5kyRj#bUmwy4HuJn(;0NVA+9MVV^B=;qqpAJK~BBn|(4fWI4AAk6vcCP4< zA7xp_vk7?UvH?k`>>su=R{yUbG|LwX6zg~KzP#*}XXPBl+3OSU3cQ>n-t`SBh_3lJ za1h7y8h>6nH@P`b^n&9jNr8D2H3GU_bd(F_v=Tza?-99nY7R7s6i^acpcsL3gZJR0 zKq0C~3MR3~hbreywfV>Au*(onONevRJkMU^*u%d!_9&A6YB~03bpa-X^rd=(qI#wB z*uy0K6~`VjxafZ3qJ}i~VC5V@@I2sx)q_kFzH%b!Tv5Lw^7PH5zxoIvfED#I91_HB zTqGPJ9w!oe&TAm&M|?nye4xJJ15M%snuG^@0A--4A|D9k10wSQV_hA{%cYHnS~4Fn zi4T1J^3#_$e85LO018k&#bMXw`EogBTW-@CjD;=p%ysi=yV-u)?zl`)%qMB`t&f-( z2V{7#xj-?E52_1zY|c2-YlP4%tOf$_T&1sHfBWsX-~agf^*{gq`4^Oxy3<`f)XgRM z%2ql~3c&ePS`MjIU$Ao^*^mTnbhYd4;1oQ1#ZM4lkGK1;U#%`cB|d~0ulM^C{4%}X zElcV3d41=gyNR{C(m6k?2a9}zhYnibI%sZf(LvijAmc>`wWIcf4jRDPJzHC3?U&HU zCPUs4ZP}m{D+CyP3_OidO3uC#Yo8pn{)mGfUpGZ}-DHS}u`|R)UO6OH-EklDN)vC? zR1=+@^UByq4{78R)%Mizuqnd4(pFxvztN>qnu!zqog&tT7D8S@5<(F2G?+_yrH509 zj@w?oNoxC2K37V!TX1{@H`rw@*8aQg_C9y!Zox}Ro#EM0fttGo)2Ki#M+K}(mfZq3 zj|u>hi%h&x!8HOZ|ON{XO0obC8(c+b*sLL9azr} ztb8BhrrN4zlg+q29TpIGdDN4)wNCkf0$3ihNa<-t8M6ptr>Z)S!k_;E=F1R3{E#mZ z46RB^h&U2iRpe#}E4F7d9V`H5Ms5Y!Ap?LJgNu|Pb(xcTN9td`eEm@yR||X;puI@7 z$de+}S->MLJ<`4Jk^PNFy3&fLFmCJhXA#Dzv)(Q+-U?gR z=}ZI{weAJk%LM||m2D`75X}fTwNe6NxR|Ex@RTdnS~aUaobKa>6Ay^M@rH(hwd<4U z9KYteB{)1thbRq5qSn;yJZZ*FczF%qZ|C&*$c%@43FB~Y#o~<>Mdau%vK%eE|oTghURzUP}+57EPlVD$n(9Q6=Iyo8a&0-B9jQ$8V8-kmu+_>>yV{^|17|~h z6t-lHqQTe2#-w*}e+L#{A`62LdJ(hdGx{WWBdorC2HCL)_nhu+L$dcCTy8swch)gO z#uKNDnmGy09O^{#wq`EU4#<+r#mwPcFR2ixDpm2y2LJ=38@U`Z0Su^HWIhhm%R)uE zszh!JaO*y{1wcIc${XGe5))c*N5Ac?J`vzoDB5b zu`6`HibSVSAOQs^c~#fFY^p0>sEzHqK@EO2UL4d)Rwe5wgcsMh+$^zWe5f%)_O=2tj`(WZnwa zN5N+Sn)Bm-{_`&s%c2YBVoc*N0Vu4EtQ>`SiSm3^;gjP^=9$GNYbIp1bxXe8)1?~nz;k}~xf-$;*gDop#iA=_-iqX6^3vR2`PaTtd9r)#lnOf_ME_D`dk8*{)C_8)^npgtIA#OPTlcviYVvBh||2v6aRWv6?s!6->4#4B&$M3dG1&ww`>Sasz?;c z*62!5^6}SHk$_kWdCm}DHVcUFxTEtz-`q}k+fsHpcetcXK{zbg>yC`PwKpPVx+Cu; z0kB?PZd-9m&`2JGMcgG_?O|%XVL~+1{@^!PKl=0MJ^U=fW3P3G;_)!T%N&QDoR{=Y z5gHgf?kEmC2!&x7^nj9QS^=$#E1+O(4MAK(jk(a)H0_p*LHv(^|_v5z+#Zf^dkMFH-6__^{gCh4w9*<#!i zNZA5$)R=N5dzB>)!X}oH~9#u24aC#hSZTypQ%6LuHuBX_LGl4fvw{Jq+uD8Nd*SgLJAa1QV zpZVmGuX9#++!l-~Np}}2x{x*IUe&#*(?$!Ae3j9P1>({aP8%MX;V#{*?tq6{ldkXT zy5maaPe>U(Wq9#qiH1X#&+wY_Zu5OdORcBM|1Cy3Ro$v}26|)m4c9 zD^ye3hh#^GvR|Vjyz(G_Je!DWBvw%~XJrrKTCpHAnzpS;08DHS|C9>t+N_wNYkCSv18Zwxx_6rs-W z^hQNB#HQmKIee*4n(T1(oIN&&*nbha6|~A$+caJfOE+Z z9U(df9IMsifWrklEfjBIj}Rq7G*1K-1^ykFsRXBKj5Z>wlkvu%gm~I*{@&|-hPXN1 zL7c9aQNbG_qVO$lODSEjh17Q@cHyqpQ%9f18`jtwOzf%#*EfL1?Q1b~VCIeE%$P@j z%0M}*uJHY%$S@k98@v;GC#B8;s9{8h%wDmfiqG>&A;5XJ9X7AK3^D3-Mu>|}W7f{G zAZ`#$r~~l#&6~okKrD0frWuX*CjS_2Y=p=TKdP5L%^Gzt%8LK8!F!nR27R#YU!E?u z+GdFHY~l#L`9xJU62i&$lXkb!i$d()C3wsJ<0(9_i@Izn6!M5vH6m&MK=->QKe2 zL54rP$vse6_3lk_WMe}ayso%q7GG;~JN=J0O03RJxRe}Gf0($M3>LrANGqB#mA#MH z*bwDXNcI7U+j=cRIA(~Odx#GN4v!qN9S2!$@H|Cb(#9Ae!*3 zWywC{*c9i@z?f?Qn3plh#s^*)Oag~#)Ch0*0L1D%*mj>G#@kOL#Q6tByrjj*lsVw= zZ;~Um){eTq6*5QD7GrX(Yk~Y9lOr;Rh{-X{Z`7C^QH@lbZE#<{j4Y1F+Ib4<(wP7H zu?f0Do~aOZE|SWXfybBmGqDDXKmHxI`Zln>y3;m++#U}Lkh5Bj%~N7 znp`q9;x1`4&be`jhrDtEk)um;vxS#0WwERtWz(*BNRS;_)FqyLmG|~!601z_^+Sie z5OuD$-i?uUzkoQCId1iyj-R%hL|DUnY_8HQAn(~+dazbKCafIdSb=KBC2kFaDC#qx8oGNm1g*ru)f{mS|Lxx)qoKyKcrz&I_ zkC#5~#_=Z#>@&R6egSV@z}n@Xo-m77Z<3>Frf%c3ESL^H_)y0it-u#+4z};cy`+K) z4!l3x2A+d)XIis3 zr1yM(0T-Z}l8OTT;Vb%?X(ddis<7Ct>0tq2ZMu6H==squi&Z>gdZjyZM+tudqf*j( z+;c)-ZFyzS37>G&^m|Tx;C`g;1QjG`UNvT+uP7y9{R(a{InpB1vl(Qb1EbU6UtR6& z0u!R#GlPc;7IVK+5~f${B>P?WCTJs{9@24JbNMp1I74kn7$29ZyC zJG#zF@*oS{ptm^LG!T4k3e<;cAvK9j1L!BWK`H^N;kpFY3RSm-i4bG}HA3sy8{iRE z_A^WXh0VA7Q*Yvde(`3v-S?-sTK8?N7_ccne|&Uo(kj? z5BL-to4;i`^T4Pz}2?Upx64R8jN}OU#5u@VWA82T%;MPZ}R_<&`Pz5Zc zFX`=i=^3JZ4suHCYz0kD@@Qv3?aG^W$yggrePCZhMH>o5a( zwTa|a-&Vywn#j$p-o%k-k6&$A5h8q#VS()o{a7>lF{{0+uvwVf1DL~MF+`mP{zGR1 zqAV|G=tomh0t8LS(nKT@1xF9XT{000(UujW=>h}&n7b&Y*W?bQsPBDOibJl zyt-Zd)wTT?1G)o?PJEB<2ptd#@R;?o;BMVBTp8U_jOY&MJ2^*pC`NZ!bMoaUg*{ig z`TRGfUQ=|37=_!L_w*^$lxV#`r#o(rYJcd>+%D&rbu|#NR)ct%RH?dAjvxx+vQ`(< ztl>Qxr_ZNZC`W)=9j94&N_m_O?=fv(9VX9DvObm$0c1P02q5WETAg^lo=%W zfib)V^w@2eLSq)fRxY>m>pEwD?pXBFDmkw`3^x^PT?g&tmK_hbnCX&&copOX&7vvt z#up}%vg;1StIR=7)_cT6rt;QdlfsEtySfhfZGh`Q4x)o^74*Mb-Yb2_>eJY?`@QI2 zAJ<5d4fOGNLv^%v zU=~u&R?wpprg=2rv;b{YxvG{_d_=|e4w)VvBoi>Lg$A`u%}uo(pmnkBVz6=jW|<6$ zPO={oojwb|V6r_QOsa7(*)$#m-05KQVKTKd=UD)~N6C%2p)pAzQh=@N1ZK`U$I5%` zy`i{ypN~C{9lOmR%Ix}^dYGFy5%=5;@9}i`9%Fsw+9uEFJ)-wYSwJfK*=yOP3});+ zmOjniW6OOZ2$zU3K?j~ih>|YD2~bPQo;qPlmkQ~z5iWiF{wT}ZqDLv}=FaA;Ip%-v zddJF5$%&QMGgf)k%9gCGeQu(a@gQ?7wV=yNMaI<#nKg03?521RdY!f82nj+Giho>9 z)BMsX1XHd0kA3f0r+0Nb&L;Q4v+g%TiFKu8CvEK5sj*|17Q)HivrdPTcIen)ijJNL zMGG`uxr>fHh7^ZUW$A*hEB&41eA}^Or(V-r-mH%4>6r93g3n6nY427iC`ir(xZiOW zJkSg%3usp5RK$_5n7}syod{ZfkG|)zhx!lih=1)w7?o3lI#}r^@;`PUP&E+(luC|z8 zWu@y)gziknCJxdFi?*hOXx2T~=$>2do};$1R9gwvNzuBpbU@I%29p+qc=bz&0m7@GiLohv! zD{9o%@m&-#Et1k3HopzuivKqKoMS2b0oYqCWu zvCIVGUsO`nJeZ8}|4A2=F0)9$2_nO{6M$^@uGZvKH?h;PeY4vpzt-t+S34c^%1SMq zJ;w>==ybT@HB#It1mLU}oeuR&oem`A@60lqc-+lRTn;(Ta(7QcE*2Ulp5-`bu-aRl z4it|z&ay&i>=>Dp{ao$oP7{-AC3m_vakZJ4xC-0M;wMLBcR7;${r9(rXd-W@+cPq0+_e*$IlBm<ZKmPmIuo8M}@U#h|yCVueH#`iUlV9X-=L!}=s$$`)Nmk@8vy!ED`; zI(KgmdG)7zx9Lq>PfUD8*zohp@!BLng-O7Tp{wCF*rCO3xhR+!Vj* zSHgzd6&o~GH_@ezCa#BMJc=fVe$_2D(yU(<*+?lCfQEh*m5aK#!A2VURdgRH*%{bK zn%PJ{zESO1Au^Kf?HL>C&IQ?Sj5ya%4fo#v|HQKp3UjdF2>V90=*>x-sW) z&kXUunR^@_>3EwLK$Fuc*DfHFd3x@f&SZ^HNPW<+N!h7N`iJ7bco{!`?XD(mz8#+9 za{e5X<1p{Q@NsvTH&^Fr9R1)65%8YX+(!2h_$pT`M-eWo7Sn=5d2>o*;1HnYVwOXJ zqc!4X^RC&56@x1bdkTszRrfbj{8BFE!p;fmiQotHQB!O^#Va$x7Ib)!_{+y zx=bG&i?cC6i`Obe*dj`eW*f)0<7R9i zD-3;sFLUFA%;ub=QBs%^vBCxrJ=OTrjcsKQp<`w_9anAw;4?tzSWq~Dtkt95$EI-4s@s+f%ptthm9R-v8cPG)C z$e`BaelT$|lVi0qBmh-ACzkt&p-)}kJr+FbU`!GnI`6T^m}CKRd_zkmk+4Kf}ZP z4Eq*N5>=sx{bfCzRont4v0hi@Tm&V--DEWti9X2Fvm_a`Jku7uvD6z+Vbw7*Sywgh zHfz24CcQ5lVg?qNfg?bP!7D*Yq58KEt93lZ?b#_t1|6cwejTK*Rb3fd72M%aEOwbu zR!UElesa~gMV?7-4Lu#G->H;~ zrJGIOgNHhPTuGAwha|NFvP!`LRrJ{Ca?08;V=(Oa0(kjE?}OiEPGvHcojv*bJ`=I& zTz35_GQYex@fLwcoIyU-h zi4CQ!4gJ@=&MX;6Pg1a|H{IoIzGGO@3&yhO+a^A^KPeFiau#S=E!W!{=^Tp%`ue&~ zkY$CrG^TS*i6CDhso;*0g8zF9xEbWu@T07Z7umFw%rcBmf}U(}PE){l?h zzd{nx^V&ULEI}EaK(@o1X?|*Dwv)czM=L2#?A{y>tNuAgIjc=5CgXGT*F6qDN6)=S zF>kLHu+sO$53*8BoT{R?EP1}_f|WuA9K94RtaL(Ww*ti79>=EDra#F2Z^t*A<$5r2 zoad|u6EC=}<3TpP$MJ)SjpeSD#A3no_NpJdR-ooYs(=Z^PsjLG_hRmLmx9bgCx80dp}KA&>OkMqlga1p1C|ykb6GFi~i3%#jzx2w9@t z`pU7@m2GDFwEJy;9JqArBV-@eOx_tZEtBc=SeVEFb_Kcwn zp~sX>^Z^LaU@C_#b!?(T8mOzXrjTh+JHKAI10-x$t)q#M=Ne4Bby06b|63O|9WLq= z2r#>-R2E{m@gKUV z$Z<5WfDTe{s_j3<$*m8&x2t3oO=QGM!Ys4(RH{65kkJM(txIJ&(+M)8h)!6$*C>jY zlZ9E0MyI}BP=E&^Qn?ltvKL(z zB)jfJA7{rh>*7UCcKb6EH-n_*4#*?c?6NWf9TXRkEgAGbk6*qkPdT8kWWVgC|2WBCMRfU%;cPar#KdvFbBtpF_*iG#OB4G+B>C7f=H_PXVA+Qf_!T=i zAUAffI*HGRuWlP(r$QJZi=p_8>NEp|lwuch^U~ijpPV&!*1O~W&^w>q#Jq9paflrd zB(2tBrApWkCDGfV+8x-=+%Vi)7p=m}3_CwlT|9Jv4CuHJ(~$wezp`X-o1<-I(F!h2 z7PlzVr{}Tmj_Q%z*c7TdlKB>%MM~wX3ds7{o)94qv9j8|A&=T$(6~$plE~}02pc!F zp0~;k+8m3fb|=clg78zWirD6G4+T^XG~Gbp0o#66BpqJ|ndA&`0E1}&HYO=m(mgP| zDze5-54z#-JR(i8we8d)M84|y!lJL!@~{!vCYYHVnccBL#+q<5XK0o$(e2m;y}-x| z2)>6ANLm%n#_Lm5t~${<3at8Rp;`&lql6X$D0t16FJFIr#UdV^o;ufIBVTS)p)A;N zijI=b@8f@ix(bZ^J@4)gV}sU(*dCAZKw12;w{#anBzv6IKdL}DH(fN>Z~-C&OP*jA zJ_bvy*s;?J@Medr-|fL%hT_xXd+t3 zgNc_}-J#c=aTlkEe85>3cr4;_Xg(yaZpa4)-5E;{Ft9JR}rI_gr;!4;l zfO!D|3`)+0rF6k{X<;4qoG@d+_B;9RmonLAx>(2KO^~!x3c$7>1`f|5a(McI!N6i3 zVIOt8!RY||fGV`jB?X2`st(J)Y!IPk1#=31hO#FJB4IrMow_ge0Q-P~xe2fjN|V6} zrF74E=J3d?b}jZ!mHh7_8CX1BJuL0L#8 zNwgB;&x4f+n}fGq_KnE}R4^{{JMyR3YiJp3=yFn;xXC+34Xs%MOUF)#u3J}<&Ejcf4du(b#)v+1IihVSc8OY`!`{=V89_mxe2*hUkN>)*C-HcT2w z&Y)OhyFgxllDr?2)xJQh3ntp8yKEOUm`(a8q5WDz%nv#lw*-f43|xBDi)p3ifYiCvncRBI*!)+I?G z+P<7j)R)+%xmqSSoNTMJ!nJjj`TxJ>u5CANB!}*6@2@P{(Eai#x2hz$BtcRsQZ#P^ zOq>av4YK3GeE;&WN`134aXRf}8VHa$K;mOok%uhuusbqmURiUhaV#F*KYaLzj{61j zkEO^pnUfOc?E-UBKu0+T5++sIU;9-(Si)w4Je0dXwjOs%IJlCI6-`K@jhA?vAX%Mk zH$!ibs8)tn31VrObm)dfjoV_LrhaUsCi+6OHHVxRoj;pGPO}-#vW#z#B1?yz;}kllFInkAZrLdkWNB_nP<{evyvO1eG$jq--ZG zDL`O$Sy#nOmhLp2)vV_}B^IBT+=1Ti3PsVf&{`DLvS8mQ(QS(J^bd~Y!a6A*(9rYo z@!iAwfpY6!$pxiJys$6bq|v!7Shx2wxLy-2{YZA@UFZpRp{JO*XSnvzdaEz;4`7W7 zIQK9SSc-d~hkjCIgn;Ry1RVcLd5LC1#RKLPD7eUV2n3 zC4vd>dM}H7XfjrI0bQy468p_y%~t^!zfDC;PS9<0K@6~ilt}8vf}9U#GV?F z8?)%do*JaNMF;@0vWFiNn*sM~2#@*=&Ymq6t5bF#v7 zj^Lf0*0uSy#c7?aCE;d8({_EQ8?|lJt|teojhn3~KG091%*c|9IjuzLb49sgwHPle z>8K84#nybK)9N+x?GCk9!(lO}?S+XwtrlQt#az?iD;{T?{f)<|4<1Le$5~E2PW4(I z#}nz?XOV?Sd?2_9P;s^w(%TPBa6_T<#S6(~V4*Z4pH)ryvurDa5lsRany-b~t+1|C znx=&?PAwe)?g@q58C^*A$NlegA%AT#g&}z&llWu(VL?X|opfDD9zVd1{MG@HbeHTx z3Wt$zSx<7~1!v~OOAsEv9PiYeqlG$BwA4nOztS^`%{zuV@XN-;S(SpoFhOn5%M7S9 zpweHrZQX+XL8tx|POKs7B8)?CM=sX0F4nto*u}Cwznv}g&F>IJC^?#KO0qKbu^uV$ z60In0sed?>Lvl{M7D^XBKK}W>th6EOogPZcco!{47T?XV0=rgpit$Q2y?fxiE1My1 zb`7jJx>M8#X=;I{@d?n?oB=AHa^iQUzf2O&P*U>hLo>T97-e5|Yr2+T7hsEf)l z0&N+AAeXxkBgo~9GAyVpGlF`@jG&n4&V5oN-%O#(M`-mZ_1ACrAC<(LE~G^CMV-yh z6bu|U#>+wESoxNea*~*_Cg2Oq3?M>8%+HVF18;BOiL{|VnBGyE>*^EgA`S6&O1%0~ z6Ig6X)Kb`hqUtMo)R)5KmZo%QG?6*HM+fk%HwpAx90-Kk;+D4wDSoE^*66HOPqi_t z&MpVxo^%Ql(8k2(I&7@PuB8iJcdmx7iRa*##Nfl;4FAUGfCppd#syq~=6 zQE~tUi)@v6&s1iFBCwio5EzH0!@5OLVZRpy+cIYsih#uX@m%@!@v)>y6B&FEcs<|? zR+ocdiPX|F5J#xA-`KrqiO}E{<$@9Yja*c;DH z86z=)XzgY~0VMtc5vVwAW~GQ`A33HYB7o3A6jd3{qalWILIgA=6dXC^Rp|K>PZQ3W z_Tf~}u*DCw5rSiQ2#zr}F!J*(g~pO@6a&oG&^RG9Qo2+QrvMSaS5`+%XJCNC_6(pT zrt_Mi5xG?t5dY>(G6KyZ9?k&wCbYSgAD>kNY?*FH(O-A!&qpl^RW&)?#i5%hIAY^D zNDpOSHvTK#3O15$hoWMA!O`?uXZ3C@x%lxKR&?Jc)7@QSL8f@VOVXG@ItY~PPebc( z9Nt3&>u^QDaJjlb{}P2nigpmm+ z@vI(jxQghptMY}?-+o4M`z-yC)fdu_v8_hU=`n^vF_Ly|grkKd}f{8{gzL2<$sP0gBed2LSsb#G* zJOyCcri2D6otRz-I`^XzUp^Plz4_!p;K@xGaxzK&6-{>xlVhf}CCaaBF|x}za+nrT zM2@PZZ)R-F^0PzT6@~M>ZYIp^$}4%47kth;dqhxH;5UKYCy|mJ6YEu7*RFR7A|k5E z#*2A9Z>L4`n8_?FCZqWE+{;tZ>q%gv-%wX|gSt+$lSMJeoEXx7+oDk8B8PdXHC-0U zjWGz5*Hc%5)AAkyygd_Tybes?|K$xCG4ySCB%100CT4x2b)94sZ}6l=u|tf7UgVRs zs>KVeJWs0xPHJ!5>R{5USN}ZIpjASW#zno+oir*&bi1(QoAk|wz;`cjo4(S`bR~d{x zWa>SrVF2T1T}2Q(8;?#cF?Cx_?%Ekjmizi#fpI$oj|@U3bo(@i$MRbx9bdUL)~w8 zXyBR3MNU(JRcL0Oxtw0DV6GEc-CBr9eGd>roKKS#CmY}K=$GxpYP)6T>!|sOhg=JU zIkZMoB+N*El62rWE9UN&og~A``}@ymKEt}R>tJ~~OtU6AQGxIIC4gk9L+MB+;}|J{nkf4gGQUK9Wz1x7vHi{S51Gv318{FnYCx| z`B>p{6JrJz2a`ySYrE0WK;U}*3r$!*RTw%^rZF3ArR}DJhH|5d`k+iR=^aa&^p~}J zJW+2Cn>TOWO<-P@vC=)@2Um3NM5!H?h!UnZ6#2*AOu%5-IVNCGS5@Ued7q7WeVt9% zlmI0TEN#W|oPOjb%c{cb<3-=~<@nyXMM2-b!AD#iQ$8u<3-LvM?+C%l)Ad286Emw| zE=Tn@VsLByt*UcVf2%GDXIBU4-Ud@d-X)0agl}}?*gNQK%#L%&wMIPgLh01ltwT)H zI>c1#VJN3DKbUYrLnN7S_`Vj6>mW@0xT-~L5t6%1Zrs3_Fxpr}8qa~7*)PgA@{s1i zF{{wXLD^7=33bT}q-s&oVf_2G@;?xubZXg#2A~%n1Kgicp14nA3WzpiY};anOdKcv zbWF3M8q`j;Ce2Vpy_Kvg>|?$Bb>f;B(@@PShP{w3vI15b5Jqt@L!?TTxIefhEU~L^`IrXQkA-}pvu$xg*@ME>K zd{D*I$^nbYw&*+?&}=*?^taX}ku*w}2r{LS!&p>vUT-UG8bX~*4iq#ON{Ml~DZ~7} ztNa9LI_q0vH>Um3r_A5D9bCy(cQ@enGH-3Y&b8SR0b!A%Ag_b^93}6i#*G{@;&XIO zC$eQBhQ7vG{BHYP3Q>h`%QQ9c;Bk)OS&swVL62gG@3>#`IMdYsb3IPbC2hA0SB&Wq z#$+|22Z;#W%Gf<>4D1Wpn2fkbnCS3mF+zvs@y`#RdWmNYbj-Do0qQWH^7HJtF5sbL z*c??1o!>;UEB=GYh;-io>7fnc!dxm_1wPF3dOFW1?VY`8?=19wN}#&3q)^cNA|35* zMAymzqPLO&kGzkE0fLxE?7BMFziB?NnjafwTR{?^l7CSQRq^{gOyfAvfh83E7u0cI z`Ezc_D4xQOXn?zSu3i{3N_Vr)+%wfl=~5!)=qM)SnF%1mP@So-Tc|{$a|X9jmkRpG z{Z9e7eXeTkk%ewSSsi9{L{4wQJzPlG@p8e9j*MdgqN> zCF?0YwxPQ(kdtTSB#WPNbXVn%?aJxMh-gr3>oFgAj%FJ5!!Y%eBo~Aj@{>^h>;LYB JK2 +
+Folding from a FASTA file + +For convienience, RF3 supports input from FASTA files. +Not all input features are supported, and for more complex use cases (e.g. covalent connectivity), the use of JSON or CIF input formats is recommended. + +The input format is (roughly) compatible with the Boltz FASTA format: +``` +>CHAIN_ID|ENTITY_TYPE|MSA_PATH +SEQUENCE +``` +Where ENTITY_TYPE is one of `protein`, `dna`, `rna`, `ccd`, `smiles` or `path`. + +- Each FASTA represents a single combined prediction. +- The name of the output files take their name from the name of the fasta file. +- All field are optional. If ENTITY_TYPE is not present, it defaults to polymeric (protein/dna/rna). +- Each entry type is handled in the same way a their corresponding entry in the JSON-style input. Including support for inline modified residues with the `(PBF)`-style CCD code designation syntax. +- If present, CHAIN_ID must be a single character. +- If present, MSA_PATH must include ".a3m" in its name. + + +📝 **Example FASTA configuration** (full example found at `docs/rf3/examples/2FLD_from_fasta.fasta`): + +``` +>2FLD_1|Chain A[auth C]|5'-D(*GP*CP*AP*GP*AP*AP*GP*GP*TP*CP*GP*TP*GP*AP*GP*AP*CP*CP*GP*TP*TP*CP*CP*G)-3'| +GCAGAAGGTCGTGAGACCGTTCCG +>B|dna +CGGAACGGTCTCACGACCTTCTGC +>2FLD_3|Chains C[auth A], D[auth B]|DNA ENDONUCLEASE I-MSOI|Monomastix sp. (141716)|docs/rf3/examples/msas/2FLD_CD.a3m.gz +TLQPTEAAYIAGFLDGDGSIYALLIPRPDYKDIKYQVSLAISFIQRKDKFPYLQDIYDQLGKRGNLRKDRGDGIADYRIIGSTHLSIILPDLVPYLRIKKKQANRILHIINLYPQAQKNPSKFLDLVKIVDDVQNLNKRADELKSTNYDRLLEEFLKAGKIESSP +>D|protein|docs/rf3/examples/msas/2FLD_CD.a3m.gz +TLQPTEAAYIAGFLDGDGSIYALLIPRPDYKDIKYQVSLAISFIQRKDKFPYLQDIYDQLGKRGNLRKDRGDGIADYRIIGSTHLSIILPDLVPYLRIKKKQANRILHIINLYPQAQKNPSKFLDLVKIVDDVQNLNKRADELKSTNYDRLLEEFLKAGKIESSP +>E|ccd +NA +>ccd +CA +>smiles +[Ca+2] +``` + +Note that chain A has the identical header as what comes from the RCSB-provided FASTA file, and chain C has the RCSB-provided header plus the appended MSA file path. The intent is that purely polymeric predictions should work with minimal pre-processing of header lines. + +🚀 **Run the example:** + +```bash +rf3 fold inputs='docs/rf3/examples/2FLD_from_fasta.fasta' +``` + +
+ #### Templating a Polymer (Protein / DNA / RNA) From 4f8599e76ff2cc2219c12fcacd9d27bb76ac97e8 Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Wed, 1 Oct 2025 12:57:56 -0500 Subject: [PATCH 3/5] tests: Add tests for FASTA loading --- tests/data/5vht_from_fasta.fasta | 8 ++++++++ tests/test_inference_pipelines.py | 2 ++ 2 files changed, 10 insertions(+) create mode 100644 tests/data/5vht_from_fasta.fasta diff --git a/tests/data/5vht_from_fasta.fasta b/tests/data/5vht_from_fasta.fasta new file mode 100644 index 0000000..75bbd91 --- /dev/null +++ b/tests/data/5vht_from_fasta.fasta @@ -0,0 +1,8 @@ +>Arbitrary header|with|various|multiple|bars +MTSENPLLALREKISALDEKLLALFAERRELAVEVGKAKLLSHRPVRDIDRERDLLERLITLGKAHHLDAH(PBF)ITRTFQLGIEYSVLTQQALLEHHHHHH +>B|protein|tests/data/msas/5vht_A.a3m +MTSENPLLALREKISALDEKLLALFAERRELAVEVGKAKLLSHRPVRDIDRERDLLERLITLGKAHHLDAH(PBF)ITRTFQLGIEYSVLTQQALLEHHHHHH +>ccd +MG +>D|smiles +OCCO diff --git a/tests/test_inference_pipelines.py b/tests/test_inference_pipelines.py index e79130b..ba5ab80 100644 --- a/tests/test_inference_pipelines.py +++ b/tests/test_inference_pipelines.py @@ -21,6 +21,7 @@ [ "data/nested_examples", "data/multiple_examples_from_json.json", + "data/5vht_from_fasta.fasta", ], ) def test_build_file_paths_for_prediction(file_path: PathLike, tmp_path: Path): @@ -33,6 +34,7 @@ def test_build_file_paths_for_prediction(file_path: PathLike, tmp_path: Path): # Iterate over the returned paths and parse them, ensuring the the outputs are reasonable for path in paths: output = parse(path) + print("PATH:", path) assert output is not None assert len(output["assemblies"]["1"][0]) > 0 From 8fa2c2ea43236dba8b225bc66a5e3c61cae9f555 Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Wed, 1 Oct 2025 13:05:55 -0500 Subject: [PATCH 4/5] (fix): remove spuriously added print statement. --- tests/test_inference_pipelines.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_inference_pipelines.py b/tests/test_inference_pipelines.py index ba5ab80..9cb342d 100644 --- a/tests/test_inference_pipelines.py +++ b/tests/test_inference_pipelines.py @@ -34,7 +34,6 @@ def test_build_file_paths_for_prediction(file_path: PathLike, tmp_path: Path): # Iterate over the returned paths and parse them, ensuring the the outputs are reasonable for path in paths: output = parse(path) - print("PATH:", path) assert output is not None assert len(output["assemblies"]["1"][0]) > 0 From 2a708f5cb23785f59e49f8f92bd6299097ec6985 Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Wed, 1 Oct 2025 13:08:19 -0500 Subject: [PATCH 5/5] (fix): remove spurious null character from end of a3m file --- docs/rf3/examples/msas/2FLD_CD.a3m.gz | Bin 21158 -> 21159 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/docs/rf3/examples/msas/2FLD_CD.a3m.gz b/docs/rf3/examples/msas/2FLD_CD.a3m.gz index aaea1c0e5c3e6220ebdeb4d921cb4dc8666847ef..f9d582368581eb7ff9d036d55628ca41cbcb70bb 100644 GIT binary patch delta 3932 zcmV-i52Ns=r2(g<0T3UH2ng|Q-Dm&j$JXOc2?tlwv7!k{wDA&e z6C|sX?Ph=I4HDJL&?-SJ4U-Ptu&8lc%+u75jnqV6h_>dC^P=-dG*pUWfV#921G(RN zTSC(g>m%#2{a;mer4(xE>S`qBn_Vyh@-j+vgU+C^j?FW}o0u;rF+D8jGYLxS8(Xfr zn7k5q-?ns=WvV0TFRnTZZm{nM`F!{J`p$)t?9+dp58n-N=iJSMzln_Q1)kJnoK%PR zf;)TviYIk{zb9%tqitivQQgV1DzG7|6(eS{KtyXHCo)9Y54pg~PHZMyB*fb$;&8ep zOa4~XJ1Au6o#CT^(w(NWn)TeL#NzXkJJ8!*p(t7wT8pAu7VP^Zx=nGO{=t!4 zSSRHJ8hSoIet7seP;T8Txu7(O7xtx_G&+|B>-Js-*K4AsAIYw~3q8Ru^b`~K4A&l7 zZ}mm~0jyC0=N={kOVLnu=;=YD(L`YbU*vywd-PjjL$n$tz|svK@q;PqvX){Rb=BuQ zIEU8kw>Y*YwG?9=8Hk{==}qAK3^80G`j8yhAKX0aj}I;-*kr)6+lpq)@Qy&4q{K{e zNk}MD!Ap;7r9?2{UGHU)4^76(E}$!QUt+%*tobGYajJ1t6H zq?hy>h+KY7R(Q@4ytC7~Hovwwt&_DR+^lHYuJ3fCwvF2LXldcQN z;|I8r-#Q?Y?vhq&oZyx`27cnQMem*bt9bF@%Lik8}_^H+LCv3bW(2Y%U@ zIIB_+7$&Fi^_<@ zt7bjL%f|5ufEg-7F!av6gHr!`br-4wJ^D*DIFS3WDbAt(E&W`O#=NE2LhqC zxaDm^il6DfG&-x*Q*F$uv&%uaC!K->v@x-{4jXH+Yw3d5ovYz%;yL&wG5D}I!@u!4 zU_x>*Sz8MeD5S0u?_|@GND=Ua>fXt%91x`UzW0r z0E?&UQcI$*^zT7zxWW_HTQP#@F1U4e>54Hp!gx~XN&eR5h^E`_UC!>z<+S8-`d7Q0 zD`O;-T|xIP4*p*bPz>VhVy8M zVVn>FO$h}@4tW)N{>0OSbEbVb6*O$|18s!h7#@OSj17$ZJWHXmq#MNmvo$nM2#u63 zmBT4O1n`yB5z`qM;IKUdD2eI3WoSfh)dj@AIg^Y)bBKpCz`Y4=Zso^k)c{+j+fnq_ zo%-`pi$Z@@O-^@l=q3t|*mw@oL)n*&|BAPQjbz)Qs90ZcG`-eYy&Fp|e!PVh-S^3K zcb8a@DW30=G-i+v0ww#?(E1yP_YlE4ToEu_t}f8OL?Mx)9fWu(;~><^)*@LqC_^n0 z3l@LO=ji}Up_FeRLha)K@yS&@s|OseB6{qqe4&5zx1UkmK1)Aj^+NhFw$-RPJ;qQd zM$)d0&~z{bu?WK3+x4)Xg5;5jb!)`2Mq$`aVV2)`)#(^)Pv9Jk;hSGodIS= zFwv;R7ZSG-)g212PdrX3wXAi9rvNP5l+Zw>6VnSp=YCY;<#X}en@=7Dp4@~XCzIq~ z(R6>uFga#gTcZ5B79+ciBZp}bMdYYj`ew$)EI&KcT~RpC>t@2tuDp^*dBx|vvquDF z1%4CQeG)0zF|l6Nb?thWAR?lgY`mD)^LAP^kD1J}Vls+f&%Hbqy`BU%`VDneH>m40 zJ6RNi%!wiWr!5LKE^?TMTGM5r+!%u}c|Cu1B{(hbA;8-+QO4`Q^!;Dnkr6}RhDV~Q z9$;eDCtBA@M)3wuS`<6PSm;GQNvm4Cz{>NqO5mjS#;pz}t$Op%GYwiLBxyWLae|vL z@fjb4+H79s0^w0v=UYB?Jx@IvcqnmftL4-051+q&CC5{$v0r+kt67GrRzQcQ!!RSM#-h&zjFmBdW1hKR6=+qKZx7FmXouOp8ug?`2 zw?pvAAXGxPPjh%Izg6PlN}s1~N@Fck)Zo^3$&jh4e<?I{dRwc2A-*0fl`L&zV=EUo+K%x;CY$go@qs{edvFEPxj;i zN{rrF>2Hydr^ zmThPNdf_p^{Tbzn`!s*1fM_$uwk>AJ#Bt(J$21$NLG4s)(hNn^Tgj@zKGw@$C$5Px z4b`k-*bC_*D`2GoVRSd)!HDt3qaT7F*aB7FF04sUPABJ10E!OS=TM>tNkV5BrEv96 zu_IcL;+px{DqrGw9%GDIY0uUf6Jm1CMI8Lu&%VK2byMG07z=;O?8X*R?$k7KgSl$D zzR*L_h+iML!Q85~N_pB~-m6*&w5ONor+l`V?3X?;7`+6nCWfE@$~9}q_YG48J*~SD zU3qhLIH~P4vNl(?Z8BCCQjjpI@_q~9OVKtp{qOm!J8HiFbAD3qk}UF9gj}Fn5jux< zIXv6An8U{l57vLyQI=DG$r|#jiv+tFB?UiLOUnmUOsyQSsBDYQvjNSA0Z%js{`vyo4Z4ejc zQrRl-VU~Z_(|JB=@9a%`XQB5~0@alzg@WD}>1b~wx>gPly_E!bVhXS>4FMM4a6F)}7QlE$gf$8jb}D%MZSQe*+VyoFh@>j>YJCM;OT=*`1NZ zdq9K&FJ1j>W?8FAuCh`NpT9o*`r(f+|NX469EX*Hx<%D_uOwxc*jD!Q!Nz&8h(5Al zQz(Bb2WiS+$;vAyz&-Vx!;%`VSaG7fql7BM@Q@f54wa(Lx@H<)rOfq15F!al?|g$0 zgZBr7$^K@-zoKW*Jv>`6#tN$y4IRJ~vqB$0y9^k_!J^iG#LdjYI|mySOfSx{1&vN5 zLL`PkeM+*zH36ZhaC9Xl_5mr~WtY^>-r_)W{s_=J@7yX`PwBA@-F<`J68Q#QvIf?0EIiE>TQs3Be)y3qM zxcj!HqbyS$Nq=$GS#X1WKgj31&)0V@lw_ZO?tJ)efIH`I9{f#YbT9Cv9^<4sycgWr z`&T@v`};jn+Zk;eBaZ4$mQ{fbS*;i`lLaDL3ptS?%6`ZNR(4`D*&-p{HW7!@Em`un zs@_2%L-!N{DLLC>pGPO}-#vW#z#B1?yz;}kllFInkAZrLdkWNB_nP<{evyvO1eG#> zwxnz)Eh#`?c3D@&OqT96oz<-8J|z~Pm)wEg?g~ZGvd~%-)v{pUC(&(+^Yjmn&QR^l}&E~-)D&73ekt;!2aOoS$}+RDZwTKmfcn~V}^GG$|NObl1oBD znF?NdR4XNd3GaF@i+pG@R(1hhsrwT9&0x(}0T{ncMNB}f4N>6oux|jv>HCU*rDw&m z;^_3mo*I!Gv*^U08nGiIE@Z+&|vaiGAd7 z!>gmJ>HUXC`X9P_M;QSW(dh^ef*jX3Z$fvpSNF=>M4|OWc>8h~Ei0k2IZ+aVd_At` z*yZG0ct1vUBnPgU>5a4C9a-al1T`?;12&_}7%f^X>aNMF;@0vWFiNn*sM~2#@*=&Y zmq6t5bF#v7j^Lf0*0uSy#c7?aCE;d8({_EQ8?|lJt|teojhn3~KG091%*c|9IjuzL zb49sgwHPle>8K84#nybK)9N+x?GCk9!(lO}?S+XwtrlQt#az?iD;{TmoBfT)sSh4U zv&UIZJx=vn9>){u+-H%6NPHl;2~csi7t-4gO>je@^Ti9vWMH8*BcD}G`Lk>*gAq*v z8Je$!*{!gyRGOxRFitHU0qzNf+!82b zg1|6AZP3dMs579_U$<@Dg8f0K{uNHFA?hNGLvKee*0V0wyK&gXvOd3^E%eRr5Jf0C znr%w5GWD?@De)4mC~c{KIFv(jPP`UM7d}4z`M#{QA?lqTO3HYD7cEB?-_5WByH<3H z@k%?rd*Hk)n;~v?4Xil2Q`84(YJsNl3DDJ?0Y95~@UB(YwUpGKm=olOg-{<}6RmIYM;smCOlwcM+B2&WEn)x`8N(-PM){AwJ$<8zAfIYoSkk ztgH$M%rhgXi^?#60&N+AAeXxkBgo~9GAyVpGlF`@jG&n4&V5oN-%O#(M`-mZ_1ACr zAC<(LE~G^CMV-yh6bu|U#>+wESoxNea*~*_Cg2Oq3?M>8%+HVF18;BOiL{|VnBGyE z>*^EgA`S6&O1%0~6Ig6X)Kb`hqUtMo)R)5KmZo%QG?6)fyhjJ{tTzetTO0_4+TxbC z2`PT2|JLZNR!_AttIjS5;huB~641uP<~nSw#jd3bUU#mBuZid2m&D-1-VFc7=YR>x z!DMYMOrVguO1z)E>``(61&eHzc+XU3gd(t-Zx9%VrNg>KQDMIq1luxa7K(tx`|(`) z_3^Q!NfQ}=d=Pj&;0solgJ6l&(lZc8sI=dkhJt;xEXjms!OIyVfGSJgn0;BwG6F1~ zs!J`2zS6%3vEd3&T(89lqPyVM*`+JS;0WVMp(pvO%Mnet-MgIKnagR(<@7IhIakI= zD7%90TgdP14msEx&rKO4F@b39W*6V93T;Z)GD#SgR*f@63Hjxjbc^7AZ(#*%In1I*UYI3YArx>OFQ z01?1fR!2-{V1UE+44@>Y^O~U%xm6bs|K?0G0?i>F&H(o&w7HcZpH%~FnQlkXUw7)y zM=c6}RW&)?#i5%hIAY^DNDpOSHvTK#3O15$hoWMA!O`?uXZ3C@x%lxKR&?Jc)7@QS zL8f@VOVXG@ItY~PPebc(9Nt3&>u^QDaJjlb{}P2nigpmm+@vI(jxQghptMY|^(%*hYar-R&kkuE`kFl*r&FL|QLNStd zZG@(SDTqZ7-rlZ<^%NwJOsr$uF$C#NSc|C8ZMe~@+EGs6X`1Rb&Q_<^5V58qqS9OEBPP3Cm zG02=4(tq2cP~#$pd8jpA7Rrq=2$R=;Q&)o1@*V=bJriZT4ou(wSrVF2T1T}2Q(8;?#cF?Cx_?%Ekjmizi#fpI$o zj|@U3bo(@i$MRbx9bdUL)~wGcWB_5%0*67fmLW`p1GV}tzfPbS>0NQNPQ0wL!3{O z6(<|t@#vTB#A>@`=If~WiHBSZggLZEQzXntf0A_II4kDvmYpQS%KQ7zXg%rMv@%OGMU_XJl6fzM?Hf^ zo30%*LH-xtvQk$~f4>P|>Aab>XYl!0;c^pW1{MdCNRDf}(a}KQdj1PdSU*)5I#H%E z8*HWRrh|rZql)^VOf%_!9ZQ<@m$iF5QEv~MH*eidU|yE7(mmh@S9I<~sU4Px5~ep4 z`N!T&z+l-qCSXukRpmf=pN)BaolV%303{AAZN>7Oe&i*~s>18zMc?)1_};ihLEpZ? zM_e3JJ}Ki1@kM^`2*JwJ^+Bf-Gpk@ONA)*iaBKaos&i9+t1bzDXIBU4-Ud@d-X)0a zgl}}?*gNQK%#L%&wMIPgLh01ltwT)HI>c1#VJN3DKbUYrLnN7S_`Vj6>mW@0xT-~L z5t6%1Zrs3_Fxpr}8qa~7*)PgA@{s1iF{{wXLD^7=33bT}q-s&oVf_2G@;?xubZXg# z2A~%n1Kgicp14ncV+x2iV{F@EhD;nM{&Y;Up&HaqwI%z6E~Qvrt1qm z6pi@xfg8-NTC0?&4d%V7g+P0HiGIpwo5_CZ1B1~^z-nR$3ZPuGhJ4>JRnXJA8_|_F zSBI0@P9tk`W!olWWg!I#lPd4G5WW;`Q`7&Rzqq63`#Mzq&}Un^98mW3{w=P{q{B0gKAE=sX+HY&sw+srv1^U%-^^jT**~;H{kX% zZ*9HKwb>E@VUeOBuY>v=CGVxijT|!Kb97B7vSlHEhQ7vG{BHYP3Q>h`%QQ9c;Bk)O zS&swVL62gG@3>#`IMdYsb3IPbC2hA0SB&Wq#$+|22Z;#W%Gf<>4D1Wpn2fkbnCS3m zF+zvs@y`#RdWmNYbj-Do0qQWH^7HJtF5sbL*c??1o!>;UEB=GYh;-io>7fnc!dxm_ z1wPDw@_IVYC+(fRY40rbeoCOavZPSZ`yw6fZA90~0iw5(0FS(nhXI0^N9?*f*S~2# zubLklWm`cKpOSx33{~;_JWS&_(19fs{TI}6U-@%x$S9t|j%a|pcdlL-GfH=}&fGKA zN$FA|<>)9T=N6`em>YZ4;Il!7HkTC zMdct(87x_OCF?0YwxPQ(kdtTSB#WPNbXVn% p?aJxMh-gr3>oFgAj%FJ5!!Y%eBo~Aj@{>^h>;LYBK2