@@ -105,3 +105,86 @@ pub fn decompress_sequence(compressed: &[u8], sequence_length: usize) -> io::Res
105
105
106
106
Ok ( sequence)
107
107
}
108
+
109
+ /// A struct that represents a FASTA file compressor.
110
+ pub struct FASTACompressor ;
111
+
112
+ /// Compressor methods for the FASTA file format.
113
+ impl FASTACompressor {
114
+ pub fn new ( ) -> Self {
115
+ FASTACompressor
116
+ }
117
+
118
+ /// Compresses a FASTA file content into a vector of bytes.
119
+ pub fn compress ( & self , content : & str ) -> Vec < u8 > {
120
+ let mut lines = content. lines ( ) ;
121
+ let header = lines. next ( ) . unwrap_or ( "" ) . to_string ( ) ;
122
+ let sequence: String = lines. map ( |line| line. trim ( ) ) . collect ( ) ;
123
+
124
+ let sequence_length = sequence. len ( ) as u32 ;
125
+ let compressed_data = compress_sequence ( & sequence) ;
126
+
127
+ let mut output = Vec :: new ( ) ;
128
+
129
+ // Write header length (4 bytes)
130
+ output. extend_from_slice ( & ( header. len ( ) as u32 ) . to_le_bytes ( ) ) ;
131
+
132
+ // Write header
133
+ output. extend_from_slice ( header. as_bytes ( ) ) ;
134
+
135
+ // Write sequence length (4 bytes)
136
+ output. extend_from_slice ( & sequence_length. to_le_bytes ( ) ) ;
137
+
138
+ // Write compressed data length (4 bytes)
139
+ output. extend_from_slice ( & ( compressed_data. len ( ) as u32 ) . to_le_bytes ( ) ) ;
140
+
141
+ // Write compressed data
142
+ output. extend_from_slice ( & compressed_data) ;
143
+
144
+ output
145
+ }
146
+
147
+ /// Decompresses a vector of bytes into a FASTA file content.
148
+ #[ cfg_attr( target_arch = "wasm32" , wasm_bindgen) ]
149
+ pub fn decompress ( & self , data : & [ u8 ] ) -> Result < String , String > {
150
+ if data. len ( ) < 12 {
151
+ return Err ( "File is too short" . to_string ( ) ) ;
152
+ }
153
+
154
+ let header_len = u32:: from_le_bytes ( data[ 0 ..4 ] . try_into ( ) . unwrap ( ) ) as usize ;
155
+
156
+ if data. len ( ) < 12 + header_len {
157
+ return Err ( "File is too short for header" . to_string ( ) ) ;
158
+ }
159
+
160
+ let header =
161
+ String :: from_utf8 ( data[ 4 ..4 + header_len] . to_vec ( ) ) . map_err ( |e| e. to_string ( ) ) ?;
162
+
163
+ let sequence_length =
164
+ u32:: from_le_bytes ( data[ 4 + header_len..8 + header_len] . try_into ( ) . unwrap ( ) ) as usize ;
165
+
166
+ let compressed_len =
167
+ u32:: from_le_bytes ( data[ 8 + header_len..12 + header_len] . try_into ( ) . unwrap ( ) ) as usize ;
168
+
169
+ if data. len ( ) < 12 + header_len + compressed_len {
170
+ return Err ( "File is too short for compressed data" . to_string ( ) ) ;
171
+ }
172
+
173
+ let compressed_data = & data[ 12 + header_len..12 + header_len + compressed_len] ;
174
+ let decompressed =
175
+ decompress_sequence ( compressed_data, sequence_length) . unwrap_or_default ( ) ;
176
+
177
+ let mut result = String :: with_capacity (
178
+ header. len ( ) + decompressed. len ( ) + ( decompressed. len ( ) / 60 ) * 2 ,
179
+ ) ;
180
+ result. push_str ( & header) ;
181
+ result. push ( '\n' ) ;
182
+
183
+ for chunk in decompressed. as_bytes ( ) . chunks ( 60 ) {
184
+ result. extend ( chunk. iter ( ) . map ( |& b| b as char ) ) ;
185
+ result. push ( '\n' ) ;
186
+ }
187
+
188
+ Ok ( result)
189
+ }
190
+ }
0 commit comments