@@ -105,3 +105,102 @@ pub fn decompress_sequence(compressed: &[u8], sequence_length: usize) -> io::Res
105
105
106
106
Ok ( sequence)
107
107
}
108
+
109
+ /// Compresses a FASTA file content into a vector of bytes.
110
+ ///
111
+ /// The FASTA file content is expected to have a header line followed by
112
+ /// the DNA sequence. The DNA sequence is compressed by representing each
113
+ /// base (A, C, T, G) with 2 bits. The compressed data starts with a 4-byte
114
+ /// (u32) integer representing the length of the original sequence.
115
+ ///
116
+ /// # Arguments
117
+ ///
118
+ /// * `content` - A string slice that holds the FASTA file content.
119
+ ///
120
+ /// # Returns
121
+ ///
122
+ /// A vector of bytes containing the compressed FASTA file content.
123
+ pub fn compress_fasta ( content : & str ) -> Vec < u8 > {
124
+ let mut lines = content. lines ( ) ;
125
+ let header = lines. next ( ) . unwrap_or ( "" ) . to_string ( ) ;
126
+ let sequence: String = lines. map ( |line| line. trim ( ) ) . collect ( ) ;
127
+
128
+ let sequence_length = sequence. len ( ) as u32 ;
129
+ let compressed_data = compress_sequence ( & sequence) ;
130
+
131
+ let mut output = Vec :: new ( ) ;
132
+
133
+ // Write header length (4 bytes)
134
+ output. extend_from_slice ( & ( header. len ( ) as u32 ) . to_le_bytes ( ) ) ;
135
+
136
+ // Write header
137
+ output. extend_from_slice ( header. as_bytes ( ) ) ;
138
+
139
+ // Write sequence length (4 bytes)
140
+ output. extend_from_slice ( & sequence_length. to_le_bytes ( ) ) ;
141
+
142
+ // Write compressed data length (4 bytes)
143
+ output. extend_from_slice ( & ( compressed_data. len ( ) as u32 ) . to_le_bytes ( ) ) ;
144
+
145
+ // Write compressed data
146
+ output. extend_from_slice ( & compressed_data) ;
147
+
148
+ output
149
+ }
150
+
151
+ /// Decompresses a vector of bytes into a FASTA file content.
152
+ ///
153
+ /// The compressed data starts with a 4-byte (u32) integer representing
154
+ /// the length of the header, followed by the header, the sequence length,
155
+ /// and the compressed sequence data. Each base (A, C, T, G) is represented
156
+ /// by 2 bits.
157
+ ///
158
+ /// # Arguments
159
+ ///
160
+ /// * `data` - A slice of bytes containing the compressed FASTA file content.
161
+ ///
162
+ /// # Returns
163
+ ///
164
+ /// A string containing the decompressed FASTA file content.
165
+ ///
166
+ /// # Errors
167
+ ///
168
+ /// Returns an error if the file is too short or if the file is missing
169
+ pub fn decompress_fasta ( data : & [ u8 ] ) -> Result < String , String > {
170
+ if data. len ( ) < 12 {
171
+ return Err ( "File is too short" . to_string ( ) ) ;
172
+ }
173
+
174
+ let header_len = u32:: from_le_bytes ( data[ 0 ..4 ] . try_into ( ) . unwrap ( ) ) as usize ;
175
+
176
+ if data. len ( ) < 12 + header_len {
177
+ return Err ( "File is too short for header" . to_string ( ) ) ;
178
+ }
179
+
180
+ let header = String :: from_utf8 ( data[ 4 ..4 + header_len] . to_vec ( ) ) . map_err ( |e| e. to_string ( ) ) ?;
181
+
182
+ let sequence_length =
183
+ u32:: from_le_bytes ( data[ 4 + header_len..8 + header_len] . try_into ( ) . unwrap ( ) ) as usize ;
184
+
185
+ let compressed_len =
186
+ u32:: from_le_bytes ( data[ 8 + header_len..12 + header_len] . try_into ( ) . unwrap ( ) ) as usize ;
187
+
188
+ if data. len ( ) < 12 + header_len + compressed_len {
189
+ return Err ( "File is too short for compressed data" . to_string ( ) ) ;
190
+ }
191
+
192
+ let compressed_data = & data[ 12 + header_len..12 + header_len + compressed_len] ;
193
+ let decompressed = decompress_sequence ( compressed_data, sequence_length) . unwrap_or_default ( ) ;
194
+
195
+ let mut result =
196
+ String :: with_capacity ( header. len ( ) + decompressed. len ( ) + ( decompressed. len ( ) / 60 ) * 2 ) ;
197
+ result. push_str ( & header) ;
198
+ result. push ( '\n' ) ;
199
+
200
+ for chunk in decompressed. as_bytes ( ) . chunks ( 60 ) {
201
+ result. extend ( chunk. iter ( ) . map ( |& b| b as char ) ) ;
202
+ result. push ( '\n' ) ;
203
+ }
204
+
205
+ Ok ( result)
206
+ }
0 commit comments