Skip to content

Commit 66901c9

Browse files
committed
Add logics for compressing fasta file
1 parent 97cd73f commit 66901c9

File tree

4 files changed

+1340114
-0
lines changed

4 files changed

+1340114
-0
lines changed

src/lib.rs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,86 @@ pub fn decompress_sequence(compressed: &[u8], sequence_length: usize) -> io::Res
105105

106106
Ok(sequence)
107107
}
108+
109+
/// A struct that represents a FASTA file compressor.
110+
pub struct FASTACompressor;
111+
112+
/// Compressor methods for the FASTA file format.
113+
impl FASTACompressor {
114+
pub fn new() -> Self {
115+
FASTACompressor
116+
}
117+
118+
/// Compresses a FASTA file content into a vector of bytes.
119+
pub fn compress(&self, content: &str) -> Vec<u8> {
120+
let mut lines = content.lines();
121+
let header = lines.next().unwrap_or("").to_string();
122+
let sequence: String = lines.map(|line| line.trim()).collect();
123+
124+
let sequence_length = sequence.len() as u32;
125+
let compressed_data = compress_sequence(&sequence);
126+
127+
let mut output = Vec::new();
128+
129+
// Write header length (4 bytes)
130+
output.extend_from_slice(&(header.len() as u32).to_le_bytes());
131+
132+
// Write header
133+
output.extend_from_slice(header.as_bytes());
134+
135+
// Write sequence length (4 bytes)
136+
output.extend_from_slice(&sequence_length.to_le_bytes());
137+
138+
// Write compressed data length (4 bytes)
139+
output.extend_from_slice(&(compressed_data.len() as u32).to_le_bytes());
140+
141+
// Write compressed data
142+
output.extend_from_slice(&compressed_data);
143+
144+
output
145+
}
146+
147+
/// Decompresses a vector of bytes into a FASTA file content.
148+
#[cfg_attr(target_arch = "wasm32", wasm_bindgen)]
149+
pub fn decompress(&self, data: &[u8]) -> Result<String, String> {
150+
if data.len() < 12 {
151+
return Err("File is too short".to_string());
152+
}
153+
154+
let header_len = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize;
155+
156+
if data.len() < 12 + header_len {
157+
return Err("File is too short for header".to_string());
158+
}
159+
160+
let header =
161+
String::from_utf8(data[4..4 + header_len].to_vec()).map_err(|e| e.to_string())?;
162+
163+
let sequence_length =
164+
u32::from_le_bytes(data[4 + header_len..8 + header_len].try_into().unwrap()) as usize;
165+
166+
let compressed_len =
167+
u32::from_le_bytes(data[8 + header_len..12 + header_len].try_into().unwrap()) as usize;
168+
169+
if data.len() < 12 + header_len + compressed_len {
170+
return Err("File is too short for compressed data".to_string());
171+
}
172+
173+
let compressed_data = &data[12 + header_len..12 + header_len + compressed_len];
174+
let decompressed =
175+
decompress_sequence(compressed_data, sequence_length).unwrap_or_default();
176+
177+
let mut result = String::with_capacity(
178+
header.len() + decompressed.len() + (decompressed.len() / 60) * 2,
179+
);
180+
result.push_str(&header);
181+
result.push('\n');
182+
183+
for chunk in decompressed.as_bytes().chunks(60) {
184+
result.extend(chunk.iter().map(|&b| b as char));
185+
result.push('\n');
186+
}
187+
188+
Ok(result)
189+
}
190+
}

tests/compression.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@ use base_sequence_compression::{compress_sequence, decompress_sequence};
22

33
#[cfg(test)]
44
mod tests {
5+
use std::path::Path;
6+
7+
use base_sequence_compression::FASTACompressor;
8+
59
use super::*;
610

711
#[test]
@@ -105,4 +109,28 @@ mod tests {
105109

106110
assert_eq!("ACGT", decompressed);
107111
}
112+
113+
#[test]
114+
fn test_fasta_file() {
115+
let compressor = FASTACompressor::new();
116+
let input_path = Path::new("tests/input/test.fasta");
117+
118+
let content = std::fs::read_to_string(input_path).unwrap();
119+
let compressed = compressor.compress(&content);
120+
let decompressed = compressor.decompress(&compressed).unwrap();
121+
122+
assert_eq!(content, decompressed);
123+
}
124+
125+
#[test]
126+
fn test_large_fasta_file() {
127+
let compressor = FASTACompressor::new();
128+
let input_path = Path::new("tests/input/large.fasta");
129+
130+
let content = std::fs::read_to_string(input_path).unwrap();
131+
let compressed = compressor.compress(&content);
132+
let decompressed = compressor.decompress(&compressed).unwrap();
133+
134+
assert_eq!(content, decompressed);
135+
}
108136
}

0 commit comments

Comments
 (0)