Skip to content

Commit 7c443e1

Browse files
committed
bump popvcf
1 parent 8ef2db2 commit 7c443e1

File tree

2 files changed

+26
-9
lines changed

2 files changed

+26
-9
lines changed

include/graphtyper/utilities/options.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class Options
3737
bool uncompressed_sample_names{false};
3838
char encoding{'v'}; // v VCF, p popVCF
3939
bool is_on_final_output{false}; // Set as true before writing out final output
40-
int bgzf_compression_level{9};
40+
int bgzf_compression_level{-1};
4141

4242
/****
4343
* FILTERING OPTIONS

include/popvcf/encode.hpp

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class EncodeData
2929
/* Data fields from current line. */
3030
std::string contig{};
3131
int64_t pos{0};
32+
int32_t stored_alt{0};
3233
int32_t n_alt{-1};
3334
std::vector<std::string> unique_fields{};
3435
std::vector<uint32_t> field2uid{};
@@ -38,8 +39,11 @@ class EncodeData
3839
std::string next_contig{};
3940
int64_t next_pos{0};
4041

41-
inline void clear_line(std::string && next_contig, int64_t next_pos, int32_t next_n_alt)
42+
inline void clear_line(int64_t next_pos, int32_t next_n_alt)
4243
{
44+
next_n_alt += stored_alt;
45+
stored_alt = 0;
46+
4347
if (next_contig != contig || (next_pos / 10000) != (pos / 10000))
4448
{
4549
/// Previous line is not available, clear values
@@ -56,7 +60,7 @@ class EncodeData
5660
}
5761

5862
/// Clear data from this line for the next
59-
contig = std::move(next_contig);
63+
contig = next_contig;
6064
pos = next_pos;
6165
n_alt = next_n_alt;
6266
unique_fields.resize(0);
@@ -85,7 +89,6 @@ inline void encode_buffer(Tbuffer_out & buffer_out, Tbuffer_in & buffer_in, Enco
8589
set_input_size(buffer_in, ed);
8690
buffer_out.reserve(ENC_BUFFER_SIZE);
8791
std::size_t constexpr N_FIELDS_SITE_DATA{9}; // how many fields of the VCF contains site data
88-
std::string next_contig{};
8992
int64_t next_pos{0};
9093

9194
while (ed.i < ed.in_size)
@@ -104,7 +107,7 @@ inline void encode_buffer(Tbuffer_out & buffer_out, Tbuffer_in & buffer_in, Enco
104107
ed.header_line = buffer_in[ed.b] == '#'; // check if in header line
105108

106109
if (not ed.header_line)
107-
next_contig.assign(&buffer_in[ed.b], ed.i - ed.b);
110+
ed.next_contig.assign(&buffer_in[ed.b], ed.i - ed.b);
108111
}
109112
else if (not ed.header_line)
110113
{
@@ -115,7 +118,7 @@ inline void encode_buffer(Tbuffer_out & buffer_out, Tbuffer_in & buffer_in, Enco
115118
else if (ed.field == 4) /*ALT field*/
116119
{
117120
int32_t next_n_alt = std::count(&buffer_in[ed.b], &buffer_in[ed.i], ',');
118-
ed.clear_line(std::move(next_contig), next_pos, next_n_alt);
121+
ed.clear_line(next_pos, next_n_alt);
119122
}
120123
}
121124

@@ -213,9 +216,23 @@ inline void encode_buffer(Tbuffer_out & buffer_out, Tbuffer_in & buffer_in, Enco
213216
++ed.field;
214217
} // ends inner loop
215218

216-
// copy the remaining data to the beginning of the input buffer
217-
std::copy(&buffer_in[ed.b], &buffer_in[ed.i], &buffer_in[0]);
218-
ed.i = ed.i - ed.b;
219+
if (ed.field >= 3 && ed.field < N_FIELDS_SITE_DATA)
220+
{
221+
// write the data even if the field is not complete
222+
buffer_out.insert(buffer_out.end(), &buffer_in[ed.b], &buffer_in[ed.i]);
223+
224+
if (ed.field == 4) /*ALT field*/
225+
ed.stored_alt = std::count(&buffer_in[ed.b], &buffer_in[ed.i], ',');
226+
227+
ed.i = 0;
228+
}
229+
else
230+
{
231+
// copy the remaining data to the beginning of the input buffer
232+
std::copy(&buffer_in[ed.b], &buffer_in[ed.i], &buffer_in[0]);
233+
ed.i = ed.i - ed.b;
234+
}
235+
219236
ed.b = 0;
220237
ed.in_size = ed.i;
221238
resize_input_buffer(buffer_in, ed.i);

0 commit comments

Comments
 (0)