@@ -29,6 +29,7 @@ class EncodeData
29
29
/* Data fields from current line. */
30
30
std::string contig{};
31
31
int64_t pos{0 };
32
+ int32_t stored_alt{0 };
32
33
int32_t n_alt{-1 };
33
34
std::vector<std::string> unique_fields{};
34
35
std::vector<uint32_t > field2uid{};
@@ -38,8 +39,11 @@ class EncodeData
38
39
std::string next_contig{};
39
40
int64_t next_pos{0 };
40
41
41
- inline void clear_line (std::string && next_contig, int64_t next_pos, int32_t next_n_alt)
42
+ inline void clear_line (int64_t next_pos, int32_t next_n_alt)
42
43
{
44
+ next_n_alt += stored_alt;
45
+ stored_alt = 0 ;
46
+
43
47
if (next_contig != contig || (next_pos / 10000 ) != (pos / 10000 ))
44
48
{
45
49
// / Previous line is not available, clear values
@@ -56,7 +60,7 @@ class EncodeData
56
60
}
57
61
58
62
// / Clear data from this line for the next
59
- contig = std::move ( next_contig) ;
63
+ contig = next_contig;
60
64
pos = next_pos;
61
65
n_alt = next_n_alt;
62
66
unique_fields.resize (0 );
@@ -85,7 +89,6 @@ inline void encode_buffer(Tbuffer_out & buffer_out, Tbuffer_in & buffer_in, Enco
85
89
set_input_size (buffer_in, ed);
86
90
buffer_out.reserve (ENC_BUFFER_SIZE);
87
91
std::size_t constexpr N_FIELDS_SITE_DATA{9 }; // how many fields of the VCF contains site data
88
- std::string next_contig{};
89
92
int64_t next_pos{0 };
90
93
91
94
while (ed.i < ed.in_size )
@@ -104,7 +107,7 @@ inline void encode_buffer(Tbuffer_out & buffer_out, Tbuffer_in & buffer_in, Enco
104
107
ed.header_line = buffer_in[ed.b ] == ' #' ; // check if in header line
105
108
106
109
if (not ed.header_line )
107
- next_contig.assign (&buffer_in[ed.b ], ed.i - ed.b );
110
+ ed. next_contig .assign (&buffer_in[ed.b ], ed.i - ed.b );
108
111
}
109
112
else if (not ed.header_line )
110
113
{
@@ -115,7 +118,7 @@ inline void encode_buffer(Tbuffer_out & buffer_out, Tbuffer_in & buffer_in, Enco
115
118
else if (ed.field == 4 ) /* ALT field*/
116
119
{
117
120
int32_t next_n_alt = std::count (&buffer_in[ed.b ], &buffer_in[ed.i ], ' ,' );
118
- ed.clear_line (std::move (next_contig), next_pos, next_n_alt);
121
+ ed.clear_line (next_pos, next_n_alt);
119
122
}
120
123
}
121
124
@@ -213,9 +216,23 @@ inline void encode_buffer(Tbuffer_out & buffer_out, Tbuffer_in & buffer_in, Enco
213
216
++ed.field ;
214
217
} // ends inner loop
215
218
216
- // copy the remaining data to the beginning of the input buffer
217
- std::copy (&buffer_in[ed.b ], &buffer_in[ed.i ], &buffer_in[0 ]);
218
- ed.i = ed.i - ed.b ;
219
+ if (ed.field >= 3 && ed.field < N_FIELDS_SITE_DATA)
220
+ {
221
+ // write the data even if the field is not complete
222
+ buffer_out.insert (buffer_out.end (), &buffer_in[ed.b ], &buffer_in[ed.i ]);
223
+
224
+ if (ed.field == 4 ) /* ALT field*/
225
+ ed.stored_alt = std::count (&buffer_in[ed.b ], &buffer_in[ed.i ], ' ,' );
226
+
227
+ ed.i = 0 ;
228
+ }
229
+ else
230
+ {
231
+ // copy the remaining data to the beginning of the input buffer
232
+ std::copy (&buffer_in[ed.b ], &buffer_in[ed.i ], &buffer_in[0 ]);
233
+ ed.i = ed.i - ed.b ;
234
+ }
235
+
219
236
ed.b = 0 ;
220
237
ed.in_size = ed.i ;
221
238
resize_input_buffer (buffer_in, ed.i );
0 commit comments