@@ -247,71 +247,146 @@ public void mergeNumericField(FieldInfo mergeFieldInfo, MergeState mergeState) t
247
247
}
248
248
}
249
249
250
+ @ Override
251
+ public void mergeBinaryField (FieldInfo mergeFieldInfo , MergeState mergeState ) throws IOException {
252
+ var result = compatibleWithOptimizedMerge (enableOptimizedMerge , mergeState , mergeFieldInfo );
253
+ if (result .supported ()) {
254
+ mergeBinaryField (result , mergeFieldInfo , mergeState );
255
+ } else {
256
+ super .mergeBinaryField (mergeFieldInfo , mergeState );
257
+ }
258
+ }
259
+
250
260
@ Override
251
261
public void addBinaryField (FieldInfo field , DocValuesProducer valuesProducer ) throws IOException {
252
262
meta .writeInt (field .number );
253
263
meta .writeByte (ES819TSDBDocValuesFormat .BINARY );
254
264
255
- BinaryDocValues values = valuesProducer .getBinary (field );
256
- long start = data .getFilePointer ();
257
- meta .writeLong (start ); // dataOffset
258
- int numDocsWithField = 0 ;
259
- int minLength = Integer .MAX_VALUE ;
260
- int maxLength = 0 ;
261
- for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
262
- numDocsWithField ++;
263
- BytesRef v = values .binaryValue ();
264
- int length = v .length ;
265
- data .writeBytes (v .bytes , v .offset , v .length );
266
- minLength = Math .min (length , minLength );
267
- maxLength = Math .max (length , maxLength );
268
- }
269
- assert numDocsWithField <= maxDoc ;
270
- meta .writeLong (data .getFilePointer () - start ); // dataLength
271
-
272
- if (numDocsWithField == 0 ) {
273
- meta .writeLong (-2 ); // docsWithFieldOffset
274
- meta .writeLong (0L ); // docsWithFieldLength
275
- meta .writeShort ((short ) -1 ); // jumpTableEntryCount
276
- meta .writeByte ((byte ) -1 ); // denseRankPower
277
- } else if (numDocsWithField == maxDoc ) {
278
- meta .writeLong (-1 ); // docsWithFieldOffset
279
- meta .writeLong (0L ); // docsWithFieldLength
280
- meta .writeShort ((short ) -1 ); // jumpTableEntryCount
281
- meta .writeByte ((byte ) -1 ); // denseRankPower
282
- } else {
283
- long offset = data .getFilePointer ();
284
- meta .writeLong (offset ); // docsWithFieldOffset
285
- values = valuesProducer .getBinary (field );
286
- final short jumpTableEntryCount = IndexedDISI .writeBitSet (values , data , IndexedDISI .DEFAULT_DENSE_RANK_POWER );
287
- meta .writeLong (data .getFilePointer () - offset ); // docsWithFieldLength
288
- meta .writeShort (jumpTableEntryCount );
289
- meta .writeByte (IndexedDISI .DEFAULT_DENSE_RANK_POWER );
290
- }
265
+ if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer .mergeStats .supported ()) {
266
+ final int numDocsWithField = tsdbValuesProducer .mergeStats .sumNumDocsWithField ();
267
+ final int minLength = tsdbValuesProducer .mergeStats .minLength ();
268
+ final int maxLength = tsdbValuesProducer .mergeStats .maxLength ();
291
269
292
- meta .writeInt (numDocsWithField );
293
- meta .writeInt (minLength );
294
- meta .writeInt (maxLength );
295
- if (maxLength > minLength ) {
296
- start = data .getFilePointer ();
297
- meta .writeLong (start );
298
- meta .writeVInt (ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT );
270
+ assert numDocsWithField <= maxDoc ;
299
271
300
- final DirectMonotonicWriter writer = DirectMonotonicWriter .getInstance (
301
- meta ,
302
- data ,
303
- numDocsWithField + 1 ,
304
- ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
305
- );
306
- long addr = 0 ;
307
- writer .add (addr );
308
- values = valuesProducer .getBinary (field );
272
+ BinaryDocValues values = valuesProducer .getBinary (field );
273
+ long start = data .getFilePointer ();
274
+ meta .writeLong (start ); // dataOffset
275
+
276
+ OffsetsAccumulator offsetsAccumulator = null ;
277
+ DISIAccumulator disiAccumulator = null ;
278
+ try {
279
+ if (numDocsWithField > 0 && numDocsWithField < maxDoc ) {
280
+ disiAccumulator = new DISIAccumulator (dir , context , data , IndexedDISI .DEFAULT_DENSE_RANK_POWER );
281
+ }
282
+
283
+ assert maxLength >= minLength ;
284
+ if (maxLength > minLength ) {
285
+ offsetsAccumulator = new OffsetsAccumulator (dir , context , data , numDocsWithField );
286
+ }
287
+
288
+ for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
289
+ BytesRef v = values .binaryValue ();
290
+ data .writeBytes (v .bytes , v .offset , v .length );
291
+ if (disiAccumulator != null ) {
292
+ disiAccumulator .addDocId (doc );
293
+ }
294
+ if (offsetsAccumulator != null ) {
295
+ offsetsAccumulator .addDoc (v .length );
296
+ }
297
+ }
298
+ meta .writeLong (data .getFilePointer () - start ); // dataLength
299
+
300
+ if (numDocsWithField == 0 ) {
301
+ meta .writeLong (-2 ); // docsWithFieldOffset
302
+ meta .writeLong (0L ); // docsWithFieldLength
303
+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
304
+ meta .writeByte ((byte ) -1 ); // denseRankPower
305
+ } else if (numDocsWithField == maxDoc ) {
306
+ meta .writeLong (-1 ); // docsWithFieldOffset
307
+ meta .writeLong (0L ); // docsWithFieldLength
308
+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
309
+ meta .writeByte ((byte ) -1 ); // denseRankPower
310
+ } else {
311
+ long offset = data .getFilePointer ();
312
+ meta .writeLong (offset ); // docsWithFieldOffset
313
+ final short jumpTableEntryCount = disiAccumulator .build (data );
314
+ meta .writeLong (data .getFilePointer () - offset ); // docsWithFieldLength
315
+ meta .writeShort (jumpTableEntryCount );
316
+ meta .writeByte (IndexedDISI .DEFAULT_DENSE_RANK_POWER );
317
+ }
318
+
319
+ meta .writeInt (numDocsWithField );
320
+ meta .writeInt (minLength );
321
+ meta .writeInt (maxLength );
322
+ if (offsetsAccumulator != null ) {
323
+ offsetsAccumulator .build (meta , data );
324
+ }
325
+ } finally {
326
+ IOUtils .close (disiAccumulator , offsetsAccumulator );
327
+ }
328
+ } else {
329
+ BinaryDocValues values = valuesProducer .getBinary (field );
330
+ long start = data .getFilePointer ();
331
+ meta .writeLong (start ); // dataOffset
332
+ int numDocsWithField = 0 ;
333
+ int minLength = Integer .MAX_VALUE ;
334
+ int maxLength = 0 ;
309
335
for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
310
- addr += values .binaryValue ().length ;
336
+ numDocsWithField ++;
337
+ BytesRef v = values .binaryValue ();
338
+ int length = v .length ;
339
+ data .writeBytes (v .bytes , v .offset , v .length );
340
+ minLength = Math .min (length , minLength );
341
+ maxLength = Math .max (length , maxLength );
342
+ }
343
+ assert numDocsWithField <= maxDoc ;
344
+ meta .writeLong (data .getFilePointer () - start ); // dataLength
345
+
346
+ if (numDocsWithField == 0 ) {
347
+ meta .writeLong (-2 ); // docsWithFieldOffset
348
+ meta .writeLong (0L ); // docsWithFieldLength
349
+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
350
+ meta .writeByte ((byte ) -1 ); // denseRankPower
351
+ } else if (numDocsWithField == maxDoc ) {
352
+ meta .writeLong (-1 ); // docsWithFieldOffset
353
+ meta .writeLong (0L ); // docsWithFieldLength
354
+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
355
+ meta .writeByte ((byte ) -1 ); // denseRankPower
356
+ } else {
357
+ long offset = data .getFilePointer ();
358
+ meta .writeLong (offset ); // docsWithFieldOffset
359
+ values = valuesProducer .getBinary (field );
360
+ final short jumpTableEntryCount = IndexedDISI .writeBitSet (values , data , IndexedDISI .DEFAULT_DENSE_RANK_POWER );
361
+ meta .writeLong (data .getFilePointer () - offset ); // docsWithFieldLength
362
+ meta .writeShort (jumpTableEntryCount );
363
+ meta .writeByte (IndexedDISI .DEFAULT_DENSE_RANK_POWER );
364
+ }
365
+
366
+ meta .writeInt (numDocsWithField );
367
+ meta .writeInt (minLength );
368
+ meta .writeInt (maxLength );
369
+ if (maxLength > minLength ) {
370
+ start = data .getFilePointer ();
371
+ meta .writeLong (start );
372
+ meta .writeVInt (ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT );
373
+
374
+ final DirectMonotonicWriter writer = DirectMonotonicWriter .getInstance (
375
+ meta ,
376
+ data ,
377
+ numDocsWithField + 1 ,
378
+ ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
379
+ );
380
+ long addr = 0 ;
311
381
writer .add (addr );
382
+ values = valuesProducer .getBinary (field );
383
+ for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
384
+ addr += values .binaryValue ().length ;
385
+ writer .add (addr );
386
+ }
387
+ writer .finish ();
388
+ meta .writeLong (data .getFilePointer () - start );
312
389
}
313
- writer .finish ();
314
- meta .writeLong (data .getFilePointer () - start );
315
390
}
316
391
}
317
392
0 commit comments