@@ -630,7 +630,9 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr
630
630
std::string medoids_file, std::string centroids_file, size_t build_pq_bytes, bool use_opq,
631
631
uint32_t num_threads, bool use_filters, const std::string &label_file,
632
632
const std::string &labels_to_medoids_file, const std::string &universal_label,
633
- const uint32_t Lf, uint32_t universal_label_num = 0 )
633
+ const uint32_t Lf, uint32_t universal_label_num = 0 ,
634
+ const char * seller_file_path = nullptr ,
635
+ uint32_t num_diverse_build = 1 )
634
636
{
635
637
size_t base_num, base_dim;
636
638
diskann::get_bin_metadata (base_file, base_num, base_dim);
@@ -643,10 +645,18 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr
643
645
diskann::cout << " Full index fits in RAM budget, should consume at most "
644
646
<< full_index_ram / (1024 * 1024 * 1024 ) << " GiBs, so building in one shot" << std::endl;
645
647
648
+ bool is_diverse_index = false ;
649
+ if (seller_file_path != nullptr && !std::string (seller_file_path).empty ())
650
+ {
651
+ is_diverse_index = true ;
652
+ }
646
653
diskann::IndexWriteParameters paras = diskann::IndexWriteParametersBuilder (L, R)
647
654
.with_filter_list_size (Lf)
648
655
.with_saturate_graph (!use_filters)
649
656
.with_num_threads (num_threads)
657
+ .with_diverse_index (is_diverse_index)
658
+ .with_seller_file (seller_file_path)
659
+ .with_num_diverse_build (num_diverse_build)
650
660
.build ();
651
661
using TagT = uint32_t ;
652
662
diskann::Index<T, TagT, LabelT> _index (compareMetric, base_dim, base_num,
@@ -1106,7 +1116,9 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
1106
1116
diskann::Metric compareMetric, bool use_opq, const std::string &codebook_prefix, bool use_filters,
1107
1117
const std::string &label_file, const std::string &universal_label, const uint32_t filter_threshold,
1108
1118
const uint32_t Lf,
1109
- const char * reorderDataFilePath)
1119
+ const char * reorderDataFilePath,
1120
+ const char * sellerFilePath,
1121
+ uint32_t num_diverse_build)
1110
1122
{
1111
1123
std::stringstream parser;
1112
1124
parser << std::string (indexBuildParameters);
@@ -1194,6 +1206,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
1194
1206
std::string mem_univ_label_file = mem_index_path + " _universal_label.txt" ;
1195
1207
std::string disk_univ_label_file = disk_index_path + " _universal_label.txt" ;
1196
1208
std::string disk_labels_int_map_file = disk_index_path + " _labels_map.txt" ;
1209
+ std::string disk_seller_file = disk_index_path + " _sellers.txt" ;
1197
1210
std::string dummy_remap_file = disk_index_path + " _dummy_remap.txt" ; // remap will be used if we break-up points of
1198
1211
// high label-density to create copies
1199
1212
@@ -1333,7 +1346,8 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
1333
1346
diskann::build_merged_vamana_index<T, LabelT>(data_file_to_use.c_str (), diskann::Metric::L2, L, R, p_val,
1334
1347
indexing_ram_budget, mem_index_path, medoids_path, centroids_path,
1335
1348
build_pq_bytes, use_opq, num_threads, use_filters, labels_file_to_use,
1336
- labels_to_medoids_path, universal_label, Lf, universal_label_id);
1349
+ labels_to_medoids_path, universal_label, Lf, universal_label_id,
1350
+ sellerFilePath, num_diverse_build);
1337
1351
diskann::cout << timer.elapsed_seconds_for_step (" building merged vamana index" ) << std::endl;
1338
1352
1339
1353
timer.reset ();
@@ -1377,6 +1391,14 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
1377
1391
std::remove (augmented_labels_file.c_str ());
1378
1392
std::remove (labels_file_to_use.c_str ());
1379
1393
}
1394
+
1395
+ std::string seller_mem_file = std::string (mem_index_path) + " _sellers.txt" ;
1396
+ if (file_exists (seller_mem_file))
1397
+ {
1398
+ copy_file (seller_mem_file, disk_seller_file);
1399
+ std::remove (seller_mem_file.c_str ());
1400
+ }
1401
+
1380
1402
if (created_temp_file_for_processed_data)
1381
1403
std::remove (prepped_base.c_str ());
1382
1404
std::remove (mem_index_path.c_str ());
@@ -1448,23 +1470,26 @@ template DISKANN_DLLEXPORT int build_disk_index<int8_t, uint32_t>(const char *da
1448
1470
const std::string &label_file,
1449
1471
const std::string &universal_label,
1450
1472
const uint32_t filter_threshold, const uint32_t Lf,
1451
- const char * reorderDataFilePath);
1473
+ const char * reorderDataFilePath, const char * sellerFilePath,
1474
+ uint32_t num_diverse_build);
1452
1475
template DISKANN_DLLEXPORT int build_disk_index<uint8_t , uint32_t >(const char *dataFilePath, const char *indexFilePath,
1453
1476
const char *indexBuildParameters,
1454
1477
diskann::Metric compareMetric, bool use_opq,
1455
1478
const std::string &codebook_prefix, bool use_filters,
1456
1479
const std::string &label_file,
1457
1480
const std::string &universal_label,
1458
1481
const uint32_t filter_threshold, const uint32_t Lf,
1459
- const char * reorderDataFilePath);
1482
+ const char * reorderDataFilePath, const char * sellerFilePath,
1483
+ uint32_t num_diverse_build);
1460
1484
template DISKANN_DLLEXPORT int build_disk_index<float , uint32_t >(const char *dataFilePath, const char *indexFilePath,
1461
1485
const char *indexBuildParameters,
1462
1486
diskann::Metric compareMetric, bool use_opq,
1463
1487
const std::string &codebook_prefix, bool use_filters,
1464
1488
const std::string &label_file,
1465
1489
const std::string &universal_label,
1466
1490
const uint32_t filter_threshold, const uint32_t Lf,
1467
- const char * reorderDataFilePath);
1491
+ const char * reorderDataFilePath, const char * sellerFilePath,
1492
+ uint32_t num_diverse_build);
1468
1493
// LabelT = uint16
1469
1494
template DISKANN_DLLEXPORT int build_disk_index<int8_t , uint16_t >(const char *dataFilePath, const char *indexFilePath,
1470
1495
const char *indexBuildParameters,
@@ -1473,23 +1498,26 @@ template DISKANN_DLLEXPORT int build_disk_index<int8_t, uint16_t>(const char *da
1473
1498
const std::string &label_file,
1474
1499
const std::string &universal_label,
1475
1500
const uint32_t filter_threshold, const uint32_t Lf,
1476
- const char * reorderDataFilePath);
1501
+ const char * reorderDataFilePath, const char * sellerFilePath,
1502
+ uint32_t num_diverse_build);
1477
1503
template DISKANN_DLLEXPORT int build_disk_index<uint8_t , uint16_t >(const char *dataFilePath, const char *indexFilePath,
1478
1504
const char *indexBuildParameters,
1479
1505
diskann::Metric compareMetric, bool use_opq,
1480
1506
const std::string &codebook_prefix, bool use_filters,
1481
1507
const std::string &label_file,
1482
1508
const std::string &universal_label,
1483
1509
const uint32_t filter_threshold, const uint32_t Lf,
1484
- const char * reorderDataFilePath);
1510
+ const char * reorderDataFilePath, const char * sellerFilePath,
1511
+ uint32_t num_diverse_build);
1485
1512
template DISKANN_DLLEXPORT int build_disk_index<float , uint16_t >(const char *dataFilePath, const char *indexFilePath,
1486
1513
const char *indexBuildParameters,
1487
1514
diskann::Metric compareMetric, bool use_opq,
1488
1515
const std::string &codebook_prefix, bool use_filters,
1489
1516
const std::string &label_file,
1490
1517
const std::string &universal_label,
1491
1518
const uint32_t filter_threshold, const uint32_t Lf,
1492
- const char * reorderDataFilePath);
1519
+ const char * reorderDataFilePath, const char * sellerFilePath,
1520
+ uint32_t num_diverse_build);
1493
1521
1494
1522
template DISKANN_DLLEXPORT int build_merged_vamana_index<int8_t , uint32_t >(
1495
1523
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
0 commit comments