Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions fdb-record-layer-core/src/test/proto/test_records_text.proto
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,21 @@ message MultiDocument {
repeated string text = 2;
}

message FlaggedDocument {
optional int64 doc_id = 1 [(field).primary_key = true];
optional string text = 2 [(field).index = { type: "text" }];
optional int64 group = 3;
optional bool is_seen = 4;
optional bool is_urgent = 5;
}

message RecordTypeUnion {
optional SimpleDocument _SimpleDocument = 1;
optional ComplexDocument _ComplexDocument = 2;
optional MapDocument _MapDocument = 3;
optional MultiDocument _MultiDocument = 4;
optional ManyFieldsDocument _ManyFieldsDocument = 5;
optional FlaggedDocument _FlaggedDocument = 6;
}

message NestedMapDocument {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
/*
* LuceneBitmapValueQuery.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2025 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.record.lucene;

import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.annotation.SpotBugsSuppressWarnings;
import com.apple.foundationdb.record.EvaluationContext;
import com.apple.foundationdb.record.ExecuteProperties;
import com.apple.foundationdb.record.FunctionNames;
import com.apple.foundationdb.record.IndexEntry;
import com.apple.foundationdb.record.RecordCursor;
import com.apple.foundationdb.record.RecordCursorResult;
import com.apple.foundationdb.record.lucene.directory.FDBDirectory;
import com.apple.foundationdb.record.metadata.Index;
import com.apple.foundationdb.record.metadata.IndexAggregateFunctionCall;
import com.apple.foundationdb.record.metadata.Key;
import com.apple.foundationdb.record.metadata.RecordType;
import com.apple.foundationdb.record.metadata.expressions.FieldKeyExpression;
import com.apple.foundationdb.record.metadata.expressions.KeyExpression;
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreBase;
import com.apple.foundationdb.record.query.IndexQueryabilityFilter;
import com.apple.foundationdb.record.query.RecordQuery;
import com.apple.foundationdb.record.query.expressions.BaseField;
import com.apple.foundationdb.record.query.expressions.QueryComponent;
import com.apple.foundationdb.record.query.plan.RecordQueryPlanner;
import com.apple.foundationdb.record.query.plan.bitmap.ComposedBitmapIndexAggregate;
import com.apple.foundationdb.record.query.plan.cascades.explain.Attribute;
import com.apple.foundationdb.record.query.plan.planning.FilterSatisfiedMask;
import com.apple.foundationdb.record.query.plan.plans.QueryResult;
import com.apple.foundationdb.record.query.plan.plans.RecordQueryPlan;
import com.apple.foundationdb.tuple.Tuple;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.DocIdSetBuilder;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;

/**
* Use BitmapValue indexes to get composite bitmaps to turn back into doc ids.
* Do this for any leftover predicates after pushing down into Lucene, that is, for fields not included in Lucene documents.
* Probably only performs acceptably when the record keys and document ids have mostly congruent segment distributions.
*/
@API(API.Status.EXPERIMENTAL)
public class LuceneBitmapValueQuery extends LuceneQueryClause {
@Nonnull
private final RecordQueryPlan plan;

public LuceneBitmapValueQuery(@Nonnull RecordQueryPlan plan) {
super(LuceneQueryType.QUERY);
this.plan = plan;
}

@Nullable
public static LuceneBitmapValueQuery tryBuild(@Nonnull RecordQueryPlanner planner,
@Nonnull RecordType recordType,
@Nonnull FilterSatisfiedMask filterMask,
@Nonnull KeyExpression groupingKey) {
final KeyExpression primaryKey = recordType.getPrimaryKey();
if (!(primaryKey instanceof FieldKeyExpression)) {
return null;
}
final List<QueryComponent> filters = new ArrayList<>();
final List<FilterSatisfiedMask> candidates = new ArrayList<>();
for (FilterSatisfiedMask child : filterMask.getChildren()) {
if (!child.isSatisfied()) {
filters.add(child.getUnsatisfiedFilter());
candidates.add(child);
} else if (child.getFilter() instanceof BaseField && groupingKey instanceof FieldKeyExpression
&& ((BaseField)child.getFilter()).getFieldName().equals(((FieldKeyExpression)groupingKey).getFieldName())) {
filters.add(child.getFilter()); // Share group predicate.
}
}
if (filters.isEmpty()) {
return null;
}
KeyExpression predicateField = null;
for (QueryComponent filter : filters) {
if (filter instanceof BaseField) {
predicateField = Key.Expressions.field(((BaseField)filter).getFieldName());
break;
}
}
if (predicateField == null) {
return null;
}
final IndexAggregateFunctionCall aggregate = new IndexAggregateFunctionCall(FunctionNames.BITMAP_VALUE,
Key.Expressions.concat(groupingKey, primaryKey).group(1));
final QueryComponent filter = filters.size() > 1 ? com.apple.foundationdb.record.query.expressions.Query.and(filters) : filters.get(0);
// TODO: Add primary key range filters here; these will limit size (and so relevancy) of returned bitmaps.

Check warning on line 128 in fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java#L128

TODO: Add primary key range filters here; these will limit size (and so relevancy) of returned bitmaps. https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=5733A500ADD0D0A9A33BC19EB0EC2A59
final RecordQuery recordQuery = RecordQuery.newBuilder()
.setRecordType(recordType.getName())
.setFilter(filter)
.setRequiredResults(Collections.singletonList(primaryKey))
.build();
final Optional<RecordQueryPlan> recordQueryPlan = ComposedBitmapIndexAggregate.tryPlan(planner, recordQuery, aggregate, IndexQueryabilityFilter.DEFAULT);
if (recordQueryPlan.isEmpty()) {
return null;
}
for (FilterSatisfiedMask candidate : candidates) {
candidate.setSatisfied(true);
}
return new LuceneBitmapValueQuery(recordQueryPlan.get());
}

@Override
public BoundQuery bind(@Nonnull FDBRecordStoreBase<?> store, @Nonnull Index index, @Nonnull EvaluationContext context) {
final Query luceneQuery = new BitmapValueQuery(store, plan, context);
return BoundQuery.ofLuceneQueryWithQueryType(luceneQuery, getQueryType());
}

@Override
public void getPlannerGraphDetails(@Nonnull ImmutableList.Builder<String> detailsBuilder, @Nonnull ImmutableMap.Builder<String, Attribute> attributeMapBuilder) {
detailsBuilder.add("bitmap: {{plan}}");
attributeMapBuilder.put("plan", Attribute.gml(plan.toString()));
}

@Override
public int planHash(@Nonnull PlanHashMode hashMode) {
return plan.planHash(hashMode);
}

/**
* The actual Lucene {@link Query}.
*/
static class BitmapValueQuery extends Query {

Check warning on line 164 in fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java#L164

Reduce this class from 112 lines to the maximum allowed 25 or externalize it in a public class https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=8AE0A6496790BF440A1185F187AA96B1
@Nonnull
private final FDBRecordStoreBase<?> store;
@Nonnull
private final RecordQueryPlan plan;
@Nonnull
private final EvaluationContext context;

BitmapValueQuery(@Nonnull FDBRecordStoreBase<?> store, @Nonnull RecordQueryPlan plan, @Nonnull EvaluationContext context) {
this.store = store;
this.plan = plan;
this.context = context;
}

@Override
public Weight createWeight(final IndexSearcher searcher, final ScoreMode scoreMode, final float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@SuppressWarnings("PMD.CloseResource")
@Override
public ScorerSupplier scorerSupplier(final LeafReaderContext context) throws IOException {
final LeafReader reader = context.reader();
final SegmentInfo segmentInfo = ((SegmentReader)reader).getSegmentInfo().info;
final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
final Weight weight = this;

return new ScorerSupplier() {
@Override
public Scorer get(long leadCost) throws IOException {
runQuery(segmentInfo, result);
DocIdSetIterator iterator = result.build().iterator();
return new ConstantScoreScorer(weight, score(), scoreMode, iterator);
}

@Override
public long cost() {
// TODO: Could run the query to get just popcounts and not do the actually docid mapping.

Check warning on line 199 in fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java#L199

TODO: Could run the query to get just popcounts and not do the actually docid mapping. https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=C8168D11CF6EC94145EFFB00EFC51BAE
return -1;
}
};
}

@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
return scorerSupplier(context).get(Long.MAX_VALUE);
}

@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
};
}

private void runQuery(@Nonnull SegmentInfo segmentInfo, @Nonnull DocIdSetBuilder docIdSetBuilder) {
final FDBDirectory directory = (FDBDirectory)segmentInfo.dir;
final LucenePrimaryKeySegmentIndex primaryKeySegmentIndex = Objects.requireNonNull(directory.getPrimaryKeySegmentIndex());

Check warning on line 219 in fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java#L219

Move the declaration of "primaryKeySegmentIndex" closer to the code that uses it https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=92AB37FF459E78591F57098712ED8D5C
final String segmentName = segmentInfo.name;
final DirectoryReader directoryReader;
final long segmentId;
try {
segmentId = directory.primaryKeySegmentId(segmentName, false);

Check warning on line 224 in fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java#L224

The value assigned to variable `segmentId` is never read https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=378361FD95D97140CB2415D5A7B0DD3F
directoryReader = DirectoryReader.open(directory);
} catch (IOException ex) {
throw LuceneExceptions.toRecordCoreException("segment info error", ex);
}
// TODO: Bind primary key range parameters from max per segment, as maintained by LucenePrimaryKeySegmentIndex.

Check warning on line 229 in fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java#L229

TODO: Bind primary key range parameters from max per segment, as maintained by LucenePrimaryKeySegmentIndex. https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=DA2549D46E672A0327AD1A526F1811F4
try (RecordCursor<QueryResult> cursor = plan.executePlan(store, context, null, ExecuteProperties.SERIAL_EXECUTE)) {
while (true) {
final RecordCursorResult<QueryResult> nextResult = store.getContext().asyncToSync(LuceneEvents.Waits.WAIT_LUCENE_FIND_PRIMARY_KEY, cursor.onNext());
if (!nextResult.hasNext()) {
break;
}
final IndexEntry indexEntry = nextResult.get().getIndexEntry();
final long offset = indexEntry.getKey().getLong(indexEntry.getKeySize() - 1);
final byte[] bits = indexEntry.getValue().getBytes(0);
int nrecs = 0;
for (int i = 0; i < bits.length; i++) {
nrecs += Integer.bitCount(bits[i] & 0xFF);
}
if (nrecs > 0) {
// TODO: Bulk version of these loops and mapping lookup. Read multiple pkey entries or store a different index.

Check warning on line 244 in fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java#L244

TODO: Bulk version of these loops and mapping lookup. Read multiple pkey entries or store a different index. https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=9B33B6A6EA35BD4513D61485D01B8C48
final DocIdSetBuilder.BulkAdder adder = docIdSetBuilder.grow(nrecs);
for (int i = 0; i < bits.length; i++) {
int b = bits[i] & 0xFF;
for (int j = 0; j < 8; j++) {
if ((b & (1 << j)) != 0) {
Tuple primaryKey = Tuple.from(offset + i * 8L + j);
final LucenePrimaryKeySegmentIndex.DocumentIndexEntry entry;
try {
entry = primaryKeySegmentIndex.findDocument(directoryReader, primaryKey);

Check failure on line 253 in fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java#L253

This method is deeply nested in multiple places [0, 1, 2]. Consider extracting helper methods or reducing the nesting by using early breaks or returns. [0] https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=AB5E2ABCE2DA3CA430EF681FE6EA3456 [1] https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=0328FE0C37FE60A9FB7CD1A4C46A6C33 [2] https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=927D80EC82CC401ED7F3D3C2FC8368BC
} catch (IOException ex) {
throw LuceneExceptions.toRecordCoreException("segment docid mapping error", ex);
}
if (entry != null) {
adder.add(entry.docId);
}
}
}
}
}
}
}
}

@Override
public String toString(final String field) {

Check warning on line 269 in fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBitmapValueQuery.java#L269

Suspicious method `toString` https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=0A0DAAB70C8A081F4ADA4D53C3D774E8
return "BITMAP-VALUE(" + plan + ")";
}

@Override
@SpotBugsSuppressWarnings("EQ_UNUSUAL")
public boolean equals(final Object obj) {
return sameClassAs(obj) &&
equalsTo(getClass().cast(obj));
}

private boolean equalsTo(BitmapValueQuery query) {
return Objects.equals(plan, query.plan);
}

@Override
public int hashCode() {
int hash = classHash();
return 31 * hash + plan.hashCode();
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -96,67 +96,78 @@
@Nonnull Index index, @Nonnull QueryComponent filter,
@Nullable KeyExpression sort, boolean sortReverse,
@Nullable KeyExpression commonPrimaryKey) {
final RecordMetaData metaData = getRecordMetaData();
final Collection<RecordType> recordTypes = metaData.recordTypesForIndex(index);
if (recordTypes.size() != 1) {
return null;
}

final FilterSatisfiedMask filterMask = FilterSatisfiedMask.of(filter);

final KeyExpression rootExp = index.getRootExpression();
final KeyExpression groupingKey;
final ScanComparisons groupingComparisons;

// Getting grouping information from the index key and query filter
if (rootExp instanceof GroupingKeyExpression) {
groupingKey = ((GroupingKeyExpression)rootExp).getGroupingSubKey();
final QueryToKeyMatcher.Match groupingMatch = new QueryToKeyMatcher(filter).matchesCoveringKey(groupingKey, filterMask);
if (!groupingMatch.getType().equals((QueryToKeyMatcher.MatchType.EQUALITY))) {
return null;
}
if (filterMask.allSatisfied()) {
// If filter is only group predicates, can skip trying to find non-trivial Lucene scan.
return null;
}
groupingComparisons = new ScanComparisons(groupingMatch.getEqualityComparisons(), Collections.emptySet());
} else {
groupingKey = null;
groupingComparisons = ScanComparisons.EMPTY;
}

LucenePlanState state = new LucenePlanState(index, groupingComparisons, filter);
state.documentFields = LuceneIndexExpressions.getDocumentFieldDerivations(index, metaData);

QueryComponent queryComponent = state.groupingComparisons.isEmpty() ? state.filter : filterMask.getUnsatisfiedFilter();
// Special scans like auto-complete cannot be combined with regular queries.
LuceneScanParameters scanParameters = getSpecialScan(state, filterMask, queryComponent);
if (scanParameters == null) {
// Scan by means of normal Lucene search API.
LuceneQueryClause query = getQueryForFilter(LuceneQueryType.QUERY, state, filter, new ArrayList<>(), filterMask);
if (query == null) {
return null;
}
if (!getSort(state, sort, sortReverse, commonPrimaryKey, groupingKey)) {
return null;
}
if (!filterMask.allSatisfied() && index.getBooleanOption("tryBitmapValueIndexes", false)) {
LuceneQueryClause bitmapQuery = LuceneBitmapValueQuery.tryBuild(this,
recordTypes.iterator().next(), filterMask, groupingKey);
if (bitmapQuery != null) {
if (query instanceof LuceneBooleanQuery && ((LuceneBooleanQuery)query).getOccur() == BooleanClause.Occur.MUST) {
((LuceneBooleanQuery)query).getChildren().add(bitmapQuery);
} else {
query = new LuceneBooleanQuery(query.getQueryType(), List.of(query, bitmapQuery), BooleanClause.Occur.MUST);
}
}
}
getStoredFields(state);
LuceneScanQueryParameters.LuceneQueryHighlightParameters highlightParameters = getHighlightParameters(queryComponent);
scanParameters = new LuceneScanQueryParameters(groupingComparisons, query,
state.sort, state.storedFields, state.storedFieldTypes, highlightParameters);
}

// Wrap in plan.
RecordQueryPlan plan = LuceneIndexQueryPlan.of(index.getName(), scanParameters,
resolveFetchIndexRecords(candidateScan.getPlanContext()), false,
state.planOrderingKey, state.storedFieldExpressions);
plan = addTypeFilterIfNeeded(candidateScan, plan, getPossibleTypes(index));
if (filterMask.allSatisfied()) {
filterMask.setSatisfied(true);
}
return new ScoredPlan(plan, filterMask.getUnsatisfiedFilters(), Collections.emptyList(),
computeSargedComparisons(plan), 11 - filterMask.getUnsatisfiedFilters().size(),
state.repeated, false, false, null);

Check warning on line 170 in fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LucenePlanner.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LucenePlanner.java#L99-L170

This method is a bit lengthy [0]. Consider shortening it, e.g. by extracting code blocks into separate methods. [0] https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3812%2FMMcM%2Flucene-bitmap-value-index%3AHEAD&id=0BA6390BD6A9E6A1BA7E90618B362A74
}

private static LuceneScanQueryParameters.LuceneQueryHighlightParameters getHighlightParameters(@Nonnull QueryComponent queryComponent) {
Expand Down
Loading
Loading