@@ -6596,4 +6596,200 @@ void getExperimentItemsStats__withFeedbackScoresIsNotEmptyFilter() {
65966596 }
65976597 }
65986598
6599+ @ Nested
6600+ @ DisplayName ("OPIK-2469: Cross-Project Traces Duplicate Test" )
6601+ @ TestInstance (TestInstance .Lifecycle .PER_CLASS )
6602+ class CrossProjectTracesDuplicateTest {
6603+
6604+ @ Test
6605+ @ DisplayName ("Should return unique experiment items when trace has spans in multiple projects" )
6606+ void findDatasetItemsWithExperimentItems__whenTraceHasSpansInMultipleProjects__thenReturnUniqueItems () {
6607+
6608+ var workspaceName = UUID .randomUUID ().toString ();
6609+ var apiKey = UUID .randomUUID ().toString ();
6610+ var workspaceId = UUID .randomUUID ().toString ();
6611+
6612+ mockTargetWorkspace (apiKey , workspaceName , workspaceId );
6613+
6614+ // Create dataset
6615+ var dataset = factory .manufacturePojo (Dataset .class );
6616+ var datasetId = createAndAssert (dataset , apiKey , workspaceName );
6617+
6618+ // Create dataset item
6619+ var datasetItem = factory .manufacturePojo (DatasetItem .class );
6620+ var datasetItemBatch = DatasetItemBatch .builder ()
6621+ .datasetId (datasetId )
6622+ .items (List .of (datasetItem ))
6623+ .build ();
6624+ putAndAssert (datasetItemBatch , workspaceName , apiKey );
6625+
6626+ // Create Project A
6627+ var projectA = UUID .randomUUID ().toString ();
6628+
6629+ // Create trace in Project A with spans
6630+ var trace1 = factory .manufacturePojo (Trace .class ).toBuilder ()
6631+ .projectName (projectA )
6632+ .build ();
6633+ createAndAssert (trace1 , workspaceName , apiKey );
6634+
6635+ // Create span in Project A for trace1
6636+ var span1InProjectA = factory .manufacturePojo (Span .class ).toBuilder ()
6637+ .projectName (projectA )
6638+ .traceId (trace1 .id ())
6639+ .build ();
6640+ createSpan (span1InProjectA , apiKey , workspaceName );
6641+
6642+ // ROOT CAUSE SIMULATION: Insert spans directly into ClickHouse for the SAME trace in Project B
6643+ // This creates a cross-project trace scenario
6644+ insertSpansForTraceInDifferentProject (workspaceId , trace1 .id (), workspaceName , apiKey );
6645+
6646+ // Wait for ClickHouse to process the manually inserted span
6647+ try {
6648+ Thread .sleep (500 );
6649+ } catch (InterruptedException e ) {
6650+ Thread .currentThread ().interrupt ();
6651+ }
6652+
6653+ // Create another trace in Project A (no cross-project issue)
6654+ var trace2 = factory .manufacturePojo (Trace .class ).toBuilder ()
6655+ .projectName (projectA )
6656+ .build ();
6657+ createAndAssert (trace2 , workspaceName , apiKey );
6658+
6659+ // Create experiment items for both traces
6660+ var experimentId = GENERATOR .generate ();
6661+ var experimentItem1 = factory .manufacturePojo (ExperimentItem .class ).toBuilder ()
6662+ .experimentId (experimentId )
6663+ .datasetItemId (datasetItem .id ())
6664+ .traceId (trace1 .id ())
6665+ .input (trace1 .input ())
6666+ .output (trace1 .output ())
6667+ .build ();
6668+
6669+ var experimentItem2 = factory .manufacturePojo (ExperimentItem .class ).toBuilder ()
6670+ .experimentId (experimentId )
6671+ .datasetItemId (datasetItem .id ())
6672+ .traceId (trace2 .id ())
6673+ .input (trace2 .input ())
6674+ .output (trace2 .output ())
6675+ .build ();
6676+
6677+ var experimentItemsBatch = ExperimentItemsBatch .builder ()
6678+ .experimentItems (Set .of (experimentItem1 , experimentItem2 ))
6679+ .build ();
6680+ createAndAssert (experimentItemsBatch , apiKey , workspaceName );
6681+
6682+ // Query the endpoint
6683+ var result = datasetResourceClient .getDatasetItemsWithExperimentItems (
6684+ datasetId ,
6685+ List .of (experimentId ),
6686+ apiKey ,
6687+ workspaceName );
6688+
6689+ // Assert results
6690+ assertThat (result ).isNotNull ();
6691+ assertThat (result .content ()).hasSize (1 );
6692+
6693+ var datasetItemResult = result .content ().get (0 );
6694+ assertThat (datasetItemResult .id ()).isEqualTo (datasetItem .id ());
6695+
6696+ // CRITICAL ASSERTION: Should have exactly 2 unique experiment items (no duplicates)
6697+ // Without the fix, trace1 appears twice because it has spans in 2 projects
6698+ var experimentItems = datasetItemResult .experimentItems ();
6699+ assertThat (experimentItems ).isNotNull ();
6700+
6701+ // Count experiment items by their ID to detect duplicates
6702+ var experimentItemIds = experimentItems .stream ()
6703+ .map (ExperimentItem ::id )
6704+ .collect (Collectors .toList ());
6705+
6706+ var uniqueIds = new HashSet <>(experimentItemIds );
6707+
6708+ // THIS IS THE KEY ASSERTION - Verifies fix for OPIK-2469
6709+ assertThat (experimentItemIds )
6710+ .as ("Should not contain duplicate experiment item IDs - trace1 has spans in 2 projects but should appear once" )
6711+ .hasSameSizeAs (uniqueIds )
6712+ .as ("Should have exactly 2 unique experiment items" )
6713+ .hasSize (2 );
6714+
6715+ // Verify the correct experiment items are present
6716+ assertThat (uniqueIds ).containsExactlyInAnyOrder (experimentItem1 .id (), experimentItem2 .id ());
6717+
6718+ // Verify each experiment item appears only once
6719+ experimentItemIds .forEach (id -> {
6720+ long count = experimentItemIds .stream ().filter (i -> i .equals (id )).count ();
6721+ assertThat (count )
6722+ .as ("Experiment item '%s' should appear exactly once, but appears '%d' times" , id , count )
6723+ .isEqualTo (1 );
6724+ });
6725+ }
6726+
6727+ /**
6728+ * Simulates the production scenario where a trace has spans in multiple projects.
6729+ * This is the root cause of OPIK-2469: when GROUP BY includes project_id,
6730+ * the query returns multiple rows for the same trace_id, causing duplicates.
6731+ */
6732+ private void insertSpansForTraceInDifferentProject (String workspaceId , UUID traceId ,
6733+ String workspaceName , String apiKey ) {
6734+ try {
6735+ // Create Project B through the API (this ensures all related tables are properly populated)
6736+ var projectBName = UUID .randomUUID ().toString ();
6737+ var dummyTrace = factory .manufacturePojo (Trace .class ).toBuilder ()
6738+ .projectName (projectBName )
6739+ .build ();
6740+ createAndAssert (dummyTrace , workspaceName , apiKey );
6741+
6742+ // Now insert spans directly into ClickHouse for the original trace but in Project B
6743+ // This creates the cross-project trace scenario
6744+ try (var connection = CLICKHOUSE .createConnection ("?database=" + DATABASE_NAME )) {
6745+ var statement = connection .createStatement ();
6746+
6747+ // Get the project ID for Project B by using the dummy trace we just created
6748+ String getProjectIdSql = String .format (
6749+ "SELECT project_id FROM traces WHERE workspace_id = '%s' AND id = '%s' LIMIT 1" ,
6750+ workspaceId , dummyTrace .id ());
6751+
6752+ var resultSet = statement .executeQuery (getProjectIdSql );
6753+ String projectBId = null ;
6754+ if (resultSet .next ()) {
6755+ projectBId = resultSet .getString (1 );
6756+ }
6757+ resultSet .close ();
6758+
6759+ if (projectBId == null ) {
6760+ throw new RuntimeException ("Could not find Project B ID" );
6761+ }
6762+
6763+ // Insert spans into ClickHouse for the SAME trace (the original trace) but in Project B
6764+ // This creates the cross-project trace scenario
6765+ var spanId = GENERATOR .generate ();
6766+ var now = Instant .now ().getEpochSecond ();
6767+
6768+ String insertSpanSql = String .format (
6769+ """
6770+ INSERT INTO spans (
6771+ id, workspace_id, project_id, trace_id, parent_span_id,
6772+ type, name, start_time, end_time,
6773+ input, output, metadata, tags,
6774+ usage, total_estimated_cost,
6775+ created_at, last_updated_at, created_by, last_updated_by
6776+ ) VALUES (
6777+ '%s', '%s', '%s', '%s', '%s',
6778+ 'general', 'test-span-project-b', toDateTime64(%d, 9), toDateTime64(%d, 9),
6779+ map('key', 'value'), map('result', 'success'), map(), [],
6780+ map('tokens', 100), 0.05,
6781+ %d, %d, 'test-user', 'test-user'
6782+ )
6783+ """ ,
6784+ spanId , workspaceId , projectBId , traceId , GENERATOR .generate (),
6785+ now , now + 1 ,
6786+ now , now );
6787+
6788+ statement .execute (insertSpanSql );
6789+ }
6790+ } catch (Exception exception ) {
6791+ throw new RuntimeException ("Failed to insert cross-project spans" , exception );
6792+ }
6793+ }
6794+ }
65996795}
0 commit comments