Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
"@types/node": "^20.0.0",
"@vitest/coverage-v8": "^2.1.8",
"memfs": "^4.14.0",
"testcontainers": "^10.24.2",
"vitest": "^2.1.8"
},
"publishConfig": {
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/splitter/ast-splitter.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import Parser from 'tree-sitter'

// Language parsers
const JavaScript = require('tree-sitter-javascript')

import { CodeChunk, Splitter } from './index'

import { LangChainCodeSplitter } from './langchain-splitter'
const TypeScript = require('tree-sitter-typescript').typescript
const CSharp = require('tree-sitter-c-sharp')
Expand Down
369 changes: 369 additions & 0 deletions packages/core/test/integration/milvus-grpc.integration.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,369 @@
import type { StartedTestContainer } from 'testcontainers'
import type { VectorDocument } from '../../src/vectordb/types.js'
import { GenericContainer, Wait } from 'testcontainers'
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it } from 'vitest'
import { MilvusVectorDatabase } from '../../src/vectordb/milvus-vectordb.js'

// Helper to create test documents with sensible defaults
function createTestDocument(overrides: Partial<VectorDocument> & { id: string }): VectorDocument {
return {
vector: Array.from({ length: 1536 }).fill(0.1) as number[],
content: 'test content',
relativePath: 'src/test.ts',
startLine: 1,
endLine: 10,
fileExtension: '.ts',
metadata: {
language: 'typescript',
codebasePath: '/home/user/test-project',
chunkIndex: 0,
},
...overrides,
}
}

/**
* Integration tests for Milvus gRPC client functionality.
*
* Tests use Testcontainers to automatically manage Milvus instances.
* Uses Milvus standalone with embedded etcd (no external etcd/minio needed).
* Docker is required; tests are automatically skipped if unavailable.
*/
describe('milvus gRPC Client Integration', () => {
let container: StartedTestContainer | undefined
let milvusDb: MilvusVectorDatabase
const testCollectionName = 'test_milvus_integration'
let skipTests = false

// Helper to drop collection if it exists, ignoring errors
async function dropCollectionIfExists(collectionName: string): Promise<void> {
const exists = await milvusDb.hasCollection(collectionName).catch(() => false)
if (exists) {
await milvusDb.dropCollection(collectionName).catch(() => {})
}
}
Comment on lines +39 to +44
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

If new MilvusVectorDatabase() in beforeEach fails, milvusDb will be undefined. When afterEach runs, it will call dropCollectionIfExists, which will then throw a TypeError because it tries to access milvusDb.hasCollection. To make the cleanup more robust, you should add a check to ensure milvusDb is defined at the beginning of this function.

  async function dropCollectionIfExists(collectionName: string): Promise<void> {
    if (!milvusDb) {
      return
    }
    const exists = await milvusDb.hasCollection(collectionName).catch(() => false)
    if (exists) {
      await milvusDb.dropCollection(collectionName).catch(() => {})
    }
  }


beforeAll(async () => {
try {
// Use Milvus standalone with embedded etcd (no external etcd/minio needed)
// Reference: https://github.com/milvus-io/milvus/blob/master/scripts/standalone_embed.sh
container = await new GenericContainer('milvusdb/milvus:v2.5.10')
.withExposedPorts(19530, 9091)
.withEnvironment({
ETCD_USE_EMBED: 'true',
ETCD_DATA_DIR: '/var/lib/milvus/etcd',
ETCD_CONFIG_PATH: '/milvus/configs/embedEtcd.yaml',
COMMON_STORAGETYPE: 'local',
})
.withCommand(['milvus', 'run', 'standalone'])
.withWaitStrategy(Wait.forHttp('/healthz', 9091).forStatusCode(200))
.start()
}
catch (error) {
console.warn('Docker not available, skipping Milvus tests:', (error as Error).message)
skipTests = true
}
}, 180000) // 3 minutes for Milvus startup

beforeEach(async () => {
if (skipTests || !container) {
return
}

const host = container.getHost()
const port = container.getMappedPort(19530)
milvusDb = new MilvusVectorDatabase({ address: `${host}:${port}` })

await dropCollectionIfExists(testCollectionName)
})

afterEach(async () => {
if (skipTests || !container) {
return
}
await dropCollectionIfExists(testCollectionName)
})

afterAll(async () => {
await container?.stop()
})

describe('collection Operations', () => {
it.skipIf(() => skipTests)('should list collections', async () => {
Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: it.skipIf() expects a boolean, not a function. Passing () => skipTests is always truthy, causing tests to always be skipped. Use it.skipIf(skipTests) instead.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At packages/core/test/integration/milvus-grpc.integration.test.ts, line 92:

<comment>`it.skipIf()` expects a boolean, not a function. Passing `() => skipTests` is always truthy, causing tests to always be skipped. Use `it.skipIf(skipTests)` instead.</comment>

<file context>
@@ -0,0 +1,369 @@
+  })
+
+  describe('collection Operations', () => {
+    it.skipIf(() => skipTests)('should list collections', async () => {
+      expect(Array.isArray(await milvusDb.listCollections())).toBe(true)
+    })
</file context>
Fix with Cubic

expect(Array.isArray(await milvusDb.listCollections())).toBe(true)
})

it.skipIf(() => skipTests)('should check if collection exists', async () => {
expect(await milvusDb.hasCollection(testCollectionName)).toBe(false)
})

it.skipIf(() => skipTests)('should create and drop collection', async () => {
await milvusDb.createHybridCollection(testCollectionName, 1536)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The vector dimension 1536 is a magic number that appears in multiple places (e.g., here, lines 114, 142, and in createTestDocument). The dimension 384 is also used for hybrid search tests. To improve readability and maintainability, consider defining these as named constants at the top of the file, for example: const DENSE_VECTOR_DIM = 1536; and const HYBRID_VECTOR_DIM = 384;.


expect(await milvusDb.hasCollection(testCollectionName)).toBe(true)
expect(await milvusDb.listCollections()).toContain(testCollectionName)

await milvusDb.dropCollection(testCollectionName)

expect(await milvusDb.hasCollection(testCollectionName)).toBe(false)
})
})

describe('document Insertion', () => {
it.skipIf(() => skipTests)('should insert documents with metadata', async () => {
await milvusDb.createHybridCollection(testCollectionName, 1536)

const testDoc = createTestDocument({
id: 'chunk_1234567890abcdef',
content: 'test content for indexing',
})

await milvusDb.insertHybrid(testCollectionName, [testDoc])

const results = await milvusDb.query(testCollectionName, '', ['metadata'], 1)

expect(results.length).toBe(1)
expect(results[0].metadata).toBeDefined()
})
})

describe('query with Metadata Extraction', () => {
const metadataTestDocs = [
{ id: 'chunk_0000000000000001', content: 'user authentication service', relativePath: 'src/auth.ts', endLine: 20, chunkIndex: 0, vectorFill: 0.1 },
{ id: 'chunk_0000000000000002', content: 'database connection handler', relativePath: 'src/db.ts', endLine: 15, chunkIndex: 1, vectorFill: 0.2 },
{ id: 'chunk_0000000000000003', content: 'api endpoint controller', relativePath: 'src/api.ts', endLine: 25, chunkIndex: 2, vectorFill: 0.3 },
]

beforeEach(async () => {
if (skipTests || !container) {
return
}

await milvusDb.createHybridCollection(testCollectionName, 1536)

const testDocs = metadataTestDocs.map((d) =>
createTestDocument({
id: d.id,
vector: Array.from({ length: 1536 }).fill(d.vectorFill) as number[],
content: d.content,
relativePath: d.relativePath,
endLine: d.endLine,
metadata: {
language: 'typescript',
codebasePath: '/home/user/my-project',
chunkIndex: d.chunkIndex,
},
}),
)

await milvusDb.insertHybrid(testCollectionName, testDocs)
})

it.skipIf(() => skipTests)('should query and extract metadata.codebasePath correctly', async () => {
const results = await milvusDb.query(testCollectionName, '', ['metadata'], 3)

expect(results.length).toBeGreaterThan(0)

for (const result of results) {
expect(result.metadata).toBeDefined()
const metadata = typeof result.metadata === 'string'
? JSON.parse(result.metadata)
: result.metadata
Comment on lines +169 to +171
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This logic for parsing metadata is duplicated in a few places (here and at lines 361-363). To improve maintainability and reduce code duplication, consider extracting this into a small helper function.

expect(metadata.codebasePath).toBe('/home/user/my-project')
expect(metadata.language).toBe('typescript')
expect(typeof metadata.chunkIndex).toBe('number')
}
})

it.skipIf(() => skipTests)('should handle string values correctly', async () => {
const results = await milvusDb.query(testCollectionName, '', ['content', 'relativePath', 'metadata'], 1)

expect(results.length).toBe(1)
const result = results[0]

expect(typeof result.content).toBe('string')
expect(result.content.length).toBeGreaterThan(0)
expect(typeof result.relativePath).toBe('string')
expect(result.relativePath).toContain('.ts')
})

it.skipIf(() => skipTests)('should handle integer values correctly', async () => {
const results = await milvusDb.query(testCollectionName, '', ['startLine', 'endLine'], 1)

expect(results.length).toBe(1)
const result = results[0]

expect(typeof result.startLine).toBe('number')
expect(result.startLine).toBeGreaterThanOrEqual(1)
expect(typeof result.endLine).toBe('number')
expect(result.endLine).toBeGreaterThan(result.startLine)
})

it.skipIf(() => skipTests)('should return all fields when outputFields is empty', async () => {
const results = await milvusDb.query(testCollectionName, '', [], 1)

expect(results.length).toBe(1)
const result = results[0]

expect(result.id).toBeTruthy()
expect(result.content).toBeTruthy()
expect(result.relativePath).toBeTruthy()
expect(typeof result.startLine).toBe('number')
expect(typeof result.endLine).toBe('number')
expect(result.fileExtension).toBeTruthy()
expect(result.metadata).toBeDefined()
})

it.skipIf(() => skipTests)('should filter by fileExtension correctly', async () => {
const results = await milvusDb.query(testCollectionName, 'fileExtension == ".ts"', ['relativePath'], 10)

expect(results.length).toBeGreaterThan(0)

for (const result of results) {
expect(result.relativePath).toContain('.ts')
}
})

it.skipIf(() => skipTests)('should handle empty collections gracefully', async () => {
const emptyCollection = 'test_empty_collection'
await milvusDb.createHybridCollection(emptyCollection, 1536)

try {
const results = await milvusDb.query(emptyCollection, '', ['metadata'], 10)

expect(Array.isArray(results)).toBe(true)
expect(results.length).toBe(0)
}
finally {
await milvusDb.dropCollection(emptyCollection)
}
})
})

describe('hybrid Search with BM25', () => {
const hybridTestDocs = [
{ id: 'chunk_0000000000000001', content: 'function get_resolver() { return new URLResolver(); }', relativePath: 'urls/resolvers.py', startLine: 1, endLine: 5, vectorFill: 0.1 },
{ id: 'chunk_0000000000000002', content: 'def get_resolver(): return URLResolver()', relativePath: 'urls/base.py', startLine: 10, endLine: 15, vectorFill: 0.2 },
{ id: 'chunk_0000000000000003', content: 'class URLResolver: pass', relativePath: 'urls/resolver.py', startLine: 20, endLine: 25, vectorFill: 0.3 },
]

beforeEach(async () => {
if (skipTests || !container) {
return
}

await milvusDb.createHybridCollection(testCollectionName, 384)

const testDocs = hybridTestDocs.map((d) =>
createTestDocument({
id: d.id,
vector: Array.from({ length: 384 }).fill(d.vectorFill) as number[],
content: d.content,
relativePath: d.relativePath,
startLine: d.startLine,
endLine: d.endLine,
fileExtension: '.py',
metadata: { codebasePath: '/test/django' },
}),
)

await milvusDb.insertHybrid(testCollectionName, testDocs)
})

it.skipIf(() => skipTests)('should perform hybrid search successfully', async () => {
const denseVector = Array.from({ length: 384 }).fill(0.15) as number[]

const results = await milvusDb.hybridSearch(
testCollectionName,
[
{ data: denseVector, anns_field: 'vector', param: { nprobe: 10 }, limit: 10 },
{ data: 'get_resolver function', anns_field: 'sparse_vector', param: { drop_ratio_search: 0.2 }, limit: 10 },
],
{ limit: 3 },
)

expect(results).toBeDefined()
expect(Array.isArray(results)).toBe(true)
expect(results.length).toBeGreaterThan(0)
expect(results.length).toBeLessThanOrEqual(3)

for (const result of results) {
expect(result.document).toBeDefined()
expect(result.document.content).toBeTruthy()
expect(result.score).toBeGreaterThan(0)
}
})

it.skipIf(() => skipTests)('should handle query with empty sparse vector gracefully', async () => {
const denseVector = Array.from({ length: 384 }).fill(0.15) as number[]

const results = await milvusDb.hybridSearch(
testCollectionName,
[
{ data: denseVector, anns_field: 'vector', param: { nprobe: 10 }, limit: 10 },
{ data: 'nonexistent_unknown_term_xyz', anns_field: 'sparse_vector', param: { drop_ratio_search: 0.2 }, limit: 10 },
],
{ limit: 3 },
)

expect(results).toBeDefined()
expect(Array.isArray(results)).toBe(true)
})

it.skipIf(() => skipTests)('should handle BM25 model persistence across searches', async () => {
const denseVector1 = Array.from({ length: 384 }).fill(0.1) as number[]
const results1 = await milvusDb.hybridSearch(
testCollectionName,
[
{ data: denseVector1, anns_field: 'vector', param: { nprobe: 10 }, limit: 10 },
{ data: 'get_resolver', anns_field: 'sparse_vector', param: { drop_ratio_search: 0.2 }, limit: 10 },
],
{ limit: 3 },
)

expect(results1).toBeDefined()
expect(results1.length).toBeGreaterThan(0)

const denseVector2 = Array.from({ length: 384 }).fill(0.2) as number[]
const results2 = await milvusDb.hybridSearch(
testCollectionName,
[
{ data: denseVector2, anns_field: 'vector', param: { nprobe: 10 }, limit: 10 },
{ data: 'URLResolver class', anns_field: 'sparse_vector', param: { drop_ratio_search: 0.2 }, limit: 10 },
],
{ limit: 3 },
)

expect(results2).toBeDefined()
expect(results2.length).toBeGreaterThan(0)
})
})

describe('sync Integration', () => {
it.skipIf(() => skipTests)('should allow sync to extract codebasePath from Milvus collections', async () => {
await milvusDb.createHybridCollection(testCollectionName, 1536)

const testDoc = createTestDocument({
id: 'chunk_bbbbbbbbbbbbbbbb',
content: 'sync test',
relativePath: 'src/sync.ts',
endLine: 5,
metadata: { language: 'typescript', codebasePath: '/home/user/sync-project', chunkIndex: 0 },
})

await milvusDb.insertHybrid(testCollectionName, [testDoc])

const results = await milvusDb.query(testCollectionName, '', ['metadata'], 1)

expect(results.length).toBe(1)
expect(results[0].metadata).toBeDefined()

const metadata = typeof results[0].metadata === 'string'
? JSON.parse(results[0].metadata)
: results[0].metadata

expect(metadata.codebasePath).toBe('/home/user/sync-project')
expect(typeof metadata.codebasePath).toBe('string')
})
})
})
3 changes: 3 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.