Skip to content

Commit de1328a

Browse files
committed
Update RAG config and argument placement
1 parent 9a6d667 commit de1328a

File tree

2 files changed

+49
-46
lines changed

2 files changed

+49
-46
lines changed

src/RAG/RAGDB.php

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -174,44 +174,44 @@ public static function createRAGIndexes(
174174
array $options = []
175175
): PromiseInterface {
176176
self::initializeIfNeeded();
177-
177+
178178
$defaultOptions = [
179179
'hnsw_m' => 16,
180180
'hnsw_ef_construction' => 64,
181181
'create_multiple_indexes' => true
182182
];
183-
183+
184184
$config = array_merge($defaultOptions, $options);
185-
185+
186186
return Async::async(function () use ($table, $vectorColumn, $config): bool {
187187
$ragTable = self::ragTable($table);
188-
188+
189189
// Create HNSW index for cosine similarity (most common for embeddings)
190190
await($ragTable->createVectorIndex(
191-
$vectorColumn,
192191
'hnsw',
193192
'vector_cosine_ops',
194-
['m' => $config['hnsw_m'], 'ef_construction' => $config['hnsw_ef_construction']]
193+
['m' => $config['hnsw_m'], 'ef_construction' => $config['hnsw_ef_construction']],
194+
$vectorColumn
195195
));
196-
196+
197197
if ($config['create_multiple_indexes']) {
198198
// Create additional indexes for different distance metrics
199199
await($ragTable->createVectorIndex(
200-
$vectorColumn,
201200
'hnsw',
202201
'vector_l2_ops',
203-
['m' => $config['hnsw_m'], 'ef_construction' => $config['hnsw_ef_construction']]
202+
['m' => $config['hnsw_m'], 'ef_construction' => $config['hnsw_ef_construction']],
203+
$vectorColumn
204204
));
205-
205+
206206
await($ragTable->createVectorIndex(
207-
$vectorColumn,
208207
'hnsw',
209208
'vector_ip_ops',
210-
['m' => $config['hnsw_m'], 'ef_construction' => $config['hnsw_ef_construction']]
209+
['m' => $config['hnsw_m'], 'ef_construction' => $config['hnsw_ef_construction']],
210+
$vectorColumn
211211
));
212212
}
213-
213+
214214
return true;
215215
})();
216216
}
217-
}
217+
}

src/RAG/RAGQueryBuilder.php

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@ final public function __construct(string $table = '')
3232
*
3333
* @param array<string, mixed> $data The document data.
3434
* @param array<float> $embedding The embedding vector.
35-
* @param string $embeddingColumn The embedding column name.
35+
* @param string|null $embeddingColumn The embedding column name.
3636
* @return PromiseInterface<int> A promise that resolves to the number of affected rows.
3737
*/
3838
public function insertWithEmbedding(array $data, array $embedding, ?string $embeddingColumn = null): PromiseInterface
3939
{
40-
$embeddingColumn = $embeddingColumn ?? $this->ragConfig['default_vector_column'];
40+
$embeddingColumn ??= $this->ragConfig['default_vector_column'];
4141

4242
if (empty($embedding)) {
4343
return Promise::resolve(0);
@@ -52,17 +52,17 @@ public function insertWithEmbedding(array $data, array $embedding, ?string $embe
5252
*
5353
* @param array<string, mixed> $data The document data.
5454
* @param array<float> $embedding The embedding vector.
55-
* @param string $embeddingColumn The embedding column name.
5655
* @param string $idColumn The ID column name.
56+
* @param string|null $embeddingColumn The embedding column name.
5757
* @return PromiseInterface<mixed> A promise that resolves to the inserted ID.
5858
*/
5959
public function insertWithEmbeddingGetId(
6060
array $data,
6161
array $embedding,
62-
?string $embeddingColumn = null,
63-
string $idColumn = 'id'
62+
string $idColumn = 'id',
63+
?string $embeddingColumn = null
6464
): PromiseInterface {
65-
$embeddingColumn = $embeddingColumn ?? $this->ragConfig['default_vector_column'];
65+
$embeddingColumn ??= $this->ragConfig['default_vector_column'];
6666

6767
if (empty($embedding)) {
6868
return Promise::resolve(null);
@@ -76,12 +76,12 @@ public function insertWithEmbeddingGetId(
7676
* Batch insert documents with embeddings.
7777
*
7878
* @param array<array{data: array<string, mixed>, embedding: array<float>}> $documents Documents with embeddings.
79-
* @param string $embeddingColumn The embedding column name.
79+
* @param string|null $embeddingColumn The embedding column name.
8080
* @return PromiseInterface<int> A promise that resolves to the number of affected rows.
8181
*/
8282
public function insertBatchWithEmbeddings(array $documents, ?string $embeddingColumn = null): PromiseInterface
8383
{
84-
$embeddingColumn = $embeddingColumn ?? $this->ragConfig['default_vector_column'];
84+
$embeddingColumn ??= $this->ragConfig['default_vector_column'];
8585

8686
if (empty($documents)) {
8787
return Promise::resolve(0);
@@ -107,19 +107,22 @@ public function insertBatchWithEmbeddings(array $documents, ?string $embeddingCo
107107
*
108108
* @param array<float> $queryVector The query vector.
109109
* @param array<string, mixed> $filters Additional filters.
110-
* @param string $vectorColumn The vector column name.
111-
* @param float $threshold Similarity threshold.
112-
* @param int $limit Number of results to return.
110+
* @param string|null $vectorColumn The vector column name.
111+
* @param string|null $metadataColumn The metadata column name.
112+
* @param float|null $threshold Similarity threshold.
113+
* @param int|null $limit Number of results to return.
113114
* @return PromiseInterface<array<int, array<string, mixed>>> A promise that resolves to search results.
114115
*/
115116
public function performSemanticSearch(
116117
array $queryVector,
117118
array $filters = [],
118119
?string $vectorColumn = null,
120+
?string $metadataColumn = null,
119121
?float $threshold = null,
120122
?int $limit = null
121123
): PromiseInterface {
122-
$query = $this->semanticSearch($queryVector, $filters, $vectorColumn, $this->ragConfig['default_metadata_column'], $threshold, $limit);
124+
$metadataColumn ??= $this->ragConfig['default_metadata_column'];
125+
$query = $this->semanticSearch($queryVector, $filters, $vectorColumn, $metadataColumn, $threshold, $limit);
123126
return $query->get();
124127
}
125128

@@ -132,21 +135,21 @@ public function performSemanticSearch(
132135
* @param string $vectorColumn The vector column name.
133136
* @param float $textWeight Weight for text search.
134137
* @param float $vectorWeight Weight for vector search.
135-
* @param int $limit Number of results to return.
138+
* @param int|null $limit Number of results to return.
136139
* @return PromiseInterface<array<int, array<string, mixed>>> A promise that resolves to search results.
137140
*/
138141
public function performHybridSearch(
139142
string $textQuery,
140143
array $queryVector,
141-
?string $textColumn = null,
142-
?string $vectorColumn = null,
144+
string $textColumn,
145+
string $vectorColumn,
143146
float $textWeight = 0.3,
144147
float $vectorWeight = 0.7,
145148
?int $limit = null
146149
): PromiseInterface {
147-
$textColumn = $textColumn ?? $this->ragConfig['default_content_column'];
148-
$vectorColumn = $vectorColumn ?? $this->ragConfig['default_vector_column'];
149-
$limit = $limit ?? $this->ragConfig['default_search_limit'];
150+
$textColumn ??= $this->ragConfig['default_content_column'];
151+
$vectorColumn ??= $this->ragConfig['default_vector_column'];
152+
$limit ??= $this->ragConfig['default_search_limit'];
150153

151154
$query = $this->hybridSearch($textColumn, $vectorColumn, $textQuery, $queryVector, $textWeight, $vectorWeight, $limit);
152155
return $query->get();
@@ -158,7 +161,7 @@ public function performHybridSearch(
158161
* @param array<float> $queryVector The query vector.
159162
* @param array<string, mixed> $filters Additional filters.
160163
* @param int $topK Number of results to return.
161-
* @param float $threshold Similarity threshold.
164+
* @param float|null $threshold Similarity threshold.
162165
* @return PromiseInterface<array<int, array<string, mixed>>> A promise that resolves to context with citations.
163166
*/
164167
public function retrieveContextForRAG(
@@ -167,7 +170,7 @@ public function retrieveContextForRAG(
167170
int $topK = 5,
168171
?float $threshold = null
169172
): PromiseInterface {
170-
$threshold = $threshold ?? $this->ragConfig['default_similarity_threshold'];
173+
$threshold ??= $this->ragConfig['default_similarity_threshold'];
171174

172175
$query = $this->retrievalWithCitation($queryVector, ['title', 'source', 'url', 'author', 'created_at'], null, $topK)
173176
->semanticSearch($queryVector, $filters, null, null, $threshold, $topK);
@@ -185,12 +188,12 @@ public function retrieveContextForRAG(
185188
* @return PromiseInterface<int> A promise that resolves when index is created.
186189
*/
187190
public function createVectorIndex(
188-
?string $column = null,
189191
string $method = 'hnsw',
190192
string $operator = 'vector_cosine_ops',
191-
array $options = ['m' => 16, 'ef_construction' => 64]
193+
array $options = ['m' => 16, 'ef_construction' => 64],
194+
?string $column = null,
192195
): PromiseInterface {
193-
$column = $column ?? $this->ragConfig['default_vector_column'];
196+
$column ??= $this->ragConfig['default_vector_column'];
194197
$sql = $this->buildVectorIndexQuery($column, $method, $operator, $options);
195198
return AsyncPostgreSQL::execute($sql, []);
196199
}
@@ -204,11 +207,11 @@ public function createVectorIndex(
204207
* @return PromiseInterface<array<int, array<string, mixed>>> A promise that resolves to document chunks.
205208
*/
206209
public function chunkDocuments(
207-
?string $contentColumn = null,
208210
int $chunkSize = 1000,
209-
int $overlapSize = 200
211+
int $overlapSize = 200,
212+
?string $contentColumn = null,
210213
): PromiseInterface {
211-
$contentColumn = $contentColumn ?? $this->ragConfig['default_content_column'];
214+
$contentColumn ??= $this->ragConfig['default_content_column'];
212215
$sql = $this->buildChunkQuery($contentColumn, $chunkSize, $overlapSize);
213216
return AsyncPostgreSQL::query($sql, []);
214217
}
@@ -218,17 +221,17 @@ public function chunkDocuments(
218221
*
219222
* @param mixed $id The document ID.
220223
* @param array<float> $embedding The new embedding vector.
221-
* @param string $embeddingColumn The embedding column name.
222224
* @param string $idColumn The ID column name.
225+
* @param string|null $embeddingColumn The embedding column name.
223226
* @return PromiseInterface<int> A promise that resolves to the number of affected rows.
224227
*/
225228
public function updateEmbedding(
226229
mixed $id,
227230
array $embedding,
228-
?string $embeddingColumn = null,
229-
string $idColumn = 'id'
231+
string $idColumn = 'id',
232+
?string $embeddingColumn = null
230233
): PromiseInterface {
231-
$embeddingColumn = $embeddingColumn ?? $this->ragConfig['default_vector_column'];
234+
$embeddingColumn ??= $this->ragConfig['default_vector_column'];
232235

233236
if (empty($embedding)) {
234237
return Promise::resolve(0);
@@ -246,7 +249,7 @@ public function updateEmbedding(
246249
*/
247250
public function getVectorStatistics(?string $vectorColumn = null): PromiseInterface
248251
{
249-
$vectorColumn = $vectorColumn ?? $this->ragConfig['default_vector_column'];
252+
$vectorColumn ??= $this->ragConfig['default_vector_column'];
250253

251254
// @phpstan-ignore-next-line
252255
return Async::async(function () use ($vectorColumn): array {
@@ -282,7 +285,7 @@ public function analyzeVectorPerformance(
282285
array $sampleVector,
283286
?string $vectorColumn = null
284287
): PromiseInterface {
285-
$vectorColumn = $vectorColumn ?? $this->ragConfig['default_vector_column'];
288+
$vectorColumn ??= $this->ragConfig['default_vector_column'];
286289

287290
// @phpstan-ignore-next-line
288291
return Async::async(function () use ($sampleVector, $vectorColumn): array {

0 commit comments

Comments
 (0)