Skip to content

Commit 9bb04b2

Browse files
authored
improve search query (#262)
1 parent e9d9805 commit 9bb04b2

File tree

5 files changed

+19
-35
lines changed

5 files changed

+19
-35
lines changed

core/src/init.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ pub async fn cleanup_job(pool: &PgPool, job_name: &str) -> Result<(), VectorizeE
330330
// Delete pending PGMQ messages for this job
331331
// We search for messages where the job_name matches
332332
let delete_messages_query =
333-
"DELETE FROM pgmq.vectorize_jobs WHERE message->>'job_name' = $1".to_string();
333+
"DELETE FROM pgmq.q_vectorize_jobs WHERE message->>'job_name' = $1".to_string();
334334
match sqlx::query(&delete_messages_query)
335335
.bind(job_name)
336336
.execute(pool)

core/src/query.rs

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -731,8 +731,8 @@ pub fn hybrid_search_query(
731731
}
732732

733733
format!(
734-
"
735-
SELECT to_jsonb(t) as results
734+
"
735+
SELECT to_jsonb(t) as results
736736
FROM (
737737
SELECT {cols}, t.rrf_score, t.semantic_rank, t.fts_rank, t.similarity_score
738738
FROM (
@@ -742,21 +742,14 @@ pub fn hybrid_search_query(
742742
s.similarity_score,
743743
f.fts_rank,
744744
(
745-
CASE
746-
WHEN s.semantic_rank IS NOT NULL THEN {semantic_weight}::float/({rrf_k} + s.semantic_rank)
747-
ELSE 0
748-
END +
749-
CASE
750-
WHEN f.fts_rank IS NOT NULL THEN {fts_weight}::float/({rrf_k} + f.fts_rank)
751-
ELSE 0
752-
END
745+
COALESCE({semantic_weight}::float / ({rrf_k} + s.semantic_rank), 0) +
746+
COALESCE({fts_weight}::float / ({rrf_k} + f.fts_rank), 0)
753747
) as rrf_score
754748
FROM (
755749
SELECT
756750
{join_key},
757751
distance,
758752
ROW_NUMBER() OVER (ORDER BY distance) as semantic_rank,
759-
COUNT(*) OVER () as max_semantic_rank,
760753
1 - distance as similarity_score
761754
FROM (
762755
SELECT
@@ -770,17 +763,16 @@ pub fn hybrid_search_query(
770763
FULL OUTER JOIN (
771764
SELECT
772765
{join_key},
773-
ROW_NUMBER() OVER (ORDER BY ts_rank_cd(search_tokens, query) DESC) as fts_rank,
774-
COUNT(*) OVER () as max_fts_rank
775-
FROM vectorize._search_tokens_{job_name},
776-
to_tsquery('english',
766+
ROW_NUMBER() OVER (ORDER BY ts_rank_cd(search_tokens, query) DESC) as fts_rank
767+
FROM vectorize._search_tokens_{job_name},
768+
to_tsquery('english',
777769
NULLIF(
778770
replace(plainto_tsquery('english', $2)::text, ' & ', ' | '),
779771
''
780772
)
781773
) as query
782774
WHERE search_tokens @@ query
783-
ORDER BY ts_rank_cd(search_tokens, query) DESC
775+
ORDER BY ts_rank_cd(search_tokens, query) DESC
784776
LIMIT {window_size}
785777
) f ON s.{join_key} = f.{join_key}
786778
) t
@@ -789,7 +781,7 @@ pub fn hybrid_search_query(
789781
ORDER BY t.rrf_score DESC
790782
LIMIT {limit}
791783
) t"
792-
)
784+
)
793785
}
794786
#[cfg(test)]
795787
mod tests {

server/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ version = "0.1.0"
44
edition = "2024"
55
publish = false
66

7+
[[bin]]
8+
name = "vectorize-worker"
9+
path = "src/bin/worker.rs"
10+
711
[lib]
812
name = "vectorize_server"
913
path = "src/lib.rs"

server/Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@ COPY Cargo.toml Cargo.lock ./
1515

1616
ENV SQLX_OFFLINE=1
1717
RUN cargo build --bin vectorize-server --release
18-
18+
RUN cargo build --bin vectorize-worker --release
19+
1920
FROM rust:1.90.0-slim-bookworm
2021

2122
RUN apt-get update && \
2223
apt-get install -y postgresql-client && apt-get clean && \
2324
rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
2425

2526
COPY --from=builder /build/target/release/vectorize-server /usr/local/bin/vectorize-server
27+
COPY --from=builder /build/target/release/vectorize-worker /usr/local/bin/vectorize-worker
2628

2729
CMD ["vectorize-server"]

server/tests/tests.rs

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,21 +1098,7 @@ async fn test_delete_job_with_pending_messages() {
10981098
let cfg = vectorize_core::config::Config::from_env();
10991099
let pool = sqlx::PgPool::connect(&cfg.database_url).await.unwrap();
11001100

1101-
let mut rng = rand::rng();
1102-
let test_num = rng.random_range(1..100000);
1103-
let table = format!("test_pending_msgs_{test_num}");
1104-
1105-
// Create table
1106-
sqlx::query(&format!(
1107-
"CREATE TABLE IF NOT EXISTS vectorize_test.{table} (
1108-
id SERIAL PRIMARY KEY,
1109-
content TEXT,
1110-
updated_at TIMESTAMPTZ DEFAULT NOW()
1111-
);"
1112-
))
1113-
.execute(&pool)
1114-
.await
1115-
.unwrap();
1101+
let table = common::create_test_table().await;
11161102

11171103
// Insert multiple rows
11181104
for i in 0..10 {
@@ -1125,7 +1111,7 @@ async fn test_delete_job_with_pending_messages() {
11251111
.unwrap();
11261112
}
11271113

1128-
let job_name = format!("test_pending_{test_num}");
1114+
let job_name = format!("test_pending_{}", table);
11291115

11301116
// Create a vectorize job
11311117
let payload = json!({

0 commit comments

Comments
 (0)