@@ -5,7 +5,7 @@ const Embeddings = struct {
5
5
parsed : std .json .Parsed (EmbedResponse ),
6
6
7
7
const EmbeddingsObject = struct {
8
- float : []const []const f32 ,
8
+ ubinary : []const []const u8 ,
9
9
};
10
10
11
11
const EmbedResponse = struct {
@@ -16,8 +16,8 @@ const Embeddings = struct {
16
16
self .parsed .deinit ();
17
17
}
18
18
19
- pub fn get (self : * Embeddings , index : usize ) ? []const f32 {
20
- const data = self .parsed .value .embeddings .float ;
19
+ pub fn get (self : * Embeddings , index : usize ) ? []const u8 {
20
+ const data = self .parsed .value .embeddings .ubinary ;
21
21
return if (index < data .len ) data [index ] else null ;
22
22
}
23
23
};
@@ -31,7 +31,7 @@ fn embed(allocator: std.mem.Allocator, texts: []const []const u8, inputType: []c
31
31
.texts = texts ,
32
32
.model = "embed-english-v3.0" ,
33
33
.input_type = inputType ,
34
- .embedding_types = [_ ][]const u8 {"float " },
34
+ .embedding_types = [_ ][]const u8 {"ubinary " },
35
35
};
36
36
37
37
var authorization = std .ArrayList (u8 ).init (allocator );
@@ -59,6 +59,14 @@ fn embed(allocator: std.mem.Allocator, texts: []const []const u8, inputType: []c
59
59
return Embeddings { .parsed = parsed };
60
60
}
61
61
62
+ fn bitString (allocator : std.mem.Allocator , data : []const u8 ) ! std. ArrayList (u8 ) {
63
+ var buf = std .ArrayList (u8 ).init (allocator );
64
+ for (data ) | v | {
65
+ try buf .writer ().print ("{b:08}" , .{v });
66
+ }
67
+ return buf ;
68
+ }
69
+
62
70
pub fn main () ! void {
63
71
const apiKey = std .posix .getenv ("CO_API_KEY" ) orelse {
64
72
std .debug .print ("Set CO_API_KEY\n " , .{});
@@ -80,20 +88,24 @@ pub fn main() !void {
80
88
81
89
_ = try conn .exec ("CREATE EXTENSION IF NOT EXISTS vector" , .{});
82
90
_ = try conn .exec ("DROP TABLE IF EXISTS documents" , .{});
83
- _ = try conn .exec ("CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector (1024))" , .{});
91
+ _ = try conn .exec ("CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding bit (1024))" , .{});
84
92
85
93
const documents = [_ ][]const u8 { "The dog is barking" , "The cat is purring" , "The bear is growling" };
86
94
var documentEmbeddings = try embed (allocator , & documents , "search_document" , apiKey );
87
95
defer documentEmbeddings .deinit ();
88
96
for (& documents , 0.. ) | content , i | {
89
- const params = .{ content , documentEmbeddings .get (i ) };
90
- _ = try conn .exec ("INSERT INTO documents (content, embedding) VALUES ($1, $2::float4[])" , params );
97
+ var bit = try bitString (allocator , documentEmbeddings .get (i ).? );
98
+ defer bit .deinit ();
99
+ const params = .{ content , bit .items };
100
+ _ = try conn .exec ("INSERT INTO documents (content, embedding) VALUES ($1, $2)" , params );
91
101
}
92
102
93
103
const query = "forest" ;
94
104
var queryEmbeddings = try embed (allocator , &[_ ][]const u8 {query }, "search_query" , apiKey );
95
105
defer queryEmbeddings .deinit ();
96
- var result = try conn .query ("SELECT content FROM documents ORDER BY embedding <=> $1::float4[]::vector LIMIT 5" , .{queryEmbeddings .get (0 )});
106
+ var queryBit = try bitString (allocator , queryEmbeddings .get (0 ).? );
107
+ defer queryBit .deinit ();
108
+ var result = try conn .query ("SELECT content FROM documents ORDER BY embedding <~> $1 LIMIT 5" , .{queryBit .items });
97
109
defer result .deinit ();
98
110
while (try result .next ()) | row | {
99
111
const content = row .get ([]const u8 , 0 );
0 commit comments