1
+ #!/usr/bin/env python3
2
+ """
3
+ Performance testing script for PostgreSQL backend
4
+ Tests various operations and generates performance report
5
+ """
6
+
7
+ import asyncio
8
+ import time
9
+ import random
10
+ import statistics
11
+ import json
12
+ from typing import List , Dict , Any
13
+ import numpy as np
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ # Add parent directory to path
18
+ sys .path .insert (0 , str (Path (__file__ ).parent .parent ))
19
+
20
+ from app .storage .postgres_unified import PostgresUnifiedBackend
21
+ from app .services .memory_service_postgres import MemoryServicePostgres
22
+
23
+
24
+ class PerformanceTester :
25
+ """Performance testing for PostgreSQL backend"""
26
+
27
+ def __init__ (self , connection_string : str = None ):
28
+ self .backend = PostgresUnifiedBackend (connection_string )
29
+ self .service = MemoryServicePostgres (connection_string , enable_embeddings = False )
30
+ self .results = {}
31
+
32
+ async def setup (self ):
33
+ """Initialize connections"""
34
+ await self .backend .initialize ()
35
+ await self .service .initialize ()
36
+
37
+ # Clean test data
38
+ async with self .backend .acquire () as conn :
39
+ await conn .execute ("DELETE FROM memories WHERE container_id = 'perf_test'" )
40
+
41
+ async def teardown (self ):
42
+ """Clean up"""
43
+ # Clean test data
44
+ async with self .backend .acquire () as conn :
45
+ await conn .execute ("DELETE FROM memories WHERE container_id = 'perf_test'" )
46
+
47
+ await self .backend .close ()
48
+ await self .service .close ()
49
+
50
+ async def test_insert_performance (self , count : int = 1000 ):
51
+ """Test insert performance"""
52
+ print (f"\n 📝 Testing INSERT performance ({ count } memories)..." )
53
+
54
+ times = []
55
+
56
+ for i in range (count ):
57
+ memory = {
58
+ "content" : f"Performance test memory { i } : " + "x" * random .randint (100 , 500 ),
59
+ "memory_type" : random .choice (["knowledge" , "experience" , "task" ]),
60
+ "importance_score" : random .random (),
61
+ "tags" : random .sample (["test" , "perf" , "benchmark" , "data" ], k = 2 ),
62
+ "metadata" : {"index" : i , "batch" : "perf_test" },
63
+ "container_id" : "perf_test"
64
+ }
65
+
66
+ start = time .perf_counter ()
67
+ await self .backend .create_memory (memory )
68
+ elapsed = time .perf_counter () - start
69
+ times .append (elapsed * 1000 ) # Convert to ms
70
+
71
+ if (i + 1 ) % 100 == 0 :
72
+ print (f" ✓ Inserted { i + 1 } /{ count } memories" )
73
+
74
+ self .results ["insert" ] = {
75
+ "count" : count ,
76
+ "mean_ms" : statistics .mean (times ),
77
+ "median_ms" : statistics .median (times ),
78
+ "p95_ms" : np .percentile (times , 95 ),
79
+ "p99_ms" : np .percentile (times , 99 ),
80
+ "total_s" : sum (times ) / 1000
81
+ }
82
+
83
+ print (f" Mean: { self .results ['insert' ]['mean_ms' ]:.2f} ms" )
84
+ print (f" P99: { self .results ['insert' ]['p99_ms' ]:.2f} ms" )
85
+
86
+ async def test_vector_search_performance (self , queries : int = 100 ):
87
+ """Test vector search performance"""
88
+ print (f"\n 🔍 Testing VECTOR SEARCH performance ({ queries } queries)..." )
89
+
90
+ # Generate random embeddings for test memories
91
+ print (" Generating test embeddings..." )
92
+ memories = await self .backend .list_memories (limit = 100 , container_id = "perf_test" )
93
+
94
+ for memory in memories [:50 ]: # Add embeddings to first 50
95
+ embedding = np .random .rand (1536 ).tolist ()
96
+ await self .backend .update_memory (memory ["id" ], {}, embedding )
97
+
98
+ times = []
99
+
100
+ for i in range (queries ):
101
+ # Random query embedding
102
+ query_embedding = np .random .rand (1536 ).tolist ()
103
+
104
+ start = time .perf_counter ()
105
+ results = await self .backend .vector_search (
106
+ embedding = query_embedding ,
107
+ limit = 10 ,
108
+ container_id = "perf_test"
109
+ )
110
+ elapsed = time .perf_counter () - start
111
+ times .append (elapsed * 1000 )
112
+
113
+ if (i + 1 ) % 20 == 0 :
114
+ print (f" ✓ Completed { i + 1 } /{ queries } searches" )
115
+
116
+ self .results ["vector_search" ] = {
117
+ "queries" : queries ,
118
+ "mean_ms" : statistics .mean (times ),
119
+ "median_ms" : statistics .median (times ),
120
+ "p95_ms" : np .percentile (times , 95 ),
121
+ "p99_ms" : np .percentile (times , 99 )
122
+ }
123
+
124
+ print (f" Mean: { self .results ['vector_search' ]['mean_ms' ]:.2f} ms" )
125
+ print (f" P99: { self .results ['vector_search' ]['p99_ms' ]:.2f} ms" )
126
+
127
+ async def test_text_search_performance (self , queries : int = 100 ):
128
+ """Test full-text search performance"""
129
+ print (f"\n 📖 Testing TEXT SEARCH performance ({ queries } queries)..." )
130
+
131
+ search_terms = ["test" , "memory" , "performance" , "data" , "knowledge" , "system" ]
132
+ times = []
133
+
134
+ for i in range (queries ):
135
+ query = random .choice (search_terms )
136
+
137
+ start = time .perf_counter ()
138
+ results = await self .backend .text_search (
139
+ query = query ,
140
+ limit = 10 ,
141
+ container_id = "perf_test"
142
+ )
143
+ elapsed = time .perf_counter () - start
144
+ times .append (elapsed * 1000 )
145
+
146
+ if (i + 1 ) % 20 == 0 :
147
+ print (f" ✓ Completed { i + 1 } /{ queries } searches" )
148
+
149
+ self .results ["text_search" ] = {
150
+ "queries" : queries ,
151
+ "mean_ms" : statistics .mean (times ),
152
+ "median_ms" : statistics .median (times ),
153
+ "p95_ms" : np .percentile (times , 95 ),
154
+ "p99_ms" : np .percentile (times , 99 )
155
+ }
156
+
157
+ print (f" Mean: { self .results ['text_search' ]['mean_ms' ]:.2f} ms" )
158
+ print (f" P99: { self .results ['text_search' ]['p99_ms' ]:.2f} ms" )
159
+
160
+ async def test_hybrid_search_performance (self , queries : int = 50 ):
161
+ """Test hybrid search performance"""
162
+ print (f"\n 🔄 Testing HYBRID SEARCH performance ({ queries } queries)..." )
163
+
164
+ search_terms = ["test" , "memory" , "performance" , "data" ]
165
+ times = []
166
+
167
+ for i in range (queries ):
168
+ query = random .choice (search_terms )
169
+ embedding = np .random .rand (1536 ).tolist ()
170
+
171
+ start = time .perf_counter ()
172
+ results = await self .backend .hybrid_search (
173
+ query = query ,
174
+ embedding = embedding ,
175
+ limit = 10 ,
176
+ container_id = "perf_test"
177
+ )
178
+ elapsed = time .perf_counter () - start
179
+ times .append (elapsed * 1000 )
180
+
181
+ if (i + 1 ) % 10 == 0 :
182
+ print (f" ✓ Completed { i + 1 } /{ queries } searches" )
183
+
184
+ self .results ["hybrid_search" ] = {
185
+ "queries" : queries ,
186
+ "mean_ms" : statistics .mean (times ),
187
+ "median_ms" : statistics .median (times ),
188
+ "p95_ms" : np .percentile (times , 95 ),
189
+ "p99_ms" : np .percentile (times , 99 )
190
+ }
191
+
192
+ print (f" Mean: { self .results ['hybrid_search' ]['mean_ms' ]:.2f} ms" )
193
+ print (f" P99: { self .results ['hybrid_search' ]['p99_ms' ]:.2f} ms" )
194
+
195
+ async def test_update_performance (self , count : int = 100 ):
196
+ """Test update performance"""
197
+ print (f"\n ✏️ Testing UPDATE performance ({ count } updates)..." )
198
+
199
+ # Get memories to update
200
+ memories = await self .backend .list_memories (limit = count , container_id = "perf_test" )
201
+
202
+ if len (memories ) < count :
203
+ print (f" ⚠️ Only { len (memories )} memories available for update" )
204
+ count = len (memories )
205
+
206
+ times = []
207
+
208
+ for i , memory in enumerate (memories [:count ]):
209
+ updates = {
210
+ "importance_score" : random .random (),
211
+ "tags" : random .sample (["updated" , "test" , "perf" ], k = 2 )
212
+ }
213
+
214
+ start = time .perf_counter ()
215
+ await self .backend .update_memory (memory ["id" ], updates )
216
+ elapsed = time .perf_counter () - start
217
+ times .append (elapsed * 1000 )
218
+
219
+ if (i + 1 ) % 20 == 0 :
220
+ print (f" ✓ Updated { i + 1 } /{ count } memories" )
221
+
222
+ self .results ["update" ] = {
223
+ "count" : count ,
224
+ "mean_ms" : statistics .mean (times ),
225
+ "median_ms" : statistics .median (times ),
226
+ "p95_ms" : np .percentile (times , 95 ),
227
+ "p99_ms" : np .percentile (times , 99 )
228
+ }
229
+
230
+ print (f" Mean: { self .results ['update' ]['mean_ms' ]:.2f} ms" )
231
+ print (f" P99: { self .results ['update' ]['p99_ms' ]:.2f} ms" )
232
+
233
+ async def test_relationship_performance (self , count : int = 100 ):
234
+ """Test relationship creation performance"""
235
+ print (f"\n 🔗 Testing RELATIONSHIP performance ({ count } relationships)..." )
236
+
237
+ # Get memories for relationships
238
+ memories = await self .backend .list_memories (limit = 50 , container_id = "perf_test" )
239
+
240
+ if len (memories ) < 2 :
241
+ print (" ⚠️ Not enough memories for relationship testing" )
242
+ return
243
+
244
+ times = []
245
+
246
+ for i in range (count ):
247
+ source = random .choice (memories )
248
+ target = random .choice ([m for m in memories if m ["id" ] != source ["id" ]])
249
+
250
+ start = time .perf_counter ()
251
+ await self .backend .create_relationship (
252
+ source_id = source ["id" ],
253
+ target_id = target ["id" ],
254
+ relationship_type = random .choice (["related" , "similar" , "follows" ]),
255
+ strength = random .random ()
256
+ )
257
+ elapsed = time .perf_counter () - start
258
+ times .append (elapsed * 1000 )
259
+
260
+ if (i + 1 ) % 20 == 0 :
261
+ print (f" ✓ Created { i + 1 } /{ count } relationships" )
262
+
263
+ self .results ["relationships" ] = {
264
+ "count" : count ,
265
+ "mean_ms" : statistics .mean (times ),
266
+ "median_ms" : statistics .median (times ),
267
+ "p95_ms" : np .percentile (times , 95 ),
268
+ "p99_ms" : np .percentile (times , 99 )
269
+ }
270
+
271
+ print (f" Mean: { self .results ['relationships' ]['mean_ms' ]:.2f} ms" )
272
+ print (f" P99: { self .results ['relationships' ]['p99_ms' ]:.2f} ms" )
273
+
274
+ async def test_statistics_performance (self ):
275
+ """Test statistics query performance"""
276
+ print (f"\n 📊 Testing STATISTICS query performance..." )
277
+
278
+ times = []
279
+
280
+ for i in range (10 ):
281
+ start = time .perf_counter ()
282
+ stats = await self .backend .get_statistics ()
283
+ elapsed = time .perf_counter () - start
284
+ times .append (elapsed * 1000 )
285
+
286
+ self .results ["statistics" ] = {
287
+ "queries" : 10 ,
288
+ "mean_ms" : statistics .mean (times ),
289
+ "median_ms" : statistics .median (times ),
290
+ "memory_count" : stats .get ("total_memories" , 0 )
291
+ }
292
+
293
+ print (f" Mean: { self .results ['statistics' ]['mean_ms' ]:.2f} ms" )
294
+ print (f" Total memories: { self .results ['statistics' ]['memory_count' ]} " )
295
+
296
+ def generate_report (self ):
297
+ """Generate performance report"""
298
+ print ("\n " + "=" * 60 )
299
+ print ("📈 PERFORMANCE TEST REPORT" )
300
+ print ("=" * 60 )
301
+
302
+ # Summary table
303
+ print ("\n 📊 Summary (all times in milliseconds):" )
304
+ print (f"{ 'Operation' :<20} { 'Mean' :<10} { 'Median' :<10} { 'P95' :<10} { 'P99' :<10} " )
305
+ print ("-" * 60 )
306
+
307
+ for op in ["insert" , "vector_search" , "text_search" , "hybrid_search" , "update" ]:
308
+ if op in self .results :
309
+ r = self .results [op ]
310
+ print (f"{ op :<20} { r ['mean_ms' ]:<10.2f} { r ['median_ms' ]:<10.2f} "
311
+ f"{ r ['p95_ms' ]:<10.2f} { r ['p99_ms' ]:<10.2f} " )
312
+
313
+ # Throughput
314
+ print ("\n 📈 Throughput:" )
315
+ if "insert" in self .results :
316
+ throughput = self .results ["insert" ]["count" ] / self .results ["insert" ]["total_s" ]
317
+ print (f" Insert: { throughput :.1f} ops/sec" )
318
+
319
+ # Save results to file
320
+ report_file = Path (__file__ ).parent .parent / "performance_report.json"
321
+ with open (report_file , "w" ) as f :
322
+ json .dump (self .results , f , indent = 2 )
323
+ print (f"\n 💾 Full report saved to: { report_file } " )
324
+
325
+ # Recommendations
326
+ print ("\n 💡 Recommendations:" )
327
+ if "insert" in self .results and self .results ["insert" ]["p99_ms" ] > 100 :
328
+ print (" ⚠️ Insert P99 > 100ms - Consider batch inserts" )
329
+ if "vector_search" in self .results and self .results ["vector_search" ]["p99_ms" ] > 200 :
330
+ print (" ⚠️ Vector search P99 > 200ms - Check HNSW index parameters" )
331
+ if "text_search" in self .results and self .results ["text_search" ]["p99_ms" ] > 100 :
332
+ print (" ⚠️ Text search P99 > 100ms - Check GIN index and vacuum status" )
333
+
334
+ print ("\n ✅ Performance testing complete!" )
335
+
336
+
337
+ async def main ():
338
+ """Run performance tests"""
339
+ import argparse
340
+ import os
341
+
342
+ parser = argparse .ArgumentParser (description = "PostgreSQL performance testing" )
343
+ parser .add_argument ("--insert" , type = int , default = 1000 , help = "Number of inserts" )
344
+ parser .add_argument ("--queries" , type = int , default = 100 , help = "Number of queries" )
345
+ parser .add_argument ("--skip-insert" , action = "store_true" , help = "Skip insert test" )
346
+ parser .add_argument ("--database-url" , type = str , help = "Database URL" )
347
+
348
+ args = parser .parse_args ()
349
+
350
+ print ("🚀 Second Brain PostgreSQL Performance Test" )
351
+ print ("=" * 60 )
352
+
353
+ # Get database URL
354
+ db_url = args .database_url or os .getenv ("DATABASE_URL" , "postgresql://localhost/second_brain" )
355
+ print (f"📡 Database: { db_url .split ('@' )[- 1 ]} " )
356
+
357
+ tester = PerformanceTester (db_url )
358
+
359
+ try :
360
+ await tester .setup ()
361
+
362
+ # Run tests
363
+ if not args .skip_insert :
364
+ await tester .test_insert_performance (args .insert )
365
+
366
+ await tester .test_text_search_performance (args .queries )
367
+ await tester .test_vector_search_performance (args .queries // 2 )
368
+ await tester .test_hybrid_search_performance (args .queries // 4 )
369
+ await tester .test_update_performance (args .queries )
370
+ await tester .test_relationship_performance (args .queries )
371
+ await tester .test_statistics_performance ()
372
+
373
+ # Generate report
374
+ tester .generate_report ()
375
+
376
+ finally :
377
+ await tester .teardown ()
378
+
379
+
380
+ if __name__ == "__main__" :
381
+ asyncio .run (main ())
0 commit comments