@@ -46,18 +46,21 @@ def upgrade() -> None:
46
46
op .execute (
47
47
text (
48
48
f"""
49
- DO $$
50
- BEGIN
51
- IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{ DB_READONLY_USER } ') THEN
52
- EXECUTE format('CREATE USER %I WITH PASSWORD %L', '{ DB_READONLY_USER } ', '{ DB_READONLY_PASSWORD } ');
53
- -- Explicitly revoke all privileges including CONNECT
54
- EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{ DB_READONLY_USER } ');
55
- -- Grant only the CONNECT privilege
56
- EXECUTE format('GRANT CONNECT ON DATABASE %I TO %I', current_database(), '{ DB_READONLY_USER } ');
57
- END IF;
58
- END
59
- $$;
60
- """
49
+ DO $$
50
+ BEGIN
51
+ -- Check if the read-only user already exists
52
+ IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{ DB_READONLY_USER } ') THEN
53
+ -- Create the read-only user with the specified password
54
+ EXECUTE format('CREATE USER %I WITH PASSWORD %L', '{ DB_READONLY_USER } ', '{ DB_READONLY_PASSWORD } ');
55
+ -- First revoke all privileges to ensure a clean slate
56
+ EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{ DB_READONLY_USER } ');
57
+ -- Grant only the CONNECT privilege to allow the user to connect to the database
58
+ -- but not perform any operations without additional specific grants
59
+ EXECUTE format('GRANT CONNECT ON DATABASE %I TO %I', current_database(), '{ DB_READONLY_USER } ');
60
+ END IF;
61
+ END
62
+ $$;
63
+ """
61
64
)
62
65
)
63
66
@@ -448,28 +451,34 @@ def upgrade() -> None:
448
451
"ON kg_entity_extraction_staging USING GIN (clustering_name gin_trgm_ops)"
449
452
)
450
453
451
- if not MULTI_TENANT :
452
- # Create trigger to update clustering columns if entity w/ doc_id is created
453
- alphanum_pattern = r"[^a-z0-9]+"
454
- op . execute (
454
+ # Create trigger to update clustering columns if entity w/ doc_id is created
455
+ alphanum_pattern = r"[^a-z0-9]+"
456
+ op . execute (
457
+ text (
455
458
f"""
456
459
CREATE OR REPLACE FUNCTION update_kg_entity_clustering()
457
460
RETURNS TRIGGER AS $$
458
461
DECLARE
459
462
doc_semantic_id text;
460
463
cleaned_semantic_id text;
464
+ max_length integer := 1000; -- Limit length for performance
461
465
BEGIN
462
466
-- Get semantic_id from document
463
467
SELECT semantic_id INTO doc_semantic_id
464
468
FROM document
465
469
WHERE id = NEW.document_id;
466
470
467
- -- Clean the semantic_id with regex patterns
471
+ -- Clean the semantic_id with regex patterns and handle NULLs
468
472
cleaned_semantic_id = regexp_replace(
469
- lower(COALESCE(doc_semantic_id, NEW.name)),
473
+ lower(COALESCE(doc_semantic_id, NEW.name, '' )),
470
474
'{ alphanum_pattern } ', '', 'g'
471
475
);
472
476
477
+ -- Truncate if too long for performance
478
+ IF length(cleaned_semantic_id) > max_length THEN
479
+ cleaned_semantic_id = left(cleaned_semantic_id, max_length);
480
+ END IF;
481
+
473
482
-- Set clustering_name to cleaned version and generate trigrams
474
483
NEW.clustering_name = cleaned_semantic_id;
475
484
NEW.clustering_trigrams = show_trgm(cleaned_semantic_id);
@@ -478,50 +487,56 @@ def upgrade() -> None:
478
487
$$ LANGUAGE plpgsql;
479
488
"""
480
489
)
481
- op .execute (
490
+ )
491
+ op .execute (
492
+ text (
482
493
"""
483
494
CREATE OR REPLACE FUNCTION update_kg_entity_extraction_clustering()
484
495
RETURNS TRIGGER AS $$
485
496
DECLARE
486
497
doc_semantic_id text;
487
498
BEGIN
488
499
-- Get semantic_id from document
500
+ -- If no document is found, doc_semantic_id will be NULL and COALESCE will use NEW.name
489
501
SELECT semantic_id INTO doc_semantic_id
490
502
FROM document
491
503
WHERE id = NEW.document_id;
492
504
493
505
-- Set clustering_name to semantic_id
494
- NEW.clustering_name = lower(COALESCE(doc_semantic_id, NEW.name));
506
+ NEW.clustering_name = lower(COALESCE(doc_semantic_id, NEW.name, '' ));
495
507
RETURN NEW;
496
508
END;
497
509
$$ LANGUAGE plpgsql;
498
510
"""
499
511
)
500
- for table , function in (
501
- ("kg_entity" , "update_kg_entity_clustering" ),
502
- ("kg_entity_extraction_staging" , "update_kg_entity_extraction_clustering" ),
503
- ):
504
- trigger = f"{ function } _trigger"
505
- op .execute (f"DROP TRIGGER IF EXISTS { trigger } ON { table } " )
506
- op .execute (
507
- f"""
508
- CREATE TRIGGER { trigger }
509
- BEFORE INSERT
510
- ON { table }
511
- FOR EACH ROW
512
- EXECUTE FUNCTION { function } ();
513
- """
514
- )
515
-
516
- # Create trigger to update kg_entity clustering_name and its trigrams when document.clustering_name changes
512
+ )
513
+ for table , function in (
514
+ ("kg_entity" , "update_kg_entity_clustering" ),
515
+ ("kg_entity_extraction_staging" , "update_kg_entity_extraction_clustering" ),
516
+ ):
517
+ trigger = f"{ function } _trigger"
518
+ op .execute (f"DROP TRIGGER IF EXISTS { trigger } ON { table } " )
517
519
op .execute (
520
+ f"""
521
+ CREATE TRIGGER { trigger }
522
+ BEFORE INSERT
523
+ ON { table }
524
+ FOR EACH ROW
525
+ EXECUTE FUNCTION { function } ();
526
+ """
527
+ )
528
+
529
+ # Create trigger to update kg_entity clustering_name and its trigrams when document.clustering_name changes
530
+ op .execute (
531
+ text (
518
532
f"""
519
533
CREATE OR REPLACE FUNCTION update_kg_entity_clustering_from_doc()
520
534
RETURNS TRIGGER AS $$
521
535
DECLARE
522
536
cleaned_semantic_id text;
523
537
BEGIN
524
538
-- Clean the semantic_id with regex patterns
539
+ -- If semantic_id is NULL, COALESCE will use empty string
525
540
cleaned_semantic_id = regexp_replace(
526
541
lower(COALESCE(NEW.semantic_id, '')),
527
542
'{ alphanum_pattern } ', '', 'g'
@@ -538,11 +553,15 @@ def upgrade() -> None:
538
553
$$ LANGUAGE plpgsql;
539
554
"""
540
555
)
541
- op .execute (
556
+ )
557
+ op .execute (
558
+ text (
542
559
"""
543
560
CREATE OR REPLACE FUNCTION update_kg_entity_extraction_clustering_from_doc()
544
561
RETURNS TRIGGER AS $$
545
562
BEGIN
563
+ -- Update clustering name for all entities in staging referencing this document
564
+ -- If semantic_id is NULL, COALESCE will use empty string
546
565
UPDATE kg_entity_extraction_staging
547
566
SET
548
567
clustering_name = lower(COALESCE(NEW.semantic_id, ''))
@@ -552,21 +571,22 @@ def upgrade() -> None:
552
571
$$ LANGUAGE plpgsql;
553
572
"""
554
573
)
555
- for function in (
556
- "update_kg_entity_clustering_from_doc" ,
557
- "update_kg_entity_extraction_clustering_from_doc" ,
558
- ):
559
- trigger = f"{ function } _trigger"
560
- op .execute (f"DROP TRIGGER IF EXISTS { trigger } ON document" )
561
- op .execute (
562
- f"""
563
- CREATE TRIGGER { trigger }
564
- AFTER UPDATE OF semantic_id
565
- ON document
566
- FOR EACH ROW
567
- EXECUTE FUNCTION { function } ();
568
- """
569
- )
574
+ )
575
+ for function in (
576
+ "update_kg_entity_clustering_from_doc" ,
577
+ "update_kg_entity_extraction_clustering_from_doc" ,
578
+ ):
579
+ trigger = f"{ function } _trigger"
580
+ op .execute (f"DROP TRIGGER IF EXISTS { trigger } ON document" )
581
+ op .execute (
582
+ f"""
583
+ CREATE TRIGGER { trigger }
584
+ AFTER UPDATE OF semantic_id
585
+ ON document
586
+ FOR EACH ROW
587
+ EXECUTE FUNCTION { function } ();
588
+ """
589
+ )
570
590
571
591
572
592
def downgrade () -> None :
0 commit comments