@@ -451,29 +451,51 @@ def get_full_record_data_for_id_group(
451
451
452
452
453
453
def get_attribute_data (
454
- project_id : str , attribute_name : str
454
+ project_id : str ,
455
+ attribute_name : str ,
456
+ only_missing : bool = False ,
457
+ embedding_id : Optional [str ] = None ,
455
458
) -> Tuple [List [str ], List [str ]]:
456
459
project_id = prevent_sql_injection (project_id , isinstance (project_id , str ))
457
460
attribute_name = prevent_sql_injection (
458
461
attribute_name , isinstance (attribute_name , str )
459
462
)
463
+ if embedding_id :
464
+ embedding_id = prevent_sql_injection (
465
+ embedding_id , isinstance (embedding_id , str )
466
+ )
460
467
query = None
461
- order = __get_order_by (project_id )
468
+ order = __get_order_by (project_id , prefix = "r." )
469
+ join_extension , where_add = "" , ""
470
+ if only_missing :
471
+ if not embedding_id :
472
+ raise ValueError ("embedding_id must be provided if only_missing is True" )
473
+ join_extension , where_add = (
474
+ f"""
475
+ LEFT JOIN embedding_tensor et
476
+ ON et.project_id = r.project_id
477
+ AND et.record_id = r.id
478
+ AND et.project_id = '{ project_id } ' AND et.embedding_id = '{ embedding_id } '
479
+ """ ,
480
+ "AND et.id IS NULL" ,
481
+ )
462
482
if attribute .get_by_name (project_id , attribute_name ).data_type == "EMBEDDING_LIST" :
463
483
query = f"""
464
484
SELECT id::TEXT || '@' || sub_key id, att AS "{ attribute_name } "
465
485
FROM (
466
- SELECT id, value as att, ordinality - 1 as sub_key
467
- FROM record
468
- cross join json_array_elements_text((data::JSON->'{ attribute_name } ')) with ordinality
469
- WHERE project_id = '{ project_id } '
486
+ SELECT r.id, value as att, ordinality - 1 as sub_key
487
+ FROM record r
488
+ { join_extension }
489
+ cross join json_array_elements_text((r.data::JSON->'{ attribute_name } ')) with ordinality
490
+ WHERE r.project_id = '{ project_id } ' { where_add }
470
491
{ order }
471
492
)x """
472
493
else :
473
494
query = f"""
474
- SELECT id::TEXT, data::JSON->'{ attribute_name } ' AS "{ attribute_name } "
475
- FROM record
476
- WHERE project_id = '{ project_id } '
495
+ SELECT r.id::TEXT, r.data::JSON->'{ attribute_name } ' AS "{ attribute_name } "
496
+ FROM record r
497
+ { join_extension }
498
+ WHERE r.project_id = '{ project_id } ' { where_add }
477
499
{ order }
478
500
"""
479
501
result = general .execute_all (query )
@@ -485,6 +507,43 @@ def count(project_id: str) -> int:
485
507
return session .query (Record ).filter (Record .project_id == project_id ).count ()
486
508
487
509
510
+ def count_missing_delta (project_id : str , attribute_id : str ) -> int :
511
+ project_id = prevent_sql_injection (project_id , isinstance (project_id , str ))
512
+ attribute_id = prevent_sql_injection (attribute_id , isinstance (attribute_id , str ))
513
+ query = f"""
514
+ WITH n AS (
515
+ SELECT NAME
516
+ FROM attribute a
517
+ WHERE id = '{ attribute_id } '
518
+ )
519
+ SELECT COUNT(*)
520
+ FROM record r, n
521
+ WHERE r.project_id = '{ project_id } '
522
+ AND r.data->>n.name IS NULL
523
+ """
524
+ value = general .execute_first (query )
525
+ if not value or not value [0 ]:
526
+ return 0
527
+ return value [0 ]
528
+
529
+
530
+ def get_missing_delta_record_ids (project_id : str , attribute_id : str ) -> List [str ]:
531
+ project_id = prevent_sql_injection (project_id , isinstance (project_id , str ))
532
+ attribute_id = prevent_sql_injection (attribute_id , isinstance (attribute_id , str ))
533
+ query = f"""
534
+ WITH n AS (
535
+ SELECT NAME
536
+ FROM attribute a
537
+ WHERE id = '{ attribute_id } '
538
+ )
539
+ SELECT r.id::TEXT
540
+ FROM record r, n
541
+ WHERE r.project_id = '{ project_id } '
542
+ AND r.data->>n.name IS NULL
543
+ """
544
+ return [row [0 ] for row in general .execute_all (query )]
545
+
546
+
488
547
def count_attribute_list_entries (project_id : str , attribute_name : str ) -> int :
489
548
project_id = prevent_sql_injection (project_id , isinstance (project_id , str ))
490
549
attribute_name = prevent_sql_injection (
@@ -809,7 +868,7 @@ def get_tokenized_records_from_db(
809
868
)
810
869
811
870
812
- def __get_order_by (project_id : str , first_x : int = 3 ) -> str :
871
+ def __get_order_by (project_id : str , first_x : int = 3 , prefix : str = "" ) -> str :
813
872
query = f"""
814
873
SELECT name, data_type
815
874
FROM attribute a
@@ -823,7 +882,7 @@ def __get_order_by(project_id: str, first_x: int = 3) -> str:
823
882
for x in values :
824
883
if order != "" :
825
884
order += ", "
826
- tmp = f"data->>'{ x .name } '"
885
+ tmp = f"{ prefix } data->>'{ x .name } '"
827
886
828
887
r_id = attribute .get_running_id_name (project_id )
829
888
if x .data_type == "INTEGER" and x .name == r_id :
0 commit comments