From 299b31e339f1f3399927eb66be6bcd778f14fed9 Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Wed, 11 Jun 2025 11:15:14 -0700 Subject: [PATCH 1/5] Edit logic for default entity-types population --- backend/onyx/chat/process_message.py | 2 +- .../kg/setup/kg_default_entity_definitions.py | 150 +++++++++++++----- 2 files changed, 107 insertions(+), 45 deletions(-) diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index c97917e2a22..90a21b50d07 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -617,7 +617,7 @@ def stream_chat_message_objects( raise Exception("Vespa KG index reset done") elif new_msg_req.message == "kg_setup": - populate_default_entity_types() + populate_default_entity_types(db_session=db_session) raise Exception("KG setup done") try: diff --git a/backend/onyx/kg/setup/kg_default_entity_definitions.py b/backend/onyx/kg/setup/kg_default_entity_definitions.py index 639252c8192..1333983e227 100644 --- a/backend/onyx/kg/setup/kg_default_entity_definitions.py +++ b/backend/onyx/kg/setup/kg_default_entity_definitions.py @@ -1,10 +1,11 @@ -from typing import cast +from collections.abc import Generator + +from sqlalchemy.orm import Session from onyx.configs.constants import DocumentSource -from onyx.db.engine import get_session_with_current_tenant from onyx.db.entity_type import KGEntityType -from onyx.db.kg_config import get_kg_config_settings -from onyx.db.kg_config import validate_kg_settings +from onyx.db.models import KGConfig +from onyx.kg.models import KGConfigVars from onyx.kg.models import KGDefaultEntityDefinition from onyx.kg.models import KGGroundingType @@ -309,48 +310,109 @@ } -def populate_default_entity_types() -> None: - with get_session_with_current_tenant() as db_session: - kg_config_settings = get_kg_config_settings(db_session) - validate_kg_settings(kg_config_settings) +TEMPLATE = "---vendor_name---" + + +def sanitize_default( + id_name: str, definition: KGDefaultEntityDefinition, vendor_name: str +) -> KGEntityType: + """ + Sanitizes the "default" Entity Types by string-replacing all instances of `TEMPLATE` (e.g., "---vendor_name---") + with the vendor's name. + """ + + description = definition.description.replace( + TEMPLATE, + vendor_name, + ) + grounded_source_name = ( + definition.grounded_source_name.value + if definition.grounded_source_name + else None + ) + + return KGEntityType( + id_name=id_name, + description=description, + attributes=definition.attributes, + grounding=definition.grounding, + grounded_source_name=grounded_source_name, + active=False, + ) + + +def generate_non_existing_entity_types( + existing_entity_types: dict[str, KGEntityType], + vendor_name: str, +) -> Generator[KGEntityType]: + default_definitions = { + **KG_DEFAULT_PRIMARY_GROUNDED_ENTITIES, + **KG_DEFAULT_ACCOUNT_EMPLOYEE_ENTITIES, + } + + for default_entity_name, default_entity_type in default_definitions.items(): + if default_entity_name not in existing_entity_types: + yield sanitize_default( + id_name=default_entity_name, + definition=default_entity_type, + vendor_name=vendor_name, + ) + + +def get_vendor_name( + db_session: Session, +) -> str: + config = ( + db_session.query(KGConfig) + .filter(KGConfig.kg_variable_name == KGConfigVars.KG_VENDOR) + .first() + ) + if not config: + raise RuntimeError("Failed to find the vendor name") + + if len(config.kg_variable_values) != 1: + raise RuntimeError( + f"Expected vendor name to be a list of length 1, instead got {config.kg_variable_values=}" + ) + + [vendor_name] = config.kg_variable_values + + if not vendor_name: + raise ValueError( + f"Vendor name must be a non-empty string, instead got {vendor_name=}" + ) + + return vendor_name + + +def populate_default_entity_types( + db_session: Session, +) -> list[KGEntityType]: + """ + Populates the database with the *missing* Entity Types (if any are missing) into the database after sanitizing them first. + Returns the *entire* list of Entity Types. + + # Note + Sanitization of "default" Entity Types includes string-replacing all instances of `TEMPLATE` with the vendor's name. + """ + + vendor_name = get_vendor_name(db_session=db_session) + existing_entity_types = { + et.id_name: et for et in db_session.query(KGEntityType).all() + } - # Get all existing entity types - existing_entity_types = { - et.id_name for et in db_session.query(KGEntityType).all() - } + entity_types = [] - # Create an instance of the default definitions - default_definitions = [ - KG_DEFAULT_PRIMARY_GROUNDED_ENTITIES, - KG_DEFAULT_ACCOUNT_EMPLOYEE_ENTITIES, - ] + for non_existing_entity_type in generate_non_existing_entity_types( + existing_entity_types=existing_entity_types, + vendor_name=vendor_name, + ): + db_session.add(non_existing_entity_type) + entity_types.append(non_existing_entity_type) - # Iterate over all attributes in the default definitions - for default_definition in default_definitions: - for id_name, definition in default_definition.items(): - # Skip if this entity type already exists - if id_name in existing_entity_types: - continue + db_session.commit() - # Create new entity type - description = definition.description.replace( - "---vendor_name---", cast(str, kg_config_settings.KG_VENDOR) - ) - grounded_source_name = ( - definition.grounded_source_name.value - if definition.grounded_source_name - else None - ) - new_entity_type = KGEntityType( - id_name=id_name, - description=description, - attributes=definition.attributes, - grounding=definition.grounding, - grounded_source_name=grounded_source_name, - active=False, - ) + for existing_entity_type in existing_entity_types.values(): + entity_types.append(existing_entity_type) - # Add to session - db_session.add(new_entity_type) - existing_entity_types.add(id_name) - db_session.commit() + return entity_types From a5751d79216cd2df1748bbf74ad434e9580002ed Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Wed, 11 Jun 2025 17:56:25 -0700 Subject: [PATCH 2/5] Remove templatization in favour of function --- .../kg/setup/kg_default_entity_definitions.py | 604 ++++++++---------- 1 file changed, 282 insertions(+), 322 deletions(-) diff --git a/backend/onyx/kg/setup/kg_default_entity_definitions.py b/backend/onyx/kg/setup/kg_default_entity_definitions.py index 1333983e227..12894cc7191 100644 --- a/backend/onyx/kg/setup/kg_default_entity_definitions.py +++ b/backend/onyx/kg/setup/kg_default_entity_definitions.py @@ -10,352 +10,315 @@ from onyx.kg.models import KGGroundingType -KG_DEFAULT_PRIMARY_GROUNDED_ENTITIES = { - "LINEAR": KGDefaultEntityDefinition( - description="A formal Linear ticket about a product issue or improvement request.", - attributes={ - "metadata_attributes": { - "team": "", - "state": "", - "priority": "", - "created_at": "", - "completed_at": "", +def get_default_entity_types(vendor_name: str) -> dict[str, KGDefaultEntityDefinition]: + return { + "LINEAR": KGDefaultEntityDefinition( + description="A formal Linear ticket about a product issue or improvement request.", + attributes={ + "metadata_attributes": { + "team": "", + "state": "", + "priority": "", + "created_at": "", + "completed_at": "", + }, + "entity_filter_attributes": {}, + "classification_attributes": {}, }, - "entity_filter_attributes": {}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.LINEAR, - ), - "JIRA-EPIC": KGDefaultEntityDefinition( - description=( - "A formal Jira ticket describing large bodies of work that can be broken down into " - "a number of smaller Jira Tasks, Stories, or Bugs." + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.LINEAR, ), - attributes={ - "metadata_attributes": { - "status": "", - "priority": "", - "reporter": "", - "project_name": "", - "created": "", - "updated": "", + "JIRA-EPIC": KGDefaultEntityDefinition( + description=( + "A formal Jira ticket describing large bodies of work that can be broken down into " + "a number of smaller Jira Tasks, Stories, or Bugs." + ), + attributes={ + "metadata_attributes": { + "status": "", + "priority": "", + "reporter": "", + "project_name": "", + "created": "", + "updated": "", + }, + "entity_filter_attributes": {"issuetype": "Epic"}, + "classification_attributes": {}, }, - "entity_filter_attributes": {"issuetype": "Epic"}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.JIRA, - ), - "JIRA-STORY": KGDefaultEntityDefinition( - description=( - "Also called 'user stories', these are Jira tickets describing short requirements or requests " - "written from the perspective of the end user." + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.JIRA, ), - attributes={ - "metadata_attributes": { - "status": "", - "priority": "", - "reporter": "", - "project_name": "", - "created": "", - "updated": "", - }, - "entity_filter_attributes": {"issuetype": "Story"}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.JIRA, - ), - "JIRA-BUG": KGDefaultEntityDefinition( - description=("A Jira ticket describing a bug."), - attributes={ - "metadata_attributes": { - "status": "", - "priority": "", - "reporter": "", - "project_name": "", - "created": "", - "updated": "", - }, - "entity_filter_attributes": {"issuetype": "Bug"}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.JIRA, - ), - "JIRA-TASK": KGDefaultEntityDefinition( - description=("A Jira ticket describing a unit of work."), - attributes={ - "metadata_attributes": { - "status": "", - "priority": "", - "reporter": "", - "project_name": "", - "created": "", - "updated": "", + "JIRA-STORY": KGDefaultEntityDefinition( + description=( + "Also called 'user stories', these are Jira tickets describing short requirements or requests " + "written from the perspective of the end user." + ), + attributes={ + "metadata_attributes": { + "status": "", + "priority": "", + "reporter": "", + "project_name": "", + "created": "", + "updated": "", + }, + "entity_filter_attributes": {"issuetype": "Story"}, + "classification_attributes": {}, }, - "entity_filter_attributes": {"issuetype": "Task"}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.JIRA, - ), - "JIRA-SUBTASK": KGDefaultEntityDefinition( - description=("A Jira ticket describing a sub-unit of work."), - attributes={ - "metadata_attributes": { - "status": "", - "priority": "", - "reporter": "", - "project_name": "", - "created": "", - "updated": "", + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.JIRA, + ), + "JIRA-BUG": KGDefaultEntityDefinition( + description=("A Jira ticket describing a bug."), + attributes={ + "metadata_attributes": { + "status": "", + "priority": "", + "reporter": "", + "project_name": "", + "created": "", + "updated": "", + }, + "entity_filter_attributes": {"issuetype": "Bug"}, + "classification_attributes": {}, }, - "entity_filter_attributes": {"issuetype": "Sub-task"}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.JIRA, - ), - "GITHUB-PR": KGDefaultEntityDefinition( - description="Our (---vendor_name---) Engineering PRs describing what was actually implemented.", - attributes={ - "metadata_attributes": { - "repo": "", - "state": "", - "num_commits": "", - "num_files_changed": "", - "labels": "", - "merged": "", - "merged_at": "", - "closed_at": "", - "created_at": "", - "updated_at": "", + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.JIRA, + ), + "JIRA-TASK": KGDefaultEntityDefinition( + description=("A Jira ticket describing a unit of work."), + attributes={ + "metadata_attributes": { + "status": "", + "priority": "", + "reporter": "", + "project_name": "", + "created": "", + "updated": "", + }, + "entity_filter_attributes": {"issuetype": "Task"}, + "classification_attributes": {}, }, - "entity_filter_attributes": {"object_type": "PullRequest"}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.GITHUB, - ), - "GITHUB-ISSUE": KGDefaultEntityDefinition( - description="Our (---vendor_name---) Engineering issues describing what needs to be implemented.", - attributes={ - "metadata_attributes": { - "repo": "", - "state": "", - "labels": "", - "closed_at": "", - "created_at": "", - "updated_at": "", + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.JIRA, + ), + "JIRA-SUBTASK": KGDefaultEntityDefinition( + description=("A Jira ticket describing a sub-unit of work."), + attributes={ + "metadata_attributes": { + "status": "", + "priority": "", + "reporter": "", + "project_name": "", + "created": "", + "updated": "", + }, + "entity_filter_attributes": {"issuetype": "Sub-task"}, + "classification_attributes": {}, }, - "entity_filter_attributes": {"object_type": "Issue"}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.GITHUB, - ), - "FIREFLIES": KGDefaultEntityDefinition( - description=( - "A phone call transcript between us (---vendor_name---) " - "and another account or individuals, or an internal meeting." + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.JIRA, ), - attributes={ - "metadata_attributes": {}, - "entity_filter_attributes": {}, - "classification_attributes": { - "customer": { - "extraction": True, - "description": "a call with representatives of one or more customers prospects", + "GITHUB-PR": KGDefaultEntityDefinition( + description=f"Our ({vendor_name}) Engineering PRs describing what was actually implemented.", + attributes={ + "metadata_attributes": { + "repo": "", + "state": "", + "num_commits": "", + "num_files_changed": "", + "labels": "", + "merged": "", + "merged_at": "", + "closed_at": "", + "created_at": "", + "updated_at": "", }, - "internal": { - "extraction": True, - "description": "a call between employees of the vendor's company (a vendor-internal call)", + "entity_filter_attributes": {"object_type": "PullRequest"}, + "classification_attributes": {}, + }, + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.GITHUB, + ), + "GITHUB-ISSUE": KGDefaultEntityDefinition( + description=f"Our ({vendor_name}) Engineering issues describing what needs to be implemented.", + attributes={ + "metadata_attributes": { + "repo": "", + "state": "", + "labels": "", + "closed_at": "", + "created_at": "", + "updated_at": "", }, - "interview": { - "extraction": True, - "description": ( - "a call with an individual who is interviewed or is discussing potential employment with the vendor" - ), + "entity_filter_attributes": {"object_type": "Issue"}, + "classification_attributes": {}, + }, + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.GITHUB, + ), + "FIREFLIES": KGDefaultEntityDefinition( + description=( + f"A phone call transcript between us ({vendor_name}) " + "and another account or individuals, or an internal meeting." + ), + attributes={ + "metadata_attributes": {}, + "entity_filter_attributes": {}, + "classification_attributes": { + "customer": { + "extraction": True, + "description": "a call with representatives of one or more customers prospects", + }, + "internal": { + "extraction": True, + "description": "a call between employees of the vendor's company (a vendor-internal call)", + }, + "interview": { + "extraction": True, + "description": ( + "a call with an individual who is interviewed or is discussing potential employment with the vendor" + ), + }, + "other": { + "extraction": True, + "description": ( + "a call with representatives of companies having a different reason for the call " + "(investment, partnering, etc.)" + ), + }, }, - "other": { - "extraction": True, - "description": ( - "a call with representatives of companies having a different reason for the call " - "(investment, partnering, etc.)" - ), + }, + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.FIREFLIES, + ), + "GONG": KGDefaultEntityDefinition( + description=( + f"A phone call transcript between us ({vendor_name}) " + "and another account or individuals, or an internal meeting." + ), + attributes={ + "metadata_attributes": {}, + "entity_filter_attributes": {}, + "classification_attributes": {}, + }, + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.GONG, + ), + "GOOGLE_DRIVE": KGDefaultEntityDefinition( + description="A Google Drive document.", + attributes={ + "metadata_attributes": {}, + "entity_filter_attributes": {}, + "classification_attributes": {}, + }, + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.GOOGLE_DRIVE, + ), + "GMAIL": KGDefaultEntityDefinition( + description="An email.", + attributes={ + "metadata_attributes": {}, + "entity_filter_attributes": {}, + "classification_attributes": {}, + }, + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.GMAIL, + ), + "ACCOUNT": KGDefaultEntityDefinition( + description=( + "A company that was, is, or potentially could be a customer of the vendor " + f"('us, {vendor_name}'). Note that {vendor_name} can never be an ACCOUNT." + ), + attributes={ + "metadata_attributes": {}, + "entity_filter_attributes": {"object_type": "Account"}, + "classification_attributes": {}, + }, + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.SALESFORCE, + ), + "OPPORTUNITY": KGDefaultEntityDefinition( + description="A sales opportunity.", + attributes={ + "metadata_attributes": { + "name": "", + "stage_name": "", + "type": "", + "amount": "", + "fiscal_year": "", + "fiscal_quarter": "", + "is_closed": "", + "close_date": "", + "probability": "", + "created_date": "", + "last_modified_date": "", }, + "entity_filter_attributes": {"object_type": "Opportunity"}, + "classification_attributes": {}, }, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.FIREFLIES, - ), - "GONG": KGDefaultEntityDefinition( - description=( - "A phone call transcript between us (---vendor_name---) " - "and another account or individuals, or an internal meeting." + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.SALESFORCE, ), - attributes={ - "metadata_attributes": {}, - "entity_filter_attributes": {}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.GONG, - ), - "GOOGLE_DRIVE": KGDefaultEntityDefinition( - description="A Google Drive document.", - attributes={ - "metadata_attributes": {}, - "entity_filter_attributes": {}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.GOOGLE_DRIVE, - ), - "GMAIL": KGDefaultEntityDefinition( - description="An email.", - attributes={ - "metadata_attributes": {}, - "entity_filter_attributes": {}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.GMAIL, - ), - "ACCOUNT": KGDefaultEntityDefinition( - description=( - "A company that was, is, or potentially could be a customer of the vendor " - "('us, ---vendor_name---'). Note that ---vendor_name--- can never be an ACCOUNT." + "SLACK": KGDefaultEntityDefinition( + description="A Slack conversation.", + attributes={ + "metadata_attributes": {}, + "entity_filter_attributes": {}, + "classification_attributes": {}, + }, + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.SLACK, ), - attributes={ - "metadata_attributes": {}, - "entity_filter_attributes": {"object_type": "Account"}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.SALESFORCE, - ), - "OPPORTUNITY": KGDefaultEntityDefinition( - description="A sales opportunity.", - attributes={ - "metadata_attributes": { - "name": "", - "stage_name": "", - "type": "", - "amount": "", - "fiscal_year": "", - "fiscal_quarter": "", - "is_closed": "", - "close_date": "", - "probability": "", - "created_date": "", - "last_modified_date": "", + "WEB": KGDefaultEntityDefinition( + description="A web page.", + attributes={ + "metadata_attributes": {}, + "entity_filter_attributes": {}, + "classification_attributes": {}, }, - "entity_filter_attributes": {"object_type": "Opportunity"}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.SALESFORCE, - ), - "SLACK": KGDefaultEntityDefinition( - description="A Slack conversation.", - attributes={ - "metadata_attributes": {}, - "entity_filter_attributes": {}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.SLACK, - ), - "WEB": KGDefaultEntityDefinition( - description="A web page.", - attributes={ - "metadata_attributes": {}, - "entity_filter_attributes": {}, - "classification_attributes": {}, - }, - grounding=KGGroundingType.GROUNDED, - grounded_source_name=DocumentSource.WEB, - ), -} - -KG_DEFAULT_ACCOUNT_EMPLOYEE_ENTITIES = { - "VENDOR": KGDefaultEntityDefinition( - description="The Vendor ---vendor_name---, 'us'", - grounding=KGGroundingType.GROUNDED, - active=False, - grounded_source_name=None, - ), - "ACCOUNT": KGDefaultEntityDefinition( - description=( - "A company that was, is, or potentially could be a customer of the vendor " - "('us, ---vendor_name---'). Note that ---vendor_name--- can never be an ACCOUNT." + grounding=KGGroundingType.GROUNDED, + grounded_source_name=DocumentSource.WEB, ), - grounding=KGGroundingType.GROUNDED, - active=False, - grounded_source_name=None, - ), - "EMPLOYEE": KGDefaultEntityDefinition( - description=( - "A person who speaks on behalf of 'our' company (the VENDOR ---vendor_name---), " - "NOT of another account. Therefore, employees of other companies " - "are NOT included here. If in doubt, do NOT extract." + "VENDOR": KGDefaultEntityDefinition( + description=f"The Vendor {vendor_name}, 'us'", + grounding=KGGroundingType.GROUNDED, + active=False, + grounded_source_name=None, ), - grounding=KGGroundingType.GROUNDED, - active=False, - grounded_source_name=None, - ), -} - - -TEMPLATE = "---vendor_name---" - - -def sanitize_default( - id_name: str, definition: KGDefaultEntityDefinition, vendor_name: str -) -> KGEntityType: - """ - Sanitizes the "default" Entity Types by string-replacing all instances of `TEMPLATE` (e.g., "---vendor_name---") - with the vendor's name. - """ - - description = definition.description.replace( - TEMPLATE, - vendor_name, - ) - grounded_source_name = ( - definition.grounded_source_name.value - if definition.grounded_source_name - else None - ) - - return KGEntityType( - id_name=id_name, - description=description, - attributes=definition.attributes, - grounding=definition.grounding, - grounded_source_name=grounded_source_name, - active=False, - ) + "EMPLOYEE": KGDefaultEntityDefinition( + description=( + f"A person who speaks on behalf of 'our' company (the VENDOR {vendor_name}), " + "NOT of another account. Therefore, employees of other companies " + "are NOT included here. If in doubt, do NOT extract." + ), + grounding=KGGroundingType.GROUNDED, + active=False, + grounded_source_name=None, + ), + } def generate_non_existing_entity_types( existing_entity_types: dict[str, KGEntityType], vendor_name: str, ) -> Generator[KGEntityType]: - default_definitions = { - **KG_DEFAULT_PRIMARY_GROUNDED_ENTITIES, - **KG_DEFAULT_ACCOUNT_EMPLOYEE_ENTITIES, - } + default_entity_types = get_default_entity_types(vendor_name=vendor_name) - for default_entity_name, default_entity_type in default_definitions.items(): + for default_entity_name, default_entity_type in default_entity_types.items(): if default_entity_name not in existing_entity_types: - yield sanitize_default( + grounded_source_name = ( + default_entity_type.grounded_source_name.value + if default_entity_type.grounded_source_name + else None + ) + yield KGEntityType( id_name=default_entity_name, - definition=default_entity_type, - vendor_name=vendor_name, + description=default_entity_type.description, + attributes=default_entity_type.attributes, + grounding=default_entity_type.grounding, + grounded_source_name=grounded_source_name, + active=False, ) @@ -389,11 +352,8 @@ def populate_default_entity_types( db_session: Session, ) -> list[KGEntityType]: """ - Populates the database with the *missing* Entity Types (if any are missing) into the database after sanitizing them first. + Populates the database with the *missing* Entity Types (if any are missing) into the database. Returns the *entire* list of Entity Types. - - # Note - Sanitization of "default" Entity Types includes string-replacing all instances of `TEMPLATE` with the vendor's name. """ vendor_name = get_vendor_name(db_session=db_session) From 48be898dcde067bf82845be58f994359981b9a27 Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Wed, 11 Jun 2025 18:16:09 -0700 Subject: [PATCH 3/5] Address comments on PR --- .../kg/setup/kg_default_entity_definitions.py | 48 ++++++------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/backend/onyx/kg/setup/kg_default_entity_definitions.py b/backend/onyx/kg/setup/kg_default_entity_definitions.py index 12894cc7191..c69819dddc9 100644 --- a/backend/onyx/kg/setup/kg_default_entity_definitions.py +++ b/backend/onyx/kg/setup/kg_default_entity_definitions.py @@ -4,13 +4,13 @@ from onyx.configs.constants import DocumentSource from onyx.db.entity_type import KGEntityType -from onyx.db.models import KGConfig -from onyx.kg.models import KGConfigVars +from onyx.db.kg_config import get_kg_config_settings +from onyx.db.kg_config import validate_kg_settings from onyx.kg.models import KGDefaultEntityDefinition from onyx.kg.models import KGGroundingType -def get_default_entity_types(vendor_name: str) -> dict[str, KGDefaultEntityDefinition]: +def _get_default_entity_types(vendor_name: str) -> dict[str, KGDefaultEntityDefinition]: return { "LINEAR": KGDefaultEntityDefinition( description="A formal Linear ticket about a product issue or improvement request.", @@ -299,11 +299,11 @@ def get_default_entity_types(vendor_name: str) -> dict[str, KGDefaultEntityDefin } -def generate_non_existing_entity_types( +def _generate_non_existing_entity_types( existing_entity_types: dict[str, KGEntityType], vendor_name: str, ) -> Generator[KGEntityType]: - default_entity_types = get_default_entity_types(vendor_name=vendor_name) + default_entity_types = _get_default_entity_types(vendor_name=vendor_name) for default_entity_name, default_entity_type in default_entity_types.items(): if default_entity_name not in existing_entity_types: @@ -322,32 +322,6 @@ def generate_non_existing_entity_types( ) -def get_vendor_name( - db_session: Session, -) -> str: - config = ( - db_session.query(KGConfig) - .filter(KGConfig.kg_variable_name == KGConfigVars.KG_VENDOR) - .first() - ) - if not config: - raise RuntimeError("Failed to find the vendor name") - - if len(config.kg_variable_values) != 1: - raise RuntimeError( - f"Expected vendor name to be a list of length 1, instead got {config.kg_variable_values=}" - ) - - [vendor_name] = config.kg_variable_values - - if not vendor_name: - raise ValueError( - f"Vendor name must be a non-empty string, instead got {vendor_name=}" - ) - - return vendor_name - - def populate_default_entity_types( db_session: Session, ) -> list[KGEntityType]: @@ -356,14 +330,22 @@ def populate_default_entity_types( Returns the *entire* list of Entity Types. """ - vendor_name = get_vendor_name(db_session=db_session) + kg_config_settings = get_kg_config_settings(db_session=db_session) + validate_kg_settings(kg_config_settings) + + vendor_name = kg_config_settings.KG_VENDOR + if not vendor_name: + raise ValueError( + f"Vendor name must be a non-empty string, instead got {vendor_name=}" + ) + existing_entity_types = { et.id_name: et for et in db_session.query(KGEntityType).all() } entity_types = [] - for non_existing_entity_type in generate_non_existing_entity_types( + for non_existing_entity_type in _generate_non_existing_entity_types( existing_entity_types=existing_entity_types, vendor_name=vendor_name, ): From 6eb934ccb3a856a3b29beb7e34522c98c80748d4 Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Wed, 11 Jun 2025 18:29:09 -0700 Subject: [PATCH 4/5] Pull call out into its own binding --- backend/onyx/kg/setup/kg_default_entity_definitions.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/backend/onyx/kg/setup/kg_default_entity_definitions.py b/backend/onyx/kg/setup/kg_default_entity_definitions.py index c69819dddc9..aef0cbbcb60 100644 --- a/backend/onyx/kg/setup/kg_default_entity_definitions.py +++ b/backend/onyx/kg/setup/kg_default_entity_definitions.py @@ -342,13 +342,14 @@ def populate_default_entity_types( existing_entity_types = { et.id_name: et for et in db_session.query(KGEntityType).all() } + non_existing_entity_types = _generate_non_existing_entity_types( + existing_entity_types=existing_entity_types, + vendor_name=vendor_name, + ) entity_types = [] - for non_existing_entity_type in _generate_non_existing_entity_types( - existing_entity_types=existing_entity_types, - vendor_name=vendor_name, - ): + for non_existing_entity_type in non_existing_entity_types: db_session.add(non_existing_entity_type) entity_types.append(non_existing_entity_type) From 949a9dfe467c35be940b8a2f3631f696c622013b Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Wed, 11 Jun 2025 22:51:46 -0700 Subject: [PATCH 5/5] Address comments on PR --- backend/onyx/kg/setup/kg_default_entity_definitions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/onyx/kg/setup/kg_default_entity_definitions.py b/backend/onyx/kg/setup/kg_default_entity_definitions.py index aef0cbbcb60..88b9eb098ba 100644 --- a/backend/onyx/kg/setup/kg_default_entity_definitions.py +++ b/backend/onyx/kg/setup/kg_default_entity_definitions.py @@ -355,7 +355,6 @@ def populate_default_entity_types( db_session.commit() - for existing_entity_type in existing_entity_types.values(): - entity_types.append(existing_entity_type) + entity_types.extend(existing_entity_types.values()) return entity_types