From c71258083a156fd7d490331da366f3d2e7f63452 Mon Sep 17 00:00:00 2001 From: Weves Date: Wed, 10 Sep 2025 18:56:35 -0700 Subject: [PATCH 1/4] Improve migration --- .../abbfec3a5ac5_merge_prompt_into_persona.py | 123 ++++++++---------- .../b7ec9b5b505f_adjust_prompt_length.py | 55 ++++++++ backend/onyx/db/models.py | 8 +- 3 files changed, 113 insertions(+), 73 deletions(-) create mode 100644 backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py diff --git a/backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py b/backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py index 671ef8a685a..d9df14a0fca 100644 --- a/backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py +++ b/backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py @@ -16,76 +16,59 @@ branch_labels = None depends_on = None +MAX_PROMPT_LENGTH = 5_000_000 + def upgrade() -> None: """NOTE: Prompts without any Personas will just be lost.""" # Step 1: Add new columns to persona table (only if they don't exist) + op.add_column( + "persona", + sa.Column("system_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=True), + ) + op.add_column( + "persona", + sa.Column("task_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=True), + ) + op.add_column( + "persona", + sa.Column( + "datetime_aware", sa.Boolean(), nullable=False, server_default="true" + ), + ) - # Check if columns exist before adding them - connection = op.get_bind() - inspector = sa.inspect(connection) - existing_columns = [col["name"] for col in inspector.get_columns("persona")] - - if "system_prompt" not in existing_columns: - op.add_column( - "persona", sa.Column("system_prompt", sa.String(length=8000), nullable=True) - ) - - if "task_prompt" not in existing_columns: - op.add_column( - "persona", sa.Column("task_prompt", sa.String(length=8000), nullable=True) - ) - - if "datetime_aware" not in existing_columns: - op.add_column( - "persona", - sa.Column( - "datetime_aware", sa.Boolean(), nullable=False, server_default="true" - ), - ) - - # Step 2: Migrate data from prompt table to persona table (only if tables exist) - existing_tables = inspector.get_table_names() - - if "prompt" in existing_tables and "persona__prompt" in existing_tables: - # For personas that have associated prompts, copy the prompt data - op.execute( - """ - UPDATE persona - SET - system_prompt = p.system_prompt, - task_prompt = p.task_prompt, - datetime_aware = p.datetime_aware - FROM ( - -- Get the first prompt for each persona (in case there are multiple) - SELECT DISTINCT ON (pp.persona_id) - pp.persona_id, - pr.system_prompt, - pr.task_prompt, - pr.datetime_aware - FROM persona__prompt pp - JOIN prompt pr ON pp.prompt_id = pr.id - ) p - WHERE persona.id = p.persona_id + # For personas that have associated prompts, copy the prompt data + op.execute( """ - ) + UPDATE persona + SET + system_prompt = p.system_prompt, + task_prompt = p.task_prompt, + datetime_aware = p.datetime_aware + FROM ( + -- Get the first prompt for each persona (in case there are multiple) + SELECT DISTINCT ON (pp.persona_id) + pp.persona_id, + pr.system_prompt, + pr.task_prompt, + pr.datetime_aware + FROM persona__prompt pp + JOIN prompt pr ON pp.prompt_id = pr.id + ) p + WHERE persona.id = p.persona_id + """ + ) - # Step 3: Update chat_message references - # Since chat messages referenced prompt_id, we need to update them to use persona_id - # This is complex as we need to map from prompt_id to persona_id - - # Check if chat_message has prompt_id column - chat_message_columns = [ - col["name"] for col in inspector.get_columns("chat_message") - ] - if "prompt_id" in chat_message_columns: - op.execute( - """ - ALTER TABLE chat_message - DROP CONSTRAINT IF EXISTS chat_message__prompt_fk - """ - ) - op.drop_column("chat_message", "prompt_id") + # Step 3: Update chat_message references + # Since chat messages referenced prompt_id, we need to update them to use persona_id + # This is complex as we need to map from prompt_id to persona_id + op.execute( + """ + ALTER TABLE chat_message + DROP CONSTRAINT IF EXISTS chat_message__prompt_fk + """ + ) + op.drop_column("chat_message", "prompt_id") # Step 4: Handle personas without prompts - set default values if needed (always run this) op.execute( @@ -99,18 +82,16 @@ def upgrade() -> None: ) # Step 5: Drop the persona__prompt association table (if it exists) - if "persona__prompt" in existing_tables: - op.drop_table("persona__prompt") + op.drop_table("persona__prompt") # Step 6: Drop the prompt table (if it exists) - if "prompt" in existing_tables: - op.drop_table("prompt") + op.drop_table("prompt") # Step 7: Make system_prompt and task_prompt non-nullable after migration (only if they exist) op.alter_column( "persona", "system_prompt", - existing_type=sa.String(length=8000), + existing_type=sa.String(length=MAX_PROMPT_LENGTH), nullable=False, server_default=None, ) @@ -118,7 +99,7 @@ def upgrade() -> None: op.alter_column( "persona", "task_prompt", - existing_type=sa.String(length=8000), + existing_type=sa.String(length=MAX_PROMPT_LENGTH), nullable=False, server_default=None, ) @@ -132,8 +113,8 @@ def downgrade() -> None: sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("name", sa.String(), nullable=False), sa.Column("description", sa.String(), nullable=False), - sa.Column("system_prompt", sa.String(length=8000), nullable=False), - sa.Column("task_prompt", sa.String(length=8000), nullable=False), + sa.Column("system_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=False), + sa.Column("task_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=False), sa.Column( "datetime_aware", sa.Boolean(), nullable=False, server_default="true" ), diff --git a/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py b/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py new file mode 100644 index 00000000000..267b1a811fd --- /dev/null +++ b/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py @@ -0,0 +1,55 @@ +"""adjust prompt length + +Revision ID: b7ec9b5b505f +Revises: abbfec3a5ac5 +Create Date: 2025-09-10 18:51:15.629197 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "b7ec9b5b505f" +down_revision = "abbfec3a5ac5" +branch_labels = None +depends_on = None + + +MAX_PROMPT_LENGTH = 5_000_000 + + +def upgrade() -> None: + # NOTE: need to run this since the previous migration PREVIOUSLY set the length to 8000 + op.alter_column( + "persona", + "system_prompt", + existing_type=sa.TEXT(), + type_=sa.String(length=MAX_PROMPT_LENGTH), + existing_nullable=False, + ) + op.alter_column( + "persona", + "task_prompt", + existing_type=sa.TEXT(), + type_=sa.String(length=MAX_PROMPT_LENGTH), + existing_nullable=False, + ) + + +def downgrade() -> None: + op.alter_column( + "prompt", + "system_prompt", + existing_type=sa.String(length=MAX_PROMPT_LENGTH), + type_=sa.TEXT(), + existing_nullable=False, + ) + op.alter_column( + "prompt", + "task_prompt", + existing_type=sa.String(length=MAX_PROMPT_LENGTH), + type_=sa.TEXT(), + existing_nullable=False, + ) diff --git a/backend/onyx/db/models.py b/backend/onyx/db/models.py index ef75f065aba..2cf82e7b8a2 100644 --- a/backend/onyx/db/models.py +++ b/backend/onyx/db/models.py @@ -92,6 +92,8 @@ logger = setup_logger() +PROMPT_LENGTH = 5_000_000 + class Base(DeclarativeBase): __abstract__ = True @@ -2583,9 +2585,11 @@ class Persona(Base): # Prompt fields merged from Prompt table system_prompt: Mapped[str | None] = mapped_column( - String(length=8000), nullable=True + String(length=PROMPT_LENGTH), nullable=True + ) + task_prompt: Mapped[str | None] = mapped_column( + String(length=PROMPT_LENGTH), nullable=True ) - task_prompt: Mapped[str | None] = mapped_column(String(length=8000), nullable=True) datetime_aware: Mapped[bool] = mapped_column(Boolean, default=True) uploaded_image_id: Mapped[str | None] = mapped_column(String, nullable=True) From d498bae4b8f508e4bc6d6a0aa705d2eec21b6b45 Mon Sep 17 00:00:00 2001 From: Weves Date: Wed, 10 Sep 2025 19:01:58 -0700 Subject: [PATCH 2/4] Fix --- backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py b/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py index 267b1a811fd..48eb55c9de9 100644 --- a/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py +++ b/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py @@ -40,14 +40,14 @@ def upgrade() -> None: def downgrade() -> None: op.alter_column( - "prompt", + "persona", "system_prompt", existing_type=sa.String(length=MAX_PROMPT_LENGTH), type_=sa.TEXT(), existing_nullable=False, ) op.alter_column( - "prompt", + "persona", "task_prompt", existing_type=sa.String(length=MAX_PROMPT_LENGTH), type_=sa.TEXT(), From cea68796fb44adecef3aabfbd2268b0408ddd17d Mon Sep 17 00:00:00 2001 From: Weves Date: Wed, 10 Sep 2025 19:05:09 -0700 Subject: [PATCH 3/4] Revert og migration --- .../abbfec3a5ac5_merge_prompt_into_persona.py | 120 +++++++++++------- 1 file changed, 74 insertions(+), 46 deletions(-) diff --git a/backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py b/backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py index d9df14a0fca..c683fc52b4f 100644 --- a/backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py +++ b/backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py @@ -16,59 +16,85 @@ branch_labels = None depends_on = None + MAX_PROMPT_LENGTH = 5_000_000 def upgrade() -> None: """NOTE: Prompts without any Personas will just be lost.""" # Step 1: Add new columns to persona table (only if they don't exist) - op.add_column( - "persona", - sa.Column("system_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=True), - ) - op.add_column( - "persona", - sa.Column("task_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=True), - ) - op.add_column( - "persona", - sa.Column( - "datetime_aware", sa.Boolean(), nullable=False, server_default="true" - ), - ) - # For personas that have associated prompts, copy the prompt data - op.execute( - """ - UPDATE persona - SET - system_prompt = p.system_prompt, - task_prompt = p.task_prompt, - datetime_aware = p.datetime_aware - FROM ( - -- Get the first prompt for each persona (in case there are multiple) - SELECT DISTINCT ON (pp.persona_id) - pp.persona_id, - pr.system_prompt, - pr.task_prompt, - pr.datetime_aware - FROM persona__prompt pp - JOIN prompt pr ON pp.prompt_id = pr.id - ) p - WHERE persona.id = p.persona_id - """ - ) + # Check if columns exist before adding them + connection = op.get_bind() + inspector = sa.inspect(connection) + existing_columns = [col["name"] for col in inspector.get_columns("persona")] + + if "system_prompt" not in existing_columns: + op.add_column( + "persona", + sa.Column( + "system_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=True + ), + ) - # Step 3: Update chat_message references - # Since chat messages referenced prompt_id, we need to update them to use persona_id - # This is complex as we need to map from prompt_id to persona_id - op.execute( + if "task_prompt" not in existing_columns: + op.add_column( + "persona", + sa.Column( + "task_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=True + ), + ) + + if "datetime_aware" not in existing_columns: + op.add_column( + "persona", + sa.Column( + "datetime_aware", sa.Boolean(), nullable=False, server_default="true" + ), + ) + + # Step 2: Migrate data from prompt table to persona table (only if tables exist) + existing_tables = inspector.get_table_names() + + if "prompt" in existing_tables and "persona__prompt" in existing_tables: + # For personas that have associated prompts, copy the prompt data + op.execute( + """ + UPDATE persona + SET + system_prompt = p.system_prompt, + task_prompt = p.task_prompt, + datetime_aware = p.datetime_aware + FROM ( + -- Get the first prompt for each persona (in case there are multiple) + SELECT DISTINCT ON (pp.persona_id) + pp.persona_id, + pr.system_prompt, + pr.task_prompt, + pr.datetime_aware + FROM persona__prompt pp + JOIN prompt pr ON pp.prompt_id = pr.id + ) p + WHERE persona.id = p.persona_id """ - ALTER TABLE chat_message - DROP CONSTRAINT IF EXISTS chat_message__prompt_fk - """ - ) - op.drop_column("chat_message", "prompt_id") + ) + + # Step 3: Update chat_message references + # Since chat messages referenced prompt_id, we need to update them to use persona_id + # This is complex as we need to map from prompt_id to persona_id + + # Check if chat_message has prompt_id column + chat_message_columns = [ + col["name"] for col in inspector.get_columns("chat_message") + ] + if "prompt_id" in chat_message_columns: + op.execute( + """ + ALTER TABLE chat_message + DROP CONSTRAINT IF EXISTS chat_message__prompt_fk + """ + ) + op.drop_column("chat_message", "prompt_id") # Step 4: Handle personas without prompts - set default values if needed (always run this) op.execute( @@ -82,10 +108,12 @@ def upgrade() -> None: ) # Step 5: Drop the persona__prompt association table (if it exists) - op.drop_table("persona__prompt") + if "persona__prompt" in existing_tables: + op.drop_table("persona__prompt") # Step 6: Drop the prompt table (if it exists) - op.drop_table("prompt") + if "prompt" in existing_tables: + op.drop_table("prompt") # Step 7: Make system_prompt and task_prompt non-nullable after migration (only if they exist) op.alter_column( From 4309a48ff17e38b21a250b4b495a7a7ce4d9c3d6 Mon Sep 17 00:00:00 2001 From: Weves Date: Wed, 10 Sep 2025 19:06:26 -0700 Subject: [PATCH 4/4] fix --- .../b7ec9b5b505f_adjust_prompt_length.py | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py b/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py index 48eb55c9de9..37e6313ee8d 100644 --- a/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py +++ b/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py @@ -25,31 +25,19 @@ def upgrade() -> None: op.alter_column( "persona", "system_prompt", - existing_type=sa.TEXT(), + existing_type=sa.String(length=8000), type_=sa.String(length=MAX_PROMPT_LENGTH), existing_nullable=False, ) op.alter_column( "persona", "task_prompt", - existing_type=sa.TEXT(), + existing_type=sa.String(length=8000), type_=sa.String(length=MAX_PROMPT_LENGTH), existing_nullable=False, ) def downgrade() -> None: - op.alter_column( - "persona", - "system_prompt", - existing_type=sa.String(length=MAX_PROMPT_LENGTH), - type_=sa.TEXT(), - existing_nullable=False, - ) - op.alter_column( - "persona", - "task_prompt", - existing_type=sa.String(length=MAX_PROMPT_LENGTH), - type_=sa.TEXT(), - existing_nullable=False, - ) + # Downgrade not necessary + pass