From c947271e27735df0886fe333c750aa6deb8c5155 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 09:12:57 -0500 Subject: [PATCH 01/21] Pulling scripts into separate files, parsing PG credentials from POSTGRES_URI --- docker/docker-entrypoint.sh | 69 ++++++++++++++++++++++++++++ docker/morphik.toml.default | 41 +++++++++++++++++ dockerfile | 91 ++----------------------------------- 3 files changed, 113 insertions(+), 88 deletions(-) create mode 100644 docker/docker-entrypoint.sh create mode 100644 docker/morphik.toml.default diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh new file mode 100644 index 00000000..cd5027b9 --- /dev/null +++ b/docker/docker-entrypoint.sh @@ -0,0 +1,69 @@ +#!/bin/bash +set -e + +# Copy default config if none exists +if [ ! -f /app/morphik.toml ]; then + cp /app/morphik.toml.default /app/morphik.toml +fi + +# Function to check PostgreSQL +check_postgres() { + if [ -n "$POSTGRES_URI" ]; then + # Extract connection details from POSTGRES_URI, which can be + # postgresql:// or postgresql+asyncpg:// + URI=${POSTGRES_URI#postgresql*://} + USER_PASS=${URI%%@*} + USER=${USER_PASS%:*} + PASS=${USER_PASS#*:} + HOST_PORT_DB=${URI#*@} + HOST_PORT=${HOST_PORT_DB%/*} + HOST=${HOST_PORT%:*} + PORT=${HOST_PORT#*:} + DB=${HOST_PORT_DB#*/} + + echo "POSTGRES_URI: $POSTGRES_URI" + echo "USER: $USER" + echo "PASS: $PASS" + echo "HOST: $HOST" + echo "PORT: $PORT" + echo "DB: $DB" + + if [ -z "$PASS" ]; then + echo "Error: POSTGRES_URI does not contain a password" + exit 1 + fi + + echo "Waiting for PostgreSQL..." + max_retries=30 + retries=0 + until PGPASSWORD=$PASS pg_isready -h $HOST -p $PORT -U $USER -d $DB; do + retries=$((retries + 1)) + if [ $retries -eq $max_retries ]; then + echo "Error: PostgreSQL did not become ready in time" + exit 1 + fi + echo "Waiting for PostgreSQL... (Attempt $retries/$max_retries)" + sleep 2 + done + echo "PostgreSQL is ready!" + + # Verify database connection + if ! PGPASSWORD=$PASS psql -h $HOST -p $PORT -U $USER -d $DB -c "SELECT 1" > /dev/null 2>&1; then + echo "Error: Could not connect to PostgreSQL database" + exit 1 + fi + echo "PostgreSQL connection verified!" + fi +} + +# Check PostgreSQL +check_postgres + +# Check if command arguments were passed ($# is the number of arguments) +if [ $# -gt 0 ]; then + # If arguments exist, execute them (e.g., execute "arq core.workers...") + exec "$@" +else + # Otherwise, execute the default command (uv run start_server.py) + exec uv run uvicorn core.api:app --host $HOST --port $PORT --loop asyncio --http auto --ws auto --lifespan auto +fi \ No newline at end of file diff --git a/docker/morphik.toml.default b/docker/morphik.toml.default new file mode 100644 index 00000000..6c7b24ee --- /dev/null +++ b/docker/morphik.toml.default @@ -0,0 +1,41 @@ +[api] +host = "0.0.0.0" +port = 8000 +reload = false + +[auth] +jwt_algorithm = "HS256" +dev_mode = true +dev_entity_id = "dev_user" +dev_entity_type = "developer" +dev_permissions = ["read", "write", "admin"] + +[completion] +provider = "ollama" +model_name = "llama2" +base_url = "http://localhost:11434" + +[database] +provider = "postgres" + +[embedding] +provider = "ollama" +model_name = "nomic-embed-text" +dimensions = 768 +similarity_metric = "cosine" +base_url = "http://localhost:11434" + +[parser] +chunk_size = 1000 +chunk_overlap = 200 +use_unstructured_api = false + +[reranker] +use_reranker = false + +[storage] +provider = "local" +storage_path = "/app/storage" + +[vector_store] +provider = "pgvector" diff --git a/dockerfile b/dockerfile index 0d3c9d67..d7e61c61 100644 --- a/dockerfile +++ b/dockerfile @@ -101,96 +101,11 @@ ENV VIRTUAL_ENV=/app/.venv ENV PATH="/app/.venv/bin:/usr/local/bin:${PATH}" # Create default configuration -RUN echo '[api]\n\ -host = "0.0.0.0"\n\ -port = 8000\n\ -reload = false\n\ -\n\ -[auth]\n\ -jwt_algorithm = "HS256"\n\ -dev_mode = true\n\ -dev_entity_id = "dev_user"\n\ -dev_entity_type = "developer"\n\ -dev_permissions = ["read", "write", "admin"]\n\ -\n\ -[completion]\n\ -provider = "ollama"\n\ -model_name = "llama2"\n\ -base_url = "http://localhost:11434"\n\ -\n\ -[database]\n\ -provider = "postgres"\n\ -\n\ -[embedding]\n\ -provider = "ollama"\n\ -model_name = "nomic-embed-text"\n\ -dimensions = 768\n\ -similarity_metric = "cosine"\n\ -base_url = "http://localhost:11434"\n\ -\n\ -[parser]\n\ -chunk_size = 1000\n\ -chunk_overlap = 200\n\ -use_unstructured_api = false\n\ -\n\ -[reranker]\n\ -use_reranker = false\n\ -\n\ -[storage]\n\ -provider = "local"\n\ -storage_path = "/app/storage"\n\ -\n\ -[vector_store]\n\ -provider = "pgvector"\n\ -' > /app/morphik.toml.default +COPY docker/morphik.toml.default /app/morphik.toml.default # Create startup script -RUN echo '#!/bin/bash\n\ -set -e\n\ -\n\ -# Copy default config if none exists\n\ -if [ ! -f /app/morphik.toml ]; then\n\ - cp /app/morphik.toml.default /app/morphik.toml\n\ -fi\n\ -\n\ -# Function to check PostgreSQL\n\ -check_postgres() {\n\ - if [ -n "$POSTGRES_URI" ]; then\n\ - echo "Waiting for PostgreSQL..."\n\ - max_retries=30\n\ - retries=0\n\ - until PGPASSWORD=$PGPASSWORD pg_isready -h postgres -U morphik -d morphik; do\n\ - retries=$((retries + 1))\n\ - if [ $retries -eq $max_retries ]; then\n\ - echo "Error: PostgreSQL did not become ready in time"\n\ - exit 1\n\ - fi\n\ - echo "Waiting for PostgreSQL... (Attempt $retries/$max_retries)"\n\ - sleep 2\n\ - done\n\ - echo "PostgreSQL is ready!"\n\ - \n\ - # Verify database connection\n\ - if ! PGPASSWORD=$PGPASSWORD psql -h postgres -U morphik -d morphik -c "SELECT 1" > /dev/null 2>&1; then\n\ - echo "Error: Could not connect to PostgreSQL database"\n\ - exit 1\n\ - fi\n\ - echo "PostgreSQL connection verified!"\n\ - fi\n\ -}\n\ -\n\ -# Check PostgreSQL\n\ -check_postgres\n\ -\n\ -# Check if command arguments were passed ($# is the number of arguments)\n\ -if [ $# -gt 0 ]; then\n\ - # If arguments exist, execute them (e.g., execute "arq core.workers...")\n\ - exec "$@"\n\ -else\n\ - # Otherwise, execute the default command (uv run start_server.py)\n\ - exec uv run uvicorn core.api:app --host $HOST --port $PORT --loop asyncio --http auto --ws auto --lifespan auto\n\ -fi\n\ -' > /app/docker-entrypoint.sh && chmod +x /app/docker-entrypoint.sh +COPY docker/docker-entrypoint.sh /app/docker-entrypoint.sh +RUN chmod +x /app/docker-entrypoint.sh # Copy application code # pyproject.toml is needed for uv to identify the project context for `uv run` From fa9c481b07d0248fc51035dece5b5dd844afcad6 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 09:17:14 -0500 Subject: [PATCH 02/21] Preserving stderr to help debug connection issues --- docker/docker-entrypoint.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index cd5027b9..6db4ffa5 100644 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -48,7 +48,8 @@ check_postgres() { echo "PostgreSQL is ready!" # Verify database connection - if ! PGPASSWORD=$PASS psql -h $HOST -p $PORT -U $USER -d $DB -c "SELECT 1" > /dev/null 2>&1; then + # NOTE: preserve stderr for debugging + if ! PGPASSWORD=$PASS psql -h $HOST -p $PORT -U $USER -d $DB -c "SELECT 1"; then echo "Error: Could not connect to PostgreSQL database" exit 1 fi From 0d42518b37f92f88ab2fe70f576c8b810a56f258 Mon Sep 17 00:00:00 2001 From: Adityavardhan Agrawal Date: Sun, 25 May 2025 15:51:33 -0700 Subject: [PATCH 03/21] update npm package version --- ee/ui-component/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/ui-component/package.json b/ee/ui-component/package.json index 7053469e..def83d82 100644 --- a/ee/ui-component/package.json +++ b/ee/ui-component/package.json @@ -1,6 +1,6 @@ { "name": "@morphik/ui", - "version": "0.2.22", + "version": "0.2.24", "private": true, "description": "Modern UI component for Morphik - A powerful document processing and querying system", "author": "Morphik Team", From 25550668ab7775837317b1cbf5533d5d7a311453 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 09:31:35 -0500 Subject: [PATCH 04/21] Trying awk parse to fix edge cases --- docker/docker-entrypoint.sh | 48 ++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index 6db4ffa5..edf53881 100644 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -11,15 +11,45 @@ check_postgres() { if [ -n "$POSTGRES_URI" ]; then # Extract connection details from POSTGRES_URI, which can be # postgresql:// or postgresql+asyncpg:// - URI=${POSTGRES_URI#postgresql*://} - USER_PASS=${URI%%@*} - USER=${USER_PASS%:*} - PASS=${USER_PASS#*:} - HOST_PORT_DB=${URI#*@} - HOST_PORT=${HOST_PORT_DB%/*} - HOST=${HOST_PORT%:*} - PORT=${HOST_PORT#*:} - DB=${HOST_PORT_DB#*/} + + # Using awk for more robust URI parsing that handles special characters + eval $(echo "$POSTGRES_URI" | awk -F'postgresql' '{print $2}' | awk '{ + # Remove the +asyncpg if present and get the URI part after :// + sub(/^[+a-z]*:\/\//, ""); + uri = $0; + + # Split into user:pass@host:port/db + if (match(uri, /([^@]+)@([^\/]+)(\/(.*))?/, m)) { + # Handle user:password + user_pass = m[1]; + if (split(user_pass, up, ":") == 2) { + printf "USER=\"%s\"\n", up[1]; + printf "PASS=\"%s\"\n", up[2]; + } else { + printf "USER=\"%s\"\n", user_pass; + printf "PASS=\"\"\n"; + } + + # Handle host:port/db + host_port_db = m[2] m[3]; + if (split(host_port_db, hpd, "\/") > 1) { + host_port = hpd[1]; + printf "DB=\"%s\"\n", hpd[2]; + } else { + host_port = host_port_db; + printf "DB=\"postgres\"\n"; # Default database + } + + # Handle host:port + if (split(host_port, hp, ":") == 2) { + printf "HOST=\"%s\"\n", hp[1]; + printf "PORT=\"%s\"\n", hp[2]; + } else { + printf "HOST=\"%s\"\n", host_port; + printf "PORT=\"5432\"\n"; # Default port + } + } + }') echo "POSTGRES_URI: $POSTGRES_URI" echo "USER: $USER" From 63c45953e0999afbc3d17b29852df3e082dda585 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 09:44:53 -0500 Subject: [PATCH 05/21] Split postgres parser into its own script so I can test jazzberry's alleged failure cases --- docker/docker-entrypoint.sh | 38 +-------------------------------- docker/parse-postgres-uri.sh | 41 ++++++++++++++++++++++++++++++++++++ dockerfile | 2 +- 3 files changed, 43 insertions(+), 38 deletions(-) mode change 100644 => 100755 docker/docker-entrypoint.sh create mode 100755 docker/parse-postgres-uri.sh diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh old mode 100644 new mode 100755 index edf53881..2de4c4a1 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -13,43 +13,7 @@ check_postgres() { # postgresql:// or postgresql+asyncpg:// # Using awk for more robust URI parsing that handles special characters - eval $(echo "$POSTGRES_URI" | awk -F'postgresql' '{print $2}' | awk '{ - # Remove the +asyncpg if present and get the URI part after :// - sub(/^[+a-z]*:\/\//, ""); - uri = $0; - - # Split into user:pass@host:port/db - if (match(uri, /([^@]+)@([^\/]+)(\/(.*))?/, m)) { - # Handle user:password - user_pass = m[1]; - if (split(user_pass, up, ":") == 2) { - printf "USER=\"%s\"\n", up[1]; - printf "PASS=\"%s\"\n", up[2]; - } else { - printf "USER=\"%s\"\n", user_pass; - printf "PASS=\"\"\n"; - } - - # Handle host:port/db - host_port_db = m[2] m[3]; - if (split(host_port_db, hpd, "\/") > 1) { - host_port = hpd[1]; - printf "DB=\"%s\"\n", hpd[2]; - } else { - host_port = host_port_db; - printf "DB=\"postgres\"\n"; # Default database - } - - # Handle host:port - if (split(host_port, hp, ":") == 2) { - printf "HOST=\"%s\"\n", hp[1]; - printf "PORT=\"%s\"\n", hp[2]; - } else { - printf "HOST=\"%s\"\n", host_port; - printf "PORT=\"5432\"\n"; # Default port - } - } - }') + eval $(./parse-postgres-uri.sh "$POSTGRES_URI") echo "POSTGRES_URI: $POSTGRES_URI" echo "USER: $USER" diff --git a/docker/parse-postgres-uri.sh b/docker/parse-postgres-uri.sh new file mode 100755 index 00000000..f8d034fb --- /dev/null +++ b/docker/parse-postgres-uri.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -euo pipefail + +# Usage: eval $(parse-postgres-uri.sh "postgresql://user:pass@host:port/db") +echo "$1" | awk -F'postgresql' '{print $2}' | awk '{ + # Remove the +asyncpg if present and get the URI part after :// + sub(/^[+a-z]*:\/\//, ""); + uri = $0; + + # Split into user:pass@host:port/db + if (match(uri, /([^@]+)@([^\/]+)(\/(.*))?/, m)) { + # Handle user:password + user_pass = m[1]; + if (split(user_pass, up, ":") == 2) { + printf "USER=\"%s\"\n", up[1]; + printf "PASS=\"%s\"\n", up[2]; + } else { + printf "USER=\"%s\"\n", user_pass; + printf "PASS=\"\"\n"; + } + + # Handle host:port/db + host_port_db = m[2] m[3]; + if (split(host_port_db, hpd, "/") > 1) { + host_port = hpd[1]; + printf "DB=\"%s\"\n", hpd[2]; + } else { + host_port = host_port_db; + printf "DB=\"postgres\"\n"; # Default database + } + + # Handle host:port + if (split(host_port, hp, ":") == 2) { + printf "HOST=\"%s\"\n", hp[1]; + printf "PORT=\"%s\"\n", hp[2]; + } else { + printf "HOST=\"%s\"\n", host_port; + printf "PORT=\"5432\"\n"; # Default port + } + } +}' \ No newline at end of file diff --git a/dockerfile b/dockerfile index d7e61c61..7c724dd9 100644 --- a/dockerfile +++ b/dockerfile @@ -104,8 +104,8 @@ ENV PATH="/app/.venv/bin:/usr/local/bin:${PATH}" COPY docker/morphik.toml.default /app/morphik.toml.default # Create startup script +COPY docker/parse-postgres-uri.sh /app/parse-postgres-uri.sh COPY docker/docker-entrypoint.sh /app/docker-entrypoint.sh -RUN chmod +x /app/docker-entrypoint.sh # Copy application code # pyproject.toml is needed for uv to identify the project context for `uv run` From 71b663fbf00cac7ea2170b25605ce172ead13924 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 09:54:44 -0500 Subject: [PATCH 06/21] More edge case fixes --- docker/parse-postgres-uri.sh | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docker/parse-postgres-uri.sh b/docker/parse-postgres-uri.sh index f8d034fb..d45b9cef 100755 --- a/docker/parse-postgres-uri.sh +++ b/docker/parse-postgres-uri.sh @@ -2,11 +2,15 @@ set -euo pipefail # Usage: eval $(parse-postgres-uri.sh "postgresql://user:pass@host:port/db") + echo "$1" | awk -F'postgresql' '{print $2}' | awk '{ # Remove the +asyncpg if present and get the URI part after :// sub(/^[+a-z]*:\/\//, ""); uri = $0; - + + # Remove query parameters + sub(/\?.*$/, "", uri); + # Split into user:pass@host:port/db if (match(uri, /([^@]+)@([^\/]+)(\/(.*))?/, m)) { # Handle user:password @@ -38,4 +42,13 @@ echo "$1" | awk -F'postgresql' '{print $2}' | awk '{ printf "PORT=\"5432\"\n"; # Default port } } +}' | awk '{ + # Decode URI escapes + result = $0; + while (match(result, /%[0-9A-Fa-f]{2}/)) { + hex = substr(result, RSTART + 1, 2); + dec = sprintf("%c", strtonum("0x" hex)); + result = substr(result, 1, RSTART - 1) dec substr(result, RSTART + 3); + } + print result }' \ No newline at end of file From bbb72723d57a235d425d0b6242d7a36c943ec8c9 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 10:28:34 -0500 Subject: [PATCH 07/21] Postgres need not use a password, especially when dealing with obscured or corrupt URIs --- docker/docker-entrypoint.sh | 5 ----- docker/parse-postgres-uri.sh | 8 ++++++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index 2de4c4a1..adbb160e 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -22,11 +22,6 @@ check_postgres() { echo "PORT: $PORT" echo "DB: $DB" - if [ -z "$PASS" ]; then - echo "Error: POSTGRES_URI does not contain a password" - exit 1 - fi - echo "Waiting for PostgreSQL..." max_retries=30 retries=0 diff --git a/docker/parse-postgres-uri.sh b/docker/parse-postgres-uri.sh index d45b9cef..45ce0e79 100755 --- a/docker/parse-postgres-uri.sh +++ b/docker/parse-postgres-uri.sh @@ -3,6 +3,14 @@ set -euo pipefail # Usage: eval $(parse-postgres-uri.sh "postgresql://user:pass@host:port/db") +# Make sure all variables are ultimately set to avoid downstream failures in +# scripts that require defined variables. +echo USER="" +echo PASS="" +echo HOST="" +echo PORT="" +echo DB="" + echo "$1" | awk -F'postgresql' '{print $2}' | awk '{ # Remove the +asyncpg if present and get the URI part after :// sub(/^[+a-z]*:\/\//, ""); From 38fd569d93724cb431f9e0cb22d93e394f65cbc6 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 10:41:12 -0500 Subject: [PATCH 08/21] Allowing postgres URI parse failures to accommodate the value "***localhost:5432/morphik", which happens during docker build --- docker/parse-postgres-uri.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docker/parse-postgres-uri.sh b/docker/parse-postgres-uri.sh index 45ce0e79..5179d9df 100755 --- a/docker/parse-postgres-uri.sh +++ b/docker/parse-postgres-uri.sh @@ -5,11 +5,11 @@ set -euo pipefail # Make sure all variables are ultimately set to avoid downstream failures in # scripts that require defined variables. -echo USER="" -echo PASS="" -echo HOST="" -echo PORT="" -echo DB="" +echo 'USER=""' +echo 'PASS=""' +echo 'HOST=""' +echo 'PORT=""' +echo 'DB=""' echo "$1" | awk -F'postgresql' '{print $2}' | awk '{ # Remove the +asyncpg if present and get the URI part after :// @@ -59,4 +59,4 @@ echo "$1" | awk -F'postgresql' '{print $2}' | awk '{ result = substr(result, 1, RSTART - 1) dec substr(result, RSTART + 3); } print result -}' \ No newline at end of file +}' || echo "POSTGRES_URI_PARSE_FAILURE=1" \ No newline at end of file From efba09cd9ee5ca732ddca4787e5ec578c22cd60a Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 10:46:24 -0500 Subject: [PATCH 09/21] Modified script to work with github awk version, which is non-GNU and has no strtonum function --- docker/parse-postgres-uri.sh | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/docker/parse-postgres-uri.sh b/docker/parse-postgres-uri.sh index 5179d9df..a9e20fea 100755 --- a/docker/parse-postgres-uri.sh +++ b/docker/parse-postgres-uri.sh @@ -54,9 +54,19 @@ echo "$1" | awk -F'postgresql' '{print $2}' | awk '{ # Decode URI escapes result = $0; while (match(result, /%[0-9A-Fa-f]{2}/)) { - hex = substr(result, RSTART + 1, 2); - dec = sprintf("%c", strtonum("0x" hex)); - result = substr(result, 1, RSTART - 1) dec substr(result, RSTART + 3); + hex = tolower(substr(result, RSTART + 1, 2)); + # Convert hex to decimal using a more portable method + dec = 0; + for (i = 1; i <= 2; i++) { + c = substr(hex, i, 1); + if (c ~ /[0-9]/) { + val = index("0123456789", c) - 1; + } else { + val = index("abcdef", c) + 9; + } + dec = dec * 16 + val; + } + result = substr(result, 1, RSTART - 1) sprintf("%c", dec) substr(result, RSTART + 3); } print result }' || echo "POSTGRES_URI_PARSE_FAILURE=1" \ No newline at end of file From c3d9933937b80dd3a05f0066d729f6b0832f1a55 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 11:01:19 -0500 Subject: [PATCH 10/21] You're killing me github --- docker/parse-postgres-uri.sh | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/docker/parse-postgres-uri.sh b/docker/parse-postgres-uri.sh index a9e20fea..f1c0fb18 100755 --- a/docker/parse-postgres-uri.sh +++ b/docker/parse-postgres-uri.sh @@ -50,23 +50,4 @@ echo "$1" | awk -F'postgresql' '{print $2}' | awk '{ printf "PORT=\"5432\"\n"; # Default port } } -}' | awk '{ - # Decode URI escapes - result = $0; - while (match(result, /%[0-9A-Fa-f]{2}/)) { - hex = tolower(substr(result, RSTART + 1, 2)); - # Convert hex to decimal using a more portable method - dec = 0; - for (i = 1; i <= 2; i++) { - c = substr(hex, i, 1); - if (c ~ /[0-9]/) { - val = index("0123456789", c) - 1; - } else { - val = index("abcdef", c) + 9; - } - dec = dec * 16 + val; - } - result = substr(result, 1, RSTART - 1) sprintf("%c", dec) substr(result, RSTART + 3); - } - print result -}' || echo "POSTGRES_URI_PARSE_FAILURE=1" \ No newline at end of file +}' | perl -pe 's/%([0-9a-fA-F]{2})/chr(hex($1))/ge' || echo "POSTGRES_URI_PARSE_FAILURE=1" \ No newline at end of file From 52a3654e0dc08e7cada2a77e1c3ee5d53fd4c655 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 11:52:23 -0500 Subject: [PATCH 11/21] OK, doing everything in python since we know the container has that --- docker/docker-entrypoint.sh | 2 +- docker/parse-postgres-uri.py | 102 +++++++++++++++++++++++++++++++++++ docker/parse-postgres-uri.sh | 53 ------------------ dockerfile | 2 +- 4 files changed, 104 insertions(+), 55 deletions(-) create mode 100755 docker/parse-postgres-uri.py delete mode 100755 docker/parse-postgres-uri.sh diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index adbb160e..a1535bcf 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -13,7 +13,7 @@ check_postgres() { # postgresql:// or postgresql+asyncpg:// # Using awk for more robust URI parsing that handles special characters - eval $(./parse-postgres-uri.sh "$POSTGRES_URI") + eval $(./parse-postgres-uri.py "$POSTGRES_URI") echo "POSTGRES_URI: $POSTGRES_URI" echo "USER: $USER" diff --git a/docker/parse-postgres-uri.py b/docker/parse-postgres-uri.py new file mode 100755 index 00000000..7075e9b1 --- /dev/null +++ b/docker/parse-postgres-uri.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +import re +import sys +import urllib.parse +from typing import Dict + +def parse_postgres_uri(uri: str) -> Dict[str, str]: + """Parse a PostgreSQL connection URI into its components. + + Args: + uri: The PostgreSQL connection URI (e.g., 'postgresql://user:pass@host:port/db') + + Returns: + Dictionary containing the parsed components (USER, PASS, HOST, PORT, DB) + """ + # Default values + result = { + 'USER': '', + 'PASS': '', + 'HOST': '', + 'PORT': '5432', # Default PostgreSQL port + 'DB': 'postgres' # Default database name + } + + try: + # PostgreSQL URI pattern: + # postgresql[+driver]://[user[:password]@][netloc][:port][/dbname][?param1=value1&...] + pattern = r''' + ^ + postgresql(?:\+[a-z]+)?:// # scheme with optional driver + (?:([^:/?#@]+)(?::([^/?#@]*))?@)? # user:password@ + (?:([^:/?#]+)(?::(\d+))?)? # host:port + (?:/([^?#]*))? # /dbname + (?:\?([^#]*))? # ?query + $ + ''' + + match = re.match(pattern, uri.strip(), re.VERBOSE) + if not match: + raise ValueError("Invalid PostgreSQL URI format") + + user, password, host, port, dbname, query = match.groups() + + # Handle username and password + if user: + result['USER'] = urllib.parse.unquote(user) + if password: + result['PASS'] = urllib.parse.unquote(password) + + # Handle host and port + if host: + result['HOST'] = host + if port: + result['PORT'] = port + + # Handle database name + if dbname: + result['DB'] = urllib.parse.unquote(dbname) + + # Handle query parameters (e.g., for password in query string) + if query and not result['PASS']: + for param in query.split('&'): # type: ignore + if '=' in param: + key, value = param.split('=', 1) + if key.lower() == 'password': + result['PASS'] = urllib.parse.unquote(value) + break + + except Exception as e: + # If any error occurs, print the failure message and exit + print(f"Error parsing PostgreSQL URI: {e}", file=sys.stderr) + print("POSTGRES_URI_PARSE_FAILURE=1", file=sys.stderr) + sys.exit(1) + + return result + +def main(): + if len(sys.argv) != 2: + print("Usage: eval $(parse-postgres-uri.sh \"postgresql://user:pass@host:port/db\")", file=sys.stderr) + sys.exit(1) + + # Print default empty values first (for compatibility with original script) + print('USER=""') + print('PASS=""') + print('HOST=""') + print('PORT=""') + print('DB=""') + + # Parse the URI and print the results + try: + components = parse_postgres_uri(sys.argv[1]) + for key, value in components.items(): + # Escape special characters in the value for shell compatibility + escaped_value = value.replace('"', '\\"').replace('`', '\\`').replace('$', '\\$') + print(f'{key}="{escaped_value}"') + except Exception as e: + print(f'Error parsing PostgreSQL URI: {e}', file=sys.stderr) + print('POSTGRES_URI_PARSE_FAILURE=1') + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/docker/parse-postgres-uri.sh b/docker/parse-postgres-uri.sh deleted file mode 100755 index f1c0fb18..00000000 --- a/docker/parse-postgres-uri.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Usage: eval $(parse-postgres-uri.sh "postgresql://user:pass@host:port/db") - -# Make sure all variables are ultimately set to avoid downstream failures in -# scripts that require defined variables. -echo 'USER=""' -echo 'PASS=""' -echo 'HOST=""' -echo 'PORT=""' -echo 'DB=""' - -echo "$1" | awk -F'postgresql' '{print $2}' | awk '{ - # Remove the +asyncpg if present and get the URI part after :// - sub(/^[+a-z]*:\/\//, ""); - uri = $0; - - # Remove query parameters - sub(/\?.*$/, "", uri); - - # Split into user:pass@host:port/db - if (match(uri, /([^@]+)@([^\/]+)(\/(.*))?/, m)) { - # Handle user:password - user_pass = m[1]; - if (split(user_pass, up, ":") == 2) { - printf "USER=\"%s\"\n", up[1]; - printf "PASS=\"%s\"\n", up[2]; - } else { - printf "USER=\"%s\"\n", user_pass; - printf "PASS=\"\"\n"; - } - - # Handle host:port/db - host_port_db = m[2] m[3]; - if (split(host_port_db, hpd, "/") > 1) { - host_port = hpd[1]; - printf "DB=\"%s\"\n", hpd[2]; - } else { - host_port = host_port_db; - printf "DB=\"postgres\"\n"; # Default database - } - - # Handle host:port - if (split(host_port, hp, ":") == 2) { - printf "HOST=\"%s\"\n", hp[1]; - printf "PORT=\"%s\"\n", hp[2]; - } else { - printf "HOST=\"%s\"\n", host_port; - printf "PORT=\"5432\"\n"; # Default port - } - } -}' | perl -pe 's/%([0-9a-fA-F]{2})/chr(hex($1))/ge' || echo "POSTGRES_URI_PARSE_FAILURE=1" \ No newline at end of file diff --git a/dockerfile b/dockerfile index 7c724dd9..5c22a713 100644 --- a/dockerfile +++ b/dockerfile @@ -104,7 +104,7 @@ ENV PATH="/app/.venv/bin:/usr/local/bin:${PATH}" COPY docker/morphik.toml.default /app/morphik.toml.default # Create startup script -COPY docker/parse-postgres-uri.sh /app/parse-postgres-uri.sh +COPY docker/parse-postgres-uri.py /app/parse-postgres-uri.py COPY docker/docker-entrypoint.sh /app/docker-entrypoint.sh # Copy application code From a60bbe75d65304e174dc7b7f7746ec58ff9a343c Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 11:57:16 -0500 Subject: [PATCH 12/21] Log PG values only on failure --- docker/docker-entrypoint.sh | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index a1535bcf..a7bae6c5 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -15,13 +15,6 @@ check_postgres() { # Using awk for more robust URI parsing that handles special characters eval $(./parse-postgres-uri.py "$POSTGRES_URI") - echo "POSTGRES_URI: $POSTGRES_URI" - echo "USER: $USER" - echo "PASS: $PASS" - echo "HOST: $HOST" - echo "PORT: $PORT" - echo "DB: $DB" - echo "Waiting for PostgreSQL..." max_retries=30 retries=0 @@ -40,6 +33,12 @@ check_postgres() { # NOTE: preserve stderr for debugging if ! PGPASSWORD=$PASS psql -h $HOST -p $PORT -U $USER -d $DB -c "SELECT 1"; then echo "Error: Could not connect to PostgreSQL database" + echo "POSTGRES_URI: $POSTGRES_URI" + echo "USER: $USER" + echo "PASS: $PASS" + echo "HOST: $HOST" + echo "PORT: $PORT" + echo "DB: $DB" exit 1 fi echo "PostgreSQL connection verified!" From a9ce671fb585d2c225e0ca0880e0c41387b68442 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 12:07:34 -0500 Subject: [PATCH 13/21] Spinning up DB container during test --- .github/workflows/docker-build.yml | 79 +++++++++++++++++++++++------- 1 file changed, 61 insertions(+), 18 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 3e932555..ea920fb9 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -159,11 +159,49 @@ jobs: [graph] model = "openai_gpt4-1" enable_entity_resolution = true + EOF - # Start container in detached mode with config mounted - CONTAINER_ID=$(docker run -d -p 8000:8000 \ - -e POSTGRES_URI="postgresql://morphik:morphik@localhost:5432/morphik" \ - -v "$(pwd)/morphik.toml.test:/app/morphik.toml" \ + # Create a custom network for the containers to communicate + NETWORK_NAME="morphik-test-network" + docker network create $NETWORK_NAME + + # Start pgvector PostgreSQL container + echo "Starting pgvector PostgreSQL container..." + DB_CONTAINER_ID=$(docker run -d \ + --name pgvector-test \ + --network $NETWORK_NAME \ + -e POSTGRES_USER=morphik \ + -e POSTGRES_PASSWORD=morphik \ + -e POSTGRES_DB=morphik \ + -p 5432:5432 \ + ankane/pgvector:latest) + + # Wait for PostgreSQL to be ready + echo "Waiting for PostgreSQL to be ready..." + for i in {1..30}; do + if docker exec $DB_CONTAINER_ID pg_isready -U morphik -d morphik; then + echo "PostgreSQL is ready!" + break + fi + if [ $i -eq 30 ]; then + echo "PostgreSQL failed to start" + docker logs $DB_CONTAINER_ID + exit 1 + fi + sleep 1 + done + + # Enable pgvector extension + echo "Enabling pgvector extension..." + docker exec $DB_CONTAINER_ID psql -U morphik -d morphik -c 'CREATE EXTENSION IF NOT EXISTS vector;' + + # Start application container + echo "Starting application container..." + CONTAINER_ID=$(docker run -d \ + --network $NETWORK_NAME \ + -p 8000:8000 \ + -e POSTGRES_URI="postgresql://morphik:morphik@pgvector-test:5432/morphik" \ + -v "$(pwd)/morphik.toml:/app/morphik.toml" \ "$IMAGE_TAG") echo "Started container: $CONTAINER_ID" @@ -190,25 +228,30 @@ jobs: echo "โŒ Server failed to respond within ${timeout} seconds" echo "Container logs:" docker logs "$CONTAINER_ID" - docker stop "$CONTAINER_ID" - docker rm "$CONTAINER_ID" + echo "Database logs:" + docker logs "$DB_CONTAINER_ID" + docker stop "$CONTAINER_ID" "$DB_CONTAINER_ID" || true + docker rm "$CONTAINER_ID" "$DB_CONTAINER_ID" || true + docker network rm "$NETWORK_NAME" || true exit 1 fi # Verify the response is actually 200 - HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/ping) - if [ "$HTTP_CODE" = "200" ]; then - echo "โœ… Health check passed - /ping returned HTTP $HTTP_CODE" - else - echo "โŒ Health check failed - /ping returned HTTP $HTTP_CODE" + response_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/ping) + if [ "$response_code" -ne 200 ]; then + echo "โŒ Unexpected response code: $response_code" + echo "Container logs:" docker logs "$CONTAINER_ID" - docker stop "$CONTAINER_ID" - docker rm "$CONTAINER_ID" + echo "Database logs:" + docker logs "$DB_CONTAINER_ID" + docker stop "$CONTAINER_ID" "$DB_CONTAINER_ID" || true + docker rm "$CONTAINER_ID" "$DB_CONTAINER_ID" || true + docker network rm "$NETWORK_NAME" || true exit 1 fi - # Clean up - echo "๐Ÿงน Cleaning up container" - docker stop "$CONTAINER_ID" - docker rm "$CONTAINER_ID" - echo "โœ… Test completed successfully" \ No newline at end of file + echo "โœ… Tests passed!" + docker stop "$CONTAINER_ID" "$DB_CONTAINER_ID" || true + docker rm "$CONTAINER_ID" "$DB_CONTAINER_ID" || true + docker network rm "$NETWORK_NAME" || true + echo "โœ… Test completed successfully" \ No newline at end of file From 25fa23ee3156a9efbdee1cbc003d8e1fbe052da3 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 12:19:07 -0500 Subject: [PATCH 14/21] Using async PG driver --- .github/workflows/docker-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index ea920fb9..a408bfed 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -200,7 +200,7 @@ jobs: CONTAINER_ID=$(docker run -d \ --network $NETWORK_NAME \ -p 8000:8000 \ - -e POSTGRES_URI="postgresql://morphik:morphik@pgvector-test:5432/morphik" \ + -e POSTGRES_URI="postgresql+asyncpg://morphik:morphik@pgvector-test:5432/morphik" \ -v "$(pwd)/morphik.toml:/app/morphik.toml" \ "$IMAGE_TAG") From 6bc2eec72600635e17db34d3c0c1c1fdfaca2d01 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 12:32:43 -0500 Subject: [PATCH 15/21] Trying longer timeout --- .github/workflows/docker-build.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index a408bfed..8779d0ce 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -203,26 +203,26 @@ jobs: -e POSTGRES_URI="postgresql+asyncpg://morphik:morphik@pgvector-test:5432/morphik" \ -v "$(pwd)/morphik.toml:/app/morphik.toml" \ "$IMAGE_TAG") - + echo "Started container: $CONTAINER_ID" - - # Wait for server to be ready with 60 second timeout - timeout=60 + + # Wait for server to be ready + timeout=300 # long timeout required to load checkpoint shards interval=2 elapsed=0 - + echo "Waiting for server to be ready..." while [ $elapsed -lt $timeout ]; do if curl -f -s http://localhost:8000/ping > /dev/null 2>&1; then echo "โœ… Server is responding to /ping endpoint" break fi - + echo "โณ Waiting for server... (${elapsed}s/${timeout}s)" sleep $interval elapsed=$((elapsed + interval)) done - + # Check if we timed out if [ $elapsed -ge $timeout ]; then echo "โŒ Server failed to respond within ${timeout} seconds" @@ -235,7 +235,7 @@ jobs: docker network rm "$NETWORK_NAME" || true exit 1 fi - + # Verify the response is actually 200 response_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/ping) if [ "$response_code" -ne 200 ]; then @@ -249,7 +249,7 @@ jobs: docker network rm "$NETWORK_NAME" || true exit 1 fi - + echo "โœ… Tests passed!" docker stop "$CONTAINER_ID" "$DB_CONTAINER_ID" || true docker rm "$CONTAINER_ID" "$DB_CONTAINER_ID" || true From dffb99a98ceba0107841a4a6faa3d30fb4636fb5 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 12:51:14 -0500 Subject: [PATCH 16/21] Added redis container --- .github/workflows/docker-build.yml | 46 ++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 8779d0ce..1d562426 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -195,12 +195,39 @@ jobs: echo "Enabling pgvector extension..." docker exec $DB_CONTAINER_ID psql -U morphik -d morphik -c 'CREATE EXTENSION IF NOT EXISTS vector;' + # Start Redis container + echo "Starting Redis container..." + REDIS_CONTAINER_ID=$(docker run -d \ + --name redis-test \ + --network $NETWORK_NAME \ + -p 6379:6379 \ + -v redis_data:/data \ + redis:7-alpine \ + redis-server --appendonly yes) + + # Wait for Redis to be ready + echo "Waiting for Redis to be ready..." + for i in {1..30}; do + if docker exec $REDIS_CONTAINER_ID redis-cli ping | grep -q PONG; then + echo "Redis is ready!" + break + fi + if [ $i -eq 30 ]; then + echo "Redis failed to start" + docker logs $REDIS_CONTAINER_ID + exit 1 + fi + sleep 1 + done + # Start application container echo "Starting application container..." CONTAINER_ID=$(docker run -d \ --network $NETWORK_NAME \ -p 8000:8000 \ -e POSTGRES_URI="postgresql+asyncpg://morphik:morphik@pgvector-test:5432/morphik" \ + -e REDIS_HOST="redis-test" \ + -e REDIS_PORT=6379 \ -v "$(pwd)/morphik.toml:/app/morphik.toml" \ "$IMAGE_TAG") @@ -230,9 +257,12 @@ jobs: docker logs "$CONTAINER_ID" echo "Database logs:" docker logs "$DB_CONTAINER_ID" - docker stop "$CONTAINER_ID" "$DB_CONTAINER_ID" || true - docker rm "$CONTAINER_ID" "$DB_CONTAINER_ID" || true + echo "Redis logs:" + docker logs "$REDIS_CONTAINER_ID" + docker stop "$CONTAINER_ID" "$DB_CONTAINER_ID" "$REDIS_CONTAINER_ID" || true + docker rm -f "$CONTAINER_ID" "$DB_CONTAINER_ID" "$REDIS_CONTAINER_ID" || true docker network rm "$NETWORK_NAME" || true + docker volume rm redis_data 2>/dev/null || true exit 1 fi @@ -244,14 +274,18 @@ jobs: docker logs "$CONTAINER_ID" echo "Database logs:" docker logs "$DB_CONTAINER_ID" - docker stop "$CONTAINER_ID" "$DB_CONTAINER_ID" || true - docker rm "$CONTAINER_ID" "$DB_CONTAINER_ID" || true + echo "Redis logs:" + docker logs "$REDIS_CONTAINER_ID" + docker stop "$CONTAINER_ID" "$DB_CONTAINER_ID" "$REDIS_CONTAINER_ID" || true + docker rm -f "$CONTAINER_ID" "$DB_CONTAINER_ID" "$REDIS_CONTAINER_ID" || true docker network rm "$NETWORK_NAME" || true + docker volume rm redis_data 2>/dev/null || true exit 1 fi echo "โœ… Tests passed!" - docker stop "$CONTAINER_ID" "$DB_CONTAINER_ID" || true - docker rm "$CONTAINER_ID" "$DB_CONTAINER_ID" || true + docker stop "$CONTAINER_ID" "$DB_CONTAINER_ID" "$REDIS_CONTAINER_ID" || true + docker rm -f "$CONTAINER_ID" "$DB_CONTAINER_ID" "$REDIS_CONTAINER_ID" || true docker network rm "$NETWORK_NAME" || true + docker volume rm redis_data 2>/dev/null || true echo "โœ… Test completed successfully" \ No newline at end of file From 145e66c43193a56c9c9e5a6c6899fec847a56e0a Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 13:07:50 -0500 Subject: [PATCH 17/21] Fixed redis container name --- .github/workflows/docker-build.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 1d562426..1101de9b 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -195,10 +195,10 @@ jobs: echo "Enabling pgvector extension..." docker exec $DB_CONTAINER_ID psql -U morphik -d morphik -c 'CREATE EXTENSION IF NOT EXISTS vector;' - # Start Redis container + # Start Redis container (NOTE: must be named "redis") echo "Starting Redis container..." REDIS_CONTAINER_ID=$(docker run -d \ - --name redis-test \ + --name redis \ --network $NETWORK_NAME \ -p 6379:6379 \ -v redis_data:/data \ @@ -226,8 +226,6 @@ jobs: --network $NETWORK_NAME \ -p 8000:8000 \ -e POSTGRES_URI="postgresql+asyncpg://morphik:morphik@pgvector-test:5432/morphik" \ - -e REDIS_HOST="redis-test" \ - -e REDIS_PORT=6379 \ -v "$(pwd)/morphik.toml:/app/morphik.toml" \ "$IMAGE_TAG") From 24aaef19d8101183440e683d63456329fd861cac Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 13:24:29 -0500 Subject: [PATCH 18/21] Fixed postgres credentials to not collide with primary bind addr/port --- docker/docker-entrypoint.sh | 14 +++++++------- docker/parse-postgres-uri.py | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index a7bae6c5..b4969ece 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -18,7 +18,7 @@ check_postgres() { echo "Waiting for PostgreSQL..." max_retries=30 retries=0 - until PGPASSWORD=$PASS pg_isready -h $HOST -p $PORT -U $USER -d $DB; do + until PGPASSWORD=$PG_PASS pg_isready -h $PG_HOST -p $PG_PORT -U $PG_USER -d $PG_DB; do retries=$((retries + 1)) if [ $retries -eq $max_retries ]; then echo "Error: PostgreSQL did not become ready in time" @@ -31,14 +31,14 @@ check_postgres() { # Verify database connection # NOTE: preserve stderr for debugging - if ! PGPASSWORD=$PASS psql -h $HOST -p $PORT -U $USER -d $DB -c "SELECT 1"; then + if ! PGPASSWORD=$PG_PASS psql -h $PG_HOST -p $PG_PORT -U $PG_USER -d $PG_DB -c "SELECT 1"; then echo "Error: Could not connect to PostgreSQL database" echo "POSTGRES_URI: $POSTGRES_URI" - echo "USER: $USER" - echo "PASS: $PASS" - echo "HOST: $HOST" - echo "PORT: $PORT" - echo "DB: $DB" + echo "USER: $PG_USER" + echo "PASS: $PG_PASS" + echo "HOST: $PG_HOST" + echo "PORT: $PG_PORT" + echo "DB: $PG_DB" exit 1 fi echo "PostgreSQL connection verified!" diff --git a/docker/parse-postgres-uri.py b/docker/parse-postgres-uri.py index 7075e9b1..29f9a39a 100755 --- a/docker/parse-postgres-uri.py +++ b/docker/parse-postgres-uri.py @@ -80,11 +80,11 @@ def main(): sys.exit(1) # Print default empty values first (for compatibility with original script) - print('USER=""') - print('PASS=""') - print('HOST=""') - print('PORT=""') - print('DB=""') + print('PG_USER=""') + print('PG_PASS=""') + print('PG_HOST=""') + print('PG_PORT=""') + print('PG_DB=""') # Parse the URI and print the results try: @@ -92,7 +92,7 @@ def main(): for key, value in components.items(): # Escape special characters in the value for shell compatibility escaped_value = value.replace('"', '\\"').replace('`', '\\`').replace('$', '\\$') - print(f'{key}="{escaped_value}"') + print(f'PG_{key}="{escaped_value}"') except Exception as e: print(f'Error parsing PostgreSQL URI: {e}', file=sys.stderr) print('POSTGRES_URI_PARSE_FAILURE=1') From 2f716ad6efbbe0dae6bf58165e080fad839c254c Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 13:56:35 -0500 Subject: [PATCH 19/21] Using test toml file instead of repo-default --- .github/workflows/docker-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 1101de9b..2357f703 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -226,7 +226,7 @@ jobs: --network $NETWORK_NAME \ -p 8000:8000 \ -e POSTGRES_URI="postgresql+asyncpg://morphik:morphik@pgvector-test:5432/morphik" \ - -v "$(pwd)/morphik.toml:/app/morphik.toml" \ + -v "morphik.toml.test:/app/morphik.toml" \ "$IMAGE_TAG") echo "Started container: $CONTAINER_ID" From 1077fe9f7ee2539562b20986e0b8f19d90e80469 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 17:20:58 -0500 Subject: [PATCH 20/21] Create test file if it doesn't exist (avoids mounting an empty dir) --- .github/workflows/docker-build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 2357f703..9fb208e1 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -221,6 +221,8 @@ jobs: done # Start application container + [[ -e morphik.toml.test ]] || cp morphik.toml morphik.toml.test + echo "Starting application container..." CONTAINER_ID=$(docker run -d \ --network $NETWORK_NAME \ From 67b35cc6062563b2c75e6869fbf509ba4a8b8082 Mon Sep 17 00:00:00 2001 From: Spencer Tipping Date: Mon, 26 May 2025 17:35:17 -0500 Subject: [PATCH 21/21] Properly locating the test file, which must exist since we created it earlier --- .github/workflows/docker-build.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 9fb208e1..0b17684d 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -221,14 +221,12 @@ jobs: done # Start application container - [[ -e morphik.toml.test ]] || cp morphik.toml morphik.toml.test - echo "Starting application container..." CONTAINER_ID=$(docker run -d \ --network $NETWORK_NAME \ -p 8000:8000 \ -e POSTGRES_URI="postgresql+asyncpg://morphik:morphik@pgvector-test:5432/morphik" \ - -v "morphik.toml.test:/app/morphik.toml" \ + -v "$(pwd)/morphik.toml.test:/app/morphik.toml" \ "$IMAGE_TAG") echo "Started container: $CONTAINER_ID"