From 759521dc83cb09c35757e9d4db8e2d9fa26a15aa Mon Sep 17 00:00:00 2001 From: jwatson Date: Mon, 5 May 2025 08:51:24 -0700 Subject: [PATCH 1/5] Switch back to single app for now --- .project-metadata.yaml | 26 ---------------- scripts/refresh_project.sh | 4 +-- scripts/startup_app.py | 6 ++-- scripts/startup_app.sh | 7 ++++- scripts/startup_java.sh | 61 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 32 deletions(-) create mode 100644 scripts/startup_java.sh diff --git a/.project-metadata.yaml b/.project-metadata.yaml index 512eb22ff..0dc297216 100644 --- a/.project-metadata.yaml +++ b/.project-metadata.yaml @@ -45,32 +45,6 @@ tasks: entity_label: refresh_project short_summary: Run job to refresh the project from source and rebuilding. - - type: start_application - name: RagStudioQdrant - subdomain: ragstudioqdrant - bypass_authentication: false - static_subdomain: false - script: scripts/startup_qdrant.py - short_summary: Create and start RagStudio's Qdrant instance. - long_summary: Create and start RagStudio Qdrant instance. - cpu: 2 - memory: 4 - environment_variables: - TASK_TYPE: START_APPLICATION - - - type: start_application - name: RagStudioMetadata - subdomain: ragstudiometadata - bypass_authentication: false - static_subdomain: false - script: scripts/startup_metadata_app.py - short_summary: Create and start RagStudio's Metadata API instance. - long_summary: Create and start RagStudio Metadata API instance. - cpu: 2 - memory: 4 - environment_variables: - TASK_TYPE: START_APPLICATION - - type: start_application name: RagStudio subdomain: ragstudio diff --git a/scripts/refresh_project.sh b/scripts/refresh_project.sh index bfd0a356e..e9f7a33af 100644 --- a/scripts/refresh_project.sh +++ b/scripts/refresh_project.sh @@ -91,5 +91,5 @@ rm -rf node_modules tar -xzf ../../artifacts/node-dist.tar.gz cd ../../scripts -python install_qdrant_app.py -python install_metadata_app.py +#python install_qdrant_app.py +#python install_metadata_app.py diff --git a/scripts/startup_app.py b/scripts/startup_app.py index 3b39cba43..8571f2845 100644 --- a/scripts/startup_app.py +++ b/scripts/startup_app.py @@ -42,17 +42,17 @@ client = cmlapi.default_client() applications = client.list_applications(project_id=os.environ['CDSW_PROJECT_ID']) -metadata_base_url: str = "whatever, bro" +metadata_base_url: str = "http://localhost:8080" if len(applications.applications) > 0: for app in applications.applications: if app.name == "RagStudioMetadata": - metadata_base_url = f"{app.subdomain}.{os.environ['CDSW_DOMAIN']}" + metadata_base_url = f"https://{app.subdomain}.{os.environ['CDSW_DOMAIN']}" root_dir = "/home/cdsw/rag-studio" if os.getenv("IS_COMPOSABLE", "") != "" else "/home/cdsw" os.chdir(root_dir) env = os.environ.copy() -env["API_URL"] = f"https://{metadata_base_url}" +env["API_URL"] = f"{metadata_base_url}" print("Starting application with metadata base URL: ", metadata_base_url) diff --git a/scripts/startup_app.sh b/scripts/startup_app.sh index 4fba606cc..7820b895c 100755 --- a/scripts/startup_app.sh +++ b/scripts/startup_app.sh @@ -63,10 +63,15 @@ fi export RAG_DATABASES_DIR=$(pwd)/databases export LLM_SERVICE_URL="http://localhost:8081" -#export API_URL="http://localhost:8080" export MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR=false export MLFLOW_RECONCILER_DATA_PATH=$(pwd)/llm-service/reconciler/data +# start Qdrant vector DB +qdrant/qdrant & 2>&1 + +# start up the jarva +scripts/startup_java.sh & 2>&1 + # start Python backend cd llm-service mkdir -p $MLFLOW_RECONCILER_DATA_PATH diff --git a/scripts/startup_java.sh b/scripts/startup_java.sh new file mode 100644 index 000000000..29086327b --- /dev/null +++ b/scripts/startup_java.sh @@ -0,0 +1,61 @@ +# +# CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP) +# (C) Cloudera, Inc. 2025 +# All rights reserved. +# +# Applicable Open Source License: Apache 2.0 +# +# NOTE: Cloudera open source products are modular software products +# made up of hundreds of individual components, each of which was +# individually copyrighted. Each Cloudera open source product is a +# collective work under U.S. Copyright Law. Your license to use the +# collective work is as provided in your written agreement with +# Cloudera. Used apart from the collective work, this file is +# licensed for your use pursuant to the open source license +# identified above. +# +# This code is provided to you pursuant a written agreement with +# (i) Cloudera, Inc. or (ii) a third-party authorized to distribute +# this code. If you do not have a written agreement with Cloudera nor +# with an authorized and properly licensed third party, you do not +# have any rights to access nor to use this code. +# +# Absent a written agreement with Cloudera, Inc. ("Cloudera") to the +# contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY +# KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED +# WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO +# IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU, +# AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS +# ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE +# OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR +# CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES +# RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF +# BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF +# DATA. +# + +set -ox pipefail + +RAG_STUDIO_INSTALL_DIR="/home/cdsw/rag-studio" +DB_URL_LOCATION="jdbc:h2:file:~/rag-studio/databases/rag" +if [ -z "$IS_COMPOSABLE" ]; then + RAG_STUDIO_INSTALL_DIR="/home/cdsw" + DB_URL_LOCATION="jdbc:h2:file:~/databases/rag" +fi + +export DB_URL=$DB_URL_LOCATION +export JAVA_ROOT=`ls ${RAG_STUDIO_INSTALL_DIR}/java-home` +export JAVA_HOME="${RAG_STUDIO_INSTALL_DIR}/java-home/${JAVA_ROOT}" + +for i in {1..3}; do + echo "Starting Java application..." + "$JAVA_HOME"/bin/java -jar artifacts/rag-api.jar + echo "Java application crashed, retrying ($i/3)..." + sleep 5 +done +#while ! curl --output /dev/null --silent --fail http://localhost:8080/api/v1/rag/dataSources; do +# echo "Waiting for the Java backend to be ready..." +# sleep 4 +#done \ No newline at end of file From 81706aba85b58ef07877c26dad942130a23e6fdf Mon Sep 17 00:00:00 2001 From: jwatson Date: Mon, 5 May 2025 09:27:26 -0700 Subject: [PATCH 2/5] mark the java startup script as executable --- docs/allow_list.txt | 11 +++++++---- scripts/startup_java.sh | 0 2 files changed, 7 insertions(+), 4 deletions(-) mode change 100644 => 100755 scripts/startup_java.sh diff --git a/docs/allow_list.txt b/docs/allow_list.txt index db33173f2..eb5e6fa2a 100644 --- a/docs/allow_list.txt +++ b/docs/allow_list.txt @@ -7,7 +7,12 @@ Node 22: https://nodejs.org/dist/v22.15.0/node-v22.15.0-darwin-arm64.tar.xz RAG Studio artifacts: -https://github.com/cloudera/CML_AMP_RAG_Studio/releases/latest/download +# note: these first 3 redirect to the specific release url (eg. releases/download/1.16.0/...) +https://github.com/cloudera/CML_AMP_RAG_Studio/releases/latest/download/rag-api.jar +https://github.com/cloudera/CML_AMP_RAG_Studio/releases/latest/download/fe-dist.tar.gz +https://github.com/cloudera/CML_AMP_RAG_Studio/releases/latest/download/node-dist.tar.gz +https://github.com/cloudera/CML_AMP_RAG_Studio/releases/download/model_download/craft_mlt_25k.pth +https://github.com/cloudera/CML_AMP_RAG_Studio/releases/download/model_download/latin_g2.pth Qdrant: https://github.com/qdrant/qdrant/releases/download/v1.11.3/qdrant-x86_64-unknown-linux-musl.tar.gz @@ -15,13 +20,11 @@ https://github.com/qdrant/qdrant/releases/download/v1.11.3/qdrant-x86_64-unknown Java: https://corretto.aws/downloads/latest/amazon-corretto-21-x64-linux-jdk.tar.gz -RAG Studio CML image: +RAG Studio CML image (the picture shown in the catalog): https://raw.githubusercontent.com Python dependencies: https://pypi.org https://files.pythonhosted.org -Node dependencies: -http://registry.npmjs.org/ diff --git a/scripts/startup_java.sh b/scripts/startup_java.sh old mode 100644 new mode 100755 From 1a4696ee82d6e02e6b67fa0d85121fd26ab8dabf Mon Sep 17 00:00:00 2001 From: jwatson Date: Mon, 5 May 2025 09:29:37 -0700 Subject: [PATCH 3/5] only try to restart an app we have installed --- scripts/restart_app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/restart_app.py b/scripts/restart_app.py index 98769f55e..ad1578b4c 100644 --- a/scripts/restart_app.py +++ b/scripts/restart_app.py @@ -45,7 +45,8 @@ client = cmlapi.default_client() project_id = os.environ["CDSW_PROJECT_ID"] cml_apps = client.list_applications(project_id=project_id) -ragstudio_apps = ["RagStudioMetadata", "RagStudio"] +# ragstudio_apps = ["RagStudioMetadata", "RagStudio"] +ragstudio_apps = ["RagStudio"] if len(cml_apps.applications) > 0: for app_name in ragstudio_apps: From 9a59974f3aedbd89c8f3a7344968887ddfc5a120 Mon Sep 17 00:00:00 2001 From: jwatson Date: Mon, 5 May 2025 09:35:55 -0700 Subject: [PATCH 4/5] run java on 8080 always --- backend/src/main/resources/application.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/main/resources/application.properties b/backend/src/main/resources/application.properties index b7baba7cb..b6fa98902 100644 --- a/backend/src/main/resources/application.properties +++ b/backend/src/main/resources/application.properties @@ -54,4 +54,4 @@ otel.metrics.exporter=none otel.traces.exporter=none server.address=${API_HOST:127.0.0.1} -server.port=${CDSW_APP_PORT:8080} +#server.port=${CDSW_APP_PORT:8080} From 438ef3e12a867f5736d3bb608019512e89fbc512 Mon Sep 17 00:00:00 2001 From: jwatson Date: Mon, 5 May 2025 09:44:04 -0700 Subject: [PATCH 5/5] use a separate env var for the metadata app port --- backend/src/main/resources/application.properties | 2 +- scripts/startup_metadata_app.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/src/main/resources/application.properties b/backend/src/main/resources/application.properties index b6fa98902..269a9eb74 100644 --- a/backend/src/main/resources/application.properties +++ b/backend/src/main/resources/application.properties @@ -54,4 +54,4 @@ otel.metrics.exporter=none otel.traces.exporter=none server.address=${API_HOST:127.0.0.1} -#server.port=${CDSW_APP_PORT:8080} +server.port=${METADATA_APP_PORT:8080} diff --git a/scripts/startup_metadata_app.sh b/scripts/startup_metadata_app.sh index 463a2291a..2223d4214 100755 --- a/scripts/startup_metadata_app.sh +++ b/scripts/startup_metadata_app.sh @@ -51,6 +51,7 @@ fi export DB_URL=$DB_URL_LOCATION export JAVA_ROOT=`ls ${RAG_STUDIO_INSTALL_DIR}/java-home` export JAVA_HOME="${RAG_STUDIO_INSTALL_DIR}/java-home/${JAVA_ROOT}" +export METADATA_APP_PORT=${CDSW_APP_PORT} for i in {1..3}; do echo "Starting Java application..."