From e911f0fd0b82f52d1742f7272e2cf81e7578a7b1 Mon Sep 17 00:00:00 2001 From: yubingjiaocn Date: Fri, 11 Apr 2025 07:13:17 +0000 Subject: [PATCH 1/3] Revert to IRSA for mountpoint S3 CSI --- lib/addons/s3CSIDriver.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/addons/s3CSIDriver.ts b/lib/addons/s3CSIDriver.ts index 3cbab87..fa7cac6 100644 --- a/lib/addons/s3CSIDriver.ts +++ b/lib/addons/s3CSIDriver.ts @@ -30,8 +30,7 @@ export class s3CSIDriverAddOn extends blueprints.addons.HelmAddOn { const cluster = clusterInfo.cluster; const serviceAccount = cluster.addServiceAccount('s3-csi-driver-sa', { name: 's3-csi-driver-sa', - namespace: this.options.namespace, - identityType: eks.IdentityType.POD_IDENTITY + namespace: this.options.namespace }); // new IAM policy to grand access to S3 bucket From eed0f49c39dc3c1953aecf1f4017ae7815abe9a8 Mon Sep 17 00:00:00 2001 From: yubingjiaocn Date: Fri, 11 Apr 2025 07:15:26 +0000 Subject: [PATCH 2/3] Update retry and fallback logic for x-ray --- src/backend/queue_agent/src/main.py | 266 +++++++++++------- .../queue_agent/src/runtimes/comfyui.py | 19 +- .../queue_agent/src/runtimes/sdwebui.py | 29 +- 3 files changed, 207 insertions(+), 107 deletions(-) diff --git a/src/backend/queue_agent/src/main.py b/src/backend/queue_agent/src/main.py index b81a297..1b827b8 100644 --- a/src/backend/queue_agent/src/main.py +++ b/src/backend/queue_agent/src/main.py @@ -7,28 +7,86 @@ import signal import sys import uuid +import time +import functools import boto3 +from botocore.exceptions import EndpointConnectionError from aws_xray_sdk.core import patch_all, xray_recorder from aws_xray_sdk.core.models.trace_header import TraceHeader from modules import s3_action, sns_action, sqs_action from runtimes import comfyui, sdwebui -patch_all() - -# Logging configuration +# Initialize logging first so we can log X-Ray initialization attempts logging.basicConfig() logging.getLogger().setLevel(logging.ERROR) +# Configure the queue-agent logger only once logger = logging.getLogger("queue-agent") logger.propagate = False logger.setLevel(os.environ.get('LOGLEVEL', 'INFO').upper()) + +# Remove any existing handlers to prevent duplicate logs +if logger.handlers: + logger.handlers.clear() + +# Add a single handler handler = logging.StreamHandler(sys.stdout) handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger.addHandler(handler) -# Set current logger as global -logger = logging.getLogger("queue-agent") +# Check if X-Ray is manually disabled via environment variable +DISABLE_XRAY = os.environ.get('DISABLE_XRAY', 'false').lower() == 'true' +if DISABLE_XRAY: + logger.info("X-Ray tracing manually disabled via DISABLE_XRAY environment variable") + xray_enabled = False +else: + # Try to initialize X-Ray SDK with retries, as the daemon might be starting up + MAX_XRAY_INIT_ATTEMPTS = 5 + XRAY_RETRY_DELAY = 3 # seconds + xray_enabled = False + + for attempt in range(MAX_XRAY_INIT_ATTEMPTS): + try: + logger.info(f"Attempting to initialize X-Ray SDK (attempt {attempt+1}/{MAX_XRAY_INIT_ATTEMPTS})") + patch_all() + xray_enabled = True + logger.info("X-Ray SDK initialized successfully") + break + except EndpointConnectionError: + logger.warning(f"Could not connect to X-Ray daemon (attempt {attempt+1}/{MAX_XRAY_INIT_ATTEMPTS})") + if attempt < MAX_XRAY_INIT_ATTEMPTS - 1: + logger.info(f"Retrying in {XRAY_RETRY_DELAY} seconds...") + time.sleep(XRAY_RETRY_DELAY) + except Exception as e: + logger.warning(f"Error initializing X-Ray: {str(e)} (attempt {attempt+1}/{MAX_XRAY_INIT_ATTEMPTS})") + if attempt < MAX_XRAY_INIT_ATTEMPTS - 1: + logger.info(f"Retrying in {XRAY_RETRY_DELAY} seconds...") + time.sleep(XRAY_RETRY_DELAY) + + if not xray_enabled: + logger.warning("X-Ray initialization failed after all attempts. Tracing will be disabled.") + +# Create a decorator for safe X-Ray instrumentation +def safe_xray_capture(name): + """Decorator that safely applies X-Ray instrumentation if available""" + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if xray_enabled: + try: + # Try to use X-Ray instrumentation + with xray_recorder.in_segment(name): + return func(*args, **kwargs) + except Exception as e: + logger.warning(f"X-Ray instrumentation failed for {name}: {str(e)}") + # Fall back to non-instrumented execution + return func(*args, **kwargs) + else: + # X-Ray is disabled, just call the function directly + return func(*args, **kwargs) + return wrapper + return decorator # Get base environment variable aws_default_region = os.getenv("AWS_DEFAULT_REGION") @@ -104,101 +162,115 @@ def main(): received_messages = sqs_action.receive_messages(queue, 1, SQS_WAIT_TIME_SECONDS) for message in received_messages: - with xray_recorder.in_segment(runtime_name+"-queue-agent") as segment: - # Retrieve x-ray trace header from SQS message - if "AWSTraceHeader" in message.attributes.keys(): - traceHeaderStr = message.attributes['AWSTraceHeader'] - sqsTraceHeader = TraceHeader.from_header_str(traceHeaderStr) - # Update current segment to link with SQS - segment.trace_id = sqsTraceHeader.root - segment.parent_id = sqsTraceHeader.parent - segment.sampled = sqsTraceHeader.sampled - - # Process received message + # Process with X-Ray if enabled, otherwise just process the message directly + if xray_enabled: try: - payload = json.loads(json.loads(message.body)['Message']) - metadata = payload["metadata"] - task_id = metadata["id"] - - logger.info(f"Received task {task_id}, processing") - - if "prefix" in metadata.keys(): - if metadata["prefix"][-1] == '/': - prefix = metadata["prefix"] + str(task_id) - else: - prefix = metadata["prefix"] + "/" + str(task_id) - else: - prefix = str(task_id) - - if "tasktype" in metadata.keys(): - tasktype = metadata["tasktype"] - - if "context" in metadata.keys(): - context = metadata["context"] - else: - context = {} - - body = payload["content"] - logger.debug(body) + with xray_recorder.in_segment(runtime_name+"-queue-agent") as segment: + # Retrieve x-ray trace header from SQS message + if "AWSTraceHeader" in message.attributes.keys(): + traceHeaderStr = message.attributes['AWSTraceHeader'] + sqsTraceHeader = TraceHeader.from_header_str(traceHeaderStr) + # Update current segment to link with SQS + segment.trace_id = sqsTraceHeader.root + segment.parent_id = sqsTraceHeader.parent + segment.sampled = sqsTraceHeader.sampled + + # Process the message within the X-Ray segment + process_message(message, topic, s3_bucket, runtime_type, runtime_name, api_base_url, dynamic_sd_model if runtime_type == "sdwebui" else None) except Exception as e: - logger.error(f"Error parsing message: {e}, skipping") - logger.debug(payload) - sqs_action.delete_message(message) - continue - - if (exp_callback_when_running.lower() == "true"): - sns_response = {"runtime": runtime_name, - 'id': task_id, - 'status': "running", - 'context': context} - - sns_action.publish_message(topic, json.dumps(sns_response)) - - # Start handling message - response = {} + logger.error(f"Error with X-Ray tracing: {str(e)}. Processing message without tracing.") + process_message(message, topic, s3_bucket, runtime_type, runtime_name, api_base_url, dynamic_sd_model if runtime_type == "sdwebui" else None) + else: + # Process without X-Ray tracing + process_message(message, topic, s3_bucket, runtime_type, runtime_name, api_base_url, dynamic_sd_model if runtime_type == "sdwebui" else None) + +def process_message(message, topic, s3_bucket, runtime_type, runtime_name, api_base_url, dynamic_sd_model=None): + """Process a single SQS message""" + # Process received message + try: + payload = json.loads(json.loads(message.body)['Message']) + metadata = payload["metadata"] + task_id = metadata["id"] + + logger.info(f"Received task {task_id}, processing") + + if "prefix" in metadata.keys(): + if metadata["prefix"][-1] == '/': + prefix = metadata["prefix"] + str(task_id) + else: + prefix = metadata["prefix"] + "/" + str(task_id) + else: + prefix = str(task_id) + + if "tasktype" in metadata.keys(): + tasktype = metadata["tasktype"] + + if "context" in metadata.keys(): + context = metadata["context"] + else: + context = {} + + body = payload["content"] + logger.debug(body) + except Exception as e: + logger.error(f"Error parsing message: {e}, skipping") + logger.debug(payload) + sqs_action.delete_message(message) + return + + if (exp_callback_when_running.lower() == "true"): + sns_response = {"runtime": runtime_name, + 'id': task_id, + 'status': "running", + 'context': context} + + sns_action.publish_message(topic, json.dumps(sns_response)) + + # Start handling message + response = {} + + try: + if runtime_type == "sdwebui": + response = sdwebui.handler(api_base_url, tasktype, task_id, body, dynamic_sd_model) + + if runtime_type == "comfyui": + response = comfyui.handler(api_base_url, task_id, body) + except Exception as e: + logger.error(f"Error calling handler for task {task_id}: {str(e)}") + response = { + "success": False, + "image": [], + "content": '{"code": 500, "error": "Runtime handler failed"}' + } + + result = [] + rand = str(uuid.uuid4())[0:4] + + if response["success"]: + idx = 0 + if len(response["image"]) > 0: + for i in response["image"]: + idx += 1 + result.append(s3_action.upload_file(i, s3_bucket, prefix, str(task_id)+"-"+rand+"-"+str(idx))) + + output_url = s3_action.upload_file(response["content"], s3_bucket, prefix, str(task_id)+"-"+rand, ".out") + + if response["success"]: + status = "completed" + else: + status = "failed" - try: - if runtime_type == "sdwebui": - response = sdwebui.handler(api_base_url, tasktype, task_id, body, dynamic_sd_model) + sns_response = {"runtime": runtime_name, + 'id': task_id, + 'result': response["success"], + 'status': status, + 'image_url': result, + 'output_url': output_url, + 'context': context} - if runtime_type == "comfyui": - response = comfyui.handler(api_base_url, task_id, body) - except Exception as e: - logger.error(f"Error calling handler for task {task_id}: {str(e)}") - response = { - "success": False, - "image": [], - "content": '{"code": 500, "error": "Runtime handler failed"}' - } - - result = [] - rand = str(uuid.uuid4())[0:4] - - if response["success"]: - idx = 0 - if len(response["image"]) > 0: - for i in response["image"]: - idx += 1 - result.append(s3_action.upload_file(i, s3_bucket, prefix, str(task_id)+"-"+rand+"-"+str(idx))) - - output_url = s3_action.upload_file(response["content"], s3_bucket, prefix, str(task_id)+"-"+rand, ".out") - - if response["success"]: - status = "completed" - else: - status = "failed" - - sns_response = {"runtime": runtime_name, - 'id': task_id, - 'result': response["success"], - 'status': status, - 'image_url': result, - 'output_url': output_url, - 'context': context} - - # Put response handler to SNS and delete message - sns_action.publish_message(topic, json.dumps(sns_response)) - sqs_action.delete_message(message) + # Put response handler to SNS and delete message + sns_action.publish_message(topic, json.dumps(sns_response)) + sqs_action.delete_message(message) def print_env() -> None: logger.info(f'AWS_DEFAULT_REGION={aws_default_region}') @@ -207,6 +279,8 @@ def print_env() -> None: logger.info(f'S3_BUCKET={s3_bucket}') logger.info(f'RUNTIME_TYPE={runtime_type}') logger.info(f'RUNTIME_NAME={runtime_name}') + logger.info(f'X-Ray Tracing: {"Disabled" if DISABLE_XRAY else "Enabled"}') + logger.info(f'X-Ray Status: {"Active" if xray_enabled else "Inactive"}') def signalHandler(signum, frame): global shutdown diff --git a/src/backend/queue_agent/src/runtimes/comfyui.py b/src/backend/queue_agent/src/runtimes/comfyui.py index 6906595..fe76a3d 100644 --- a/src/backend/queue_agent/src/runtimes/comfyui.py +++ b/src/backend/queue_agent/src/runtimes/comfyui.py @@ -6,16 +6,30 @@ import time import traceback import urllib.parse -import urllib.request import uuid from typing import Optional, Dict, List, Any, Union import websocket # NOTE: websocket-client (https://github.com/websocket-client/websocket-client) -from aws_xray_sdk.core import xray_recorder from modules import http_action logger = logging.getLogger("queue-agent") +# Import the safe_xray_capture decorator from main module +try: + from src.main import safe_xray_capture, xray_enabled +except ImportError: + try: + # Try alternative import path + from ..main import safe_xray_capture, xray_enabled + except ImportError: + # Fallback if import fails - create a simple pass-through decorator + logger.warning("Failed to import safe_xray_capture from main, using fallback") + def safe_xray_capture(name): + def decorator(func): + return func + return decorator + xray_enabled = False + # Constants for websocket reconnection MAX_RECONNECT_ATTEMPTS = 5 RECONNECT_DELAY = 2 # seconds @@ -324,6 +338,7 @@ def handler(api_base_url: str, task_id: str, payload: dict) -> dict: return response +@safe_xray_capture('comfyui-pipeline') def invoke_pipeline(api_base_url: str, body) -> str: cf = comfyuiCaller() cf.setUrl(api_base_url) diff --git a/src/backend/queue_agent/src/runtimes/sdwebui.py b/src/backend/queue_agent/src/runtimes/sdwebui.py index 9f9578c..c591e8f 100644 --- a/src/backend/queue_agent/src/runtimes/sdwebui.py +++ b/src/backend/queue_agent/src/runtimes/sdwebui.py @@ -8,7 +8,6 @@ import traceback from requests.exceptions import ReadTimeout, HTTPError -from aws_xray_sdk.core import xray_recorder from modules import http_action, misc logger = logging.getLogger("queue-agent") @@ -16,6 +15,22 @@ ALWAYSON_SCRIPTS_EXCLUDE_KEYS = ['task', 'id_task', 'uid', 'sd_model_checkpoint', 'image_link', 'save_dir', 'sd_vae', 'override_settings'] +# Import the safe_xray_capture decorator from main module +try: + from src.main import safe_xray_capture, xray_enabled +except ImportError: + try: + # Try alternative import path + from ..main import safe_xray_capture, xray_enabled + except ImportError: + # Fallback if import fails - create a simple pass-through decorator + logger.warning("Failed to import safe_xray_capture from main, using fallback") + def safe_xray_capture(name): + def decorator(func): + return func + return decorator + xray_enabled = False + def check_readiness(api_base_url: str, dynamic_sd_model: bool) -> bool: """Check if SD Web UI is ready by invoking /option endpoint""" while True: @@ -107,7 +122,7 @@ def handler(api_base_url: str, task_type: str, task_id: str, payload: dict, dyna response["content"] = content return response -@xray_recorder.capture('text-to-image') +@safe_xray_capture('text-to-image') def invoke_txt2img(api_base_url: str, body) -> str: # Compatiability for v1alpha1: Move override_settings from header to body override_settings = {} @@ -128,7 +143,7 @@ def invoke_txt2img(api_base_url: str, body) -> str: response = http_action.do_invocations(api_base_url+"txt2img", body) return response -@xray_recorder.capture('image-to-image') +@safe_xray_capture('image-to-image') def invoke_img2img(api_base_url: str, body: dict) -> str: """Image-to-Image request""" # Process image link @@ -155,19 +170,15 @@ def invoke_img2img(api_base_url: str, body: dict) -> str: response = http_action.do_invocations(api_base_url+"img2img", body) return response -@xray_recorder.capture('extra-single-image') +@safe_xray_capture('extra-single-image') def invoke_extra_single_image(api_base_url: str, body) -> str: - body = download_image(body) - response = http_action.do_invocations(api_base_url+"extra-single-image", body) return response -@xray_recorder.capture('extra-batch-images') +@safe_xray_capture('extra-batch-images') def invoke_extra_batch_images(api_base_url: str, body) -> str: - body = download_image(body) - response = http_action.do_invocations(api_base_url+"extra-batch-images", body) return response From 5898ebec93f6bbd8499f0b707c2608115e61e13a Mon Sep 17 00:00:00 2001 From: yubingjiaocn Date: Fri, 11 Apr 2025 07:15:47 +0000 Subject: [PATCH 3/3] Update helm chart --- lib/runtime/sdRuntime.ts | 2 +- src/charts/sd_on_eks/Chart.yaml | 4 ++-- .../templates/aws-sqs-queue-scaledobject.yaml | 2 +- .../templates/deployment-comfyui.yaml | 9 ++++++--- .../templates/deployment-sdwebui.yaml | 16 +++++++++------- .../templates/persistentvolume-s3.yaml | 7 +++---- .../templates/persistentvolumeclaim.yaml | 18 ++++++++---------- src/charts/sd_on_eks/values.yaml | 6 +++++- 8 files changed, 35 insertions(+), 29 deletions(-) diff --git a/lib/runtime/sdRuntime.ts b/lib/runtime/sdRuntime.ts index 32e13e8..f51bd26 100644 --- a/lib/runtime/sdRuntime.ts +++ b/lib/runtime/sdRuntime.ts @@ -28,7 +28,7 @@ export const defaultProps: blueprints.addons.HelmAddOnProps & SDRuntimeAddOnProp name: 'sdRuntimeAddOn', namespace: 'sdruntime', release: 'sdruntime', - version: '1.1.1', + version: '1.1.3', repository: 'oci://public.ecr.aws/bingjiao/charts/sd-on-eks', values: {}, type: "sdwebui" diff --git a/src/charts/sd_on_eks/Chart.yaml b/src/charts/sd_on_eks/Chart.yaml index ccbd8af..7c4b6c9 100644 --- a/src/charts/sd_on_eks/Chart.yaml +++ b/src/charts/sd_on_eks/Chart.yaml @@ -13,9 +13,9 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.1.1 +version: 1.1.3 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.1.1" +appVersion: "1.1.3" diff --git a/src/charts/sd_on_eks/templates/aws-sqs-queue-scaledobject.yaml b/src/charts/sd_on_eks/templates/aws-sqs-queue-scaledobject.yaml index aa224de..beb8a79 100644 --- a/src/charts/sd_on_eks/templates/aws-sqs-queue-scaledobject.yaml +++ b/src/charts/sd_on_eks/templates/aws-sqs-queue-scaledobject.yaml @@ -19,7 +19,7 @@ spec: advanced: horizontalPodAutoscalerConfig: behavior: - {{- toYaml .Values.runtime.extraHPAConfig | nindent 8 }} + {{- toYaml .Values.runtime.scaling.extraHPAConfig | nindent 8 }} {{- end }} scaleTargetRef: name: {{ include "sdchart.fullname" . }}-inference-api diff --git a/src/charts/sd_on_eks/templates/deployment-comfyui.yaml b/src/charts/sd_on_eks/templates/deployment-comfyui.yaml index 4c85ae2..cb15055 100644 --- a/src/charts/sd_on_eks/templates/deployment-comfyui.yaml +++ b/src/charts/sd_on_eks/templates/deployment-comfyui.yaml @@ -64,11 +64,14 @@ spec: {{- if .Values.runtime.queueAgent.extraEnv }} {{- toYaml .Values.runtime.queueAgent.extraEnv | nindent 8 }} {{- end }} - {{- if .Values.runtime.queueAgent.XRay.enabled }} + {{- if .Values.runtime.queueAgent.xray.enabled }} - name: AWS_XRAY_DAEMON_ADDRESS value: localhost:2000 - name: AWS_XRAY_CONTEXT_MISSING value: IGNORE_ERROR + {{- else }} + - name: DISABLE_XRAY + value: "true" {{- end }} image: {{ .Values.runtime.queueAgent.image.repository }}:{{ .Values.runtime.queueAgent.image.tag }} imagePullPolicy: {{ .Values.runtime.queueAgent.imagePullPolicy }} @@ -77,9 +80,9 @@ spec: name: models resources: {{- toYaml .Values.runtime.queueAgent.resources | nindent 10 }} - {{- if .Values.runtime.queueAgent.XRay.enabled }} + {{- if .Values.runtime.queueAgent.xray.enabled }} - name: xray-daemon - image: public.ecr.aws/xray/aws-xray-daemon:3.3.7 + image: {{ .Values.runtime.queueAgent.xray.daemon.image.repository }}:{{ .Values.runtime.queueAgent.xray.daemon.image.tag }} ports: - containerPort: 2000 protocol: UDP diff --git a/src/charts/sd_on_eks/templates/deployment-sdwebui.yaml b/src/charts/sd_on_eks/templates/deployment-sdwebui.yaml index 6493613..ffc87af 100644 --- a/src/charts/sd_on_eks/templates/deployment-sdwebui.yaml +++ b/src/charts/sd_on_eks/templates/deployment-sdwebui.yaml @@ -10,10 +10,9 @@ metadata: {{- toYaml .Values.runtime.labels | nindent 4 }} {{- end }} runtime-type: sdwebui - {{- if .Values.runtime.annotations }} annotations: - {{ toYaml .Values.runtime.annotations | nindent 4 }} + {{- toYaml .Values.runtime.annotations | nindent 4 }} {{- end }} spec: @@ -31,7 +30,7 @@ spec: metadata: labels: app: inference-api - {{- include "sdchart.selectorLabels" . | nindent 8 }} + {{- include "sdchart.selectorLabels" . | nindent 8 }} spec: containers: - name: inference-api @@ -48,7 +47,7 @@ spec: value: "true" - name: CONFIG_FILE value: "/tmp/config.json" - {{- if .Values.runtime.queueAgent.commandArguments }} + {{- if .Values.runtime.inferenceApi.commandArguments }} - name: EXTRA_CMD_ARG value: {{ .Values.runtime.inferenceApi.commandArguments }} {{- end }} @@ -78,19 +77,22 @@ spec: {{- if .Values.runtime.queueAgent.extraEnv }} {{- toYaml .Values.runtime.queueAgent.extraEnv | nindent 8 }} {{- end }} - {{- if .Values.runtime.queueAgent.XRay.enabled }} + {{- if .Values.runtime.queueAgent.xray.enabled }} - name: AWS_XRAY_DAEMON_ADDRESS value: localhost:2000 - name: AWS_XRAY_CONTEXT_MISSING value: IGNORE_ERROR + {{- else }} + - name: DISABLE_XRAY + value: "true" {{- end }} image: {{ .Values.runtime.queueAgent.image.repository }}:{{ .Values.runtime.queueAgent.image.tag }} imagePullPolicy: {{ .Values.runtime.queueAgent.imagePullPolicy }} resources: {{- toYaml .Values.runtime.queueAgent.resources | nindent 10 }} - {{- if .Values.runtime.queueAgent.XRay.enabled }} + {{- if .Values.runtime.queueAgent.xray.enabled }} - name: xray-daemon - image: public.ecr.aws/xray/aws-xray-daemon:3.3.7 + image: {{ .Values.runtime.queueAgent.xray.daemon.image.repository }}:{{ .Values.runtime.queueAgent.xray.daemon.image.tag }} ports: - containerPort: 2000 protocol: UDP diff --git a/src/charts/sd_on_eks/templates/persistentvolume-s3.yaml b/src/charts/sd_on_eks/templates/persistentvolume-s3.yaml index 8d4d21d..064730f 100644 --- a/src/charts/sd_on_eks/templates/persistentvolume-s3.yaml +++ b/src/charts/sd_on_eks/templates/persistentvolume-s3.yaml @@ -3,10 +3,9 @@ apiVersion: v1 kind: PersistentVolume metadata: name: {{ include "sdchart.fullname" . }}-s3-model-volume - annotations: {{- if .Values.runtime.persistence.annotations }} annotations: - {{ toYaml .Values.runtime.persistence.annotations | indent 4 }} + {{ toYaml .Values.runtime.persistence.annotations | nindent 4 }} {{- end }} labels: {{- include "sdchart.labels" . | nindent 4 }} @@ -14,13 +13,13 @@ metadata: {{- toYaml .Values.runtime.labels | nindent 4 }} {{- end }} {{- if .Values.runtime.persistence.labels }} - {{ toYaml .Values.runtime.persistence.labels | indent 4 }} + {{- toYaml .Values.runtime.persistence.labels | nindent 4 }} {{- end }} spec: capacity: storage: {{ .Values.runtime.persistence.size }} accessModes: - {{ toYaml .Values.runtime.persistence.accessModes }} + {{- toYaml .Values.runtime.persistence.accessModes | nindent 2 }} mountOptions: - allow-delete - allow-other diff --git a/src/charts/sd_on_eks/templates/persistentvolumeclaim.yaml b/src/charts/sd_on_eks/templates/persistentvolumeclaim.yaml index 48ceb13..47512ff 100644 --- a/src/charts/sd_on_eks/templates/persistentvolumeclaim.yaml +++ b/src/charts/sd_on_eks/templates/persistentvolumeclaim.yaml @@ -1,12 +1,11 @@ {{- if and (.Values.runtime.persistence.enabled) (not (.Values.runtime.persistence.existingClaim)) }} - apiVersion: v1 kind: PersistentVolumeClaim metadata: name: {{ include "sdchart.fullname" . }}-model-claim {{- if .Values.runtime.persistence.annotations }} annotations: - {{ toYaml .Values.runtime.persistence.annotations | indent 4 }} + {{- toYaml .Values.runtime.persistence.annotations | nindent 4 }} {{- end }} labels: {{- include "sdchart.labels" . | nindent 4 }} @@ -14,23 +13,22 @@ metadata: {{- toYaml .Values.runtime.labels | nindent 4 }} {{- end }} {{- if .Values.runtime.persistence.labels }} - {{ toYaml .Values.runtime.persistence.labels | indent 4 }} + {{- toYaml .Values.runtime.persistence.labels | nindent 4 }} {{- end }} spec: accessModes: - {{ toYaml .Values.runtime.persistence.accessModes }} + {{- toYaml .Values.runtime.persistence.accessModes | nindent 2 }} resources: requests: storage: "{{ .Values.runtime.persistence.size }}" - {{- if .Values.runtime.persistence.storageClass }} - {{- if (eq "-" .Values.runtime.persistence.storageClass) }} + {{- if .Values.runtime.persistence.storageClass }} + {{- if (eq "-" .Values.runtime.persistence.storageClass) }} storageClassName: "" - {{- else }} + {{- else }} storageClassName: "{{ .Values.runtime.persistence.storageClass }}" - {{- end }} - {{- end }} + {{- end }} + {{- end }} {{- if .Values.runtime.persistence.existingVolume }} volumeName: "{{ .Values.runtime.persistence.existingVolume }}" {{- end }} - {{- end }} \ No newline at end of file diff --git a/src/charts/sd_on_eks/values.yaml b/src/charts/sd_on_eks/values.yaml index 042231f..eca81cf 100644 --- a/src/charts/sd_on_eks/values.yaml +++ b/src/charts/sd_on_eks/values.yaml @@ -84,8 +84,12 @@ runtime: requests: cpu: 500m memory: 512Mi - XRay: + xray: enabled: true + daemon: + image: + repository: public.ecr.aws/xray/aws-xray-daemon + tag: 3.3.14 persistence: enabled: true existingClaim: ""