From d3e1226d15d07884d5037de34b4343e4e827ba91 Mon Sep 17 00:00:00 2001 From: justin-tahara Date: Fri, 29 Aug 2025 17:01:14 -0700 Subject: [PATCH 1/2] feat(infra): Adding new KEDA Helm templates --- deployment/helm/charts/onyx/Chart.yaml | 2 +- .../keda/api-server-scaledobject.yaml | 24 +++++++++ .../celery-worker-common-scaledobject.yaml | 51 +++++++++++++++++++ ...elery-worker-docfetching-scaledobject.yaml | 46 +++++++++++++++++ ...ery-worker-docprocessing-scaledobject.yaml | 46 +++++++++++++++++ .../keda/model-server-scaledobject.yaml | 31 +++++++++++ .../templates/keda/slackbot-scaledobject.yaml | 28 ++++++++++ .../keda/web-server-scaledobject.yaml | 24 +++++++++ 8 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 deployment/helm/charts/onyx/templates/keda/api-server-scaledobject.yaml create mode 100644 deployment/helm/charts/onyx/templates/keda/celery-worker-common-scaledobject.yaml create mode 100644 deployment/helm/charts/onyx/templates/keda/celery-worker-docfetching-scaledobject.yaml create mode 100644 deployment/helm/charts/onyx/templates/keda/celery-worker-docprocessing-scaledobject.yaml create mode 100644 deployment/helm/charts/onyx/templates/keda/model-server-scaledobject.yaml create mode 100644 deployment/helm/charts/onyx/templates/keda/slackbot-scaledobject.yaml create mode 100644 deployment/helm/charts/onyx/templates/keda/web-server-scaledobject.yaml diff --git a/deployment/helm/charts/onyx/Chart.yaml b/deployment/helm/charts/onyx/Chart.yaml index 40cbbbbc9f..a4f2ca07e2 100644 --- a/deployment/helm/charts/onyx/Chart.yaml +++ b/deployment/helm/charts/onyx/Chart.yaml @@ -5,7 +5,7 @@ home: https://www.onyx.app/ sources: - "https://github.com/onyx-dot-app/onyx" type: application -version: 0.2.9 +version: 0.2.10 appVersion: latest annotations: category: Productivity diff --git a/deployment/helm/charts/onyx/templates/keda/api-server-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/api-server-scaledobject.yaml new file mode 100644 index 0000000000..58951173c7 --- /dev/null +++ b/deployment/helm/charts/onyx/templates/keda/api-server-scaledobject.yaml @@ -0,0 +1,24 @@ +{{- if and .Values.keda.enabled .Values.keda.apiServer .Values.keda.apiServer.enabled }} +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: {{ include "onyx-stack.fullname" . }}-api-server-scaledobject + namespace: {{ .Release.Namespace }} + labels: + {{- include "onyx-stack.labels" . | nindent 4 }} + app: api-server +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "onyx-stack.fullname" . }}-api-server + pollingInterval: {{ .Values.keda.apiServer.pollingInterval | default 30 }} + cooldownPeriod: {{ .Values.keda.apiServer.cooldownPeriod | default 300 }} + minReplicaCount: {{ .Values.keda.apiServer.minReplicas | default 1 }} + maxReplicaCount: {{ .Values.keda.apiServer.maxReplicas | default 10 }} + triggers: + - type: cpu + metadata: + type: Utilization + value: {{ .Values.keda.apiServer.cpuThreshold | default "70" | quote }} +{{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/celery-worker-common-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/celery-worker-common-scaledobject.yaml new file mode 100644 index 0000000000..a38cc46b42 --- /dev/null +++ b/deployment/helm/charts/onyx/templates/keda/celery-worker-common-scaledobject.yaml @@ -0,0 +1,51 @@ +{{- if and .Values.keda.enabled .Values.keda.celeryWorkers.enabled }} +{{- range $workerType, $workerConfig := .Values.keda.celeryWorkers }} +{{- if and (ne $workerType "enabled") $workerConfig.enabled (ne $workerType "docprocessing") (ne $workerType "docfetching") }} +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: {{ include "onyx-stack.fullname" $ }}-celery-worker-{{ $workerType }}-scaledobject + namespace: {{ $.Release.Namespace }} + labels: + {{- include "onyx-stack.labels" $ | nindent 4 }} + app: celery-worker-{{ $workerType }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "onyx-stack.fullname" $ }}-celery-worker-{{ $workerType }} + pollingInterval: {{ $workerConfig.pollingInterval | default 30 }} + cooldownPeriod: {{ $workerConfig.cooldownPeriod | default 300 }} + minReplicaCount: {{ $workerConfig.minReplicas | default 1 }} + maxReplicaCount: {{ $workerConfig.maxReplicas | default 10 }} + triggers: + {{- if $workerConfig.triggers }} + # Default Prometheus-based trigger for Redis queue depth if none specified + # Scaling Logic: + # - When queue depth > 5: Scale up by factor of 2 (moderate scaling) + # - When queue depth <= 5: Scale down by factor of 0.5 (conservative scaling) + # - Threshold of 1 ensures scaling triggers when metric value > 1 + - type: prometheus + metadata: + serverAddress: "http://prometheus-redis.monitoring.svc.cluster.local:9090" + metricName: "redis_key_size_sum" + metricType: "Value" + threshold: "1" + query: | + # Simplified scaling logic for generic celery workers + # Returns 2 when queue depth > 5, 0.5 when <= 5 + # This creates a clear scaling decision boundary + ( + (sum(redis_key_size{key=~"connector_{{ $workerType }}.*"}) > 5) + * 2 + ) + + + ( + (sum(redis_key_size{key=~"connector_{{ $workerType }}.*"}) <= 5) + * 0.5 + ) + {{- end }} +{{- end }} +{{- end }} +{{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/celery-worker-docfetching-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/celery-worker-docfetching-scaledobject.yaml new file mode 100644 index 0000000000..f8d52a2c59 --- /dev/null +++ b/deployment/helm/charts/onyx/templates/keda/celery-worker-docfetching-scaledobject.yaml @@ -0,0 +1,46 @@ +{{- if and .Values.keda.enabled .Values.keda.celeryWorkers.docfetching .Values.keda.celeryWorkers.docfetching.enabled }} +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: {{ include "onyx-stack.fullname" . }}-celery-worker-docfetching-scaledobject + namespace: {{ .Release.Namespace }} + labels: + {{- include "onyx-stack.labels" . | nindent 4 }} + app: celery-worker-docfetching +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "onyx-stack.fullname" . }}-celery-worker-docfetching + pollingInterval: {{ .Values.keda.celeryWorkers.docfetching.pollingInterval | default 30 }} + cooldownPeriod: {{ .Values.keda.celeryWorkers.docfetching.cooldownPeriod | default 300 }} + minReplicaCount: {{ .Values.keda.celeryWorkers.docfetching.minReplicas | default 1 }} + maxReplicaCount: {{ .Values.keda.celeryWorkers.docfetching.maxReplicas | default 10 }} + triggers: + {{- if .Values.keda.celeryWorkers.docfetching.triggers }} + # Default Prometheus-based trigger for Redis queue depth if none specified + # Scaling Logic: + # - When queue depth > 5: Scale up by factor of 2 (aggressive scaling) + # - When queue depth <= 5: Scale down by factor of 0.5 (conservative scaling) + # - Threshold of 1 ensures scaling triggers when metric value > 1 + - type: prometheus + metadata: + serverAddress: "http://prometheus-redis.monitoring.svc.cluster.local:9090" + metricName: "redis_key_size_sum" + metricType: "Value" + threshold: "1" + query: | + # Simplified scaling logic for docfetching workers + # Returns 2 when queue depth > 5, 0.5 when <= 5 + # This creates a clear scaling decision boundary + ( + (sum(redis_key_size{key=~"connector_docfetching.*"}) > 5) + * 2 + ) + + + ( + (sum(redis_key_size{key=~"connector_docfetching.*"}) <= 5) + * 0.5 + ) + {{- end }} +{{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/celery-worker-docprocessing-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/celery-worker-docprocessing-scaledobject.yaml new file mode 100644 index 0000000000..2bd2e1a4b8 --- /dev/null +++ b/deployment/helm/charts/onyx/templates/keda/celery-worker-docprocessing-scaledobject.yaml @@ -0,0 +1,46 @@ +{{- if and .Values.keda.enabled .Values.keda.celeryWorkers.docprocessing .Values.keda.celeryWorkers.docprocessing.enabled }} +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: {{ include "onyx-stack.fullname" . }}-celery-worker-docprocessing-scaledobject + namespace: {{ .Release.Namespace }} + labels: + {{- include "onyx-stack.labels" . | nindent 4 }} + app: celery-worker-docprocessing +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "onyx-stack.fullname" . }}-celery-worker-docprocessing + pollingInterval: {{ .Values.keda.celeryWorkers.docprocessing.pollingInterval | default 30 }} + cooldownPeriod: {{ .Values.keda.celeryWorkers.docprocessing.cooldownPeriod | default 300 }} + minReplicaCount: {{ .Values.keda.celeryWorkers.docprocessing.minReplicas | default 1 }} + maxReplicaCount: {{ .Values.keda.celeryWorkers.docprocessing.maxReplicas | default 50 }} + triggers: + {{- if .Values.keda.celeryWorkers.docprocessing.triggers }} + # Default Prometheus-based trigger for Redis queue depth if none specified + # Scaling Logic: + # - When queue depth > 20: Scale up by factor of 4 (very aggressive scaling) + # - When queue depth <= 20: Scale down by factor of 0.25 (very conservative scaling) + # - Threshold of 1 ensures scaling triggers when metric value > 1 + - type: prometheus + metadata: + serverAddress: "http://prometheus-redis.monitoring.svc.cluster.local:9090" + metricName: "redis_key_size_sum" + metricType: "Value" + threshold: "1" + query: | + # Simplified scaling logic for docprocessing workers + # Returns 4 when queue depth > 20, 0.25 when <= 20 + # This creates a clear scaling decision boundary for high-volume processing + ( + (sum(redis_key_size{key=~"connector_docprocessing.*"}) > 20) + * 4 + ) + + + ( + (sum(redis_key_size{key=~"connector_docprocessing.*"}) <= 20) + * 0.25 + ) + {{- end }} +{{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/model-server-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/model-server-scaledobject.yaml new file mode 100644 index 0000000000..f54ab3086c --- /dev/null +++ b/deployment/helm/charts/onyx/templates/keda/model-server-scaledobject.yaml @@ -0,0 +1,31 @@ +{{- if and .Values.keda.enabled .Values.keda.modelServers.enabled }} +{{- range $serverType, $serverConfig := .Values.keda.modelServers }} +{{- if and (ne $serverType "enabled") $serverConfig.enabled }} +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: {{ include "onyx-stack.fullname" $ }}-{{ $serverType }}-model-server-scaledobject + namespace: {{ $.Release.Namespace }} + labels: + {{- include "onyx-stack.labels" $ | nindent 4 }} + app: {{ $serverType }}-model-server +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "onyx-stack.fullname" $ }}-{{ $serverType }}-model-server + pollingInterval: {{ $serverConfig.pollingInterval | default 30 }} + cooldownPeriod: {{ $serverConfig.cooldownPeriod | default 300 }} + minReplicaCount: {{ $serverConfig.minReplicas | default 1 }} + maxReplicaCount: {{ $serverConfig.maxReplicas | default 5 }} + triggers: + {{- if $serverConfig.triggers }} + - type: cpu + metadata: + type: Utilization + value: {{ $serverConfig.cpuThreshold | default "70" | quote }} + {{- end }} +{{- end }} +{{- end }} +{{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/slackbot-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/slackbot-scaledobject.yaml new file mode 100644 index 0000000000..bfd7dc0104 --- /dev/null +++ b/deployment/helm/charts/onyx/templates/keda/slackbot-scaledobject.yaml @@ -0,0 +1,28 @@ +{{- if and .Values.keda.enabled .Values.keda.slackbot .Values.keda.slackbot.enabled }} +# Note: This KEDA ScaledObject will conflict with HPA if slackbot.autoscaling.enabled is also true +# Only one autoscaler should be active per deployment +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: {{ include "onyx-stack.fullname" . }}-slackbot-scaledobject + namespace: {{ .Release.Namespace }} + labels: + {{- include "onyx-stack.labels" . | nindent 4 }} + app: slackbot +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "onyx-stack.fullname" . }}-slackbot + pollingInterval: {{ .Values.keda.slackbot.pollingInterval | default 30 }} + cooldownPeriod: {{ .Values.keda.slackbot.cooldownPeriod | default 300 }} + minReplicaCount: {{ .Values.keda.slackbot.minReplicas | default 1 }} + maxReplicaCount: {{ .Values.keda.slackbot.maxReplicas | default 3 }} + triggers: + {{- if .Values.keda.slackbot.triggers }} + - type: cpu + metadata: + type: Utilization + value: {{ .Values.keda.slackbot.cpuThreshold | default "70" | quote }} + {{- end }} +{{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/web-server-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/web-server-scaledobject.yaml new file mode 100644 index 0000000000..317ea9821f --- /dev/null +++ b/deployment/helm/charts/onyx/templates/keda/web-server-scaledobject.yaml @@ -0,0 +1,24 @@ +{{- if and .Values.keda.enabled .Values.keda.webServer .Values.keda.webServer.enabled }} +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: {{ include "onyx-stack.fullname" . }}-web-server-scaledobject + namespace: {{ .Release.Namespace }} + labels: + {{- include "onyx-stack.labels" . | nindent 4 }} + app: web-server +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "onyx-stack.fullname" . }}-web-server + pollingInterval: {{ .Values.keda.webServer.pollingInterval | default 30 }} + cooldownPeriod: {{ .Values.keda.webServer.cooldownPeriod | default 300 }} + minReplicaCount: {{ .Values.keda.webServer.minReplicas | default 1 }} + maxReplicaCount: {{ .Values.keda.webServer.maxReplicas | default 5 }} + triggers: + - type: cpu + metadata: + type: Utilization + value: {{ .Values.keda.webServer.cpuThreshold | default "70" | quote }} +{{- end }} From 78aee1946189cb2ad86ffd2e389f78a79c4c4ce4 Mon Sep 17 00:00:00 2001 From: justin-tahara Date: Mon, 1 Sep 2025 11:31:19 -0700 Subject: [PATCH 2/2] commit changes --- .../keda/api-server-scaledobject.yaml | 10 ++- .../celery-worker-common-scaledobject.yaml | 4 +- ...elery-worker-docfetching-scaledobject.yaml | 4 +- ...ery-worker-docprocessing-scaledobject.yaml | 4 +- .../keda/model-server-scaledobject.yaml | 4 +- .../templates/keda/slackbot-scaledobject.yaml | 12 ++- .../keda/web-server-scaledobject.yaml | 10 ++- deployment/helm/charts/onyx/values.yaml | 84 +++++++++++++++++++ 8 files changed, 115 insertions(+), 17 deletions(-) diff --git a/deployment/helm/charts/onyx/templates/keda/api-server-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/api-server-scaledobject.yaml index 58951173c7..0df982997e 100644 --- a/deployment/helm/charts/onyx/templates/keda/api-server-scaledobject.yaml +++ b/deployment/helm/charts/onyx/templates/keda/api-server-scaledobject.yaml @@ -11,14 +11,22 @@ spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment - name: {{ include "onyx-stack.fullname" . }}-api-server + name: {{ include "onyx-stack.fullname" . }} pollingInterval: {{ .Values.keda.apiServer.pollingInterval | default 30 }} cooldownPeriod: {{ .Values.keda.apiServer.cooldownPeriod | default 300 }} minReplicaCount: {{ .Values.keda.apiServer.minReplicas | default 1 }} maxReplicaCount: {{ .Values.keda.apiServer.maxReplicas | default 10 }} + # Use HPA mode to generate an HPA that works alongside existing HPA infrastructure + hpaName: {{ include "onyx-stack.fullname" . }}-api-server-keda-hpa triggers: - type: cpu metadata: type: Utilization value: {{ .Values.keda.apiServer.cpuThreshold | default "70" | quote }} + {{- if .Values.keda.apiServer.memoryThreshold }} + - type: memory + metadata: + type: Utilization + value: {{ .Values.keda.apiServer.memoryThreshold | quote }} + {{- end }} {{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/celery-worker-common-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/celery-worker-common-scaledobject.yaml index a38cc46b42..d85b6a3bf4 100644 --- a/deployment/helm/charts/onyx/templates/keda/celery-worker-common-scaledobject.yaml +++ b/deployment/helm/charts/onyx/templates/keda/celery-worker-common-scaledobject.yaml @@ -20,8 +20,7 @@ spec: minReplicaCount: {{ $workerConfig.minReplicas | default 1 }} maxReplicaCount: {{ $workerConfig.maxReplicas | default 10 }} triggers: - {{- if $workerConfig.triggers }} - # Default Prometheus-based trigger for Redis queue depth if none specified + # Default Prometheus-based trigger for Redis queue depth # Scaling Logic: # - When queue depth > 5: Scale up by factor of 2 (moderate scaling) # - When queue depth <= 5: Scale down by factor of 0.5 (conservative scaling) @@ -45,7 +44,6 @@ spec: (sum(redis_key_size{key=~"connector_{{ $workerType }}.*"}) <= 5) * 0.5 ) - {{- end }} {{- end }} {{- end }} {{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/celery-worker-docfetching-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/celery-worker-docfetching-scaledobject.yaml index f8d52a2c59..ce8c321840 100644 --- a/deployment/helm/charts/onyx/templates/keda/celery-worker-docfetching-scaledobject.yaml +++ b/deployment/helm/charts/onyx/templates/keda/celery-worker-docfetching-scaledobject.yaml @@ -17,8 +17,7 @@ spec: minReplicaCount: {{ .Values.keda.celeryWorkers.docfetching.minReplicas | default 1 }} maxReplicaCount: {{ .Values.keda.celeryWorkers.docfetching.maxReplicas | default 10 }} triggers: - {{- if .Values.keda.celeryWorkers.docfetching.triggers }} - # Default Prometheus-based trigger for Redis queue depth if none specified + # Default Prometheus-based trigger for Redis queue depth # Scaling Logic: # - When queue depth > 5: Scale up by factor of 2 (aggressive scaling) # - When queue depth <= 5: Scale down by factor of 0.5 (conservative scaling) @@ -42,5 +41,4 @@ spec: (sum(redis_key_size{key=~"connector_docfetching.*"}) <= 5) * 0.5 ) - {{- end }} {{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/celery-worker-docprocessing-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/celery-worker-docprocessing-scaledobject.yaml index 2bd2e1a4b8..57555d049f 100644 --- a/deployment/helm/charts/onyx/templates/keda/celery-worker-docprocessing-scaledobject.yaml +++ b/deployment/helm/charts/onyx/templates/keda/celery-worker-docprocessing-scaledobject.yaml @@ -17,8 +17,7 @@ spec: minReplicaCount: {{ .Values.keda.celeryWorkers.docprocessing.minReplicas | default 1 }} maxReplicaCount: {{ .Values.keda.celeryWorkers.docprocessing.maxReplicas | default 50 }} triggers: - {{- if .Values.keda.celeryWorkers.docprocessing.triggers }} - # Default Prometheus-based trigger for Redis queue depth if none specified + # Default Prometheus-based trigger for Redis queue depth # Scaling Logic: # - When queue depth > 20: Scale up by factor of 4 (very aggressive scaling) # - When queue depth <= 20: Scale down by factor of 0.25 (very conservative scaling) @@ -42,5 +41,4 @@ spec: (sum(redis_key_size{key=~"connector_docprocessing.*"}) <= 20) * 0.25 ) - {{- end }} {{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/model-server-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/model-server-scaledobject.yaml index f54ab3086c..f0599a1dd7 100644 --- a/deployment/helm/charts/onyx/templates/keda/model-server-scaledobject.yaml +++ b/deployment/helm/charts/onyx/templates/keda/model-server-scaledobject.yaml @@ -14,18 +14,16 @@ spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment - name: {{ include "onyx-stack.fullname" $ }}-{{ $serverType }}-model-server + name: {{ include "onyx-stack.fullname" $ }}-{{ $serverType }}-model pollingInterval: {{ $serverConfig.pollingInterval | default 30 }} cooldownPeriod: {{ $serverConfig.cooldownPeriod | default 300 }} minReplicaCount: {{ $serverConfig.minReplicas | default 1 }} maxReplicaCount: {{ $serverConfig.maxReplicas | default 5 }} triggers: - {{- if $serverConfig.triggers }} - type: cpu metadata: type: Utilization value: {{ $serverConfig.cpuThreshold | default "70" | quote }} - {{- end }} {{- end }} {{- end }} {{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/slackbot-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/slackbot-scaledobject.yaml index bfd7dc0104..5b1f181ef8 100644 --- a/deployment/helm/charts/onyx/templates/keda/slackbot-scaledobject.yaml +++ b/deployment/helm/charts/onyx/templates/keda/slackbot-scaledobject.yaml @@ -1,6 +1,6 @@ {{- if and .Values.keda.enabled .Values.keda.slackbot .Values.keda.slackbot.enabled }} -# Note: This KEDA ScaledObject will conflict with HPA if slackbot.autoscaling.enabled is also true -# Only one autoscaler should be active per deployment +# Note: This KEDA ScaledObject works alongside existing HPA using KEDA's HPA mode +# KEDA generates an HPA that can coexist with traditional HPA infrastructure apiVersion: keda.sh/v1alpha1 kind: ScaledObject metadata: @@ -18,11 +18,17 @@ spec: cooldownPeriod: {{ .Values.keda.slackbot.cooldownPeriod | default 300 }} minReplicaCount: {{ .Values.keda.slackbot.minReplicas | default 1 }} maxReplicaCount: {{ .Values.keda.slackbot.maxReplicas | default 3 }} + # Use HPA mode to generate an HPA that works alongside existing HPA infrastructure + hpaName: {{ include "onyx-stack.fullname" . }}-slackbot-keda-hpa triggers: - {{- if .Values.keda.slackbot.triggers }} - type: cpu metadata: type: Utilization value: {{ .Values.keda.slackbot.cpuThreshold | default "70" | quote }} + {{- if .Values.keda.slackbot.memoryThreshold }} + - type: memory + metadata: + type: Utilization + value: {{ .Values.keda.slackbot.memoryThreshold | quote }} {{- end }} {{- end }} diff --git a/deployment/helm/charts/onyx/templates/keda/web-server-scaledobject.yaml b/deployment/helm/charts/onyx/templates/keda/web-server-scaledobject.yaml index 317ea9821f..2e54f3a130 100644 --- a/deployment/helm/charts/onyx/templates/keda/web-server-scaledobject.yaml +++ b/deployment/helm/charts/onyx/templates/keda/web-server-scaledobject.yaml @@ -11,14 +11,22 @@ spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment - name: {{ include "onyx-stack.fullname" . }}-web-server + name: {{ include "onyx-stack.fullname" . }} pollingInterval: {{ .Values.keda.webServer.pollingInterval | default 30 }} cooldownPeriod: {{ .Values.keda.webServer.cooldownPeriod | default 300 }} minReplicaCount: {{ .Values.keda.webServer.minReplicas | default 1 }} maxReplicaCount: {{ .Values.keda.webServer.maxReplicas | default 5 }} + # Use HPA mode to generate an HPA that works alongside existing HPA infrastructure + hpaName: {{ include "onyx-stack.fullname" . }}-web-server-keda-hpa triggers: - type: cpu metadata: type: Utilization value: {{ .Values.keda.webServer.cpuThreshold | default "70" | quote }} + {{- if .Values.keda.webServer.memoryThreshold }} + - type: memory + metadata: + type: Utilization + value: {{ .Values.keda.webServer.memoryThreshold | quote }} + {{- end }} {{- end }} diff --git a/deployment/helm/charts/onyx/values.yaml b/deployment/helm/charts/onyx/values.yaml index 04cee79642..b7b3ceede4 100644 --- a/deployment/helm/charts/onyx/values.yaml +++ b/deployment/helm/charts/onyx/values.yaml @@ -8,6 +8,90 @@ global: # Global pull policy for all Onyx component images pullPolicy: "IfNotPresent" +keda: + # Master switch for all KEDA functionality - disabled by default + # KEDA works alongside existing HPA infrastructure using HPA mode + # This provides advanced scaling triggers while maintaining HPA stability and coexistence + enabled: false + + # API Server autoscaling configuration + # KEDA generates an HPA named: {release}-api-server-keda-hpa + # Can coexist with api.autoscaling.* configuration + apiServer: + enabled: false + pollingInterval: 30 + cooldownPeriod: 300 + minReplicas: 1 + maxReplicas: 10 + cpuThreshold: "70" + memoryThreshold: "80" # Optional: enable memory-based scaling + + # Web Server autoscaling configuration + # KEDA generates an HPA named: {release}-web-server-keda-hpa + # Can coexist with webserver.autoscaling.* configuration + webServer: + enabled: false + pollingInterval: 30 + cooldownPeriod: 300 + minReplicas: 1 + maxReplicas: 5 + cpuThreshold: "70" + memoryThreshold: "80" # Optional: enable memory-based scaling + + # Slackbot autoscaling configuration + # KEDA generates an HPA named: {release}-slackbot-keda-hpa + # Can coexist with slackbot.autoscaling.* configuration + slackbot: + enabled: false + pollingInterval: 30 + cooldownPeriod: 300 + minReplicas: 1 + maxReplicas: 3 + cpuThreshold: "70" + memoryThreshold: "80" # Optional: enable memory-based scaling + + # Model Servers autoscaling configuration + # KEDA generates HPAs for each enabled model server + # Can coexist with existing model server autoscaling + modelServers: + enabled: false + # Individual model server configurations can be added here + # Example: + # inference: + # enabled: false + # pollingInterval: 30 + # cooldownPeriod: 300 + # minReplicas: 1 + # maxReplicas: 5 + # cpuThreshold: "70" + # memoryThreshold: "80" + + # Celery Workers autoscaling configuration + # KEDA provides Redis queue-based scaling for high-performance worker management + # Can coexist with existing celery worker autoscaling + celeryWorkers: + enabled: false + # Individual worker configurations can be added here + # Example: + # docprocessing: + # enabled: false + # pollingInterval: 30 + # cooldownPeriod: 300 + # minReplicas: 1 + # maxReplicas: 50 + # light: + # enabled: false + # pollingInterval: 30 + # cooldownPeriod: 300 + # minReplicas: 1 + # maxReplicas: 10 + # primary: + # enabled: false + # pollingInterval: 30 + # cooldownPeriod: 300 + # minReplicas: 1 + # maxReplicas: 10 + postgresql: primary: persistence: