From 8d12954353d760c970271292763393e19f00f3b8 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 14:13:21 +0200 Subject: [PATCH 01/38] feat(chart): create kube infra --- chart/Chart.yaml | 5 +++++ chart/env/prod.yaml | 18 ++++++++++++++++ chart/templates/_helpers.tpl | 22 +++++++++++++++++++ chart/templates/deployment.yaml | 38 +++++++++++++++++++++++++++++++++ chart/templates/ingress.yaml | 18 ++++++++++++++++ chart/templates/service.yaml | 15 +++++++++++++ chart/values.yaml | 22 +++++++++++++++++++ 7 files changed, 138 insertions(+) create mode 100644 chart/Chart.yaml create mode 100644 chart/env/prod.yaml create mode 100644 chart/templates/_helpers.tpl create mode 100644 chart/templates/deployment.yaml create mode 100644 chart/templates/ingress.yaml create mode 100644 chart/templates/service.yaml create mode 100644 chart/values.yaml diff --git a/chart/Chart.yaml b/chart/Chart.yaml new file mode 100644 index 00000000000..994382560cf --- /dev/null +++ b/chart/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: chat-ui +version: 0.0.0-latest +type: application +icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml new file mode 100644 index 00000000000..4c6ecf5091c --- /dev/null +++ b/chart/env/prod.yaml @@ -0,0 +1,18 @@ +nodeSelector: + role-hub-utils: "true" + +tolerations: + - key: CriticalAddonsOnly + operator: Equal + +ingress: + annotations: + external-dns.alpha.kubernetes.io/hostname: "chat-ui.hub-alb.huggingface.tech" + alb.ingress.kubernetes.io/healthcheck-path: "/healthcheck" + alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]" + alb.ingress.kubernetes.io/group.name: "hub-prod" + alb.ingress.kubernetes.io/scheme: "internet-facing" + alb.ingress.kubernetes.io/ssl-redirect: "443" + alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true" + alb.ingress.kubernetes.io/target-node-labels: "role-hub-utils=true" + kubernetes.io/ingress.class: "alb" diff --git a/chart/templates/_helpers.tpl b/chart/templates/_helpers.tpl new file mode 100644 index 00000000000..eee5a181d22 --- /dev/null +++ b/chart/templates/_helpers.tpl @@ -0,0 +1,22 @@ +{{- define "name" -}} +{{- default $.Release.Name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "app.name" -}} +chat-ui +{{- end -}} + +{{- define "labels.standard" -}} +release: {{ $.Release.Name | quote }} +heritage: {{ $.Release.Service | quote }} +chart: "{{ include "name" . }}" +app: "{{ include "app.name" . }}" +{{- end -}} + +{{- define "labels.resolver" -}} +release: {{ $.Release.Name | quote }} +heritage: {{ $.Release.Service | quote }} +chart: "{{ include "name" . }}" +app: "{{ include "app.name" . }}-resolver" +{{- end -}} + diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml new file mode 100644 index 00000000000..f6bedd0959d --- /dev/null +++ b/chart/templates/deployment.yaml @@ -0,0 +1,38 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: {{ include "labels.standard" . | nindent 4 }} + name: {{ include "name" . }} + namespace: {{ .Release.Namespace }} +spec: + progressDeadlineSeconds: 600 + replicas: {{ .Values.replicas }} + revisionHistoryLimit: 10 + selector: + matchLabels: {{ include "labels.standard" . | nindent 6 }} + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: {{ include "labels.standard" . | nindent 8 }} + spec: + containers: + - name: chat-ui + image: "{{ .Values.image.repository }}/{{ .Values.image.name }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + readinessProbe: + tcpSocket: + port: 5000 + livenessProbe: + tcpSocket: + port: 5000 + ports: + - containerPort: 5000 + name: http + protocol: TCP + resources: {{ toYaml .Values.resources | nindent 12 }} + nodeSelector: {{ toYaml .Values.nodeSelector | nindent 8 }} + tolerations: {{ toYaml .Values.tolerations | nindent 8 }} diff --git a/chart/templates/ingress.yaml b/chart/templates/ingress.yaml new file mode 100644 index 00000000000..f5d013f82d9 --- /dev/null +++ b/chart/templates/ingress.yaml @@ -0,0 +1,18 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: {{ toYaml .Values.ingress.annotations | nindent 4 }} + labels: {{ include "labels.standard" . | nindent 4 }} + name: {{ include "name" . }} + namespace: {{ .Release.Namespace }} +spec: + rules: + - host: {{ .Values.domain }} + http: + paths: + - backend: + service: + name: {{ include "name" . }} + port: + name: http + pathType: ImplementationSpecific diff --git a/chart/templates/service.yaml b/chart/templates/service.yaml new file mode 100644 index 00000000000..0df90327704 --- /dev/null +++ b/chart/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: "{{ include "name" . }}" + annotations: {{ toYaml .Values.service.annotations | nindent 4 }} + namespace: {{ .Release.Namespace }} + labels: {{ include "labels.standard" . | nindent 4 }} +spec: + ports: + - name: http + port: 80 + protocol: TCP + targetPort: http + selector: {{ include "labels.standard" . | nindent 4 }} + type: {{.Values.service.type}} diff --git a/chart/values.yaml b/chart/values.yaml new file mode 100644 index 00000000000..7641fe86fee --- /dev/null +++ b/chart/values.yaml @@ -0,0 +1,22 @@ +image: + repository: ghcr.io/huggingface + name: chat-ui + tag: 0.0.0-latest + pullPolicy: IfNotPresent + +replicas: 3 + +domain: huggingface.co + +service: + type: NodePort + annotations: { } + +ingress: + annotations: { } + +resources: + requests: + cpu: 1 +nodeSelector: {} +tolerations: [] From 027192602f24dc7a4c1631c794b301ffafc20ea9 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 16:30:13 +0200 Subject: [PATCH 02/38] feat(chart): create kube infra --- chart/env/prod.yaml | 29 +++++++++++++++++++++++++++++ chart/templates/config.yaml | 20 ++++++++++++++++++++ chart/templates/deployment.yaml | 11 ++++++++--- chart/templates/secrets.yaml | 21 +++++++++++++++++++++ chart/values.yaml | 7 +++++++ 5 files changed, 85 insertions(+), 3 deletions(-) create mode 100644 chart/templates/config.yaml create mode 100644 chart/templates/secrets.yaml diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 4c6ecf5091c..5283cb35834 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -16,3 +16,32 @@ ingress: alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true" alb.ingress.kubernetes.io/target-node-labels: "role-hub-utils=true" kubernetes.io/ingress.class: "alb" + +envVars: + APP_BASE: "/chat" + PUBLIC_ORIGIN: "https://huggingface.co" + PUBLIC_SHARE_PREFIX: "https://hf.co/chat" + PUBLIC_ANNOUNCEMENT_BANNERS: "[]" + PUBLIC_APP_NAME: "HuggingChat" + PUBLIC_APP_ASSETS: "huggingchat" + PUBLIC_APP_COLOR: "yellow" + PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone." + PUBLIC_APP_DISCLAIMER_MESSAGE: "Disclaimer: AI is an area of active research with known problems such as biased generation and misinformation. Do not use this application for high-stakes decisions or advice." + PUBLIC_APP_DATA_SHARING: 0 + PUBLIC_APP_DISCLAIMER: 1 + PUBLIC_PLAUSIBLE_SCRIPT_URL: "/js/script.js" + PUBLIC_APPLE_APP_ID: "6476778843" + ENABLE_ASSISTANTS: "true" + ENABLE_ASSISTANTS_RAG: "true" + REQUIRE_FEATURED_ASSISTANTS: "true" + EXPOSE_API: "true" + ALTERNATIVE_REDIRECT_URLS: "[huggingchat://login/callback]" + WEBSEARCH_BLOCKLIST: '[\"youtube.com\", \"twitter.com\"]' + MESSAGES_BEFORE_LOGIN: 0 + +externalSecrets: + enabled: true + secretStoreName: "chat-ui-prod-secretstore" + secretName: "chat-ui-prod-secrets" + parameters: + MONGODB_URL: "hub-prod-chat-ui-mongodb-url" diff --git a/chart/templates/config.yaml b/chart/templates/config.yaml new file mode 100644 index 00000000000..da07700ad9b --- /dev/null +++ b/chart/templates/config.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: {{ include "labels.standard" . | nindent 4 }} + name: {{ include "name" . }} + namespace: {{ .Release.Namespace }} +data: + .env: | + NODE_BLOG_AND_DOC={{ $.Values.blogAndDoc.enabled | default "false" | quote }} + NODE_API_INFERENCE_URL={{ include "apiInference.url" $ | default "" | quote }} + NODE_COOKIE_SECURE={{ $.Values.global.huggingface.ingress.ssl | quote }} + NODE_GITALY_HOST={{ (include "hub.gitaly.uri" $) | quote }} + NODE_GITALY_INTERNAL_HOST={{ (include "hub.gitaly.uri" $) | quote }} + NODE_MOON_URL={{ include "hub.external.url" $ | quote }} + NODE_INTERNAL_MOON_URL={{ printf "http://%s.%s.svc.cluster.local" (include "name" $) ($.Release.Namespace) | quote }} + NODE_SECURITY_SCANNER_URL={{ include "repoScanner.url" $ | default "" | quote }} + NODE_TENSORBOARD_URL={{ include "tensorboard.url" $ | default "" | quote }} + {{- range $key, $value := $.Values.envVars }} + {{ $key }}={{ $value | quote }} + {{- end }} diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index f6bedd0959d..0a24c853b84 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -25,14 +25,19 @@ spec: imagePullPolicy: {{ .Values.image.pullPolicy }} readinessProbe: tcpSocket: - port: 5000 + port: 3000 livenessProbe: tcpSocket: - port: 5000 + port: 3000 ports: - - containerPort: 5000 + - containerPort: 3000 name: http protocol: TCP resources: {{ toYaml .Values.resources | nindent 12 }} + {{- if $.Values.externalSecrets.enabled }} + envFrom: + - secretRef: + name: {{ $.Values.externalSecrets.secretName }} + {{- end }} nodeSelector: {{ toYaml .Values.nodeSelector | nindent 8 }} tolerations: {{ toYaml .Values.tolerations | nindent 8 }} diff --git a/chart/templates/secrets.yaml b/chart/templates/secrets.yaml new file mode 100644 index 00000000000..494d9019859 --- /dev/null +++ b/chart/templates/secrets.yaml @@ -0,0 +1,21 @@ +{{- if .Values.externalSecrets.enabled }} +apiVersion: "external-secrets.io/v1beta1" +kind: ExternalSecret +metadata: + labels: {{ include "labels.standard" . | nindent 4 }} + name: {{ include "name" $ }}-external-secret + namespace: {{ $.Release.Namespace }} +spec: + refreshInterval: 1h + secretStoreRef: + name: {{ .Values.externalSecrets.secretStoreName }} + kind: SecretStore + target: + name: {{ .Values.externalSecrets.secretName }} + data: + {{- range $key, $value := .Values.externalSecrets.parameters }} + - secretKey: {{ $key | quote }} + remoteRef: + key: {{ $value | quote }} + {{- end }} +{{- end }} diff --git a/chart/values.yaml b/chart/values.yaml index 7641fe86fee..8dc1f226235 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -20,3 +20,10 @@ resources: cpu: 1 nodeSelector: {} tolerations: [] + +envVars: { } +externalSecrets: + enabled: false + secretStoreName: "" + secretName: "" + parameters: { } From d7ce0413b6a5c52b5d8b91786df169e3f9af7885 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 16:33:06 +0200 Subject: [PATCH 03/38] feat(chart): create kube infra --- chart/templates/config.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/chart/templates/config.yaml b/chart/templates/config.yaml index da07700ad9b..f273c622172 100644 --- a/chart/templates/config.yaml +++ b/chart/templates/config.yaml @@ -6,15 +6,6 @@ metadata: namespace: {{ .Release.Namespace }} data: .env: | - NODE_BLOG_AND_DOC={{ $.Values.blogAndDoc.enabled | default "false" | quote }} - NODE_API_INFERENCE_URL={{ include "apiInference.url" $ | default "" | quote }} - NODE_COOKIE_SECURE={{ $.Values.global.huggingface.ingress.ssl | quote }} - NODE_GITALY_HOST={{ (include "hub.gitaly.uri" $) | quote }} - NODE_GITALY_INTERNAL_HOST={{ (include "hub.gitaly.uri" $) | quote }} - NODE_MOON_URL={{ include "hub.external.url" $ | quote }} - NODE_INTERNAL_MOON_URL={{ printf "http://%s.%s.svc.cluster.local" (include "name" $) ($.Release.Namespace) | quote }} - NODE_SECURITY_SCANNER_URL={{ include "repoScanner.url" $ | default "" | quote }} - NODE_TENSORBOARD_URL={{ include "tensorboard.url" $ | default "" | quote }} {{- range $key, $value := $.Values.envVars }} {{ $key }}={{ $value | quote }} {{- end }} From d6df8b7126b9bb9ba4a30abab29fcb8253a26fd3 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 16:35:32 +0200 Subject: [PATCH 04/38] feat(chart): create kube infra --- chart/templates/deployment.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 0a24c853b84..06c751863fb 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -41,3 +41,7 @@ spec: {{- end }} nodeSelector: {{ toYaml .Values.nodeSelector | nindent 8 }} tolerations: {{ toYaml .Values.tolerations | nindent 8 }} + volumes: + - name: config + configMap: + name: {{ include "name" . }} From 26c22fb5e275d705172ac0654084f723b830775c Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 16:43:54 +0200 Subject: [PATCH 05/38] feat(chart): create kube infra --- chart/templates/deployment.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 06c751863fb..fe9a6af0be9 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -34,6 +34,10 @@ spec: name: http protocol: TCP resources: {{ toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: config + mountPath: /app/.env + subPath: .env {{- if $.Values.externalSecrets.enabled }} envFrom: - secretRef: From bfd6b452295ead41583645a83bf1e73233204717 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 16:50:07 +0200 Subject: [PATCH 06/38] feat(chart): create kube infra --- chart/templates/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index fe9a6af0be9..9bfb95d417e 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -36,7 +36,7 @@ spec: resources: {{ toYaml .Values.resources | nindent 12 }} volumeMounts: - name: config - mountPath: /app/.env + mountPath: /app/.env.local subPath: .env {{- if $.Values.externalSecrets.enabled }} envFrom: From aa781613757b6ba79ce47f5366dc99cc9af13697 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 16:54:31 +0200 Subject: [PATCH 07/38] feat(chart): create kube infra --- chart/env/prod.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 5283cb35834..a37a53e92cb 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -45,3 +45,6 @@ externalSecrets: secretName: "chat-ui-prod-secrets" parameters: MONGODB_URL: "hub-prod-chat-ui-mongodb-url" + OPENID_CONFIG: "hub-prod-chat-ui-openid-config" + SERPER_API_KEY: "hub-prod-chat-ui-serper-api-key" + WEBHOOK_URL_REPORT_ASSISTANT: "hub-prod-chat-ui-webhook-report-assistant" From a70d55863ee464b6d20aaabf373f6e0da964166d Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 17:03:44 +0200 Subject: [PATCH 08/38] feat(chart): create kube infra --- chart/env/prod.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index a37a53e92cb..7e1ae085197 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -36,7 +36,7 @@ envVars: REQUIRE_FEATURED_ASSISTANTS: "true" EXPOSE_API: "true" ALTERNATIVE_REDIRECT_URLS: "[huggingchat://login/callback]" - WEBSEARCH_BLOCKLIST: '[\"youtube.com\", \"twitter.com\"]' + WEBSEARCH_BLOCKLIST: '["youtube.com", "twitter.com"]' MESSAGES_BEFORE_LOGIN: 0 externalSecrets: @@ -48,3 +48,5 @@ externalSecrets: OPENID_CONFIG: "hub-prod-chat-ui-openid-config" SERPER_API_KEY: "hub-prod-chat-ui-serper-api-key" WEBHOOK_URL_REPORT_ASSISTANT: "hub-prod-chat-ui-webhook-report-assistant" + ADMIN_API_SECRET: "hub-prod-chat-ui-admin-api-secret" + USAGE_LIMITS: "hub-prod-chat-ui-usage-limits" From 22f15df057c9361345a9cafea33277c7c42c04dc Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 17:08:06 +0200 Subject: [PATCH 09/38] feat(chart): create kube infra --- chart/env/prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 7e1ae085197..e573dc39c98 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -35,7 +35,7 @@ envVars: ENABLE_ASSISTANTS_RAG: "true" REQUIRE_FEATURED_ASSISTANTS: "true" EXPOSE_API: "true" - ALTERNATIVE_REDIRECT_URLS: "[huggingchat://login/callback]" + ALTERNATIVE_REDIRECT_URLS: '["huggingchat://login/callback"]' WEBSEARCH_BLOCKLIST: '["youtube.com", "twitter.com"]' MESSAGES_BEFORE_LOGIN: 0 From 5d7108736e3398d9b124f96cb3da85b8097a2311 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 17:30:50 +0200 Subject: [PATCH 10/38] feat(chart): create kube infra --- chart/env/prod.yaml | 69 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index e573dc39c98..9751935055f 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -18,7 +18,59 @@ ingress: kubernetes.io/ingress.class: "alb" envVars: + ALTERNATIVE_REDIRECT_URLS: '["huggingchat://login/callback"]' APP_BASE: "/chat" + ENABLE_ASSISTANTS: "true" + ENABLE_ASSISTANTS_RAG: "true" + EXPOSE_API: "true" + MESSAGES_BEFORE_LOGIN: 0 + MODELS: > + [ + { + "name": "CohereForAI/c4ai-command-r-plus", + "tokenizer": "Xenova/c4ai-command-r-v01-tokenizer", + "description": "Command R+ is Cohere's latest LLM and is the first open weight model to beat GPT4 in the Chatbot Arena!", + "modelUrl": "https://huggingface.co/CohereForAI/c4ai-command-r-plus", + "websiteUrl": "https://docs.cohere.com/docs/command-r-plus", + "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/cohere-logo.png", + "parameters": { + "stop": [ "<|END_OF_TURN_TOKEN|>" ], + "truncate": 28672, + "max_new_tokens": 4096, + "temperature": 0.3 + }, + "promptExamples": [ + { + "title": "Write an email from bullet list", + "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: + + - Wine (x10) + - Eggs (x24) + - Bread (x12)" + }, { + "title": "Code a snake game", + "prompt": "Code a basic snake game in python, give explanations for each step." + }, { + "title": "Assist in a task", + "prompt": "How do I make a delicious lemon cheesecake?" + } + ] + }, + ] + OLD_MODELS: > + [ + { "name": "bigcode/starcoder" }, + { "name": "OpenAssistant/oasst-sft-6-llama-30b-xor" }, + { "name": "HuggingFaceH4/zephyr-7b-alpha" }, + { "name": "openchat/openchat_3.5" }, + { "name": "openchat/openchat-3.5-1210" }, + { "name": "tiiuae/falcon-180B-chat" }, + { "name": "codellama/CodeLlama-34b-Instruct-hf" }, + { "name": "google/gemma-7b-it" }, + { "name": "meta-llama/Llama-2-70b-chat-hf" }, + { "name": "codellama/CodeLlama-70b-Instruct-hf" }, + { "name": "openchat/openchat-3.5-0106" } + ] PUBLIC_ORIGIN: "https://huggingface.co" PUBLIC_SHARE_PREFIX: "https://hf.co/chat" PUBLIC_ANNOUNCEMENT_BANNERS: "[]" @@ -31,13 +83,19 @@ envVars: PUBLIC_APP_DISCLAIMER: 1 PUBLIC_PLAUSIBLE_SCRIPT_URL: "/js/script.js" PUBLIC_APPLE_APP_ID: "6476778843" - ENABLE_ASSISTANTS: "true" - ENABLE_ASSISTANTS_RAG: "true" REQUIRE_FEATURED_ASSISTANTS: "true" - EXPOSE_API: "true" - ALTERNATIVE_REDIRECT_URLS: '["huggingchat://login/callback"]' + TASK_MODEL: "meta-llama/Meta-Llama-3-8B-Instruct" + TEXT_EMBEDDING_MODELS: > + [{ + "name": "bge-base-en-v1-5-sxa", + "displayName": "bge-base-en-v1-5-sxa", + "chunkCharLength": 512, + "endpoints": [{ + "type": "tei", + "url": "https://huggingchat-tei.hf.space/" + }] + }] WEBSEARCH_BLOCKLIST: '["youtube.com", "twitter.com"]' - MESSAGES_BEFORE_LOGIN: 0 externalSecrets: enabled: true @@ -47,6 +105,7 @@ externalSecrets: MONGODB_URL: "hub-prod-chat-ui-mongodb-url" OPENID_CONFIG: "hub-prod-chat-ui-openid-config" SERPER_API_KEY: "hub-prod-chat-ui-serper-api-key" + HF_TOKEN: "hub-prod-chat-ui-hf-token" WEBHOOK_URL_REPORT_ASSISTANT: "hub-prod-chat-ui-webhook-report-assistant" ADMIN_API_SECRET: "hub-prod-chat-ui-admin-api-secret" USAGE_LIMITS: "hub-prod-chat-ui-usage-limits" From e41a3120abaf8330be639196acd61fcc2c8be031 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 21:45:29 +0200 Subject: [PATCH 11/38] feat(chart): create kube infra --- chart/env/prod.yaml | 227 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 215 insertions(+), 12 deletions(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 9751935055f..c2e0f22657b 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -24,29 +24,75 @@ envVars: ENABLE_ASSISTANTS_RAG: "true" EXPOSE_API: "true" MESSAGES_BEFORE_LOGIN: 0 - MODELS: > + MODELS: > [ { - "name": "CohereForAI/c4ai-command-r-plus", + "name" : "CohereForAI/c4ai-command-r-plus", "tokenizer": "Xenova/c4ai-command-r-v01-tokenizer", "description": "Command R+ is Cohere's latest LLM and is the first open weight model to beat GPT4 in the Chatbot Arena!", "modelUrl": "https://huggingface.co/CohereForAI/c4ai-command-r-plus", "websiteUrl": "https://docs.cohere.com/docs/command-r-plus", "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/cohere-logo.png", "parameters": { - "stop": [ "<|END_OF_TURN_TOKEN|>" ], - "truncate": 28672, - "max_new_tokens": 4096, - "temperature": 0.3 + "stop": ["<|END_OF_TURN_TOKEN|>"], + "truncate" : 28672, + "max_new_tokens" : 4096, + "temperature" : 0.3 }, - "promptExamples": [ + "promptExamples" : [ { "title": "Write an email from bullet list", - "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: - - - Wine (x10) - - Eggs (x24) - - Bread (x12)" + "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" + }, { + "title": "Code a snake game", + "prompt": "Code a basic snake game in python, give explanations for each step." + }, { + "title": "Assist in a task", + "prompt": "How do I make a delicious lemon cheesecake?" + } + ] + }, + { + "name" : "meta-llama/Meta-Llama-3-70B-Instruct", + "description": "Generation over generation, Meta Llama 3 demonstrates state-of-the-art performance on a wide range of industry benchmarks and offers new capabilities, including improved reasoning.", + "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/meta-logo.png", + "modelUrl": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct", + "websiteUrl": "https://llama.meta.com/llama3/", + "tokenizer" : "philschmid/meta-llama-3-tokenizer", + "promptExamples" : [ + { + "title": "Write an email from bullet list", + "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" + }, { + "title": "Code a snake game", + "prompt": "Code a basic snake game in python, give explanations for each step." + }, { + "title": "Assist in a task", + "prompt": "How do I make a delicious lemon cheesecake?" + } + ], + "parameters": { + "stop": ["<|eot_id|>"], + "truncate": 6144, + "max_new_tokens": 2047 + } + }, + { + "name" : "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", + "tokenizer": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", + "description": "Zephyr 141B-A35B is a fine-tuned version of Mistral 8x22B, trained using ORPO, a novel alignment algorithm.", + "modelUrl": "https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", + "websiteUrl": "https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", + "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/zephyr-logo.png", + "parameters": { + "truncate" : 24576, + "max_new_tokens" : 8192, + }, + "preprompt" : "You are Zephyr, an assistant developed by KAIST AI, Argilla, and Hugging Face. You should give concise responses to very simple questions, but provide thorough responses to more complex and open-ended questions. You are happy to help with writing, analysis, question answering, math, coding, and all sorts of other tasks.", + "promptExamples" : [ + { + "title": "Write a poem", + "prompt": "Write a poem to help me remember the first 10 elements on the periodic table, giving each element its own line." }, { "title": "Code a snake game", "prompt": "Code a basic snake game in python, give explanations for each step." @@ -56,6 +102,163 @@ envVars: } ] }, + { + "name" : "mistralai/Mixtral-8x7B-Instruct-v0.1", + "description" : "The latest MoE model from Mistral AI! 8x7B and outperforms Llama 2 70B in most benchmarks.", + "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png", + "websiteUrl" : "https://mistral.ai/news/mixtral-of-experts/", + "modelUrl": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1", + "tokenizer": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "preprompt" : "", + "chatPromptTemplate": " {{#each messages}}{{#ifUser}}[INST]{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}} {{content}} [/INST]{{/ifUser}}{{#ifAssistant}} {{content}} {{/ifAssistant}}{{/each}}", + "parameters" : { + "temperature" : 0.6, + "top_p" : 0.95, + "repetition_penalty" : 1.2, + "top_k" : 50, + "truncate" : 24576, + "max_new_tokens" : 8192, + "stop" : [""] + }, + "promptExamples" : [ + { + "title": "Write an email from bullet list", + "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" + }, { + "title": "Code a snake game", + "prompt": "Code a basic snake game in python, give explanations for each step." + }, { + "title": "Assist in a task", + "prompt": "How do I make a delicious lemon cheesecake?" + } + ] + }, + { + "name" : "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", + "description" : "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the Mixtral 8x7B MoE LLM.", + "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png", + "websiteUrl" : "https://nousresearch.com/", + "modelUrl": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", + "tokenizer": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", + "chatPromptTemplate" : "{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}", + "promptExamples": [ + { + "title": "Write an email from bullet list", + "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" + }, { + "title": "Code a snake game", + "prompt": "Code a basic snake game in python, give explanations for each step." + }, { + "title": "Assist in a task", + "prompt": "How do I make a delicious lemon cheesecake?" + } + ], + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "repetition_penalty": 1, + "top_k": 50, + "truncate": 24576, + "max_new_tokens": 2048, + "stop": ["<|im_end|>"] + } + }, + { + "name" : "google/gemma-1.1-7b-it", + "description": "Gemma 7B 1.1 is the latest release in the Gemma family of lightweight models built by Google, trained using a novel RLHF method.", + "websiteUrl" : "https://blog.google/technology/developers/gemma-open-models/", + "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/google-logo.png", + "modelUrl": "https://huggingface.co/google/gemma-1.1-7b-it", + "preprompt": "", + "chatPromptTemplate" : "{{#each messages}}{{#ifUser}}user\n{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}}\nmodel\n{{/ifUser}}{{#ifAssistant}}{{content}}\n{{/ifAssistant}}{{/each}}", + "promptExamples": [ + { + "title": "Write an email from bullet list", + "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" + }, { + "title": "Code a snake game", + "prompt": "Code a basic snake game in python, give explanations for each step." + }, { + "title": "Assist in a task", + "prompt": "How do I make a delicious lemon cheesecake?" + } + ], + "parameters": { + "do_sample": true, + "truncate": 7168, + "max_new_tokens": 1024, + "stop" : [""] + } + }, + + { + "name": "mistralai/Mistral-7B-Instruct-v0.2", + "displayName": "mistralai/Mistral-7B-Instruct-v0.2", + "description": "Mistral 7B is a new Apache 2.0 model, released by Mistral AI that outperforms Llama2 13B in benchmarks.", + "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png", + "websiteUrl": "https://mistral.ai/news/announcing-mistral-7b/", + "modelUrl": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2", + "tokenizer": "mistralai/Mistral-7B-Instruct-v0.2", + "preprompt": "", + "chatPromptTemplate" : "{{#each messages}}{{#ifUser}}[INST] {{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}} [/INST]{{/ifUser}}{{#ifAssistant}}{{content}}{{/ifAssistant}}{{/each}}", + "parameters": { + "temperature": 0.3, + "top_p": 0.95, + "repetition_penalty": 1.2, + "top_k": 50, + "truncate": 3072, + "max_new_tokens": 1024, + "stop": [""] + }, + "promptExamples": [ + { + "title": "Write an email from bullet list", + "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" + }, { + "title": "Code a snake game", + "prompt": "Code a basic snake game in python, give explanations for each step." + }, { + "title": "Assist in a task", + "prompt": "How do I make a delicious lemon cheesecake?" + } + ] + }, + { + "name": "microsoft/Phi-3-mini-4k-instruct", + "tokenizer": "microsoft/Phi-3-mini-4k-instruct", + "description" : "Phi-3 Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model built upon datasets used for Phi-2.", + "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png", + "modelUrl": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", + "websiteUrl": "https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/", + "preprompt": "", + "chatPromptTemplate": "{{preprompt}}{{#each messages}}{{#ifUser}}<|user|>\n{{content}}<|end|>\n<|assistant|>\n{{/ifUser}}{{#ifAssistant}}{{content}}<|end|>\n{{/ifAssistant}}{{/each}}", + "parameters": { + "stop": ["<|end|>", "<|endoftext|>", "<|assistant|>"], + "max_new_tokens": 1024, + "truncate": 3071 + }, + "promptExamples": [ + { + "title": "Write an email from bullet list", + "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" + }, { + "title": "Code a snake game", + "prompt": "Code a basic snake game in python, give explanations for each step." + }, { + "title": "Assist in a task", + "prompt": "How do I make a delicious lemon cheesecake?" + } + ] + }, + { + "name": "meta-llama/Meta-Llama-3-8B-Instruct", + "tokenizer" : "philschmid/meta-llama-3-tokenizer", + "parameters": { + "temperature": 0.1, + "stop": ["<|eot_id|>"], + }, + "unlisted": true + } ] OLD_MODELS: > [ From 25b3884ff57629ea442c72f251ed906ebd7bf2b8 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Mon, 29 Apr 2024 21:58:16 +0200 Subject: [PATCH 12/38] feat(chart): create kube infra --- chart/templates/deployment.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 9bfb95d417e..4755f0113c2 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -34,12 +34,10 @@ spec: name: http protocol: TCP resources: {{ toYaml .Values.resources | nindent 12 }} - volumeMounts: - - name: config - mountPath: /app/.env.local - subPath: .env - {{- if $.Values.externalSecrets.enabled }} envFrom: + - configMapRef: + name: {{ include "name" . }} + {{- if $.Values.externalSecrets.enabled }} - secretRef: name: {{ $.Values.externalSecrets.secretName }} {{- end }} From 6c4e774cc34503f65505cfb3b977a5a3ae5b2b24 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 06:17:34 +0200 Subject: [PATCH 13/38] feat(chart): add hpa --- chart/env/prod.yaml | 7 +++++++ chart/templates/hpa.yaml | 45 ++++++++++++++++++++++++++++++++++++++++ chart/values.yaml | 11 ++++++++++ 3 files changed, 63 insertions(+) create mode 100644 chart/templates/hpa.yaml diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index c2e0f22657b..bce81bc0d85 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -312,3 +312,10 @@ externalSecrets: WEBHOOK_URL_REPORT_ASSISTANT: "hub-prod-chat-ui-webhook-report-assistant" ADMIN_API_SECRET: "hub-prod-chat-ui-admin-api-secret" USAGE_LIMITS: "hub-prod-chat-ui-usage-limits" + +autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 10 + targetMemoryUtilizationPercentage: "70" + targetCPUUtilizationPercentage: "70" diff --git a/chart/templates/hpa.yaml b/chart/templates/hpa.yaml new file mode 100644 index 00000000000..bf7bd3b256b --- /dev/null +++ b/chart/templates/hpa.yaml @@ -0,0 +1,45 @@ +{{- if $.Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + labels: {{ include "labels.standard" . | nindent 4 }} + name: {{ include "name" . }} + namespace: {{ .Release.Namespace }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "name" . }} + minReplicas: {{ $.Values.autoscaling.minReplicas }} + maxReplicas: {{ $.Values.autoscaling.maxReplicas }} + metrics: + {{- if ne "" $.Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ $.Values.autoscaling.targetMemoryUtilizationPercentage | int }} + {{- end }} + {{- if ne "" $.Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ $.Values.autoscaling.targetCPUUtilizationPercentage | int }} + {{- end }} + behavior: + scaleDown: + stabilizationWindowSeconds: 600 + policies: + - type: Percent + value: 10 + periodSeconds: 60 + scaleUp: + stabilizationWindowSeconds: 0 + policies: + - type: Pods + value: 1 + periodSeconds: 30 +{{- end }} diff --git a/chart/values.yaml b/chart/values.yaml index 8dc1f226235..faab5aca550 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -18,6 +18,10 @@ ingress: resources: requests: cpu: 1 + memory: 8Gi + limits: + cpu: 1 + memory: 8Gi nodeSelector: {} tolerations: [] @@ -27,3 +31,10 @@ externalSecrets: secretStoreName: "" secretName: "" parameters: { } + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 2 + targetMemoryUtilizationPercentage: "" + targetCPUUtilizationPercentage: "" From 6893e2691223113a22b69719a3d107a437b55338 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 06:24:53 +0200 Subject: [PATCH 14/38] feat(chart): add service monitor --- chart/env/prod.yaml | 3 +++ chart/templates/service-monitor.yaml | 15 +++++++++++++++ chart/templates/service.yaml | 6 ++++++ chart/values.yaml | 3 +++ 4 files changed, 27 insertions(+) create mode 100644 chart/templates/service-monitor.yaml diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index bce81bc0d85..19d1946dc2f 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -319,3 +319,6 @@ autoscaling: maxReplicas: 10 targetMemoryUtilizationPercentage: "70" targetCPUUtilizationPercentage: "70" + +monitoring: + enabled: true diff --git a/chart/templates/service-monitor.yaml b/chart/templates/service-monitor.yaml new file mode 100644 index 00000000000..2d7fdae07d8 --- /dev/null +++ b/chart/templates/service-monitor.yaml @@ -0,0 +1,15 @@ +{{- if $.Values.monitoring.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: {{ include "labels.standard" . | nindent 4 }} + name: {{ include "name" . }} + namespace: {{ .Release.Namespace }} +spec: + selector: + matchLabels: {{ include "labels.standard" . | nindent 6 }} + endpoints: + - port: metrics + path: /metrics + interval: 15s +{{- end }} diff --git a/chart/templates/service.yaml b/chart/templates/service.yaml index 0df90327704..bb70a36f501 100644 --- a/chart/templates/service.yaml +++ b/chart/templates/service.yaml @@ -11,5 +11,11 @@ spec: port: 80 protocol: TCP targetPort: http + {{- if $.Values.monitoring.enabled }} + - name: metrics + port: 5565 + protocol: TCP + targetPort: http + {{- end }} selector: {{ include "labels.standard" . | nindent 4 }} type: {{.Values.service.type}} diff --git a/chart/values.yaml b/chart/values.yaml index faab5aca550..633220916e1 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -38,3 +38,6 @@ autoscaling: maxReplicas: 2 targetMemoryUtilizationPercentage: "" targetCPUUtilizationPercentage: "" + +monitoring: + enabled: false From 2871e0b36451cb2729ae01c367aa08aaa5e5fb55 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 06:27:50 +0200 Subject: [PATCH 15/38] feat(chart): test --- chart/templates/ingress.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/chart/templates/ingress.yaml b/chart/templates/ingress.yaml index f5d013f82d9..e6d033d9e2f 100644 --- a/chart/templates/ingress.yaml +++ b/chart/templates/ingress.yaml @@ -15,4 +15,5 @@ spec: name: {{ include "name" . }} port: name: http + path: /chat-test pathType: ImplementationSpecific From cdbff67044309815aaba5960b6bb8789de4d0a8d Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 06:32:46 +0200 Subject: [PATCH 16/38] feat(chart): fix lint --- .prettierignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.prettierignore b/.prettierignore index 38972655faf..177a4e072ad 100644 --- a/.prettierignore +++ b/.prettierignore @@ -3,6 +3,7 @@ node_modules /build /.svelte-kit /package +/chart .env .env.* !.env.example From 26a30ba66b541ee1b27ad0aa67df32bd5711b722 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 06:44:02 +0200 Subject: [PATCH 17/38] feat(chart): remove pm2 --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index f87b5a64935..14d689815cd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,7 +22,6 @@ RUN --mount=type=secret,id=DOTENV_LOCAL,dst=.env.local \ npm run build FROM node:20-slim -RUN npm install -g pm2 RUN userdel -r node @@ -39,4 +38,4 @@ COPY --link --chown=1000 package.json /app/package.json COPY --from=builder --chown=1000 /app/build /app/build COPY --chown=1000 gcp-*.json /app/ -CMD pm2 start /app/build/index.js -i $CPU_CORES --no-daemon +CMD node /app/build/index.js From d6f948b1045f3dad1d9975055fb62cf00932cc06 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 07:17:41 +0200 Subject: [PATCH 18/38] feat(chart): fix --- chart/templates/deployment.yaml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 4755f0113c2..49398be99bb 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -24,13 +24,19 @@ spec: image: "{{ .Values.image.repository }}/{{ .Values.image.name }}:{{ .Values.image.tag }}" imagePullPolicy: {{ .Values.image.pullPolicy }} readinessProbe: - tcpSocket: - port: 3000 + failureThreshold: 30 + periodSeconds: 10 + httpGet: + path: /healthcheck + port: {{ $.Values.envVars.APP_PORT | default 3000 | int }} livenessProbe: - tcpSocket: - port: 3000 + failureThreshold: 30 + periodSeconds: 10 + httpGet: + path: /healthcheck + port: {{ $.Values.envVars.APP_PORT | default 3000 | int }} ports: - - containerPort: 3000 + - containerPort: {{ $.Values.envVars.APP_PORT | default 3000 | int }} name: http protocol: TCP resources: {{ toYaml .Values.resources | nindent 12 }} From 8f726975d6958039c7110feac4424c8553c38b8a Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 07:20:55 +0200 Subject: [PATCH 19/38] feat(chart): fix --- chart/env/prod.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 19d1946dc2f..5ed19d9d523 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -8,7 +8,7 @@ tolerations: ingress: annotations: external-dns.alpha.kubernetes.io/hostname: "chat-ui.hub-alb.huggingface.tech" - alb.ingress.kubernetes.io/healthcheck-path: "/healthcheck" +# alb.ingress.kubernetes.io/healthcheck-path: "/healthcheck" alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]" alb.ingress.kubernetes.io/group.name: "hub-prod" alb.ingress.kubernetes.io/scheme: "internet-facing" @@ -321,4 +321,4 @@ autoscaling: targetCPUUtilizationPercentage: "70" monitoring: - enabled: true + enabled: fase From f1e0e200bff68003c8cac20e687336b794932ba1 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 07:22:40 +0200 Subject: [PATCH 20/38] feat(chart): fix --- chart/env/prod.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 5ed19d9d523..19d1946dc2f 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -8,7 +8,7 @@ tolerations: ingress: annotations: external-dns.alpha.kubernetes.io/hostname: "chat-ui.hub-alb.huggingface.tech" -# alb.ingress.kubernetes.io/healthcheck-path: "/healthcheck" + alb.ingress.kubernetes.io/healthcheck-path: "/healthcheck" alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]" alb.ingress.kubernetes.io/group.name: "hub-prod" alb.ingress.kubernetes.io/scheme: "internet-facing" @@ -321,4 +321,4 @@ autoscaling: targetCPUUtilizationPercentage: "70" monitoring: - enabled: fase + enabled: true From 62cfed9d9ac71b28920f5860ff8153c4ba6d002a Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 07:33:06 +0200 Subject: [PATCH 21/38] feat(chart): fix --- chart/templates/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 49398be99bb..ac910e55efd 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -6,7 +6,9 @@ metadata: namespace: {{ .Release.Namespace }} spec: progressDeadlineSeconds: 600 + {{- if not $.Values.autoscaling.enabled }} replicas: {{ .Values.replicas }} + {{- end }} revisionHistoryLimit: 10 selector: matchLabels: {{ include "labels.standard" . | nindent 6 }} From 24b2e6733c0c8b240b000e99265bf3cb8da58116 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 10:19:42 +0200 Subject: [PATCH 22/38] feat(chart): fix --- chart/env/prod.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 19d1946dc2f..90dbbfc08ee 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -260,6 +260,7 @@ envVars: "unlisted": true } ] + NODE_ENV: "prod" OLD_MODELS: > [ { "name": "bigcode/starcoder" }, From 5061c548eeef1124d2e60f28dff4587f9194578e Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 11:26:03 +0200 Subject: [PATCH 23/38] feat(chart): fix --- chart/templates/ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/templates/ingress.yaml b/chart/templates/ingress.yaml index e6d033d9e2f..eef4592ea82 100644 --- a/chart/templates/ingress.yaml +++ b/chart/templates/ingress.yaml @@ -15,5 +15,5 @@ spec: name: {{ include "name" . }} port: name: http - path: /chat-test + path: /chat pathType: ImplementationSpecific From 0a133251ec41d30d379eaa7238340c68be153083 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 12:05:58 +0200 Subject: [PATCH 24/38] feat(chart): fix --- chart/env/prod.yaml | 2 +- chart/templates/deployment.yaml | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 90dbbfc08ee..f5752c8c25f 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -322,4 +322,4 @@ autoscaling: targetCPUUtilizationPercentage: "70" monitoring: - enabled: true + enabled: false diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index ac910e55efd..a9592003391 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -41,6 +41,11 @@ spec: - containerPort: {{ $.Values.envVars.APP_PORT | default 3000 | int }} name: http protocol: TCP + {{- if $.Values.monitoring.enabled }} + - containerPort: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }} + name: metrics + protocol: TCP + {{- end }} resources: {{ toYaml .Values.resources | nindent 12 }} envFrom: - configMapRef: From 697d0df2f68aaccb8cefa48b1cdc1827e3ad6254 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 12:10:01 +0200 Subject: [PATCH 25/38] feat(chart): fix --- chart/templates/ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/templates/ingress.yaml b/chart/templates/ingress.yaml index eef4592ea82..b2f77acd7ce 100644 --- a/chart/templates/ingress.yaml +++ b/chart/templates/ingress.yaml @@ -16,4 +16,4 @@ spec: port: name: http path: /chat - pathType: ImplementationSpecific + pathType: Prefix From 2d6ec946d529fc83a5f03a2271b00e4e1e3fe053 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 13:33:11 +0200 Subject: [PATCH 26/38] feat(chart): fix --- chart/env/prod.yaml | 2 +- chart/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index f5752c8c25f..8b1ce5dd745 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -19,7 +19,7 @@ ingress: envVars: ALTERNATIVE_REDIRECT_URLS: '["huggingchat://login/callback"]' - APP_BASE: "/chat" + APP_BASE: "" ENABLE_ASSISTANTS: "true" ENABLE_ASSISTANTS_RAG: "true" EXPOSE_API: "true" diff --git a/chart/values.yaml b/chart/values.yaml index 633220916e1..989a1cc210f 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -6,7 +6,7 @@ image: replicas: 3 -domain: huggingface.co +domain: chat-ui.hub-alb.huggingface.tech service: type: NodePort From 13c8184840abb35dd309b08b7239bffd325ef68b Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 13:34:03 +0200 Subject: [PATCH 27/38] feat(chart): fix --- chart/templates/ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/templates/ingress.yaml b/chart/templates/ingress.yaml index b2f77acd7ce..f7b3330efe8 100644 --- a/chart/templates/ingress.yaml +++ b/chart/templates/ingress.yaml @@ -15,5 +15,5 @@ spec: name: {{ include "name" . }} port: name: http - path: /chat + path: / pathType: Prefix From 18364da3251b7d2efa2ac519723d85f69d342dc7 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 14:53:25 +0200 Subject: [PATCH 28/38] feat(chart): fix --- chart/templates/config.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/chart/templates/config.yaml b/chart/templates/config.yaml index f273c622172..a6713a96a79 100644 --- a/chart/templates/config.yaml +++ b/chart/templates/config.yaml @@ -5,7 +5,6 @@ metadata: name: {{ include "name" . }} namespace: {{ .Release.Namespace }} data: - .env: | - {{- range $key, $value := $.Values.envVars }} - {{ $key }}={{ $value | quote }} - {{- end }} + {{- range $key, $value := $.Values.envVars }} + {{ $key }}={{ $value | quote }} + {{- end }} From 9b563c1b2a16972683ee24cee3d0028f4010b836 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 14:57:13 +0200 Subject: [PATCH 29/38] feat(chart): fix --- chart/templates/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/templates/config.yaml b/chart/templates/config.yaml index a6713a96a79..c4c803e9e5f 100644 --- a/chart/templates/config.yaml +++ b/chart/templates/config.yaml @@ -6,5 +6,5 @@ metadata: namespace: {{ .Release.Namespace }} data: {{- range $key, $value := $.Values.envVars }} - {{ $key }}={{ $value | quote }} + {{ $key }}: {{ $value | quote }} {{- end }} From 50ed51387b08c8d222f578331306981c3b492320 Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 15:09:22 +0200 Subject: [PATCH 30/38] feat(chart): fix --- chart/env/prod.yaml | 3 ++- chart/templates/deployment.yaml | 4 ++-- chart/templates/ingress.yaml | 2 +- chart/values.yaml | 1 + 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 8b1ce5dd745..d0b5fedc3c4 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -6,6 +6,7 @@ tolerations: operator: Equal ingress: + path: "/chat" annotations: external-dns.alpha.kubernetes.io/hostname: "chat-ui.hub-alb.huggingface.tech" alb.ingress.kubernetes.io/healthcheck-path: "/healthcheck" @@ -19,7 +20,7 @@ ingress: envVars: ALTERNATIVE_REDIRECT_URLS: '["huggingchat://login/callback"]' - APP_BASE: "" + APP_BASE: "/chat" ENABLE_ASSISTANTS: "true" ENABLE_ASSISTANTS_RAG: "true" EXPOSE_API: "true" diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index a9592003391..9a397f7ad01 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -29,13 +29,13 @@ spec: failureThreshold: 30 periodSeconds: 10 httpGet: - path: /healthcheck + path: {{ $.Values.envVars.APP_BASE | default "" }}/healthcheck port: {{ $.Values.envVars.APP_PORT | default 3000 | int }} livenessProbe: failureThreshold: 30 periodSeconds: 10 httpGet: - path: /healthcheck + path: {{ $.Values.envVars.APP_BASE | default "" }}/healthcheck port: {{ $.Values.envVars.APP_PORT | default 3000 | int }} ports: - containerPort: {{ $.Values.envVars.APP_PORT | default 3000 | int }} diff --git a/chart/templates/ingress.yaml b/chart/templates/ingress.yaml index f7b3330efe8..d507d946f60 100644 --- a/chart/templates/ingress.yaml +++ b/chart/templates/ingress.yaml @@ -15,5 +15,5 @@ spec: name: {{ include "name" . }} port: name: http - path: / + path: {{ $.Values.ingress.path | default "/" }} pathType: Prefix diff --git a/chart/values.yaml b/chart/values.yaml index 989a1cc210f..c31a3305aef 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -13,6 +13,7 @@ service: annotations: { } ingress: + path: "/" annotations: { } resources: From bb38482d3c2950585971f172cf675459431f5cdb Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 15:09:51 +0200 Subject: [PATCH 31/38] feat(chart): fix --- chart/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/values.yaml b/chart/values.yaml index c31a3305aef..b9802e1264c 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -6,7 +6,7 @@ image: replicas: 3 -domain: chat-ui.hub-alb.huggingface.tech +domain: huggingface.co service: type: NodePort From f50b508515fd82f015e74a4b723b7367ca6ab6dd Mon Sep 17 00:00:00 2001 From: rtrompier Date: Tue, 30 Apr 2024 15:33:03 +0200 Subject: [PATCH 32/38] feat(chart): fix --- chart/env/prod.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index d0b5fedc3c4..4b32fa5cbdd 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -25,6 +25,7 @@ envVars: ENABLE_ASSISTANTS_RAG: "true" EXPOSE_API: "true" MESSAGES_BEFORE_LOGIN: 0 + METRICS_PORT: 5565 MODELS: > [ { @@ -323,4 +324,4 @@ autoscaling: targetCPUUtilizationPercentage: "70" monitoring: - enabled: false + enabled: true From e4270bbf98f1fcd454058cc3255f1758dd68018f Mon Sep 17 00:00:00 2001 From: Remy Date: Fri, 3 May 2024 10:47:43 +0200 Subject: [PATCH 33/38] update replicas count --- chart/env/prod.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 4b32fa5cbdd..af32ae59d18 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -318,8 +318,8 @@ externalSecrets: autoscaling: enabled: true - minReplicas: 2 - maxReplicas: 10 + minReplicas: 6 + maxReplicas: 30 targetMemoryUtilizationPercentage: "70" targetCPUUtilizationPercentage: "70" From 21797cde6a525ce4064abdc4721cc0d34b3c4742 Mon Sep 17 00:00:00 2001 From: Remy Date: Fri, 3 May 2024 11:17:38 +0200 Subject: [PATCH 34/38] json logs --- chart/env/prod.yaml | 1 + chart/templates/deployment.yaml | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index af32ae59d18..c14cbf9c28b 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -263,6 +263,7 @@ envVars: } ] NODE_ENV: "prod" + NODE_LOG_STRUCTURED_DATA: true OLD_MODELS: > [ { "name": "bigcode/starcoder" }, diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 9a397f7ad01..22f65956a2e 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -20,6 +20,10 @@ spec: template: metadata: labels: {{ include "labels.standard" . | nindent 8 }} + {{- if $.Values.envVars.NODE_LOG_STRUCTURED_DATA }} + annotations: + co.elastic.logs/json.expand_keys: "true" + {{- end }} spec: containers: - name: chat-ui From a38cdafd0396172577a48877ca24ad35a0527e52 Mon Sep 17 00:00:00 2001 From: Remy Date: Fri, 3 May 2024 11:50:27 +0200 Subject: [PATCH 35/38] use internal image for prod env --- chart/env/prod.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index c14cbf9c28b..362b63ede12 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -1,3 +1,7 @@ +image: + repository: registry.internal.huggingface.tech/chat-ui + name: chat-ui + nodeSelector: role-hub-utils: "true" From 10961b59279540fa39db9748c3e59ae476cd22e5 Mon Sep 17 00:00:00 2001 From: Nathan Sarrazin Date: Fri, 3 May 2024 14:35:24 +0200 Subject: [PATCH 36/38] Add LOG_LEVEL in prod yaml --- chart/env/prod.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 362b63ede12..b207e141d17 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -30,6 +30,7 @@ envVars: EXPOSE_API: "true" MESSAGES_BEFORE_LOGIN: 0 METRICS_PORT: 5565 + LOG_LEVEL: "debug" MODELS: > [ { From 5faef25e890c3a90c2c48944cd78c44a3690dd06 Mon Sep 17 00:00:00 2001 From: Nathan Sarrazin Date: Fri, 3 May 2024 14:36:11 +0200 Subject: [PATCH 37/38] Get rid of unused staging env --- .github/workflows/deploy-staging.yml | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 .github/workflows/deploy-staging.yml diff --git a/.github/workflows/deploy-staging.yml b/.github/workflows/deploy-staging.yml deleted file mode 100644 index 14da9e54aab..00000000000 --- a/.github/workflows/deploy-staging.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Deploy to staging environment -on: - push: - branches: [main] - - # to run this workflow manually from the Actions tab - workflow_dispatch: - -jobs: - sync-to-hub: - runs-on: ubuntu-latest - steps: - - name: Check large files - uses: ActionsDesk/lfs-warning@v2.0 - with: - filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - lfs: true - - name: Push to hub - env: - HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }} - run: git push https://nsarrazin:$HF_DEPLOYMENT_TOKEN@huggingface.co/spaces/huggingchat/chat-ui-staging main From 17a7345ac8e50a7423c317c5f3289b69ac99dd69 Mon Sep 17 00:00:00 2001 From: Nathan Sarrazin Date: Fri, 3 May 2024 14:46:35 +0200 Subject: [PATCH 38/38] add an image build for internal registry --- .github/workflows/deploy-prod.yml | 69 ++++++++++++++++++++++++++++ .github/workflows/deploy-release.yml | 2 +- 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/deploy-prod.yml diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml new file mode 100644 index 00000000000..edbed458ffb --- /dev/null +++ b/.github/workflows/deploy-prod.yml @@ -0,0 +1,69 @@ +name: Deploy to k8s +on: + # run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + build-and-publish-huggingchat-image: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Tailscale + uses: huggingface/tailscale-action@main + with: + authkey: ${{ secrets.TAILSCALE_AUTHKEY }} + + - name: Extract package version + id: package-version + run: | + VERSION=$(jq -r .version package.json) + echo "VERSION=$VERSION" >> $GITHUB_OUTPUT + MAJOR=$(echo $VERSION | cut -d '.' -f1) + echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT + MINOR=$(echo $VERSION | cut -d '.' -f1).$(echo $VERSION | cut -d '.' -f2) + echo "MINOR=$MINOR" >> $GITHUB_OUTPUT + + - name: Docker metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: | + ghcr.io/huggingface/chat-ui + tags: | + type=raw,value=${{ steps.package-version.outputs.VERSION }},enable=${{github.event_name == 'release'}} + type=raw,value=${{ steps.package-version.outputs.MAJOR }},enable=${{github.event_name == 'release'}} + type=raw,value=${{ steps.package-version.outputs.MINOR }},enable=${{github.event_name == 'release'}} + type=raw,value=latest,enable={{is_default_branch}} + type=sha,enable={{is_default_branch}} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Registry + uses: docker/login-action@v2 + with: + registry: registry.internal.huggingface.tech + username: ${{ secrets.DOCKER_INTERNAL_USERNAME }} + password: ${{ secrets.DOCKER_INTERNAL_PASSWORD }} + + - name: Build and Publish Docker Image without DB + uses: docker/build-push-action@v5 + with: + context: . + file: Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + platforms: linux/amd64 + cache-to: type=gha,mode=max,scope=amd64 + cache-from: type=gha,scope=amd64 + provenance: false + build-args: | + INCLUDE_DB=false + APP_BASE=/chat + PUBLIC_APP_COLOR=yellow diff --git a/.github/workflows/deploy-release.yml b/.github/workflows/deploy-release.yml index a4d0b33ca9c..53094e3c7c2 100644 --- a/.github/workflows/deploy-release.yml +++ b/.github/workflows/deploy-release.yml @@ -1,4 +1,4 @@ -name: Deploy to production +name: Deploy to production spaces on: # run this workflow manually from the Actions tab workflow_dispatch: