Skip to content

Commit cf5fdec

Browse files
feat: use ollama
2 parents b70cfa6 + d27a0a6 commit cf5fdec

File tree

5 files changed

+33
-86
lines changed

5 files changed

+33
-86
lines changed

.github/workflows/create_instance.yaml

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ name: Create instance
33
on:
44
workflow_dispatch:
55
inputs:
6-
s3ModelPath:
7-
description: 'The model to fetch from S3'
6+
modelName:
7+
description: 'The model to use'
88
required: true
9-
default: 'distilgpt2'
9+
default: 'llama3.1:70b'
1010
instanceNameSuffix:
1111
description: 'Instance name suffix'
1212
required: false
@@ -70,9 +70,5 @@ jobs:
7070
OVH_SSH_KEY_ID: ${{ secrets.OVH_SSH_KEY_ID }}
7171
OVH_SERVICE_NAME: ${{ secrets.OVH_SERVICE_NAME }}
7272
AUTH_TOKEN: ${{ secrets.AUTH_TOKEN }}
73-
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
74-
S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID}}
75-
S3_SECRET_ACCESS_KEY: ${{ secrets.S3_SECRET_ACCESS_KEY}}
76-
S3_ENDPOINT_URL: ${{ vars.S3_ENDPOINT_URL }}
77-
S3_MODEL_PATH: ${{ inputs.s3ModelPath }}
7873
INSTANCE_NAME_SUFFIX: ${{ inputs.instanceNameSuffix }}
74+
MODEL_NAME: ${{ inputs.modelName }}

.github/workflows/delete_instance.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,4 @@ jobs:
4141
OVH_SSH_KEY_ID: ${{ secrets.OVH_SSH_KEY_ID }}
4242
OVH_SERVICE_NAME: ${{ secrets.OVH_SERVICE_NAME }}
4343
AUTH_TOKEN: ${{ secrets.AUTH_TOKEN }}
44-
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
45-
S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID}}
46-
S3_SECRET_ACCESS_KEY: ${{ secrets.S3_SECRET_ACCESS_KEY}}
47-
S3_ENDPOINT_URL: ${{ vars.S3_ENDPOINT_URL }}
48-
S3_MODEL_PATH: "distilgpt2"
4944
INSTANCE_NAME_SUFFIX: ${{ inputs.instanceNameSuffix }}

docker-compose.yaml

Lines changed: 16 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,24 @@
11
services:
2-
llm-service:
2+
ollama-service:
33
restart: always
4-
image: ghcr.io/socialgouv/llm-inference-server:main
5-
environment:
6-
HUGGING_FACE_HUB_TOKEN: "${HUGGING_FACE_HUB_TOKEN}"
7-
S3_ACCESS_KEY_ID: "${S3_ACCESS_KEY_ID}"
8-
S3_SECRET_ACCESS_KEY: "${S3_SECRET_ACCESS_KEY}"
9-
S3_ENDPOINT_URL: ${S3_ENDPOINT_URL}
10-
S3_MODEL_PATH: ${S3_MODEL_PATH}
4+
image: ollama/ollama
5+
tty: true
116
expose:
12-
- "8000"
7+
- "11434"
138
ports:
14-
- "127.0.0.1:8000:8000"
9+
- "127.0.0.1:11434:11434"
1510
labels:
16-
- "traefik.http.routers.vllm-service.rule=Host(`${HOST}.nip.io`)"
17-
- "traefik.http.routers.vllm-service.entrypoints=websecure"
18-
- "traefik.http.routers.vllm-service.tls.certresolver=myresolver"
11+
- "traefik.http.routers.ollama-service.rule=Host(`${HOST}.nip.io`)"
12+
- "traefik.http.routers.ollama-service.entrypoints=websecure"
13+
- "traefik.http.routers.ollama-service.tls.certresolver=myresolver"
1914
- "traefik.http.middlewares.main-auth.basicauth.users=${CREDENTIALS}"
20-
- "traefik.http.routers.vllm-service.middlewares=main-auth@docker"
21-
runtime: nvidia # Set the desired runtime for GPU support
22-
ipc: host # Set the IPC namespace to host
15+
- "traefik.http.routers.ollama-service.middlewares=main-auth@docker"
16+
- "traefik.http.services.ollama-service.loadbalancer.server.port=11434"
17+
environment:
18+
OLLAMA_KEEP_ALIVE: "-1"
19+
VERBOSE: "${VERBOSE:-0}"
20+
runtime: nvidia
21+
ipc: host
2322
deploy:
2423
resources:
2524
reservations:
@@ -28,13 +27,7 @@ services:
2827
count: all
2928
capabilities: [gpu]
3029
volumes:
31-
- /opt/vllm/models:/app/models
32-
# healthcheck:
33-
# test: curl --fail http://localhost:8000/v1/models || exit 1
34-
# interval: 10s
35-
# timeout: 30s
36-
# retries: 60
37-
# start_period: 60s
30+
- "./.ollama:/root/.ollama"
3831

3932
reverse-proxy:
4033
image: traefik:v2.4
@@ -53,14 +46,3 @@ services:
5346
volumes:
5447
- "/var/run/docker.sock:/var/run/docker.sock"
5548
- "./letsencrypt:/letsencrypt"
56-
57-
# autoheal:
58-
# image: willfarrell/autoheal:latest
59-
# tty: true
60-
# restart: always
61-
# environment:
62-
# - AUTOHEAL_INTERVAL=60
63-
# - AUTOHEAL_START_PERIOD=300
64-
# - AUTOHEAL_DEFAULT_STOP_TIMEOUT=10
65-
# volumes:
66-
# - /var/run/docker.sock:/var/run/docker.sock

main.py

Lines changed: 12 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,15 @@ def indentString(input_string, indent_level=4):
4141
if (instanceNameSuffix):
4242
instanceNameSuffix = f"-{instanceNameSuffix}"
4343

44-
instanceName = "vllm-managed-instance" + instanceNameSuffix
44+
instanceName = "ollama-managed-instance" + instanceNameSuffix
4545
serviceName = getRequiredEnv("OVH_SERVICE_NAME")
4646
sshKeyId = getRequiredEnv("OVH_SSH_KEY_ID")
4747
flavorId = getRequiredEnv("OVH_INSTANCE_FLAVOR_ID")
4848
imageId = getRequiredEnv("OVH_INSTANCE_IMAGE_ID")
4949
region = getRequiredEnv("OVH_REGION")
5050
authToken = getRequiredEnv("AUTH_TOKEN")
51-
huggingFaceHubToken = getRequiredEnv("HUGGING_FACE_HUB_TOKEN")
52-
s3AccessKeyId = getRequiredEnv("S3_ACCESS_KEY_ID")
53-
s3SecretAccessKey = getRequiredEnv("S3_SECRET_ACCESS_KEY")
54-
s3ModelPath = getRequiredEnv("S3_MODEL_PATH")
55-
s3EndpointUrl = getRequiredEnv("S3_ENDPOINT_URL")
5651
users = os.getenv("USERS", "")
52+
modelName = getRequiredEnv("MODEL_NAME")
5753

5854
if users:
5955
users = f"""
@@ -97,33 +93,32 @@ def indentString(input_string, indent_level=4):
9793
permissions: "0600"
9894
owner: ubuntu:ubuntu
9995
content: |
100-
- path: /opt/vllm/init.sh
96+
- path: /opt/ollama/init.sh
10197
owner: ubuntu:ubuntu
10298
permissions: "0775"
10399
content: |
104100
#!/bin/bash
105101
106102
# init config
107-
sudo mkdir -p /opt/vllm
103+
sudo mkdir -p /opt/ollama
108104
sudo chown -R ubuntu:ubuntu /opt
109105
sudo chmod -R 0775 /opt
110106
111-
cd /opt/vllm
107+
cd /opt/ollama
112108
cat <<'EOF' > docker-compose.yaml
113109
{dockerCompose}
114110
EOF
115111
echo "TOKEN={authToken}" >> .env
116112
echo "HOST=$(curl -4 ifconfig.me)" >> .env
117113
echo "CREDENTIALS='$(htpasswd -nBb user {authToken})'" >> .env
118-
echo "HUGGING_FACE_HUB_TOKEN='{huggingFaceHubToken}'" >> .env
119-
echo "S3_MODEL_PATH='{s3ModelPath}'" >> .env
120-
echo "S3_ACCESS_KEY_ID='{s3AccessKeyId}'" >> .env
121-
echo "S3_SECRET_ACCESS_KEY='{s3SecretAccessKey}'" >> .env
122-
echo "S3_ENDPOINT_URL='{s3EndpointUrl}'" >> .env
123-
114+
115+
# Configure Docker to use Nvidia driver
116+
nvidia-ctk runtime configure --runtime=docker
117+
systemctl restart docker
118+
124119
# up docker compose services
125120
docker compose up -d --build
126-
121+
docker exec ollama-ollama-service-1 ollama run {modelName}
127122
touch /tmp/runcmd_finished
128123
129124
- path: /etc/ssh/sshd_config.d/90-custom-settings.conf
@@ -135,7 +130,7 @@ def indentString(input_string, indent_level=4):
135130
AllowGroups ubuntu
136131
137132
runcmd:
138-
- su - ubuntu -c '/opt/vllm/init.sh > /opt/vllm/init.log 2>&1'
133+
- su - ubuntu -c '/opt/ollama/init.sh > /opt/ollama/init.log 2>&1'
139134
140135
{users}
141136
"""
@@ -226,27 +221,6 @@ def findStatusInInstance(response):
226221
sys.exit(1)
227222
logger.info(f"Instance domain: {ip}.nip.io")
228223

229-
# url = f"https://{ip}.nip.io/v1/models"
230-
# max_attempts = 120
231-
# wait_time = 5
232-
# attempt = 1
233-
# while attempt <= max_attempts:
234-
# try:
235-
# response = requests.get(url)
236-
# if response.status_code == 401:
237-
# logger.info(f"URL {url} is ready.")
238-
# break
239-
# except requests.ConnectionError:
240-
# pass
241-
# logger.info(f"Attempt {attempt}/{max_attempts}: URL not ready (HTTP status code: {response.status_code if 'response' in locals() else 'Connection error'}). Waiting {wait_time} seconds...")
242-
# time.sleep(wait_time)
243-
# attempt += 1
244-
# else:
245-
# logger.info("URL is not ready after maximum attempts.")
246-
# sys.exit(1)
247-
248-
249-
250224
elif action == "delete":
251225
instanceId = findInstance()
252226
if not instanceId:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[tool.poetry]
2-
name = "vllm-managed-instance"
2+
name = "ollama-managed-instance"
33
version = "0.1.0"
44
description = ""
55
authors = ["Matéo Mévollon <m.mevollon@proton.me>"]

0 commit comments

Comments
 (0)