Skip to content

Commit b2711dc

Browse files
authored
Merge pull request #202 from grillazz/201-ml-streaming-endpoint
201 ml streaming endpoint
2 parents 41a8688 + 2f484d6 commit b2711dc

File tree

10 files changed

+409
-164
lines changed

10 files changed

+409
-164
lines changed

.env

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@ PYTHONDONTWRITEBYTECODE=1
22
PYTHONUNBUFFERED=1
33

44
# Postgres
5-
POSTGRES_HOST=db
5+
POSTGRES_HOST=localhost
66
POSTGRES_PORT=5432
77
POSTGRES_DB=devdb
88
POSTGRES_USER=devdb
99
POSTGRES_PASSWORD=secret
1010

1111
# Redis
12-
REDIS_HOST=inmemory
12+
REDIS_HOST=localhost
1313
REDIS_PORT=6379
1414
REDIS_DB=2
1515

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ py-upgrade: ## Upgrade project py files with pyupgrade library for python versio
4343

4444
.PHONY: lint
4545
lint: ## Lint project code.
46-
poetry run ruff check --fix .
46+
uv run ruff check --fix .
4747

4848
.PHONY: slim-build
4949
slim-build: ## with power of docker-slim build smaller and safer images

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
<li><a href="#worker-aware-async-scheduler">Schedule jobs</a></li>
3232
<li><a href="#smtp-setup">Email Configuration</a></li>
3333
<li><a href="#uv-knowledge-and-inspirations">UV knowledge and inspirations</a></li>
34+
<li><a href="#large-language-model">Integration with local LLM</a></li>
3435
</ul>
3536
</li>
3637
<li><a href="#acknowledgments">Acknowledgments</a></li>
@@ -162,6 +163,24 @@ This service supports plaintext and HTML emails, and also allows sending templat
162163
It is implemented as a singleton to ensure that only one SMTP connection is maintained
163164
throughout the application lifecycle, optimizing resource usage.
164165

166+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
167+
168+
### Large Language Model
169+
The `/v1/ml/chat/` endpoint is designed to handle chat-based interactions with the LLM model.
170+
It accepts a user prompt and streams responses back in real-time.
171+
The endpoint leverages FastAPI's asynchronous capabilities to efficiently manage multiple simultaneous requests,
172+
ensuring low latency and high throughput.
173+
174+
FastAPI's async support is particularly beneficial for reducing I/O bottlenecks when connecting to the LLM model.
175+
By using asynchronous HTTP clients like `httpx`,
176+
the application can handle multiple I/O-bound tasks concurrently,
177+
such as sending requests to the LLM server and streaming responses back to the client.
178+
This approach minimizes idle time and optimizes resource utilization, making it ideal for high-performance applications.
179+
180+
Install ollama and run the server
181+
```shell
182+
ollama run llama3.2
183+
```
165184

166185
<p align="right">(<a href="#readme-top">back to top</a>)</p>
167186

@@ -215,6 +234,7 @@ I've included a few of my favorites to kick things off!
215234
- **[DEC 16 2024]** bump project to Python 3.13 :fast_forward:
216235
- **[JAN 28 2025]** add SMTP setup :email:
217236
- **[MAR 8 2025]** switch from poetry to uv :fast_forward:
237+
- **[MAY 3 2025]** add large language model integration :robot:
218238

219239
<p align="right">(<a href="#readme-top">back to top</a>)</p>
220240

app/api/ml.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from typing import Annotated
2+
3+
from fastapi import APIRouter, Depends, Form
4+
from fastapi.responses import StreamingResponse
5+
6+
from app.services.llm import get_llm_service
7+
from app.utils.logging import AppLogger
8+
9+
logger = AppLogger().get_logger()
10+
11+
router = APIRouter()
12+
13+
14+
@router.post("/chat/")
15+
async def chat(prompt: Annotated[str, Form()], llm_service=Depends(get_llm_service)):
16+
return StreamingResponse(llm_service.stream_chat(prompt), media_type="text/plain")

app/main.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from fastapi import Depends, FastAPI
88

99
from app.api.health import router as health_router
10+
from app.api.ml import router as ml_router
1011
from app.api.nonsense import router as nonsense_router
1112
from app.api.shakespeare import router as shakespeare_router
1213
from app.api.stuff import router as stuff_router
@@ -45,12 +46,13 @@ async def lifespan(_app: FastAPI):
4546
await _app.postgres_pool.close()
4647

4748

48-
app = FastAPI(title="Stuff And Nonsense API", version="0.17", lifespan=lifespan)
49+
app = FastAPI(title="Stuff And Nonsense API", version="0.18.0", lifespan=lifespan)
4950

5051
app.include_router(stuff_router)
5152
app.include_router(nonsense_router)
5253
app.include_router(shakespeare_router)
5354
app.include_router(user_router)
55+
app.include_router(ml_router, prefix="/v1/ml", tags=["ML"])
5456

5557

5658
app.include_router(health_router, prefix="/v1/public/health", tags=["Health, Public"])

app/services/llm.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from collections.abc import AsyncGenerator
2+
3+
import httpx
4+
import orjson
5+
6+
7+
class StreamLLMService:
8+
def __init__(self, base_url: str = "http://localhost:11434/v1"):
9+
self.base_url = base_url
10+
self.model = "llama3.2"
11+
12+
async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes]:
13+
"""Stream chat completion responses from LLM."""
14+
# Send the user a message first
15+
user_msg = {
16+
"role": "user",
17+
"content": prompt,
18+
}
19+
yield orjson.dumps(user_msg) + b"\n"
20+
21+
# Open client as context manager and stream responses
22+
async with httpx.AsyncClient(base_url=self.base_url) as client:
23+
async with client.stream(
24+
"POST",
25+
"/chat/completions",
26+
json={
27+
"model": self.model,
28+
"messages": [{"role": "user", "content": prompt}],
29+
"stream": True,
30+
},
31+
timeout=60.0,
32+
) as response:
33+
async for line in response.aiter_lines():
34+
if line.startswith("data: ") and line != "data: [DONE]":
35+
try:
36+
json_line = line[6:] # Remove "data: " prefix
37+
data = orjson.loads(json_line)
38+
content = (
39+
data.get("choices", [{}])[0]
40+
.get("delta", {})
41+
.get("content", "")
42+
)
43+
if content:
44+
model_msg = {"role": "model", "content": content}
45+
yield orjson.dumps(model_msg) + b"\n"
46+
except Exception:
47+
pass
48+
49+
50+
# FastAPI dependency
51+
def get_llm_service(base_url: str | None = None) -> StreamLLMService:
52+
return StreamLLMService(base_url=base_url or "http://localhost:11434/v1")

compose.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
services:
22
app:
33
container_name: fsap_app
4+
network_mode: host
45
build: .
56
env_file:
67
- .env
@@ -22,6 +23,7 @@ services:
2223

2324
db:
2425
container_name: fsap_db
26+
network_mode: host
2527
build:
2628
context: ./db
2729
dockerfile: Dockerfile
@@ -46,6 +48,7 @@ services:
4648

4749
inmemory:
4850
image: redis:latest
51+
network_mode: host
4952
container_name: fsap_inmemory
5053
ports:
5154
- "6379:6379"

pyproject.toml

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,45 @@
11
[project]
22
name = "fastapi-sqlalchemy-asyncpg"
3-
version = "0.1.0"
3+
version = "0.18.0"
44
description = "A modern FastAPI application with SQLAlchemy 2.0 and AsyncPG for high-performance async database operations. Features include JWT authentication with Redis token storage, password hashing, connection pooling, data processing with Polars, Rich logging, task scheduling with APScheduler, and Shakespeare datasets integration."
55
readme = "README.md"
66
requires-python = ">=3.13"
77
dependencies = [
8-
"fastapi[all]>=0.115.11",
9-
"pydantic[email]>=2.10.6",
10-
"pydantic-settings>=2.8.1",
11-
"sqlalchemy>=2.0.38",
12-
"uvicorn[standard]>=0.34.0",
8+
"fastapi[all]>=0.115.12",
9+
"pydantic[email]>=2.11.4",
10+
"pydantic-settings>=2.9.1",
11+
"sqlalchemy>=2.0.40",
12+
"uvicorn[standard]>=0.34.2",
1313
"asyncpg>=0.30.0",
14-
"alembic>=1.15.1",
14+
"alembic>=1.15.2",
1515
"httpx>=0.28.1",
1616
"pytest>=8.3.5",
17-
"pytest-cov>=6.0.0",
17+
"pytest-cov>=6.1.1",
1818
"uvloop>=0.21.0",
1919
"httptools>=0.6.4",
20-
"rich>=13.9.4",
20+
"rich>=14.0.0",
2121
"pyjwt>=2.10.1",
22-
"redis>=5.2.1",
22+
"redis>=6.0.0",
2323
"bcrypt>=4.3.0",
24-
"polars>=1.24.0",
24+
"polars>=1.29.0",
2525
"python-multipart>=0.0.20",
26-
"fastexcel>=0.13.0",
27-
"inline-snapshot>=0.17.0",
28-
"dirty-equals>=0.8.0",
29-
"polyfactory>=2.18.1",
30-
"granian>=1.7.0",
31-
"apscheduler[redis,sqlalchemy]>=4.0.0a5",
26+
"fastexcel>=0.14.0",
27+
"inline-snapshot>=0.23.0",
28+
"dirty-equals>=0.9.0",
29+
"polyfactory>=2.21.0",
30+
"granian>=2.2.5",
31+
"apscheduler[redis,sqlalchemy]>=4.0.0a6",
3232
]
3333

3434
[tool.uv]
3535
dev-dependencies = [
36-
"ruff>=0.9.10",
36+
"ruff>=0.11.8",
3737
"devtools[pygments]>=0.12.2",
3838
"pyupgrade>=3.19.1",
39-
"ipython>=9.0.2",
39+
"ipython>=9.2.0",
4040
"sqlacodegen>=3.0.0",
4141
"tryceratops>=2.4.1",
42-
"locust>=2.33.0"
42+
"locust>=2.36.2"
4343

4444
]
4545

tests/chat.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import anyio
2+
import httpx
3+
import orjson
4+
5+
6+
async def chat_with_endpoint():
7+
async with httpx.AsyncClient() as client:
8+
while True:
9+
# Get user input
10+
prompt = input("\nYou: ")
11+
if prompt.lower() == "exit":
12+
break
13+
14+
# Send request to the API
15+
print("\nModel: ", end="", flush=True)
16+
async with client.stream(
17+
"POST",
18+
"http://0.0.0.0:8080/v1/ml/chat/",
19+
data={"prompt": prompt},
20+
timeout=60,
21+
) as response:
22+
async for chunk in response.aiter_lines():
23+
if chunk:
24+
try:
25+
data = orjson.loads(chunk)
26+
print(data["content"], end="", flush=True)
27+
except Exception as e:
28+
print(f"\nError parsing chunk: {e}")
29+
30+
31+
if __name__ == "__main__":
32+
anyio.run(chat_with_endpoint)

0 commit comments

Comments
 (0)