Skip to content

Proxy server 24 04 25 #703

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions refact-server/Dockerfile.proxy
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
FROM ubuntu:22.04

RUN apt-get update
RUN DEBIAN_FRONTEND="noninteractive" TZ=Etc/UTC apt-get install -y \
git \
python3 \
python3-pip \
python3-packaging \
build-essential \
cmake \
pkg-config \
libicu-dev \
zlib1g-dev \
libcurl4-openssl-dev \
libssl-dev \
&& rm -rf /var/lib/{apt,dpkg,cache,log}

RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1

RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y python3-packaging

# cassandra
RUN apt-get install -y \
default-jdk \
wget \
curl \
sudo
RUN echo "deb https://debian.cassandra.apache.org 41x main" | tee -a /etc/apt/sources.list.d/cassandra.sources.list
RUN curl https://downloads.apache.org/cassandra/KEYS | apt-key add -
RUN apt-get update
RUN apt-get install cassandra -y

# to ping hf
RUN apt-get install -y iputils-ping

COPY . /tmp/app
RUN echo "refact ${GIT_COMMIT_HASH}" >> /refact-build-info.txt
RUN SETUP_PACKAGE=refact_proxy pip install /tmp/app -v --no-build-isolation && rm -rf /tmp/app

ENV REFACT_PERM_DIR "/perm_storage"
ENV REFACT_TMP_DIR "/tmp"
ENV RDMAV_FORK_SAFE 0
ENV RDMAV_HUGEPAGES_SAFE 0

EXPOSE 8008

COPY database-start.sh /
RUN chmod +x database-start.sh
COPY docker-entrypoint-proxy.sh /
RUN chmod +x docker-entrypoint-proxy.sh

CMD ./docker-entrypoint-proxy.sh
10 changes: 10 additions & 0 deletions refact-server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,16 @@ docker volume rm VVV

See [CONTRIBUTING.md](CONTRIBUTING.md) for installation without a docker container.

### Running Refact Proxy Docker Container

A light version of the server that doesn't require an Nvidia GPU. This is the ideal choice if you're:
* Self-hosting models using Ollama, vLLM, etc.
* Using third-party model providers like OpenAI, Anthropic, etc.

```commandline
docker run -d --rm --shm-size=256m -p 8008:8008 -v refact-proxy-perm-storage:/perm_storage smallcloud/refact_proxy:latest
```

### Setting Up Plugins

Download Refact for [VS Code](https://marketplace.visualstudio.com/items?itemName=smallcloud.codify) or [JetBrains](https://plugins.jetbrains.com/plugin/20647-refact-ai).
Expand Down
5 changes: 5 additions & 0 deletions refact-server/docker-entrypoint-proxy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/sh
if [ -z "$REFACT_DATABASE_HOST" ]; then
sh database-start.sh
fi
python -m refact_proxy.webgui.webgui
Empty file.
Empty file.
18 changes: 18 additions & 0 deletions refact-server/refact_proxy/webgui/selfhost_fastapi_completions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from fastapi import Header
from fastapi import HTTPException

from refact_webgui.webgui.selfhost_fastapi_completions import NlpCompletion
from refact_webgui.webgui.selfhost_fastapi_completions import EmbeddingsStyleOpenAI
from refact_webgui.webgui.selfhost_fastapi_completions import CompletionsRouter


__all__ = ["ProxyCompletionsRouter"]


class ProxyCompletionsRouter(CompletionsRouter):

async def _completions(self, post: NlpCompletion, authorization: str = Header(None)):
raise HTTPException(status_code=400, detail="completions handler is not available for proxy")

async def _embeddings_style_openai(self, post: EmbeddingsStyleOpenAI, authorization: str = Header(None)):
raise HTTPException(status_code=400, detail="embeddings handler is not available for proxy")
54 changes: 54 additions & 0 deletions refact-server/refact_proxy/webgui/selfhost_model_assigner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from refact_webgui.webgui.selfhost_model_assigner import ModelAssigner

from typing import Dict, Any


__all__ = ["ProxyModelAssigner"]


class ProxyModelAssigner(ModelAssigner):

@property
def models_db(self) -> Dict[str, Any]:
return {}

@property
def models_info(self):
return {"models": []}

@property
def model_assignment(self):
return {"model_assign": {}}

def config_inference_mtime(self) -> int:
return 0

def to_completion_model_record(self, model_name: str, model_info: Dict[str, Any]) -> Dict[str, Any]:
raise NotImplementedError()

def to_chat_model_record(self, model_name: str, model_info: Dict[str, Any]) -> Dict[str, Any]:
raise NotImplementedError()

def models_to_watchdog_configs(self, inference_config=None):
raise NotImplementedError()

@staticmethod
def has_available_weights(model_path: str) -> bool:
raise NotImplementedError()

@property
def _model_cfg_template(self) -> Dict:
raise NotImplementedError()

def _has_loras(self, model_name: str) -> bool:
raise NotImplementedError()

def first_run(self):
raise NotImplementedError()

@property
def devices(self):
raise NotImplementedError()

def _model_inference_setup(self, inference_config: Dict[str, Any]) -> Dict[str, Any]:
raise NotImplementedError()
105 changes: 105 additions & 0 deletions refact-server/refact_proxy/webgui/webgui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import os
import asyncio
from typing import Dict

import uvicorn
import uvloop

from fastapi import APIRouter, Request

from refact_webgui.webgui.selfhost_database import RefactDatabase
from refact_webgui.webgui.selfhost_database import StatisticsService
from refact_webgui.webgui.selfhost_login import AdminRouter
from refact_webgui.webgui.selfhost_login import AdminSession
from refact_webgui.webgui.selfhost_login import DummySession
from refact_webgui.webgui.selfhost_login import RefactSession
from refact_webgui.webgui.selfhost_model_assigner import ModelAssigner
from refact_webgui.webgui.selfhost_queue import InferenceQueue, Ticket
from refact_webgui.webgui.selfhost_static import StaticRouter
from refact_webgui.webgui.selfhost_statistics import TabStatisticsRouter
from refact_webgui.webgui.tab_about import TabAboutRouter
from refact_webgui.webgui.tab_server_logs import TabServerLogRouter
from refact_webgui.webgui.tab_third_party_apis import TabThirdPartyApisRouter
from refact_webgui.webgui.webgui import WebGUI
from refact_webgui.webgui.webgui import setup_logger

from refact_proxy.webgui.selfhost_model_assigner import ProxyModelAssigner
from refact_proxy.webgui.selfhost_fastapi_completions import ProxyCompletionsRouter


class ProxyPluginsRouter(APIRouter):

def __init__(self):
super().__init__()
self.plugins = [
{"label": "Third-Party APIs", "tab": "third-party-apis"},
# NOTE: there are no completion models on server for now, so no need in stats
# {"label": "Stats", "tab": "stats"},
# TODO: there is no watchdog, so no logs
# {"label": "Server Logs", "tab": "server-logs", "hamburger": True},
{"label": "About", "tab": "about", "hamburger": True},
]
self.add_api_route("/list-plugins", self._list_plugins, methods=["GET"])

def _list_plugins(self, _request: Request):
return self.plugins


class ProxyWebGUI(WebGUI):

@staticmethod
def _routers_list(
id2ticket: Dict[str, Ticket],
inference_queue: InferenceQueue,
model_assigner: ModelAssigner,
stats_service: StatisticsService,
session: RefactSession):
return [
ProxyPluginsRouter(),
AdminRouter(
prefix="/admin",
session=session),
TabThirdPartyApisRouter(),
ProxyCompletionsRouter(
id2ticket=id2ticket,
inference_queue=inference_queue,
model_assigner=model_assigner,
session=session),
TabStatisticsRouter(
prefix="/stats",
stats_service=stats_service,
session=session,
),
TabServerLogRouter(),
TabAboutRouter(),
StaticRouter(),
]


if __name__ == "__main__":
from argparse import ArgumentParser

parser = ArgumentParser()
parser.add_argument("--host", default="0.0.0.0")
parser.add_argument("--port", default=8008, type=int)
args = parser.parse_args()
setup_logger()

model_assigner = ProxyModelAssigner()
database = RefactDatabase()
stats_service = StatisticsService(database)

admin_token = os.environ.get("REFACT_ADMIN_TOKEN", None)
session = AdminSession(admin_token) if admin_token is not None else DummySession()

app = ProxyWebGUI(
model_assigner=model_assigner,
database=database,
stats_service=stats_service,
session=session,
docs_url=None, redoc_url=None)

asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
uvicorn.run(
app, host=args.host, port=args.port,
timeout_keep_alive=600, log_config=None)
7 changes: 6 additions & 1 deletion refact-server/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ class PyPackage:
data=["webgui/static/*", "webgui/static/components/modals/*",
"webgui/static/dashboards/*", "webgui/static/assets/*", "webgui/static/utils/*",
"webgui/static/assets/fonts/*"]),
"refact_proxy": PyPackage(
requires=["scyllapy==1.3.0", "pandas", "fastapi", "uvicorn", "pydantic", "aiohttp", "uvloop"],
requires_packages=["refact_webgui", "refact_utils", "refact_known_models"],
data=["webgui/static/*"],
),
"self_hosting_machinery": PyPackage(
requires=["python-multipart", "auto-gptq==0.7.1", "accelerate",
"termcolor", "torch", "transformers==4.47.1", # Qwen2 is completely changed in transformers>=4.48
Expand Down Expand Up @@ -95,7 +100,7 @@ def get_install_requires(packages):

setup(
name="refact-self-hosting",
version="1.10.0",
version="1.10.1",
py_modules=list(setup_packages.keys()),
package_data={
name: py_package.data
Expand Down