Skip to content
16 changes: 13 additions & 3 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,22 @@
# AVAIL_LLM_MODELS = [
# "qianfan", "deepseekcoder",
# "spark", "sparkv2", "sparkv3", "sparkv3.5",
# "qwen-turbo", "qwen-plus", "qwen-max", "qwen-local",
# "qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext", "qwen-long", "qwen-local",
# "qwen2-72b-instruct","qwen2-57b-a14b-instruct","qwen2-7b-instruct","qwen1.5-110b-chat",
# "moonshot-v1-128k", "moonshot-v1-32k", "moonshot-v1-8k",
# "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125", "gpt-4o-2024-05-13"
# "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
# "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama",
# "deepseek-chat" ,"deepseek-coder",
# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview",
# "llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192",
# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview","yi-vision"
# ]
# --- --- --- ---
# 此外,您还可以在接入one-api/vllm/ollama时,
# 使用"one-api-*","vllm-*","ollama-*"前缀直接使用非标准方式接入的模型,例如
# AVAIL_LLM_MODELS = ["one-api-claude-3-sonnet-20240229(max_token=100000)", "ollama-phi3(max_token=4096)"]
# 在接入多模态模型时,可以使用"one-api-vision-*"前缀接入,例如
# AVAIL_LLM_MODELS = ["one-api-vision-gpt-4o(max_token=32000)"]
# --- --- --- ---


Expand Down Expand Up @@ -127,7 +131,7 @@
QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"


# 接入通义千问在线大模型 https://dashscope.console.aliyun.com/
# 接入通义千问在线大模型 https://bailian.console.aliyun.com/
DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY


Expand Down Expand Up @@ -229,14 +233,20 @@
# 零一万物(Yi Model) API KEY
YIMODEL_API_KEY = ""


# 深度求索(DeepSeek) API KEY,默认请求地址为"https://api.deepseek.com/v1/chat/completions"
DEEPSEEK_API_KEY = ""


# Mathpix 拥有执行PDF的OCR功能,但是需要注册账号
MATHPIX_APPID = ""
MATHPIX_APPKEY = ""


# Groq API KEY,默认请求地址为"https://api.groq.com/openai/v1/chat/completions"
GROQ_API_KEY = ""


# DOC2X的PDF解析服务,注册账号并获取API KEY: https://doc2x.noedgeai.com/login
DOC2X_API_KEY = ""

Expand Down
200 changes: 186 additions & 14 deletions request_llms/bridge_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ def decode(self, *args, **kwargs):
ollama_endpoint = "http://localhost:11434/api/chat"
yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
qwenapi_endpoint = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions"
groq_endpoint = "https://api.groq.com/openai/v1/chat/completions"

if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
Expand All @@ -93,6 +95,8 @@ def decode(self, *args, **kwargs):
if ollama_endpoint in API_URL_REDIRECT: ollama_endpoint = API_URL_REDIRECT[ollama_endpoint]
if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint]
if qwenapi_endpoint in API_URL_REDIRECT: qwenapi_endpoint = API_URL_REDIRECT[qwenapi_endpoint]
if groq_endpoint in API_URL_REDIRECT: groq_endpoint = API_URL_REDIRECT[groq_endpoint]

# 获取tokenizer
tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
Expand Down Expand Up @@ -640,43 +644,118 @@ def decode(self, *args, **kwargs):
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai
qwen_models = [
"qwen-turbo",
"qwen-plus",
"qwen-max",
"qwen-max-longcontext",
"qwen-long",
"qwen2-72b-instruct",
"qwen2-57b-a14b-instruct",
"qwen2-7b-instruct",
"qwen1.5-110b-chat",
]
if any(item in qwen_models for item in AVAIL_LLM_MODELS):
try:
from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
from .bridge_qwen import predict as qwen_ui
qwen_1500_noui, qwen_1500_ui = get_predict_function(
api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=1500, disable_proxy=False
)
qwen_2000_noui, qwen_2000_ui = get_predict_function(
api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=2000, disable_proxy=False
)
qwen_6144_noui, qwen_6144_ui = get_predict_function(
api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=6144, disable_proxy=False
)
qwen_8000_noui, qwen_8000_ui = get_predict_function(
api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=8000, disable_proxy=False
)
model_info.update({
"qwen-turbo": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"fn_with_ui": qwen_1500_ui,
"fn_without_ui": qwen_1500_noui,
"can_multi_thread": True,
"endpoint": None,
"endpoint": qwenapi_endpoint,
"max_token": 6144,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-plus": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"fn_with_ui": qwen_2000_ui,
"fn_without_ui": qwen_2000_noui,
"can_multi_thread": True,
"endpoint": None,
"endpoint": qwenapi_endpoint,
"max_token": 30720,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-max": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"fn_with_ui": qwen_2000_ui,
"fn_without_ui": qwen_2000_noui,
"can_multi_thread": True,
"endpoint": None,
"endpoint": qwenapi_endpoint,
"max_token": 6144,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-max-longcontext": {
"fn_with_ui": qwen_2000_ui,
"fn_without_ui": qwen_2000_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 28672,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
},
"qwen-long": {
"fn_with_ui": qwen_2000_ui,
"fn_without_ui": qwen_2000_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 1000000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen2-72b-instruct": {
"fn_with_ui": qwen_6144_ui,
"fn_without_ui": qwen_6144_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 128000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen2-57b-a14b-instruct": {
"fn_with_ui": qwen_6144_ui,
"fn_without_ui": qwen_6144_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 30720,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen2-7b-instruct": {
"fn_with_ui": qwen_6144_ui,
"fn_without_ui": qwen_6144_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 128000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen1.5-110b-chat": {
"fn_with_ui": qwen_8000_ui,
"fn_without_ui": qwen_8000_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 32000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
})
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"]
yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview","yi-vision"]
if any(item in yi_models for item in AVAIL_LLM_MODELS):
try:
yimodel_4k_noui, yimodel_4k_ui = get_predict_function(
Expand All @@ -688,6 +767,23 @@ def decode(self, *args, **kwargs):
yimodel_200k_noui, yimodel_200k_ui = get_predict_function(
api_key_conf_name="YIMODEL_API_KEY", max_output_token=4096, disable_proxy=False
)
if "yi-vision" in AVAIL_LLM_MODELS:
from .bridge_yi_vision import yi_version_generate_message_version
yimodel_version_noui, yimodel_version_ui = get_predict_function(
api_key_conf_name="YIMODEL_API_KEY", max_output_token=600, disable_proxy=False, encode_call=yi_version_generate_message_version
)
model_info.update({
"yi-vision": {
"fn_with_ui": yimodel_version_ui,
"fn_without_ui": yimodel_version_noui,
"can_multi_thread": True,
"endpoint": yimodel_endpoint,
"max_token": 4000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
})

model_info.update({
"yi-34b-chat-0205": {
"fn_with_ui": yimodel_4k_ui,
Expand Down Expand Up @@ -892,6 +988,52 @@ def decode(self, *args, **kwargs):
})
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- groq -=-=-=-=-=-=-
groq_models = ["llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192"]
if any(item in groq_models for item in AVAIL_LLM_MODELS):
try:
groq_8k_noui, groq_8k_ui = get_predict_function(
api_key_conf_name="GROQ_API_KEY", max_output_token=8192, disable_proxy=False
)
groq_32k_noui, groq_32k_ui = get_predict_function(
api_key_conf_name="GROQ_API_KEY", max_output_token=32768, disable_proxy=False
)
model_info.update({
"llama3-8b-8192": {
"fn_with_ui": groq_8k_ui,
"fn_without_ui": groq_8k_noui,
"endpoint": groq_endpoint,
"max_token": 8192,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"gemma-7b-it": {
"fn_with_ui": groq_8k_ui,
"fn_without_ui": groq_8k_noui,
"endpoint": groq_endpoint,
"max_token": 8192,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"mixtral-8x7b-32768": {
"fn_with_ui": groq_32k_ui,
"fn_without_ui": groq_32k_noui,
"endpoint": groq_endpoint,
"max_token": 32768,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"llama3-70b-8192": {
"fn_with_ui": groq_8k_ui,
"fn_without_ui": groq_8k_noui,
"endpoint": groq_endpoint,
"max_token": 8192,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
})
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
# 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
Expand All @@ -915,6 +1057,36 @@ def decode(self, *args, **kwargs):
"token_cnt": get_token_num_gpt35,
},
})
# -=-=-=-=-=-=- one-api-vision 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-vision-")]:
# 为了更灵活地接入one-api多模型管理界面中的多模态模型,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-vision-gpt-4o(max_token=32000)"]
# 其中
# "one-api-vision-" 是前缀(必要)
# "gpt-4o" 是模型名(必要)
# "(max_token=32000)" 是配置(非必要)
try:
_, max_token_tmp = read_one_api_model_name(model)
except:
print(f"one-api-vision模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
continue
try:
from .oai_vision_std import generate_message_version
one_api_version_noui, one_api_version_ui = get_predict_function(
api_key_conf_name="API_KEY", max_output_token=4000, disable_proxy=False, encode_call=generate_message_version
)
model_info.update({
model: {
"fn_with_ui": one_api_version_ui,
"fn_without_ui": one_api_version_noui,
"can_multi_thread": True,
"endpoint": openai_endpoint,
"max_token": max_token_tmp,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
})
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- vllm 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("vllm-")]:
# 为了更灵活地接入vllm多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["vllm-/home/hmp/llm/cache/Qwen1___5-32B-Chat(max_token=6666)"]
Expand Down
66 changes: 0 additions & 66 deletions request_llms/bridge_qwen.py

This file was deleted.

Loading