添加接入 火山引擎在线大模型 内容的支持 (#2165)

* use oai adaptive bridge function to handle vol engine

* add vol engine deepseek v3

---------

Co-authored-by: binary-husky <qingxu.fu@outlook.com>
This commit is contained in:
littleolaf
2025-03-04 23:58:03 +08:00
committed by GitHub
parent 4a79aa6a93
commit 72dbe856d2
4 changed files with 140 additions and 45 deletions

View File

@@ -80,6 +80,7 @@ ollama_endpoint = "http://localhost:11434/api/chat"
yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
grok_model_endpoint = "https://api.x.ai/v1/chat/completions"
volcengine_endpoint = "https://ark.cn-beijing.volces.com/api/v3/chat/completions"
if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
@@ -102,6 +103,7 @@ if ollama_endpoint in API_URL_REDIRECT: ollama_endpoint = API_URL_REDIRECT[ollam
if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint]
if grok_model_endpoint in API_URL_REDIRECT: grok_model_endpoint = API_URL_REDIRECT[grok_model_endpoint]
if volcengine_endpoint in API_URL_REDIRECT: volcengine_endpoint = API_URL_REDIRECT[volcengine_endpoint]
# 获取tokenizer
tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
@@ -954,7 +956,7 @@ if any(item in grok_models for item in AVAIL_LLM_MODELS):
try:
grok_beta_128k_noui, grok_beta_128k_ui = get_predict_function(
api_key_conf_name="GROK_API_KEY", max_output_token=8192, disable_proxy=False
)
)
model_info.update({
"grok-beta": {
@@ -1089,8 +1091,10 @@ if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder
})
except:
logger.error(trimmed_format_exc())
# -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS or "deepseek-reasoner" in AVAIL_LLM_MODELS:
claude_models = ["deepseek-chat", "deepseek-coder", "deepseek-reasoner"]
if any(item in claude_models for item in AVAIL_LLM_MODELS):
try:
deepseekapi_noui, deepseekapi_ui = get_predict_function(
api_key_conf_name="DEEPSEEK_API_KEY", max_output_token=4096, disable_proxy=False
@@ -1127,6 +1131,60 @@ if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS o
})
except:
logger.error(trimmed_format_exc())
# -=-=-=-=-=-=- 火山引擎 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("volcengine-")]:
# 为了更灵活地接入volcengine多模型管理界面设计了此接口例子AVAIL_LLM_MODELS = ["volcengine-deepseek-r1-250120(max_token=6666)"]
# 其中
# "volcengine-" 是前缀(必要)
# "deepseek-r1-250120" 是模型名(必要)
# "(max_token=6666)" 是配置(非必要)
model_info_extend = model_info
model_info_extend.update({
"deepseek-r1-250120": {
"max_token": 16384,
"enable_reasoning": True,
"can_multi_thread": True,
"endpoint": volcengine_endpoint,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"deepseek-v3-241226": {
"max_token": 16384,
"enable_reasoning": False,
"can_multi_thread": True,
"endpoint": volcengine_endpoint,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
})
try:
origin_model_name, max_token_tmp = read_one_api_model_name(model)
# 如果是已知模型,则尝试获取其信息
original_model_info = model_info_extend.get(origin_model_name.replace("volcengine-", "", 1), None)
except:
logger.error(f"volcengine模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
continue
volcengine_noui, volcengine_ui = get_predict_function(api_key_conf_name="ARK_API_KEY", max_output_token=8192, disable_proxy=True, model_remove_prefix = ["volcengine-"])
this_model_info = {
"fn_with_ui": volcengine_ui,
"fn_without_ui": volcengine_noui,
"endpoint": volcengine_endpoint,
"can_multi_thread": True,
"max_token": 64000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
# 同步已知模型的其他信息
attribute = "has_multimodal_capacity"
if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
attribute = "enable_reasoning"
if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
model_info.update({model: this_model_info})
# -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
# 为了更灵活地接入one-api多模型管理界面设计了此接口例子AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]