添加接入火山引擎在线大模型内容的支持 (#2165)

* use oai adaptive bridge function to handle vol engine * add vol engine deepseek v3 --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com>
2025-03-04 23:58:03 +08:00
parent 4a79aa6a93
commit 72dbe856d2
4 changed files with 140 additions and 45 deletions
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -80,6 +80,7 @@ ollama_endpoint = "http://localhost:11434/api/chat"
 yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
 deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
 grok_model_endpoint = "https://api.x.ai/v1/chat/completions"
+volcengine_endpoint = "https://ark.cn-beijing.volces.com/api/v3/chat/completions"

 if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
 azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
@@ -102,6 +103,7 @@ if ollama_endpoint in API_URL_REDIRECT: ollama_endpoint = API_URL_REDIRECT[ollam
 if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
 if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint]
 if grok_model_endpoint in API_URL_REDIRECT: grok_model_endpoint = API_URL_REDIRECT[grok_model_endpoint]
+if volcengine_endpoint in API_URL_REDIRECT: volcengine_endpoint = API_URL_REDIRECT[volcengine_endpoint]

 # 获取tokenizer
 tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
@@ -954,7 +956,7 @@ if any(item in grok_models for item in AVAIL_LLM_MODELS):
    try:
        grok_beta_128k_noui, grok_beta_128k_ui = get_predict_function(
            api_key_conf_name="GROK_API_KEY", max_output_token=8192, disable_proxy=False
-            )
+        )
        
        model_info.update({
            "grok-beta": {
@@ -1089,8 +1091,10 @@ if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
        })
    except:
        logger.error(trimmed_format_exc())
+
 # -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
-if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS or "deepseek-reasoner" in AVAIL_LLM_MODELS:
+claude_models = ["deepseek-chat", "deepseek-coder", "deepseek-reasoner"]
+if any(item in claude_models for item in AVAIL_LLM_MODELS):
    try:
        deepseekapi_noui, deepseekapi_ui = get_predict_function(
            api_key_conf_name="DEEPSEEK_API_KEY", max_output_token=4096, disable_proxy=False
@@ -1127,6 +1131,60 @@ if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS o
        })
    except:
        logger.error(trimmed_format_exc())
+
+# -=-=-=-=-=-=- 火山引擎 对齐支持 -=-=-=-=-=-=-
+for model in [m for m in AVAIL_LLM_MODELS if m.startswith("volcengine-")]:
+    # 为了更灵活地接入volcengine多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["volcengine-deepseek-r1-250120(max_token=6666)"]
+    # 其中
+    #   "volcengine-"          是前缀（必要）
+    #   "deepseek-r1-250120"   是模型名（必要）
+    #   "(max_token=6666)"     是配置（非必要）
+    model_info_extend = model_info
+    model_info_extend.update({
+        "deepseek-r1-250120": {
+            "max_token": 16384,
+            "enable_reasoning": True,
+            "can_multi_thread": True,
+            "endpoint": volcengine_endpoint,
+            "tokenizer": tokenizer_gpt35,
+            "token_cnt": get_token_num_gpt35,
+        },
+        "deepseek-v3-241226": {
+            "max_token": 16384,
+            "enable_reasoning": False,
+            "can_multi_thread": True,
+            "endpoint": volcengine_endpoint,
+            "tokenizer": tokenizer_gpt35,
+            "token_cnt": get_token_num_gpt35,
+        },
+    })
+    try:
+        origin_model_name, max_token_tmp = read_one_api_model_name(model)
+        # 如果是已知模型，则尝试获取其信息
+        original_model_info = model_info_extend.get(origin_model_name.replace("volcengine-", "", 1), None)
+    except:
+        logger.error(f"volcengine模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
+        continue
+
+    volcengine_noui, volcengine_ui = get_predict_function(api_key_conf_name="ARK_API_KEY", max_output_token=8192, disable_proxy=True, model_remove_prefix = ["volcengine-"])
+
+    this_model_info = {
+        "fn_with_ui": volcengine_ui,
+        "fn_without_ui": volcengine_noui,
+        "endpoint": volcengine_endpoint,
+        "can_multi_thread": True,
+        "max_token": 64000,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    }
+
+    # 同步已知模型的其他信息
+    attribute = "has_multimodal_capacity"
+    if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
+    attribute = "enable_reasoning"
+    if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
+    model_info.update({model: this_model_info})
+
 # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
 for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
    # 为了更灵活地接入one-api多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]