Merge Latest Frontier (#1991)

* logging sys to loguru: stage 1 complete * import loguru: stage 2 * logging -> loguru: stage 3 * support o1-preview and o1-mini * logging -> loguru stage 4 * update social helper * logging -> loguru: final stage * fix: console output * update translation matrix * fix: loguru argument error with proxy enabled (#1977) * relax llama index version * remove comment * Added some modules to support openrouter (#1975) * Added some modules for supporting openrouter model Added some modules for supporting openrouter model * Update config.py * Update .gitignore * Update bridge_openrouter.py * Not changed actually * Refactor logging in bridge_openrouter.py --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com> * remove logging extra --------- Co-authored-by: Steven Moder <java20131114@gmail.com> Co-authored-by: Ren Lifei <2602264455@qq.com>
2024-10-05 17:09:18 +08:00
parent 597c320808
commit a01ca93362
91 changed files with 2558 additions and 742 deletions
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -9,6 +9,7 @@
    2. predict_no_ui_long_connection(...)
 """
 import tiktoken, copy, re
+from loguru import logger
 from functools import lru_cache
 from concurrent.futures import ThreadPoolExecutor
 from toolbox import get_conf, trimmed_format_exc, apply_gpt_academic_string_mask, read_one_api_model_name
@@ -51,9 +52,9 @@ class LazyloadTiktoken(object):
    @staticmethod
    @lru_cache(maxsize=128)
    def get_encoder(model):
-        print('正在加载tokenizer，如果是第一次运行，可能需要一点时间下载参数')
+        logger.info('正在加载tokenizer，如果是第一次运行，可能需要一点时间下载参数')
        tmp = tiktoken.encoding_for_model(model)
-        print('加载tokenizer完毕')
+        logger.info('加载tokenizer完毕')
        return tmp

    def encode(self, *args, **kwargs):
@@ -83,7 +84,7 @@ try:
    API_URL = get_conf("API_URL")
    if API_URL != "https://api.openai.com/v1/chat/completions":
        openai_endpoint = API_URL
-        print("警告！API_URL配置选项将被弃用，请更换为API_URL_REDIRECT配置")
+        logger.warning("警告！API_URL配置选项将被弃用，请更换为API_URL_REDIRECT配置")
 except:
    pass
 # 新版配置
@@ -255,8 +256,6 @@ model_info = {
        "max_token": 128000,
        "tokenizer": tokenizer_gpt4,
        "token_cnt": get_token_num_gpt4,
-        "openai_disable_system_prompt": True,
-        "openai_disable_stream": True,
    },
    "o1-mini": {
        "fn_with_ui": chatgpt_ui,
@@ -265,8 +264,6 @@ model_info = {
        "max_token": 128000,
        "tokenizer": tokenizer_gpt4,
        "token_cnt": get_token_num_gpt4,
-        "openai_disable_system_prompt": True,
-        "openai_disable_stream": True,
    },

    "gpt-4-turbo": {
@@ -683,7 +680,7 @@ if "newbing" in AVAIL_LLM_MODELS:   # same with newbing-free
            }
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 if "chatglmft" in AVAIL_LLM_MODELS:   # same with newbing-free
    try:
        from .bridge_chatglmft import predict_no_ui_long_connection as chatglmft_noui
@@ -699,7 +696,7 @@ if "chatglmft" in AVAIL_LLM_MODELS:   # same with newbing-free
            }
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 上海AI-LAB书生大模型 -=-=-=-=-=-=-
 if "internlm" in AVAIL_LLM_MODELS:
    try:
@@ -716,7 +713,7 @@ if "internlm" in AVAIL_LLM_MODELS:
            }
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 if "chatglm_onnx" in AVAIL_LLM_MODELS:
    try:
        from .bridge_chatglmonnx import predict_no_ui_long_connection as chatglm_onnx_noui
@@ -732,7 +729,7 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS:
            }
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 通义-本地模型 -=-=-=-=-=-=-
 if "qwen-local" in AVAIL_LLM_MODELS:
    try:
@@ -750,7 +747,7 @@ if "qwen-local" in AVAIL_LLM_MODELS:
            }
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
 if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS:   # zhipuai
    try:
@@ -786,7 +783,7 @@ if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-
            }
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
 yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"]
 if any(item in yi_models for item in AVAIL_LLM_MODELS):
@@ -866,7 +863,7 @@ if any(item in yi_models for item in AVAIL_LLM_MODELS):
            },
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 讯飞星火认知大模型 -=-=-=-=-=-=-
 if "spark" in AVAIL_LLM_MODELS:
    try:
@@ -884,7 +881,7 @@ if "spark" in AVAIL_LLM_MODELS:
            }
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 if "sparkv2" in AVAIL_LLM_MODELS:   # 讯飞星火认知大模型
    try:
        from .bridge_spark import predict_no_ui_long_connection as spark_noui
@@ -901,7 +898,7 @@ if "sparkv2" in AVAIL_LLM_MODELS:   # 讯飞星火认知大模型
            }
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 if any(x in AVAIL_LLM_MODELS for x in ("sparkv3", "sparkv3.5", "sparkv4")):   # 讯飞星火认知大模型
    try:
        from .bridge_spark import predict_no_ui_long_connection as spark_noui
@@ -936,7 +933,7 @@ if any(x in AVAIL_LLM_MODELS for x in ("sparkv3", "sparkv3.5", "sparkv4")):   #
            }
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 if "llama2" in AVAIL_LLM_MODELS:   # llama2
    try:
        from .bridge_llama2 import predict_no_ui_long_connection as llama2_noui
@@ -952,7 +949,7 @@ if "llama2" in AVAIL_LLM_MODELS:   # llama2
            }
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 智谱 -=-=-=-=-=-=-
 if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai 是glm-4的别名，向后兼容配置
    try:
@@ -967,7 +964,7 @@ if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai 是glm-4的别名，向后兼容
            },
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 幻方-深度求索大模型 -=-=-=-=-=-=-
 if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
    try:
@@ -984,7 +981,7 @@ if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
            }
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
 if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS:
    try:
@@ -1012,7 +1009,7 @@ if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS:
            },
        })
    except:
-        print(trimmed_format_exc())
+        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
 for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
    # 为了更灵活地接入one-api多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
@@ -1025,7 +1022,7 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
        # 如果是已知模型，则尝试获取其信息
        original_model_info = model_info.get(origin_model_name.replace("one-api-", "", 1), None)
    except:
-        print(f"one-api模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
+        logger.error(f"one-api模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
        continue
    this_model_info = {
        "fn_with_ui": chatgpt_ui,
@@ -1056,7 +1053,7 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("vllm-")]:
    try:
        _, max_token_tmp = read_one_api_model_name(model)
    except:
-        print(f"vllm模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
+        logger.error(f"vllm模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
        continue
    model_info.update({
        model: {
@@ -1083,7 +1080,7 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("ollama-")]:
    try:
        _, max_token_tmp = read_one_api_model_name(model)
    except:
-        print(f"ollama模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
+        logger.error(f"ollama模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
        continue
    model_info.update({
        model: {
@@ -1119,6 +1116,24 @@ if len(AZURE_CFG_ARRAY) > 0:
        if azure_model_name not in AVAIL_LLM_MODELS:
            AVAIL_LLM_MODELS += [azure_model_name]

+# -=-=-=-=-=-=- Openrouter模型对齐支持 -=-=-=-=-=-=-
+# 为了更灵活地接入Openrouter路由，设计了此接口
+for model in [m for m in AVAIL_LLM_MODELS if m.startswith("openrouter-")]:
+    from request_llms.bridge_openrouter import predict_no_ui_long_connection as openrouter_noui
+    from request_llms.bridge_openrouter import predict as openrouter_ui
+    model_info.update({
+        model: {
+            "fn_with_ui": openrouter_ui,
+            "fn_without_ui": openrouter_noui,
+            # 以下参数参考gpt-4o-mini的配置, 请根据实际情况修改
+            "endpoint": openai_endpoint,
+            "has_multimodal_capacity": True,
+            "max_token": 128000,
+            "tokenizer": tokenizer_gpt4,
+            "token_cnt": get_token_num_gpt4,
+        },
+    })
+

 # -=-=-=-=-=-=--=-=-=-=-=-=--=-=-=-=-=-=--=-=-=-=-=-=-=-=
 # -=-=-=-=-=-=-=-=-=- ☝️ 以上是模型路由 -=-=-=-=-=-=-=-=-=
@@ -1264,5 +1279,6 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot,
    if additional_fn: # 根据基础功能区 ModelOverride 参数调整模型类型
        llm_kwargs, additional_fn, method = execute_model_override(llm_kwargs, additional_fn, method)

+    # 更新一下llm_kwargs的参数，否则会出现参数不匹配的问题
    yield from method(inputs, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, stream, additional_fn)

--- a/request_llms/bridge_chatglmft.py
+++ b/request_llms/bridge_chatglmft.py
@@ -1,12 +1,13 @@

 from transformers import AutoModel, AutoTokenizer
+from loguru import logger
+from toolbox import update_ui, get_conf
+from multiprocessing import Process, Pipe
 import time
 import os
 import json
 import threading
 import importlib
-from toolbox import update_ui, get_conf
-from multiprocessing import Process, Pipe

 load_message = "ChatGLMFT尚未加载，加载需要一段时间。注意，取决于`config.py`的配置，ChatGLMFT消耗大量的内存（CPU）或显存（GPU），也许会导致低配计算机卡死 ……"

@@ -78,7 +79,7 @@ class GetGLMFTHandle(Process):
                    config.pre_seq_len = model_args['pre_seq_len']
                    config.prefix_projection = model_args['prefix_projection']

-                    print(f"Loading prefix_encoder weight from {CHATGLM_PTUNING_CHECKPOINT}")
+                    logger.info(f"Loading prefix_encoder weight from {CHATGLM_PTUNING_CHECKPOINT}")
                    model = AutoModel.from_pretrained(model_args['model_name_or_path'], config=config, trust_remote_code=True)
                    prefix_state_dict = torch.load(os.path.join(CHATGLM_PTUNING_CHECKPOINT, "pytorch_model.bin"))
                    new_prefix_state_dict = {}
@@ -88,7 +89,7 @@ class GetGLMFTHandle(Process):
                    model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)

                    if model_args['quantization_bit'] is not None and model_args['quantization_bit'] != 0:
-                        print(f"Quantized to {model_args['quantization_bit']} bit")
+                        logger.info(f"Quantized to {model_args['quantization_bit']} bit")
                        model = model.quantize(model_args['quantization_bit'])
                    model = model.cuda()
                    if model_args['pre_seq_len'] is not None:
--- a/request_llms/bridge_chatgpt.py
+++ b/request_llms/bridge_chatgpt.py
@@ -12,11 +12,12 @@ import json
 import os
 import re
 import time
-import logging
 import traceback
 import requests
 import random

+from loguru import logger
+
 # config_private.py放自己的秘密如API和代理网址
 # 读取时首先看是否存在私密的config_private配置文件（不受git管控），如果有，则覆盖原config文件
 from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history
@@ -152,7 +153,7 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[],
            retry += 1
            traceback.print_exc()
            if retry > MAX_RETRY: raise TimeoutError
-            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
+            if MAX_RETRY!=0: logger.error(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')

    if not stream:
        # 该分支仅适用于不支持stream的o1模型，其他情形一律不适用
@@ -337,7 +338,6 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
                    # 前者是API2D的结束条件，后者是OPENAI的结束条件
                    if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
                        # 判定为数据流的结束，gpt_replying_buffer也写完了
-                        # logging.info(f'[response] {gpt_replying_buffer}')
                        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
                        break
                    # 处理数据流的主体
@@ -364,7 +364,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
                    error_msg = chunk_decoded
                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
                    yield from update_ui(chatbot=chatbot, history=history, msg="Json解析异常" + error_msg) # 刷新界面
-                    print(error_msg)
+                    logger.error(error_msg)
                    return
        return  # return from stream-branch

@@ -524,7 +524,6 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st
            "gpt-3.5-turbo-16k-0613",
            "gpt-3.5-turbo-0301",
        ])
-        logging.info("Random select model:" + model)

    payload = {
        "model": model,
@@ -534,10 +533,7 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st
        "n": 1,
        "stream": stream,
    }
-    # try:
-    #     print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
-    # except:
-    #     print('输入中可能存在乱码。')
+
    return headers,payload


--- a/request_llms/bridge_chatgpt_vision.py
+++ b/request_llms/bridge_chatgpt_vision.py
@@ -8,15 +8,15 @@
    2. predict_no_ui_long_connection：支持多线程
 """

+import os
 import json
 import time
-import logging
 import requests
 import base64
-import os
 import glob
+from loguru import logger
 from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, \
-    update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files
+    update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files, log_chat


 proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \
@@ -100,7 +100,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)

    raw_input = inputs
-    logging.info(f'[raw_input] {raw_input}')
    def make_media_input(inputs, image_paths):
        for image_path in image_paths:
            inputs = inputs + f'<br/><br/><div align="center"><img src="file={os.path.abspath(image_path)}"></div>'
@@ -185,7 +184,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
                        # 判定为数据流的结束，gpt_replying_buffer也写完了
                        lastmsg = chatbot[-1][-1] + f"\n\n\n\n「{llm_kwargs['llm_model']}调用结束，该模型不具备上下文对话能力，如需追问，请及时切换模型。」"
                        yield from update_ui_lastest_msg(lastmsg, chatbot, history, delay=1)
-                        logging.info(f'[response] {gpt_replying_buffer}')
+                        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
                        break
                    # 处理数据流的主体
                    status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
@@ -210,7 +209,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
                    error_msg = chunk_decoded
                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key)
                    yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
-                    print(error_msg)
+                    logger.error(error_msg)
                    return

 def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key=""):
@@ -301,10 +300,7 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths):
        "presence_penalty": 0,
        "frequency_penalty": 0,
    }
-    try:
-        print(f" {llm_kwargs['llm_model']} : {inputs[:100]} ..........")
-    except:
-        print('输入中可能存在乱码。')
+
    return headers, payload, api_key


--- a/request_llms/bridge_chatgpt_website.py
+++ b/request_llms/bridge_chatgpt_website.py
@@ -1,281 +0,0 @@
-# 借鉴了 https://github.com/GaiZhenbiao/ChuanhuChatGPT 项目
-
-"""
-    该文件中主要包含三个函数
-
-    不具备多线程能力的函数：
-    1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
-
-    具备多线程调用能力的函数
-    2. predict_no_ui_long_connection：支持多线程
-"""
-
-import json
-import time
-import gradio as gr
-import logging
-import traceback
-import requests
-import importlib
-
-# config_private.py放自己的秘密如API和代理网址
-# 读取时首先看是否存在私密的config_private配置文件（不受git管控），如果有，则覆盖原config文件
-from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc
-proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG = \
-    get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG')
-
-timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
-                  '网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。'
-
-def get_full_error(chunk, stream_response):
-    """
-        获取完整的从Openai返回的报错
-    """
-    while True:
-        try:
-            chunk += next(stream_response)
-        except:
-            break
-    return chunk
-
-
-def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
-    """
-    发送至chatGPT，等待回复，一次性完成，不显示中间过程。但内部用stream的方法避免中途网线被掐。
-    inputs：
-        是本次问询的输入
-    sys_prompt:
-        系统静默prompt
-    llm_kwargs：
-        chatGPT的内部调优参数
-    history：
-        是之前的对话列表
-    observe_window = None：
-        用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
-    """
-    watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
-    headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
-    retry = 0
-    while True:
-        try:
-            # make a POST request to the API endpoint, stream=False
-            from .bridge_all import model_info
-            endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
-            response = requests.post(endpoint, headers=headers, proxies=proxies,
-                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
-        except requests.exceptions.ReadTimeout as e:
-            retry += 1
-            traceback.print_exc()
-            if retry > MAX_RETRY: raise TimeoutError
-            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
-
-    stream_response =  response.iter_lines()
-    result = ''
-    while True:
-        try: chunk = next(stream_response).decode()
-        except StopIteration:
-            break
-        except requests.exceptions.ConnectionError:
-            chunk = next(stream_response).decode() # 失败了，重试一次？再失败就没办法了。
-        if len(chunk)==0: continue
-        if not chunk.startswith('data:'):
-            error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
-            if "reduce the length" in error_msg:
-                raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
-            else:
-                raise RuntimeError("OpenAI拒绝了请求：" + error_msg)
-        if ('data: [DONE]' in chunk): break # api2d 正常完成
-        json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
-        delta = json_data["delta"]
-        if len(delta) == 0: break
-        if "role" in delta: continue
-        if "content" in delta:
-            result += delta["content"]
-            if not console_slience: print(delta["content"], end='')
-            if observe_window is not None:
-                # 观测窗，把已经获取的数据显示出去
-                if len(observe_window) >= 1: observe_window[0] += delta["content"]
-                # 看门狗，如果超过期限没有喂狗，则终止
-                if len(observe_window) >= 2:
-                    if (time.time()-observe_window[1]) > watch_dog_patience:
-                        raise RuntimeError("用户取消了程序。")
-        else: raise RuntimeError("意外Json结构："+delta)
-    if json_data['finish_reason'] == 'content_filter':
-        raise RuntimeError("由于提问含不合规内容被Azure过滤。")
-    if json_data['finish_reason'] == 'length':
-        raise ConnectionAbortedError("正常结束，但显示Token不足，导致输出不完整，请削减单次输入的文本量。")
-    return result
-
-
-def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
-    """
-    发送至chatGPT，流式获取输出。
-    用于基础的对话功能。
-    inputs 是本次问询的输入
-    top_p, temperature是chatGPT的内部调优参数
-    history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
-    chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
-    additional_fn代表点击的哪个按钮，按钮见functional.py
-    """
-    if additional_fn is not None:
-        from core_functional import handle_core_functionality
-        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
-
-    raw_input = inputs
-    logging.info(f'[raw_input] {raw_input}')
-    chatbot.append((inputs, ""))
-    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
-
-    try:
-        headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
-    except RuntimeError as e:
-        chatbot[-1] = (inputs, f"您提供的api-key不满足要求，不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
-        yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
-        return
-
-    history.append(inputs); history.append("")
-
-    retry = 0
-    while True:
-        try:
-            # make a POST request to the API endpoint, stream=True
-            from .bridge_all import model_info
-            endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
-            response = requests.post(endpoint, headers=headers, proxies=proxies,
-                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
-        except:
-            retry += 1
-            chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
-            retry_msg = f"，正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
-            yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
-            if retry > MAX_RETRY: raise TimeoutError
-
-    gpt_replying_buffer = ""
-
-    is_head_of_the_stream = True
-    if stream:
-        stream_response =  response.iter_lines()
-        while True:
-            try:
-                chunk = next(stream_response)
-            except StopIteration:
-                # 非OpenAI官方接口的出现这样的报错，OpenAI和API2D不会走这里
-                chunk_decoded = chunk.decode()
-                error_msg = chunk_decoded
-                chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
-                yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
-                return
-
-            # print(chunk.decode()[6:])
-            if is_head_of_the_stream and (r'"object":"error"' not in chunk.decode()):
-                # 数据流的第一帧不携带content
-                is_head_of_the_stream = False; continue
-
-            if chunk:
-                try:
-                    chunk_decoded = chunk.decode()
-                    # 前者是API2D的结束条件，后者是OPENAI的结束条件
-                    if 'data: [DONE]' in chunk_decoded:
-                        # 判定为数据流的结束，gpt_replying_buffer也写完了
-                        logging.info(f'[response] {gpt_replying_buffer}')
-                        break
-                    # 处理数据流的主体
-                    chunkjson = json.loads(chunk_decoded[6:])
-                    status_text = f"finish_reason: {chunkjson['choices'][0]['finish_reason']}"
-                    delta = chunkjson['choices'][0]["delta"]
-                    if "content" in delta:
-                        gpt_replying_buffer = gpt_replying_buffer + delta["content"]
-                    history[-1] = gpt_replying_buffer
-                    chatbot[-1] = (history[-2], history[-1])
-                    yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
-                except Exception as e:
-                    yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
-                    chunk = get_full_error(chunk, stream_response)
-                    chunk_decoded = chunk.decode()
-                    error_msg = chunk_decoded
-                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
-                    yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
-                    print(error_msg)
-                    return
-
-def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
-    from .bridge_all import model_info
-    openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
-    if "reduce the length" in error_msg:
-        if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入：history[-2] 是本次输入, history[-1] 是本次输出
-        history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
-                                               max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
-        chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
-                        # history = []    # 清除历史
-    elif "does not exist" in error_msg:
-        chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
-    elif "Incorrect API key" in error_msg:
-        chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
-    elif "exceeded your current quota" in error_msg:
-        chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
-    elif "account is not active" in error_msg:
-        chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
-    elif "associated with a deactivated account" in error_msg:
-        chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
-    elif "bad forward key" in error_msg:
-        chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
-    elif "Not enough point" in error_msg:
-        chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
-    else:
-        from toolbox import regular_txt_to_markdown
-        tb_str = '```\n' + trimmed_format_exc() + '```'
-        chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
-    return chatbot, history
-
-def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
-    """
-    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
-    """
-    if not is_any_api_key(llm_kwargs['api_key']):
-        raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案：直接在输入区键入api_key，然后回车提交。\n\n2. 长效解决方案：在config.py中配置。")
-
-    headers = {
-        "Content-Type": "application/json",
-    }
-
-    conversation_cnt = len(history) // 2
-
-    messages = [{"role": "system", "content": system_prompt}]
-    if conversation_cnt:
-        for index in range(0, 2*conversation_cnt, 2):
-            what_i_have_asked = {}
-            what_i_have_asked["role"] = "user"
-            what_i_have_asked["content"] = history[index]
-            what_gpt_answer = {}
-            what_gpt_answer["role"] = "assistant"
-            what_gpt_answer["content"] = history[index+1]
-            if what_i_have_asked["content"] != "":
-                if what_gpt_answer["content"] == "": continue
-                if what_gpt_answer["content"] == timeout_bot_msg: continue
-                messages.append(what_i_have_asked)
-                messages.append(what_gpt_answer)
-            else:
-                messages[-1]['content'] = what_gpt_answer['content']
-
-    what_i_ask_now = {}
-    what_i_ask_now["role"] = "user"
-    what_i_ask_now["content"] = inputs
-    messages.append(what_i_ask_now)
-
-    payload = {
-        "model": llm_kwargs['llm_model'].strip('api2d-'),
-        "messages": messages,
-        "temperature": llm_kwargs['temperature'],  # 1.0,
-        "top_p": llm_kwargs['top_p'],  # 1.0,
-        "n": 1,
-        "stream": stream,
-        "presence_penalty": 0,
-        "frequency_penalty": 0,
-    }
-    try:
-        print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
-    except:
-        print('输入中可能存在乱码。')
-    return headers,payload
-
-
--- a/request_llms/bridge_claude.py
+++ b/request_llms/bridge_claude.py
@@ -9,13 +9,14 @@
    具备多线程调用能力的函数
    2. predict_no_ui_long_connection：支持多线程
 """
-import logging
 import os
 import time
 import traceback
 import json
 import requests
+from loguru import logger
 from toolbox import get_conf, update_ui, trimmed_format_exc, encode_image, every_image_file_in_path, log_chat
+
 picture_system_prompt = "\n当回复图像时,必须说明正在回复哪张图像。所有图像仅在最后一个问题中提供,即使它们在历史记录中被提及。请使用'这是第X张图像:'的格式来指明您正在描述的是哪张图像。"
 Claude_3_Models = ["claude-3-haiku-20240307", "claude-3-sonnet-20240229", "claude-3-opus-20240229", "claude-3-5-sonnet-20240620"]

@@ -101,7 +102,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
            retry += 1
            traceback.print_exc()
            if retry > MAX_RETRY: raise TimeoutError
-            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
+            if MAX_RETRY!=0: logger.error(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
    stream_response = response.iter_lines()
    result = ''
    while True:
@@ -116,12 +117,11 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
                if need_to_pass:
                    pass
                elif is_last_chunk:
-                    # logging.info(f'[response] {result}')
+                    # logger.info(f'[response] {result}')
                    break
                else:
                    if chunkjson and chunkjson['type'] == 'content_block_delta':
                        result += chunkjson['delta']['text']
-                        print(chunkjson['delta']['text'], end='')
                        if observe_window is not None:
                            # 观测窗，把已经获取的数据显示出去
                            if len(observe_window) >= 1:
@@ -134,7 +134,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
                chunk = get_full_error(chunk, stream_response)
                chunk_decoded = chunk.decode()
                error_msg = chunk_decoded
-                print(error_msg)
+                logger.error(error_msg)
                raise RuntimeError("Json解析不合常规")

    return result
@@ -200,7 +200,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
            retry += 1
            traceback.print_exc()
            if retry > MAX_RETRY: raise TimeoutError
-            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
+            if MAX_RETRY!=0: logger.error(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
    stream_response = response.iter_lines()
    gpt_replying_buffer = ""

@@ -217,7 +217,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
                    pass
                elif is_last_chunk:
                    log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
-                    # logging.info(f'[response] {gpt_replying_buffer}')
+                    # logger.info(f'[response] {gpt_replying_buffer}')
                    break
                else:
                    if chunkjson and chunkjson['type'] == 'content_block_delta':
@@ -230,7 +230,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
                chunk = get_full_error(chunk, stream_response)
                chunk_decoded = chunk.decode()
                error_msg = chunk_decoded
-                print(error_msg)
+                logger.error(error_msg)
                raise RuntimeError("Json解析不合常规")

 def multiple_picture_types(image_paths):
--- a/request_llms/bridge_cohere.py
+++ b/request_llms/bridge_cohere.py
@@ -13,11 +13,9 @@
 import json
 import time
 import gradio as gr
-import logging
 import traceback
 import requests
-import importlib
-import random
+from loguru import logger

 # config_private.py放自己的秘密如API和代理网址
 # 读取时首先看是否存在私密的config_private配置文件（不受git管控），如果有，则覆盖原config文件
@@ -98,7 +96,7 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[],
            retry += 1
            traceback.print_exc()
            if retry > MAX_RETRY: raise TimeoutError
-            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
+            if MAX_RETRY!=0: logger.error(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')

    stream_response = response.iter_lines()
    result = ''
@@ -153,7 +151,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)

    raw_input = inputs
-    # logging.info(f'[raw_input] {raw_input}')
+    # logger.info(f'[raw_input] {raw_input}')
    chatbot.append((inputs, ""))
    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面

@@ -237,7 +235,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
                    error_msg = chunk_decoded
                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
                    yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
-                    print(error_msg)
+                    logger.error(error_msg)
                    return

 def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
--- a/request_llms/bridge_deepseekcoder.py
+++ b/request_llms/bridge_deepseekcoder.py
@@ -1,12 +1,13 @@
 model_name = "deepseek-coder-6.7b-instruct"
 cmd_to_install = "未知" # "`pip install -r request_llms/requirements_qwen.txt`"

-import os
 from toolbox import ProxyNetworkActivate
 from toolbox import get_conf
-from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
+from request_llms.local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
 from threading import Thread
+from loguru import logger
 import torch
+import os

 def download_huggingface_model(model_name, max_retry, local_dir):
    from huggingface_hub import snapshot_download
@@ -15,7 +16,7 @@ def download_huggingface_model(model_name, max_retry, local_dir):
            snapshot_download(repo_id=model_name, local_dir=local_dir, resume_download=True)
            break
        except Exception as e:
-            print(f'\n\n下载失败，重试第{i}次中...\n\n')
+            logger.error(f'\n\n下载失败，重试第{i}次中...\n\n')
    return local_dir
 # ------------------------------------------------------------------------------------------------------------------------
 # 🔌💻 Local Model
@@ -112,7 +113,6 @@ class GetCoderLMHandle(LocalLLMHandle):
        generated_text = ""
        for new_text in self._streamer:
            generated_text += new_text
-            # print(generated_text)
            yield generated_text


--- a/request_llms/bridge_internlm.py
+++ b/request_llms/bridge_internlm.py
@@ -65,10 +65,10 @@ class GetInternlmHandle(LocalLLMHandle):

    def llm_stream_generator(self, **kwargs):
        import torch
-        import logging
        import copy
        import warnings
        import torch.nn as nn
+        from loguru import logger as logging 
        from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig

        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
@@ -119,7 +119,7 @@ class GetInternlmHandle(LocalLLMHandle):
        elif generation_config.max_new_tokens is not None:
            generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
            if not has_default_max_length:
-                logging.warn(
+                logging.warning(
                    f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
                    f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
                    "Please refer to the documentation for more information. "
--- a/request_llms/bridge_moonshot.py
+++ b/request_llms/bridge_moonshot.py
@@ -5,7 +5,6 @@
 import json
 import os
 import time
-import logging

 from toolbox import get_conf, update_ui, log_chat
 from toolbox import ChatBotWithCookies
--- a/request_llms/bridge_ollama.py
+++ b/request_llms/bridge_ollama.py
@@ -13,11 +13,11 @@
 import json
 import time
 import gradio as gr
-import logging
 import traceback
 import requests
 import importlib
 import random
+from loguru import logger

 # config_private.py放自己的秘密如API和代理网址
 # 读取时首先看是否存在私密的config_private配置文件（不受git管控），如果有，则覆盖原config文件
@@ -81,7 +81,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
            retry += 1
            traceback.print_exc()
            if retry > MAX_RETRY: raise TimeoutError
-            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
+            if MAX_RETRY!=0: logger.error(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')

    stream_response = response.iter_lines()
    result = ''
@@ -96,7 +96,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
            try:
                if is_last_chunk:
                    # 判定为数据流的结束，gpt_replying_buffer也写完了
-                    logging.info(f'[response] {result}')
+                    logger.info(f'[response] {result}')
                    break
                result += chunkjson['message']["content"]
                if not console_slience: print(chunkjson['message']["content"], end='')
@@ -112,7 +112,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
                chunk = get_full_error(chunk, stream_response)
                chunk_decoded = chunk.decode()
                error_msg = chunk_decoded
-                print(error_msg)
+                logger.error(error_msg)
                raise RuntimeError("Json解析不合常规")
    return result

@@ -134,7 +134,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)

    raw_input = inputs
-    logging.info(f'[raw_input] {raw_input}')
+    logger.info(f'[raw_input] {raw_input}')
    chatbot.append((inputs, ""))
    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面

@@ -183,7 +183,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
                try:
                    if is_last_chunk:
                        # 判定为数据流的结束，gpt_replying_buffer也写完了
-                        logging.info(f'[response] {gpt_replying_buffer}')
+                        logger.info(f'[response] {gpt_replying_buffer}')
                        break
                    # 处理数据流的主体
                    try:
@@ -202,7 +202,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
                    error_msg = chunk_decoded
                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
                    yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
-                    print(error_msg)
+                    logger.error(error_msg)
                    return

 def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
@@ -265,8 +265,5 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
        "messages": messages,
        "options": options,
    }
-    try:
-        print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
-    except:
-        print('输入中可能存在乱码。')
+
    return headers,payload
--- a/request_llms/bridge_openrouter.py
+++ b/request_llms/bridge_openrouter.py
@@ -0,0 +1,541 @@
+"""
+    该文件中主要包含三个函数
+
+    不具备多线程能力的函数：
+    1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
+
+    具备多线程调用能力的函数
+    2. predict_no_ui_long_connection：支持多线程
+"""
+
+import json
+import os
+import re
+import time
+import traceback
+import requests
+import random
+from loguru import logger
+
+# config_private.py放自己的秘密如API和代理网址
+# 读取时首先看是否存在私密的config_private配置文件（不受git管控），如果有，则覆盖原config文件
+from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history
+from toolbox import trimmed_format_exc, is_the_upload_folder, read_one_api_model_name, log_chat
+from toolbox import ChatBotWithCookies, have_any_recent_upload_image_files, encode_image
+proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \
+    get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY')
+
+timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
+                  '网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。'
+
+def get_full_error(chunk, stream_response):
+    """
+        获取完整的从Openai返回的报错
+    """
+    while True:
+        try:
+            chunk += next(stream_response)
+        except:
+            break
+    return chunk
+
+def make_multimodal_input(inputs, image_paths):
+    image_base64_array = []
+    for image_path in image_paths:
+        path = os.path.abspath(image_path)
+        base64 = encode_image(path)
+        inputs = inputs + f'<br/><br/><div align="center"><img src="file={path}" base64="{base64}"></div>'
+        image_base64_array.append(base64)
+    return inputs, image_base64_array
+
+def reverse_base64_from_input(inputs):
+    # 定义一个正则表达式来匹配 Base64 字符串（假设格式为 base64="<Base64编码>"）
+    # pattern = re.compile(r'base64="([^"]+)"></div>')
+    pattern = re.compile(r'<br/><br/><div align="center"><img[^<>]+base64="([^"]+)"></div>')
+    # 使用 findall 方法查找所有匹配的 Base64 字符串
+    base64_strings = pattern.findall(inputs)
+    # 返回反转后的 Base64 字符串列表
+    return base64_strings
+
+def contain_base64(inputs):
+    base64_strings = reverse_base64_from_input(inputs)
+    return len(base64_strings) > 0
+
+def append_image_if_contain_base64(inputs):
+    if not contain_base64(inputs):
+        return inputs
+    else:
+        image_base64_array = reverse_base64_from_input(inputs)
+        pattern = re.compile(r'<br/><br/><div align="center"><img[^><]+></div>')
+        inputs = re.sub(pattern, '', inputs)
+        res = []
+        res.append({
+            "type": "text",
+            "text": inputs
+        })
+        for image_base64 in image_base64_array:
+            res.append({
+                "type": "image_url",
+                "image_url": {
+                    "url": f"data:image/jpeg;base64,{image_base64}"
+                }
+            })
+        return res
+
+def remove_image_if_contain_base64(inputs):
+    if not contain_base64(inputs):
+        return inputs
+    else:
+        pattern = re.compile(r'<br/><br/><div align="center"><img[^><]+></div>')
+        inputs = re.sub(pattern, '', inputs)
+        return inputs
+
+def decode_chunk(chunk):
+    # 提前读取一些信息 （用于判断异常）
+    chunk_decoded = chunk.decode()
+    chunkjson = None
+    has_choices = False
+    choice_valid = False
+    has_content = False
+    has_role = False
+    try:
+        chunkjson = json.loads(chunk_decoded[6:])
+        has_choices = 'choices' in chunkjson
+        if has_choices: choice_valid = (len(chunkjson['choices']) > 0)
+        if has_choices and choice_valid: has_content = ("content" in chunkjson['choices'][0]["delta"])
+        if has_content: has_content = (chunkjson['choices'][0]["delta"]["content"] is not None)
+        if has_choices and choice_valid: has_role = "role" in chunkjson['choices'][0]["delta"]
+    except:
+        pass
+    return chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role
+
+from functools import lru_cache
+@lru_cache(maxsize=32)
+def verify_endpoint(endpoint):
+    """
+        检查endpoint是否可用
+    """
+    if "你亲手写的api名称" in endpoint:
+        raise ValueError("Endpoint不正确, 请检查AZURE_ENDPOINT的配置! 当前的Endpoint为:" + endpoint)
+    return endpoint
+
+def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[], sys_prompt:str="", observe_window:list=None, console_slience:bool=False):
+    """
+    发送至chatGPT，等待回复，一次性完成，不显示中间过程。但内部用stream的方法避免中途网线被掐。
+    inputs：
+        是本次问询的输入
+    sys_prompt:
+        系统静默prompt
+    llm_kwargs：
+        chatGPT的内部调优参数
+    history：
+        是之前的对话列表
+    observe_window = None：
+        用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
+    """
+    from request_llms.bridge_all import model_info
+
+    watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
+
+    if model_info[llm_kwargs['llm_model']].get('openai_disable_stream', False): stream = False
+    else: stream = True
+
+    headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=stream)
+    retry = 0
+    while True:
+        try:
+            # make a POST request to the API endpoint, stream=False
+            endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
+            response = requests.post(endpoint, headers=headers, proxies=proxies,
+                                    json=payload, stream=stream, timeout=TIMEOUT_SECONDS); break
+        except requests.exceptions.ReadTimeout as e:
+            retry += 1
+            traceback.print_exc()
+            if retry > MAX_RETRY: raise TimeoutError
+            if MAX_RETRY!=0: logger.error(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
+
+    if not stream:
+        # 该分支仅适用于不支持stream的o1模型，其他情形一律不适用
+        chunkjson = json.loads(response.content.decode())
+        gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"]
+        return gpt_replying_buffer
+
+    stream_response = response.iter_lines()
+    result = ''
+    json_data = None
+    while True:
+        try: chunk = next(stream_response)
+        except StopIteration:
+            break
+        except requests.exceptions.ConnectionError:
+            chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
+        chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
+        if len(chunk_decoded)==0: continue
+        if not chunk_decoded.startswith('data:'):
+            error_msg = get_full_error(chunk, stream_response).decode()
+            if "reduce the length" in error_msg:
+                raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
+            elif """type":"upstream_error","param":"307""" in error_msg:
+                raise ConnectionAbortedError("正常结束，但显示Token不足，导致输出不完整，请削减单次输入的文本量。")
+            else:
+                raise RuntimeError("OpenAI拒绝了请求：" + error_msg)
+        if ('data: [DONE]' in chunk_decoded): break # api2d 正常完成
+        # 提前读取一些信息 （用于判断异常）
+        if (has_choices and not choice_valid) or ('OPENROUTER PROCESSING' in chunk_decoded):
+            # 一些垃圾第三方接口的出现这样的错误，openrouter的特殊处理
+            continue
+        json_data = chunkjson['choices'][0]
+        delta = json_data["delta"]
+        if len(delta) == 0: break
+        if (not has_content) and has_role: continue
+        if (not has_content) and (not has_role): continue # raise RuntimeError("发现不标准的第三方接口："+delta)
+        if has_content: # has_role = True/False
+            result += delta["content"]
+            if not console_slience: print(delta["content"], end='')
+            if observe_window is not None:
+                # 观测窗，把已经获取的数据显示出去
+                if len(observe_window) >= 1:
+                    observe_window[0] += delta["content"]
+                # 看门狗，如果超过期限没有喂狗，则终止
+                if len(observe_window) >= 2:
+                    if (time.time()-observe_window[1]) > watch_dog_patience:
+                        raise RuntimeError("用户取消了程序。")
+        else: raise RuntimeError("意外Json结构："+delta)
+    if json_data and json_data['finish_reason'] == 'content_filter':
+        raise RuntimeError("由于提问含不合规内容被Azure过滤。")
+    if json_data and json_data['finish_reason'] == 'length':
+        raise ConnectionAbortedError("正常结束，但显示Token不足，导致输出不完整，请削减单次输入的文本量。")
+    return result
+
+
+def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWithCookies,
+            history:list=[], system_prompt:str='', stream:bool=True, additional_fn:str=None):
+    """
+    发送至chatGPT，流式获取输出。
+    用于基础的对话功能。
+    inputs 是本次问询的输入
+    top_p, temperature是chatGPT的内部调优参数
+    history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
+    chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
+    additional_fn代表点击的哪个按钮，按钮见functional.py
+    """
+    from request_llms.bridge_all import model_info
+    if is_any_api_key(inputs):
+        chatbot._cookies['api_key'] = inputs
+        chatbot.append(("输入已识别为openai的api_key", what_keys(inputs)))
+        yield from update_ui(chatbot=chatbot, history=history, msg="api_key已导入") # 刷新界面
+        return
+    elif not is_any_api_key(chatbot._cookies['api_key']):
+        chatbot.append((inputs, "缺少api_key。\n\n1. 临时解决方案：直接在输入区键入api_key，然后回车提交。\n\n2. 长效解决方案：在config.py中配置。"))
+        yield from update_ui(chatbot=chatbot, history=history, msg="缺少api_key") # 刷新界面
+        return
+
+    user_input = inputs
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+
+    # 多模态模型
+    has_multimodal_capacity = model_info[llm_kwargs['llm_model']].get('has_multimodal_capacity', False)
+    if has_multimodal_capacity:
+        has_recent_image_upload, image_paths = have_any_recent_upload_image_files(chatbot, pop=True)
+    else:
+        has_recent_image_upload, image_paths = False, []
+    if has_recent_image_upload:
+        _inputs, image_base64_array = make_multimodal_input(inputs, image_paths)
+    else:
+        _inputs, image_base64_array = inputs, []
+    chatbot.append((_inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
+
+    # 禁用stream的特殊模型处理
+    if model_info[llm_kwargs['llm_model']].get('openai_disable_stream', False): stream = False
+    else: stream = True
+
+    # check mis-behavior
+    if is_the_upload_folder(user_input):
+        chatbot[-1] = (inputs, f"[Local Message] 检测到操作错误！当您上传文档之后，需点击“**函数插件区**”按钮进行处理，请勿点击“提交”按钮或者“基础功能区”按钮。")
+        yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面
+        time.sleep(2)
+
+    try:
+        headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, image_base64_array, has_multimodal_capacity, stream)
+    except RuntimeError as e:
+        chatbot[-1] = (inputs, f"您提供的api-key不满足要求，不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
+        yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
+        return
+
+    # 检查endpoint是否合法
+    try:
+        endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
+    except:
+        tb_str = '```\n' + trimmed_format_exc() + '```'
+        chatbot[-1] = (inputs, tb_str)
+        yield from update_ui(chatbot=chatbot, history=history, msg="Endpoint不满足要求") # 刷新界面
+        return
+
+    # 加入历史
+    if has_recent_image_upload:
+        history.extend([_inputs, ""])
+    else:
+        history.extend([inputs, ""])
+
+    retry = 0
+    while True:
+        try:
+            # make a POST request to the API endpoint, stream=True
+            response = requests.post(endpoint, headers=headers, proxies=proxies,
+                                    json=payload, stream=stream, timeout=TIMEOUT_SECONDS);break
+        except:
+            retry += 1
+            chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
+            retry_msg = f"，正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
+            yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
+            if retry > MAX_RETRY: raise TimeoutError
+
+
+    if not stream:
+        # 该分支仅适用于不支持stream的o1模型，其他情形一律不适用
+        yield from handle_o1_model_special(response, inputs, llm_kwargs, chatbot, history)
+        return
+
+    if stream:
+        gpt_replying_buffer = ""
+        is_head_of_the_stream = True
+        stream_response =  response.iter_lines()
+        while True:
+            try:
+                chunk = next(stream_response)
+            except StopIteration:
+                # 非OpenAI官方接口的出现这样的报错，OpenAI和API2D不会走这里
+                chunk_decoded = chunk.decode()
+                error_msg = chunk_decoded
+                # 首先排除一个one-api没有done数据包的第三方Bug情形
+                if len(gpt_replying_buffer.strip()) > 0 and len(error_msg) == 0:
+                    yield from update_ui(chatbot=chatbot, history=history, msg="检测到有缺陷的非OpenAI官方接口，建议选择更稳定的接口。")
+                    break
+                # 其他情况，直接返回报错
+                chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
+                yield from update_ui(chatbot=chatbot, history=history, msg="非OpenAI官方接口返回了错误:" + chunk.decode()) # 刷新界面
+                return
+
+            # 提前读取一些信息 （用于判断异常）
+            chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
+
+            if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r"content" not in chunk_decoded):
+                # 数据流的第一帧不携带content
+                is_head_of_the_stream = False; continue
+
+            if chunk:
+                try:
+                    if (has_choices and not choice_valid) or ('OPENROUTER PROCESSING' in chunk_decoded):
+                        # 一些垃圾第三方接口的出现这样的错误, 或者OPENROUTER的特殊处理,因为OPENROUTER的数据流未连接到模型时会出现OPENROUTER PROCESSING
+                        continue
+                    if ('data: [DONE]' not in chunk_decoded) and len(chunk_decoded) > 0 and (chunkjson is None):
+                        # 传递进来一些奇怪的东西
+                        raise ValueError(f'无法读取以下数据，请检查配置。\n\n{chunk_decoded}')
+                    # 前者是API2D的结束条件，后者是OPENAI的结束条件
+                    if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
+                        # 判定为数据流的结束，gpt_replying_buffer也写完了
+                        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
+                        break
+                    # 处理数据流的主体
+                    status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
+                    # 如果这里抛出异常，一般是文本过长，详情见get_full_error的输出
+                    if has_content:
+                        # 正常情况
+                        gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
+                    elif has_role:
+                        # 一些第三方接口的出现这样的错误，兼容一下吧
+                        continue
+                    else:
+                        # 至此已经超出了正常接口应该进入的范围，一些垃圾第三方接口会出现这样的错误
+                        if chunkjson['choices'][0]["delta"]["content"] is None: continue # 一些垃圾第三方接口出现这样的错误，兼容一下吧
+                        gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
+
+                    history[-1] = gpt_replying_buffer
+                    chatbot[-1] = (history[-2], history[-1])
+                    yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
+                except Exception as e:
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
+                    chunk = get_full_error(chunk, stream_response)
+                    chunk_decoded = chunk.decode()
+                    error_msg = chunk_decoded
+                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json解析异常" + error_msg) # 刷新界面
+                    logger.error(error_msg)
+                    return
+        return  # return from stream-branch
+
+def handle_o1_model_special(response, inputs, llm_kwargs, chatbot, history):
+    try:
+        chunkjson = json.loads(response.content.decode())
+        gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"]
+        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
+        history[-1] = gpt_replying_buffer
+        chatbot[-1] = (history[-2], history[-1])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    except Exception as e:
+        yield from update_ui(chatbot=chatbot, history=history, msg="Json解析异常" + response.text) # 刷新界面
+
+def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
+    from request_llms.bridge_all import model_info
+    openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
+    if "reduce the length" in error_msg:
+        if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入：history[-2] 是本次输入, history[-1] 是本次输出
+        history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
+                                               max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
+    elif "does not exist" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
+    elif "Incorrect API key" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
+    elif "exceeded your current quota" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
+    elif "account is not active" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
+    elif "associated with a deactivated account" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
+    elif "API key has been deactivated" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] API key has been deactivated. OpenAI以账户失效为由, 拒绝服务." + openai_website)
+    elif "bad forward key" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
+    elif "Not enough point" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
+    else:
+        from toolbox import regular_txt_to_markdown
+        tb_str = '```\n' + trimmed_format_exc() + '```'
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
+    return chatbot, history
+
+def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:str, image_base64_array:list=[], has_multimodal_capacity:bool=False, stream:bool=True):
+    """
+    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
+    """
+    from request_llms.bridge_all import model_info
+
+    if not is_any_api_key(llm_kwargs['api_key']):
+        raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案：直接在输入区键入api_key，然后回车提交。\n\n2. 长效解决方案：在config.py中配置。")
+
+    if llm_kwargs['llm_model'].startswith('vllm-'):
+        api_key = 'no-api-key'
+    else:
+        api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
+
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    if API_ORG.startswith('org-'): headers.update({"OpenAI-Organization": API_ORG})
+    if llm_kwargs['llm_model'].startswith('azure-'):
+        headers.update({"api-key": api_key})
+        if llm_kwargs['llm_model'] in AZURE_CFG_ARRAY.keys():
+            azure_api_key_unshared = AZURE_CFG_ARRAY[llm_kwargs['llm_model']]["AZURE_API_KEY"]
+            headers.update({"api-key": azure_api_key_unshared})
+
+    if has_multimodal_capacity:
+        # 当以下条件满足时，启用多模态能力：
+        # 1. 模型本身是多模态模型（has_multimodal_capacity）
+        # 2. 输入包含图像（len(image_base64_array) > 0）
+        # 3. 历史输入包含图像（ any([contain_base64(h) for h in history]) ）
+        enable_multimodal_capacity = (len(image_base64_array) > 0) or any([contain_base64(h) for h in history])
+    else:
+        enable_multimodal_capacity = False
+
+    conversation_cnt = len(history) // 2
+    openai_disable_system_prompt = model_info[llm_kwargs['llm_model']].get('openai_disable_system_prompt', False)
+
+    if openai_disable_system_prompt:
+        messages = [{"role": "user", "content": system_prompt}]
+    else:
+        messages = [{"role": "system", "content": system_prompt}]
+
+    if not enable_multimodal_capacity:
+        # 不使用多模态能力
+        if conversation_cnt:
+            for index in range(0, 2*conversation_cnt, 2):
+                what_i_have_asked = {}
+                what_i_have_asked["role"] = "user"
+                what_i_have_asked["content"] = remove_image_if_contain_base64(history[index])
+                what_gpt_answer = {}
+                what_gpt_answer["role"] = "assistant"
+                what_gpt_answer["content"] = remove_image_if_contain_base64(history[index+1])
+                if what_i_have_asked["content"] != "":
+                    if what_gpt_answer["content"] == "": continue
+                    if what_gpt_answer["content"] == timeout_bot_msg: continue
+                    messages.append(what_i_have_asked)
+                    messages.append(what_gpt_answer)
+                else:
+                    messages[-1]['content'] = what_gpt_answer['content']
+        what_i_ask_now = {}
+        what_i_ask_now["role"] = "user"
+        what_i_ask_now["content"] = inputs
+        messages.append(what_i_ask_now)
+    else:
+        # 多模态能力
+        if conversation_cnt:
+            for index in range(0, 2*conversation_cnt, 2):
+                what_i_have_asked = {}
+                what_i_have_asked["role"] = "user"
+                what_i_have_asked["content"] = append_image_if_contain_base64(history[index])
+                what_gpt_answer = {}
+                what_gpt_answer["role"] = "assistant"
+                what_gpt_answer["content"] = append_image_if_contain_base64(history[index+1])
+                if what_i_have_asked["content"] != "":
+                    if what_gpt_answer["content"] == "": continue
+                    if what_gpt_answer["content"] == timeout_bot_msg: continue
+                    messages.append(what_i_have_asked)
+                    messages.append(what_gpt_answer)
+                else:
+                    messages[-1]['content'] = what_gpt_answer['content']
+        what_i_ask_now = {}
+        what_i_ask_now["role"] = "user"
+        what_i_ask_now["content"] = []
+        what_i_ask_now["content"].append({
+            "type": "text",
+            "text": inputs
+        })
+        for image_base64 in image_base64_array:
+            what_i_ask_now["content"].append({
+                "type": "image_url",
+                "image_url": {
+                    "url": f"data:image/jpeg;base64,{image_base64}"
+                }
+            })
+        messages.append(what_i_ask_now)
+
+
+    model = llm_kwargs['llm_model']
+    if llm_kwargs['llm_model'].startswith('api2d-'):
+        model = llm_kwargs['llm_model'][len('api2d-'):]
+    if llm_kwargs['llm_model'].startswith('one-api-'):
+        model = llm_kwargs['llm_model'][len('one-api-'):]
+        model, _ = read_one_api_model_name(model)
+    if llm_kwargs['llm_model'].startswith('vllm-'):
+        model = llm_kwargs['llm_model'][len('vllm-'):]
+        model, _ = read_one_api_model_name(model)
+    if llm_kwargs['llm_model'].startswith('openrouter-'):
+        model = llm_kwargs['llm_model'][len('openrouter-'):]
+        model= read_one_api_model_name(model)
+    if model == "gpt-3.5-random": # 随机选择, 绕过openai访问频率限制
+        model = random.choice([
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-16k-0613",
+            "gpt-3.5-turbo-0301",
+        ])
+
+    payload = {
+        "model": model,
+        "messages": messages,
+        "temperature": llm_kwargs['temperature'],  # 1.0,
+        "top_p": llm_kwargs['top_p'],  # 1.0,
+        "n": 1,
+        "stream": stream,
+    }
+
+    return headers,payload
+
+
--- a/request_llms/bridge_stackclaude.py
+++ b/request_llms/bridge_stackclaude.py
@@ -1,12 +1,13 @@
+import time
+import asyncio
+import threading
+import importlib
+
 from .bridge_newbingfree import preprocess_newbing_out, preprocess_newbing_out_simple
 from multiprocessing import Process, Pipe
 from toolbox import update_ui, get_conf, trimmed_format_exc
-import threading
-import importlib
-import logging
-import time
+from loguru import logger as logging
 from toolbox import get_conf
-import asyncio

 load_message = "正在加载Claude组件，请稍候..."

--- a/request_llms/bridge_tgui.py
+++ b/request_llms/bridge_tgui.py
@@ -8,7 +8,6 @@ import json
 import random
 import string
 import websockets
-import logging
 import time
 import threading
 import importlib
--- a/request_llms/com_google.py
+++ b/request_llms/com_google.py
@@ -218,5 +218,3 @@ class GoogleChatInit:

 if __name__ == "__main__":
    google = GoogleChatInit()
-    # print(gootle.generate_message_payload('你好呀', {},  ['123123', '3123123'], ''))
-    # gootle.input_encode_handle('123123[123123](./123123), ![53425](./asfafa/fff.jpg)')
--- a/request_llms/com_qwenapi.py
+++ b/request_llms/com_qwenapi.py
@@ -1,7 +1,6 @@
 from http import HTTPStatus
 from toolbox import get_conf
 import threading
-import logging

 timeout_bot_msg = '[Local Message] Request timeout. Network error.'

--- a/request_llms/com_skylark2api.py
+++ b/request_llms/com_skylark2api.py
@@ -1,7 +1,7 @@
-from toolbox import get_conf
-import threading
-import logging
 import os
+import threading
+from toolbox import get_conf
+from loguru import logger as logging

 timeout_bot_msg = '[Local Message] Request timeout. Network error.'
 #os.environ['VOLC_ACCESSKEY'] = ''
--- a/request_llms/com_sparkapi.py
+++ b/request_llms/com_sparkapi.py
@@ -1,17 +1,18 @@
-from toolbox import get_conf, get_pictures_list, encode_image
 import base64
 import datetime
 import hashlib
 import hmac
 import json
-from urllib.parse import urlparse
 import ssl
+import websocket
+import threading
+from toolbox import get_conf, get_pictures_list, encode_image
+from loguru import logger
+from urllib.parse import urlparse
 from datetime import datetime
 from time import mktime
 from urllib.parse import urlencode
 from wsgiref.handlers import format_date_time
-import websocket
-import threading, time

 timeout_bot_msg = '[Local Message] Request timeout. Network error.'

@@ -104,7 +105,7 @@ class SparkRequestInstance():
            if llm_kwargs['most_recent_uploaded'].get('path'):
                file_manifest = get_pictures_list(llm_kwargs['most_recent_uploaded']['path'])
                if len(file_manifest) > 0:
-                    print('正在使用讯飞图片理解API')
+                    logger.info('正在使用讯飞图片理解API')
                    gpt_url = self.gpt_url_img
        wsParam = Ws_Param(self.appid, self.api_key, self.api_secret, gpt_url)
        websocket.enableTrace(False)
@@ -123,7 +124,7 @@ class SparkRequestInstance():
            data = json.loads(message)
            code = data['header']['code']
            if code != 0:
-                print(f'请求错误: {code}, {data}')
+                logger.error(f'请求错误: {code}, {data}')
                self.result_buf += str(data)
                ws.close()
                self.time_to_exit_event.set()
@@ -140,7 +141,7 @@ class SparkRequestInstance():

        # 收到websocket错误的处理
        def on_error(ws, error):
-            print("error:", error)
+            logger.error("error:", error)
            self.time_to_exit_event.set()

        # 收到websocket关闭的处理
--- a/request_llms/com_taichu.py
+++ b/request_llms/com_taichu.py
@@ -4,7 +4,7 @@
 # @Descr   : 兼容最新的智谱Ai
 from toolbox import get_conf
 from toolbox import get_conf, encode_image, get_pictures_list
-import logging, os, requests
+import requests
 import json
 class TaichuChatInit:
    def __init__(self): ...
--- a/request_llms/com_zhipuglm.py
+++ b/request_llms/com_zhipuglm.py
@@ -5,7 +5,8 @@
 from toolbox import get_conf
 from zhipuai import ZhipuAI
 from toolbox import get_conf, encode_image, get_pictures_list
-import logging, os
+from loguru import logger
+import os


 def input_encode_handler(inputs:str, llm_kwargs:dict):
@@ -24,7 +25,7 @@ class ZhipuChatInit:
    def __init__(self):
        ZHIPUAI_API_KEY, ZHIPUAI_MODEL = get_conf("ZHIPUAI_API_KEY", "ZHIPUAI_MODEL")
        if len(ZHIPUAI_MODEL) > 0:
-            logging.error('ZHIPUAI_MODEL 配置项选项已经弃用，请在LLM_MODEL中配置')
+            logger.error('ZHIPUAI_MODEL 配置项选项已经弃用，请在LLM_MODEL中配置')
        self.zhipu_bro = ZhipuAI(api_key=ZHIPUAI_API_KEY)
        self.model = ''

@@ -37,8 +38,7 @@ class ZhipuChatInit:
            what_i_have_asked['content'].append({"type": 'text', "text": user_input})
            if encode_img:
                if len(encode_img) > 1:
-                    logging.warning("glm-4v只支持一张图片,将只取第一张图片进行处理")
-                    print("glm-4v只支持一张图片,将只取第一张图片进行处理")
+                    logger.warning("glm-4v只支持一张图片,将只取第一张图片进行处理")
                img_d = {"type": "image_url",
                            "image_url": {
                                "url": encode_img[0]['data']
--- a/request_llms/local_llm_class.py
+++ b/request_llms/local_llm_class.py
@@ -5,6 +5,7 @@ from toolbox import ChatBotWithCookies
 from multiprocessing import Process, Pipe
 from contextlib import redirect_stdout
 from request_llms.queued_pipe import create_queue_pipe
+from loguru import logger

 class ThreadLock(object):
    def __init__(self):
@@ -51,7 +52,7 @@ def reset_tqdm_output():
            getattr(sys.stdout, 'flush', lambda: None)()

        def fp_write(s):
-            print(s)
+            logger.info(s)
        last_len = [0]

        def print_status(s):
@@ -199,7 +200,7 @@ class LocalLLMHandle(Process):
                if res.startswith(self.std_tag):
                    new_output = res[len(self.std_tag):]
                    std_out = std_out[:std_out_clip_len]
-                    print(new_output, end='')
+                    logger.info(new_output, end='')
                    std_out = new_output + std_out
                    yield self.std_tag + '\n```\n' + std_out + '\n```\n'
                elif res == '[Finish]':
--- a/request_llms/oai_std_model_template.py
+++ b/request_llms/oai_std_model_template.py
@@ -1,8 +1,8 @@
 import json
 import time
-import logging
 import traceback
 import requests
+from loguru import logger

 # config_private.py放自己的秘密如API和代理网址
 # 读取时首先看是否存在私密的config_private配置文件（不受git管控），如果有，则覆盖原config文件
@@ -106,10 +106,7 @@ def generate_message(input, model, key, history, max_output_token, system_prompt
        "stream": True,
        "max_tokens": max_output_token,
    }
-    try:
-        print(f" {model} : {conversation_cnt} : {input[:100]} ..........")
-    except:
-        print("输入中可能存在乱码。")
+
    return headers, playload


@@ -196,7 +193,7 @@ def get_predict_function(
                if retry > MAX_RETRY:
                    raise TimeoutError
                if MAX_RETRY != 0:
-                    print(f"请求超时，正在重试 ({retry}/{MAX_RETRY}) ……")
+                    logger.error(f"请求超时，正在重试 ({retry}/{MAX_RETRY}) ……")

        stream_response = response.iter_lines()
        result = ""
@@ -219,18 +216,17 @@ def get_predict_function(
            ):
                chunk = get_full_error(chunk, stream_response)
                chunk_decoded = chunk.decode()
-                print(chunk_decoded)
+                logger.error(chunk_decoded)
                raise RuntimeError(
                    f"API异常,请检测终端输出。可能的原因是:{finish_reason}"
                )
            if chunk:
                try:
                    if finish_reason == "stop":
-                        logging.info(f"[response] {result}")
+                        if not console_slience:
+                            print(f"[response] {result}")
                        break
                    result += response_text
-                    if not console_slience:
-                        print(response_text, end="")
                    if observe_window is not None:
                        # 观测窗，把已经获取的数据显示出去
                        if len(observe_window) >= 1:
@@ -243,7 +239,7 @@ def get_predict_function(
                    chunk = get_full_error(chunk, stream_response)
                    chunk_decoded = chunk.decode()
                    error_msg = chunk_decoded
-                    print(error_msg)
+                    logger.error(error_msg)
                    raise RuntimeError("Json解析不合常规")
        return result

@@ -276,7 +272,7 @@ def get_predict_function(
            inputs, history = handle_core_functionality(
                additional_fn, inputs, history, chatbot
            )
-        logging.info(f"[raw_input] {inputs}")
+        logger.info(f"[raw_input] {inputs}")
        chatbot.append((inputs, ""))
        yield from update_ui(
            chatbot=chatbot, history=history, msg="等待响应"
@@ -376,11 +372,11 @@ def get_predict_function(
                            history=history,
                            msg="API异常:" + chunk_decoded,
                        )  # 刷新界面
-                        print(chunk_decoded)
+                        logger.error(chunk_decoded)
                        return

                    if finish_reason == "stop":
-                        logging.info(f"[response] {gpt_replying_buffer}")
+                        logger.info(f"[response] {gpt_replying_buffer}")
                        break
                    status_text = f"finish_reason: {finish_reason}"
                    gpt_replying_buffer += response_text
@@ -403,7 +399,7 @@ def get_predict_function(
                    yield from update_ui(
                        chatbot=chatbot, history=history, msg="Json异常" + chunk_decoded
                    )  # 刷新界面
-                    print(chunk_decoded)
+                    logger.error(chunk_decoded)
                    return

    return predict_no_ui_long_connection, predict