diff --git a/.gitignore b/.gitignore
index c4df2874..286a67d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -146,9 +146,9 @@ debug*
 private*
 crazy_functions/test_project/pdf_and_word
 crazy_functions/test_samples
-request_llm/jittorllms
+request_llms/jittorllms
 multi-language
-request_llm/moss
+request_llms/moss
 media
 flagged
-request_llm/ChatGLM-6b-onnx-u8s8
+request_llms/ChatGLM-6b-onnx-u8s8
diff --git a/README.md b/README.md
index 0378eaa1..5e1f2d4f 100644
--- a/README.md
+++ b/README.md
@@ -129,11 +129,11 @@ python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步
 【可选步骤】如果需要支持清华ChatGLM2/复旦MOSS作为后端，需要额外安装更多依赖（前提条件：熟悉Python + 用过Pytorch + 电脑配置够强）：
 ```sh
 # 【可选步骤I】支持清华ChatGLM2。清华ChatGLM备注：如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误，参考如下： 1：以上默认安装的为torch+cpu版，使用cuda需要卸载torch重新安装torch+cuda； 2：如因本机配置不够无法加载模型，可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
-python -m pip install -r request_llm/requirements_chatglm.txt  
+python -m pip install -r request_llms/requirements_chatglm.txt  
 
 # 【可选步骤II】支持复旦MOSS
-python -m pip install -r request_llm/requirements_moss.txt
-git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llm/moss  # 注意执行此行代码时，必须处于项目根路径
+python -m pip install -r request_llms/requirements_moss.txt
+git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss  # 注意执行此行代码时，必须处于项目根路径
 
 # 【可选步骤III】支持RWKV Runner
 参考wiki：https://github.com/binary-husky/gpt_academic/wiki/%E9%80%82%E9%85%8DRWKV-Runner
diff --git a/check_proxy.py b/check_proxy.py
index 740eed23..977ac276 100644
--- a/check_proxy.py
+++ b/check_proxy.py
@@ -46,7 +46,7 @@ def backup_and_download(current_version, remote_version):
         return new_version_dir
     os.makedirs(new_version_dir)
     shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
-    proxies, = get_conf('proxies')
+    proxies = get_conf('proxies')
     r = requests.get(
         'https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
     zip_file_path = backup_dir+'/master.zip'
@@ -113,7 +113,7 @@ def auto_update(raise_error=False):
         import requests
         import time
         import json
-        proxies, = get_conf('proxies')
+        proxies = get_conf('proxies')
         response = requests.get(
             "https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=5)
         remote_json_data = json.loads(response.text)
@@ -156,7 +156,7 @@ def auto_update(raise_error=False):
 def warm_up_modules():
     print('正在执行一些模块的预热...')
     from toolbox import ProxyNetworkActivate
-    from request_llm.bridge_all import model_info
+    from request_llms.bridge_all import model_info
     with ProxyNetworkActivate("Warmup_Modules"):
         enc = model_info["gpt-3.5-turbo"]['tokenizer']
         enc.encode("模块预热", disallowed_special=())
@@ -167,5 +167,5 @@ if __name__ == '__main__':
     import os
     os.environ['no_proxy'] = '*'  # 避免代理网络产生意外污染
     from toolbox import get_conf
-    proxies, = get_conf('proxies')
+    proxies = get_conf('proxies')
     check_proxy(proxies)
diff --git a/config.py b/config.py
index b4f00a63..06840dd8 100644
--- a/config.py
+++ b/config.py
@@ -91,7 +91,7 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
                     "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k', 
                     "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4", 
                     "chatglm", "moss", "newbing", "claude-2"]
-# P.S. 其他可用的模型还包括 ["qianfan", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",  "gpt-3.5-random"
+# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",  "gpt-3.5-random"
 # "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"]
 
 
@@ -140,7 +140,7 @@ SSL_CERTFILE = ""
 API_ORG = ""
 
 
-# 如果需要使用Slack Claude，使用教程详情见 request_llm/README.md
+# 如果需要使用Slack Claude，使用教程详情见 request_llms/README.md
 SLACK_CLAUDE_BOT_ID = ''   
 SLACK_CLAUDE_USER_TOKEN = ''
 
@@ -176,6 +176,11 @@ XFYUN_API_SECRET = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
 XFYUN_API_KEY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
 
 
+# 接入智谱大模型
+ZHIPUAI_API_KEY = ""
+ZHIPUAI_MODEL = "chatglm_turbo"
+
+
 # Claude API KEY
 ANTHROPIC_API_KEY = ""
 
@@ -218,6 +223,10 @@ WHEN_TO_USE_PROXY = ["Download_LLM", "Download_Gradio_Theme", "Connect_Grobid",
                      "Warmup_Modules", "Nougat_Download", "AutoGen"]
 
 
+# *实验性功能*: 自动检测并屏蔽失效的KEY，请勿使用
+BLOCK_INVALID_APIKEY = False
+
+
 # 自定义按钮的最大数量限制
 NUM_CUSTOM_BASIC_BTN = 4
 
diff --git a/crazy_functional.py b/crazy_functional.py
index 795bd5f7..2d7fa74b 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -498,7 +498,7 @@ def get_crazy_functions():
 
     try:
         from toolbox import get_conf
-        ENABLE_AUDIO, = get_conf('ENABLE_AUDIO')
+        ENABLE_AUDIO = get_conf('ENABLE_AUDIO')
         if ENABLE_AUDIO:
             from crazy_functions.语音助手 import 语音助手
             function_plugins.update({
diff --git a/crazy_functions/Latex全文润色.py b/crazy_functions/Latex全文润色.py
index 462f9657..268a3446 100644
--- a/crazy_functions/Latex全文润色.py
+++ b/crazy_functions/Latex全文润色.py
@@ -11,7 +11,7 @@ class PaperFileGroup():
         self.sp_file_tag = []
 
         # count_token
-        from request_llm.bridge_all import model_info
+        from request_llms.bridge_all import model_info
         enc = model_info["gpt-3.5-turbo"]['tokenizer']
         def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         self.get_token_num = get_token_num
diff --git a/crazy_functions/Latex全文翻译.py b/crazy_functions/Latex全文翻译.py
index b5aad71b..697f5ac8 100644
--- a/crazy_functions/Latex全文翻译.py
+++ b/crazy_functions/Latex全文翻译.py
@@ -11,7 +11,7 @@ class PaperFileGroup():
         self.sp_file_tag = []
 
         # count_token
-        from request_llm.bridge_all import model_info
+        from request_llms.bridge_all import model_info
         enc = model_info["gpt-3.5-turbo"]['tokenizer']
         def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         self.get_token_num = get_token_num
diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 7ec5a4b2..9edfea68 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -129,7 +129,7 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
         yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history)  # 刷新界面
     else:
         yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history)  # 刷新界面
-        proxies, = get_conf('proxies')
+        proxies = get_conf('proxies')
         r = requests.get(url_tar, proxies=proxies)
         with open(dst, 'wb+') as f:
             f.write(r.content)
diff --git a/crazy_functions/agent_fns/pipe.py b/crazy_functions/agent_fns/pipe.py
index 0e430b40..5ebe3fc6 100644
--- a/crazy_functions/agent_fns/pipe.py
+++ b/crazy_functions/agent_fns/pipe.py
@@ -20,7 +20,7 @@ class PluginMultiprocessManager():
         self.system_prompt = system_prompt
         self.web_port = web_port
         self.alive = True
-        self.use_docker, = get_conf('AUTOGEN_USE_DOCKER')
+        self.use_docker = get_conf('AUTOGEN_USE_DOCKER')
 
         # create a thread to monitor self.heartbeat, terminate the instance if no heartbeat for a long time
         timeout_seconds = 5*60
diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py
index 8533d088..a23c732b 100644
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@@ -5,7 +5,7 @@ import logging
 
 def input_clipping(inputs, history, max_token_limit):
     import numpy as np
-    from request_llm.bridge_all import model_info
+    from request_llms.bridge_all import model_info
     enc = model_info["gpt-3.5-turbo"]['tokenizer']
     def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
 
@@ -63,7 +63,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
     """
     import time
     from concurrent.futures import ThreadPoolExecutor
-    from request_llm.bridge_all import predict_no_ui_long_connection
+    from request_llms.bridge_all import predict_no_ui_long_connection
     # 用户反馈
     chatbot.append([inputs_show_user, ""])
     yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
@@ -177,11 +177,11 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
     """
     import time, random
     from concurrent.futures import ThreadPoolExecutor
-    from request_llm.bridge_all import predict_no_ui_long_connection
+    from request_llms.bridge_all import predict_no_ui_long_connection
     assert len(inputs_array) == len(history_array)
     assert len(inputs_array) == len(sys_prompt_array)
     if max_workers == -1: # 读取配置文件
-        try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
+        try: max_workers = get_conf('DEFAULT_WORKER_NUM')
         except: max_workers = 8
         if max_workers <= 0: max_workers = 3
     # 屏蔽掉 chatglm的多线程，可能会导致严重卡顿
@@ -602,7 +602,7 @@ def get_files_from_everything(txt, type): # type='.md'
         import requests
         from toolbox import get_conf
         from toolbox import get_log_folder, gen_time_str
-        proxies, = get_conf('proxies')
+        proxies = get_conf('proxies')
         try:
             r = requests.get(txt, proxies=proxies)
         except:
diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py
index 7e561df2..ead3bc4c 100644
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@@ -165,7 +165,7 @@ class LatexPaperFileGroup():
         self.sp_file_tag = []
 
         # count_token
-        from request_llm.bridge_all import model_info
+        from request_llms.bridge_all import model_info
         enc = model_info["gpt-3.5-turbo"]['tokenizer']
         def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         self.get_token_num = get_token_num
diff --git a/crazy_functions/pdf_fns/parse_pdf.py b/crazy_functions/pdf_fns/parse_pdf.py
index 9853fd54..6646c5bf 100644
--- a/crazy_functions/pdf_fns/parse_pdf.py
+++ b/crazy_functions/pdf_fns/parse_pdf.py
@@ -14,7 +14,7 @@ import math
 class GROBID_OFFLINE_EXCEPTION(Exception): pass
 
 def get_avail_grobid_url():
-    GROBID_URLS, = get_conf('GROBID_URLS')
+    GROBID_URLS = get_conf('GROBID_URLS')
     if len(GROBID_URLS) == 0: return None
     try:
         _grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
@@ -103,7 +103,7 @@ def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_fi
     inputs_show_user_array = []
 
     # get_token_num
-    from request_llm.bridge_all import model_info
+    from request_llms.bridge_all import model_info
     enc = model_info[llm_kwargs['llm_model']]['tokenizer']
     def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
 
diff --git a/crazy_functions/vt_fns/vt_call_plugin.py b/crazy_functions/vt_fns/vt_call_plugin.py
index 455ac88b..f33644d9 100644
--- a/crazy_functions/vt_fns/vt_call_plugin.py
+++ b/crazy_functions/vt_fns/vt_call_plugin.py
@@ -1,7 +1,7 @@
 from pydantic import BaseModel, Field
 from typing import List
 from toolbox import update_ui_lastest_msg, disable_auto_promotion
-from request_llm.bridge_all import predict_no_ui_long_connection
+from request_llms.bridge_all import predict_no_ui_long_connection
 from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
 import copy, json, pickle, os, sys, time
 
diff --git a/crazy_functions/vt_fns/vt_modify_config.py b/crazy_functions/vt_fns/vt_modify_config.py
index e7fd745c..58a8531e 100644
--- a/crazy_functions/vt_fns/vt_modify_config.py
+++ b/crazy_functions/vt_fns/vt_modify_config.py
@@ -1,13 +1,13 @@
 from pydantic import BaseModel, Field
 from typing import List
 from toolbox import update_ui_lastest_msg, get_conf
-from request_llm.bridge_all import predict_no_ui_long_connection
+from request_llms.bridge_all import predict_no_ui_long_connection
 from crazy_functions.json_fns.pydantic_io import GptJsonIO
 import copy, json, pickle, os, sys
 
 
 def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
-    ALLOW_RESET_CONFIG, = get_conf('ALLOW_RESET_CONFIG')
+    ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
     if not ALLOW_RESET_CONFIG:
         yield from update_ui_lastest_msg(
             lastmsg=f"当前配置不允许被修改！如需激活本功能，请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。", 
@@ -66,7 +66,7 @@ def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
         )
 
 def modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
-    ALLOW_RESET_CONFIG, = get_conf('ALLOW_RESET_CONFIG')
+    ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
     if not ALLOW_RESET_CONFIG:
         yield from update_ui_lastest_msg(
             lastmsg=f"当前配置不允许被修改！如需激活本功能，请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。", 
diff --git a/crazy_functions/下载arxiv论文翻译摘要.py b/crazy_functions/下载arxiv论文翻译摘要.py
index 8b4a5037..c711cf45 100644
--- a/crazy_functions/下载arxiv论文翻译摘要.py
+++ b/crazy_functions/下载arxiv论文翻译摘要.py
@@ -43,7 +43,7 @@ def download_arxiv_(url_pdf):
     file_path = download_dir+title_str
 
     print('下载中')
-    proxies, = get_conf('proxies')
+    proxies = get_conf('proxies')
     r = requests.get(requests_pdf_url, proxies=proxies)
     with open(file_path, 'wb+') as f:
         f.write(r.content)
@@ -77,7 +77,7 @@ def get_name(_url_):
     #     print('在缓存中')
     #     return arxiv_recall[_url_]
 
-    proxies, = get_conf('proxies')
+    proxies = get_conf('proxies')
     res = requests.get(_url_, proxies=proxies)
 
     bs = BeautifulSoup(res.text, 'html.parser')
diff --git a/crazy_functions/图片生成.py b/crazy_functions/图片生成.py
index 51a1baff..1b7dff5d 100644
--- a/crazy_functions/图片生成.py
+++ b/crazy_functions/图片生成.py
@@ -5,9 +5,9 @@ import datetime
 
 def gen_image(llm_kwargs, prompt, resolution="256x256"):
     import requests, json, time, os
-    from request_llm.bridge_all import model_info
+    from request_llms.bridge_all import model_info
 
-    proxies, = get_conf('proxies')
+    proxies = get_conf('proxies')
     # Set up OpenAI API key and model 
     api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
     chat_endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
diff --git a/crazy_functions/多智能体.py b/crazy_functions/多智能体.py
index 5a4c4a58..99b3e86b 100644
--- a/crazy_functions/多智能体.py
+++ b/crazy_functions/多智能体.py
@@ -41,7 +41,7 @@ def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
         return
     
     # 检查当前的模型是否符合要求
-    API_URL_REDIRECT, = get_conf('API_URL_REDIRECT')
+    API_URL_REDIRECT = get_conf('API_URL_REDIRECT')
     if len(API_URL_REDIRECT) > 0:
         chatbot.append([f"处理任务: {txt}", f"暂不支持中转."])
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
diff --git a/crazy_functions/总结word文档.py b/crazy_functions/总结word文档.py
index 4ea753cb..7c822e9f 100644
--- a/crazy_functions/总结word文档.py
+++ b/crazy_functions/总结word文档.py
@@ -32,7 +32,7 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot
         print(file_content)
         # private_upload里面的文件名在解压zip后容易出现乱码（rar和7z格式正常），故可以只分析文章内容，不输入文件名
         from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
-        from request_llm.bridge_all import model_info
+        from request_llms.bridge_all import model_info
         max_token = model_info[llm_kwargs['llm_model']]['max_token']
         TOKEN_LIMIT_PER_FRAGMENT = max_token * 3 // 4
         paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
diff --git a/crazy_functions/总结音视频.py b/crazy_functions/总结音视频.py
index 7c113f47..b88775b4 100644
--- a/crazy_functions/总结音视频.py
+++ b/crazy_functions/总结音视频.py
@@ -41,7 +41,7 @@ def split_audio_file(filename, split_duration=1000):
 def AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history):
     import os, requests
     from moviepy.editor import AudioFileClip
-    from request_llm.bridge_all import model_info
+    from request_llms.bridge_all import model_info
 
     # 设置OpenAI密钥和模型
     api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
@@ -79,7 +79,7 @@ def AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history):
 
             chatbot.append([f"将 {i} 发送到openai音频解析终端 (whisper)，当前参数：{parse_prompt}", "正在处理 ..."])
             yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
-            proxies, = get_conf('proxies')
+            proxies = get_conf('proxies')
             response = requests.post(url, headers=headers, files=files, data=data, proxies=proxies).text
 
             chatbot.append(["音频解析结果", response])
diff --git a/crazy_functions/批量Markdown翻译.py b/crazy_functions/批量Markdown翻译.py
index 9485b1ec..2bdffc86 100644
--- a/crazy_functions/批量Markdown翻译.py
+++ b/crazy_functions/批量Markdown翻译.py
@@ -13,7 +13,7 @@ class PaperFileGroup():
         self.sp_file_tag = []
 
         # count_token
-        from request_llm.bridge_all import model_info
+        from request_llms.bridge_all import model_info
         enc = model_info["gpt-3.5-turbo"]['tokenizer']
         def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         self.get_token_num = get_token_num
@@ -118,7 +118,7 @@ def get_files_from_everything(txt, preference=''):
     if txt.startswith('http'):
         import requests
         from toolbox import get_conf
-        proxies, = get_conf('proxies')
+        proxies = get_conf('proxies')
         # 网络的远程文件
         if preference == 'Github':
             logging.info('正在从github下载资源 ...')
diff --git a/crazy_functions/批量总结PDF文档.py b/crazy_functions/批量总结PDF文档.py
index b87d4825..57a6cdf1 100644
--- a/crazy_functions/批量总结PDF文档.py
+++ b/crazy_functions/批量总结PDF文档.py
@@ -21,7 +21,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
         TOKEN_LIMIT_PER_FRAGMENT = 2500
 
         from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
-        from request_llm.bridge_all import model_info
+        from request_llms.bridge_all import model_info
         enc = model_info["gpt-3.5-turbo"]['tokenizer']
         def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py
index 79c4a262..f2e5cf99 100644
--- a/crazy_functions/批量翻译PDF文档_多线程.py
+++ b/crazy_functions/批量翻译PDF文档_多线程.py
@@ -95,7 +95,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
 
         # 递归地切割PDF文件
         from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
-        from request_llm.bridge_all import model_info
+        from request_llms.bridge_all import model_info
         enc = model_info["gpt-3.5-turbo"]['tokenizer']
         def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
         paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
diff --git a/crazy_functions/理解PDF文档内容.py b/crazy_functions/理解PDF文档内容.py
index 3b2db998..4c0a1052 100644
--- a/crazy_functions/理解PDF文档内容.py
+++ b/crazy_functions/理解PDF文档内容.py
@@ -19,7 +19,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
     TOKEN_LIMIT_PER_FRAGMENT = 2500
 
     from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
-    from request_llm.bridge_all import model_info
+    from request_llms.bridge_all import model_info
     enc = model_info["gpt-3.5-turbo"]['tokenizer']
     def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
     paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
diff --git a/crazy_functions/联网的ChatGPT.py b/crazy_functions/联网的ChatGPT.py
index 4ed9aebf..6fa50ec2 100644
--- a/crazy_functions/联网的ChatGPT.py
+++ b/crazy_functions/联网的ChatGPT.py
@@ -2,7 +2,7 @@ from toolbox import CatchException, update_ui
 from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
 import requests
 from bs4 import BeautifulSoup
-from request_llm.bridge_all import model_info
+from request_llms.bridge_all import model_info
 
 def google(query, proxies):
     query = query # 在此处替换您要搜索的关键词
@@ -72,7 +72,7 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
 
     # ------------- < 第1步：爬取搜索引擎的结果 > -------------
     from toolbox import get_conf
-    proxies, = get_conf('proxies')
+    proxies = get_conf('proxies')
     urls = google(txt, proxies)
     history = []
     if len(urls) == 0:
diff --git a/crazy_functions/联网的ChatGPT_bing版.py b/crazy_functions/联网的ChatGPT_bing版.py
index db5adb79..009ebdce 100644
--- a/crazy_functions/联网的ChatGPT_bing版.py
+++ b/crazy_functions/联网的ChatGPT_bing版.py
@@ -2,7 +2,7 @@ from toolbox import CatchException, update_ui
 from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
 import requests
 from bs4 import BeautifulSoup
-from request_llm.bridge_all import model_info
+from request_llms.bridge_all import model_info
 
 
 def bing_search(query, proxies=None):
@@ -72,7 +72,7 @@ def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, histor
 
     # ------------- < 第1步：爬取搜索引擎的结果 > -------------
     from toolbox import get_conf
-    proxies, = get_conf('proxies')
+    proxies = get_conf('proxies')
     urls = bing_search(txt, proxies)
     history = []
     if len(urls) == 0:
diff --git a/crazy_functions/虚空终端.py b/crazy_functions/虚空终端.py
index 5f33249e..439e71ca 100644
--- a/crazy_functions/虚空终端.py
+++ b/crazy_functions/虚空终端.py
@@ -48,7 +48,7 @@ from pydantic import BaseModel, Field
 from typing import List
 from toolbox import CatchException, update_ui, is_the_upload_folder
 from toolbox import update_ui_lastest_msg, disable_auto_promotion
-from request_llm.bridge_all import predict_no_ui_long_connection
+from request_llms.bridge_all import predict_no_ui_long_connection
 from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 from crazy_functions.crazy_utils import input_clipping
 from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
diff --git a/crazy_functions/解析JupyterNotebook.py b/crazy_functions/解析JupyterNotebook.py
index d4a3b49e..709b7e1c 100644
--- a/crazy_functions/解析JupyterNotebook.py
+++ b/crazy_functions/解析JupyterNotebook.py
@@ -13,7 +13,7 @@ class PaperFileGroup():
         self.sp_file_tag = []
 
         # count_token
-        from request_llm.bridge_all import model_info
+        from request_llms.bridge_all import model_info
         enc = model_info["gpt-3.5-turbo"]['tokenizer']
         def get_token_num(txt): return len(
             enc.encode(txt, disallowed_special=()))
diff --git a/crazy_functions/语音助手.py b/crazy_functions/语音助手.py
index f48286df..3e93ceae 100644
--- a/crazy_functions/语音助手.py
+++ b/crazy_functions/语音助手.py
@@ -2,7 +2,7 @@ from toolbox import update_ui
 from toolbox import CatchException, get_conf, markdown_convertion
 from crazy_functions.crazy_utils import input_clipping
 from crazy_functions.agent_fns.watchdog import WatchDog
-from request_llm.bridge_all import predict_no_ui_long_connection
+from request_llms.bridge_all import predict_no_ui_long_connection
 import threading, time
 import numpy as np
 from .live_audio.aliyunASR import AliyunASR
diff --git a/crazy_functions/谷歌检索小助手.py b/crazy_functions/谷歌检索小助手.py
index dae8a2bb..5924a286 100644
--- a/crazy_functions/谷歌检索小助手.py
+++ b/crazy_functions/谷歌检索小助手.py
@@ -17,7 +17,7 @@ def get_meta_information(url, chatbot, history):
     from urllib.parse import urlparse
     session = requests.session()
 
-    proxies, = get_conf('proxies')
+    proxies = get_conf('proxies')
     headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
         'Accept-Encoding': 'gzip, deflate, br', 
diff --git a/docker-compose.yml b/docker-compose.yml
index dd40dd12..9472a0f9 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -137,7 +137,7 @@ services:
 
     # P.S. 通过对 command 进行微调，可以便捷地安装额外的依赖
     # command: >
-    #   bash -c "pip install -r request_llm/requirements_qwen.txt && python3 -u main.py"
+    #   bash -c "pip install -r request_llms/requirements_qwen.txt && python3 -u main.py"
 
 ### ===================================================
 ### 【方案三】 如果需要运行ChatGPT + LLAMA + 盘古 + RWKV本地模型
diff --git a/docs/GithubAction+AllCapacity b/docs/GithubAction+AllCapacity
index bf9482d3..4ba0e31a 100644
--- a/docs/GithubAction+AllCapacity
+++ b/docs/GithubAction+AllCapacity
@@ -19,13 +19,13 @@ RUN python3 -m pip install aliyun-python-sdk-core==2.13.3 pyOpenSSL webrtcvad sc
 WORKDIR /gpt
 RUN git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
 WORKDIR /gpt/gpt_academic
-RUN git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llm/moss
+RUN git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss
 
 RUN python3 -m pip install -r requirements.txt
-RUN python3 -m pip install -r request_llm/requirements_moss.txt
-RUN python3 -m pip install -r request_llm/requirements_qwen.txt
-RUN python3 -m pip install -r request_llm/requirements_chatglm.txt
-RUN python3 -m pip install -r request_llm/requirements_newbing.txt
+RUN python3 -m pip install -r request_llms/requirements_moss.txt
+RUN python3 -m pip install -r request_llms/requirements_qwen.txt
+RUN python3 -m pip install -r request_llms/requirements_chatglm.txt
+RUN python3 -m pip install -r request_llms/requirements_newbing.txt
 RUN python3 -m pip install nougat-ocr
 
 
diff --git a/docs/GithubAction+ChatGLM+Moss b/docs/GithubAction+ChatGLM+Moss
index 3087d551..3212dc2f 100644
--- a/docs/GithubAction+ChatGLM+Moss
+++ b/docs/GithubAction+ChatGLM+Moss
@@ -14,12 +14,12 @@ RUN python3 -m pip install torch --extra-index-url https://download.pytorch.org/
 WORKDIR /gpt
 RUN git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
 WORKDIR /gpt/gpt_academic
-RUN git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss
+RUN git clone https://github.com/OpenLMLab/MOSS.git request_llms/moss
 RUN python3 -m pip install -r requirements.txt
-RUN python3 -m pip install -r request_llm/requirements_moss.txt
-RUN python3 -m pip install -r request_llm/requirements_qwen.txt
-RUN python3 -m pip install -r request_llm/requirements_chatglm.txt
-RUN python3 -m pip install -r request_llm/requirements_newbing.txt
+RUN python3 -m pip install -r request_llms/requirements_moss.txt
+RUN python3 -m pip install -r request_llms/requirements_qwen.txt
+RUN python3 -m pip install -r request_llms/requirements_chatglm.txt
+RUN python3 -m pip install -r request_llms/requirements_newbing.txt
 
 
 
diff --git a/docs/GithubAction+JittorLLMs b/docs/GithubAction+JittorLLMs
index dc883bcf..189eb244 100644
--- a/docs/GithubAction+JittorLLMs
+++ b/docs/GithubAction+JittorLLMs
@@ -16,12 +16,12 @@ WORKDIR /gpt
 RUN git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
 WORKDIR /gpt/gpt_academic
 RUN python3 -m pip install -r requirements.txt
-RUN python3 -m pip install -r request_llm/requirements_chatglm.txt
-RUN python3 -m pip install -r request_llm/requirements_newbing.txt
-RUN python3 -m pip install -r request_llm/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I
+RUN python3 -m pip install -r request_llms/requirements_chatglm.txt
+RUN python3 -m pip install -r request_llms/requirements_newbing.txt
+RUN python3 -m pip install -r request_llms/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I
 
 # 下载JittorLLMs
-RUN git clone https://github.com/binary-husky/JittorLLMs.git --depth 1 request_llm/jittorllms
+RUN git clone https://github.com/binary-husky/JittorLLMs.git --depth 1 request_llms/jittorllms
 
 # 禁用缓存，确保更新代码
 ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
diff --git a/docs/README.md.German.md b/docs/README.md.German.md
index d514de30..b7a53f1f 100644
--- a/docs/README.md.German.md
+++ b/docs/README.md.German.md
@@ -103,12 +103,12 @@ python -m pip install -r requirements.txt # Same step as pip installation
 
 [Optional Step] If supporting Tsinghua ChatGLM/Fudan MOSS as backend, additional dependencies need to be installed (Prerequisites: Familiar with Python + Used Pytorch + Sufficient computer configuration):
 ```sh
-# [Optional Step I] Support Tsinghua ChatGLM. Remark: If encountering "Call ChatGLM fail Cannot load ChatGLM parameters", please refer to the following: 1: The above default installation is torch+cpu version. To use cuda, uninstall torch and reinstall torch+cuda; 2: If the model cannot be loaded due to insufficient machine configuration, you can modify the model precision in `request_llm/bridge_chatglm.py`, and modify all AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) to AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
-python -m pip install -r request_llm/requirements_chatglm.txt  
+# [Optional Step I] Support Tsinghua ChatGLM. Remark: If encountering "Call ChatGLM fail Cannot load ChatGLM parameters", please refer to the following: 1: The above default installation is torch+cpu version. To use cuda, uninstall torch and reinstall torch+cuda; 2: If the model cannot be loaded due to insufficient machine configuration, you can modify the model precision in `request_llms/bridge_chatglm.py`, and modify all AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) to AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
+python -m pip install -r request_llms/requirements_chatglm.txt  
 
 # [Optional Step II] Support Fudan MOSS
-python -m pip install -r request_llm/requirements_moss.txt
-git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss # When executing this line of code, you must be in the project root path
+python -m pip install -r request_llms/requirements_moss.txt
+git clone https://github.com/OpenLMLab/MOSS.git request_llms/moss # When executing this line of code, you must be in the project root path
 
 # [Optional Step III] Make sure the AVAIL_LLM_MODELS in the config.py configuration file contains the expected models. Currently supported models are as follows (jittorllms series currently only supports docker solutions):
 AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
diff --git a/docs/README.md.Italian.md b/docs/README.md.Italian.md
index 76efe185..1e24a535 100644
--- a/docs/README.md.Italian.md
+++ b/docs/README.md.Italian.md
@@ -109,12 +109,12 @@ python -m pip install -r requirements.txt # questo passaggio funziona allo stess
 
 【Passaggio facoltativo】 Se si desidera supportare ChatGLM di Tsinghua/MOSS di Fudan come backend, è necessario installare ulteriori dipendenze (prerequisiti: conoscenza di Python, esperienza con Pytorch e computer sufficientemente potente):
 ```sh
-# 【Passaggio facoltativo I】 Supporto a ChatGLM di Tsinghua. Note su ChatGLM di Tsinghua: in caso di errore "Call ChatGLM fail 不能正常加载ChatGLM的参数" , fare quanto segue: 1. Per impostazione predefinita, viene installata la versione di torch + cpu; per usare CUDA, è necessario disinstallare torch e installare nuovamente torch + cuda; 2. Se non è possibile caricare il modello a causa di una configurazione insufficiente del computer, è possibile modificare la precisione del modello in request_llm/bridge_chatglm.py, cambiando AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) in AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
-python -m pip install -r request_llm/requirements_chatglm.txt  
+# 【Passaggio facoltativo I】 Supporto a ChatGLM di Tsinghua. Note su ChatGLM di Tsinghua: in caso di errore "Call ChatGLM fail 不能正常加载ChatGLM的参数" , fare quanto segue: 1. Per impostazione predefinita, viene installata la versione di torch + cpu; per usare CUDA, è necessario disinstallare torch e installare nuovamente torch + cuda; 2. Se non è possibile caricare il modello a causa di una configurazione insufficiente del computer, è possibile modificare la precisione del modello in request_llms/bridge_chatglm.py, cambiando AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) in AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
+python -m pip install -r request_llms/requirements_chatglm.txt  
 
 # 【Passaggio facoltativo II】 Supporto a MOSS di Fudan
-python -m pip install -r request_llm/requirements_moss.txt
-git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss  # Si prega di notare che quando si esegue questa riga di codice, si deve essere nella directory radice del progetto
+python -m pip install -r request_llms/requirements_moss.txt
+git clone https://github.com/OpenLMLab/MOSS.git request_llms/moss  # Si prega di notare che quando si esegue questa riga di codice, si deve essere nella directory radice del progetto
 
 # 【Passaggio facoltativo III】 Assicurati che il file di configurazione config.py includa tutti i modelli desiderati, al momento tutti i modelli supportati sono i seguenti (i modelli della serie jittorllms attualmente supportano solo la soluzione docker):
 AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
diff --git a/docs/README.md.Korean.md b/docs/README.md.Korean.md
index 61b8e4a0..db4b2d8f 100644
--- a/docs/README.md.Korean.md
+++ b/docs/README.md.Korean.md
@@ -104,11 +104,11 @@ python -m pip install -r requirements.txt # 이 단계도 pip install의 단계
 # 1 : 기본 설치된 것들은 torch + cpu 버전입니다. cuda를 사용하려면 torch를 제거한 다음 torch + cuda를 다시 설치해야합니다.
 # 2 : 모델을 로드할 수 없는 기계 구성 때문에, AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)를
 # AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)로 변경합니다.
-python -m pip install -r request_llm/requirements_chatglm.txt  
+python -m pip install -r request_llms/requirements_chatglm.txt  
 
 # [선택 사항 II] Fudan MOSS 지원
-python -m pip install -r request_llm/requirements_moss.txt
-git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss  # 다음 코드 줄을 실행할 때 프로젝트 루트 경로에 있어야합니다.
+python -m pip install -r request_llms/requirements_moss.txt
+git clone https://github.com/OpenLMLab/MOSS.git request_llms/moss  # 다음 코드 줄을 실행할 때 프로젝트 루트 경로에 있어야합니다.
 
 # [선택 사항III] AVAIL_LLM_MODELS config.py 구성 파일에 기대하는 모델이 포함되어 있는지 확인하십시오.
 # 현재 지원되는 전체 모델 :
diff --git a/docs/README.md.Portuguese.md b/docs/README.md.Portuguese.md
index 2347d5a7..4a3aba08 100644
--- a/docs/README.md.Portuguese.md
+++ b/docs/README.md.Portuguese.md
@@ -119,12 +119,12 @@ python -m pip install -r requirements.txt # This step is the same as the pip ins
 
 [Optional Step] If you need to support Tsinghua ChatGLM / Fudan MOSS as the backend, you need to install more dependencies (prerequisite: familiar with Python + used Pytorch + computer configuration is strong):
 ```sh
-# 【Optional Step I】support Tsinghua ChatGLM。Tsinghua ChatGLM Note: If you encounter a "Call ChatGLM fails cannot load ChatGLM parameters normally" error, refer to the following: 1: The default installed is torch+cpu version, and using cuda requires uninstalling torch and reinstalling torch+cuda; 2: If the model cannot be loaded due to insufficient computer configuration, you can modify the model accuracy in request_llm/bridge_chatglm.py and change AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) to AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
-python -m pip install -r request_llm/requirements_chatglm.txt
+# 【Optional Step I】support Tsinghua ChatGLM。Tsinghua ChatGLM Note: If you encounter a "Call ChatGLM fails cannot load ChatGLM parameters normally" error, refer to the following: 1: The default installed is torch+cpu version, and using cuda requires uninstalling torch and reinstalling torch+cuda; 2: If the model cannot be loaded due to insufficient computer configuration, you can modify the model accuracy in request_llms/bridge_chatglm.py and change AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) to AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
+python -m pip install -r request_llms/requirements_chatglm.txt
 
 # 【Optional Step II】support Fudan MOSS
-python -m pip install -r request_llm/requirements_moss.txt
-git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss  # Note: When executing this line of code, you must be in the project root path
+python -m pip install -r request_llms/requirements_moss.txt
+git clone https://github.com/OpenLMLab/MOSS.git request_llms/moss  # Note: When executing this line of code, you must be in the project root path
 
 # 【Optional Step III】Make sure that the AVAIL_LLM_MODELS in the config.py configuration file contains the expected model. Currently, all supported models are as follows (jittorllms series currently only supports docker solutions):
 AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
diff --git a/docs/README_EN.md b/docs/README_EN.md
index 02b8588c..029186c7 100644
--- a/docs/README_EN.md
+++ b/docs/README_EN.md
@@ -106,12 +106,12 @@ python -m pip install -r requirements.txt # this step is the same as pip install
 
 [Optional step] If you need to support Tsinghua ChatGLM/Fudan MOSS as a backend, you need to install more dependencies (prerequisites: familiar with Python + used Pytorch + computer configuration is strong enough):
 ```sh
-# [Optional Step I] Support Tsinghua ChatGLM. Tsinghua ChatGLM remarks: if you encounter the "Call ChatGLM fail cannot load ChatGLM parameters" error, refer to this: 1: The default installation above is torch + cpu version, to use cuda, you need to uninstall torch and reinstall torch + cuda; 2: If the model cannot be loaded due to insufficient local configuration, you can modify the model accuracy in request_llm/bridge_chatglm.py, and change AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) to AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code = True)
-python -m pip install -r request_llm/requirements_chatglm.txt 
+# [Optional Step I] Support Tsinghua ChatGLM. Tsinghua ChatGLM remarks: if you encounter the "Call ChatGLM fail cannot load ChatGLM parameters" error, refer to this: 1: The default installation above is torch + cpu version, to use cuda, you need to uninstall torch and reinstall torch + cuda; 2: If the model cannot be loaded due to insufficient local configuration, you can modify the model accuracy in request_llms/bridge_chatglm.py, and change AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) to AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code = True)
+python -m pip install -r request_llms/requirements_chatglm.txt 
 
 # [Optional Step II] Support Fudan MOSS
-python -m pip install -r request_llm/requirements_moss.txt
-git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss  # When executing this line of code, you must be in the root directory of the project
+python -m pip install -r request_llms/requirements_moss.txt
+git clone https://github.com/OpenLMLab/MOSS.git request_llms/moss  # When executing this line of code, you must be in the root directory of the project
 
 # [Optional Step III] Make sure the AVAIL_LLM_MODELS in the config.py configuration file includes the expected models. Currently supported models are as follows (the jittorllms series only supports the docker solution for the time being):
 AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
diff --git a/docs/README_FR.md b/docs/README_FR.md
index af3bb42c..62d81ebf 100644
--- a/docs/README_FR.md
+++ b/docs/README_FR.md
@@ -111,12 +111,12 @@ python -m pip install -r requirements.txt # Same step as pip instalation
 
 【Optional】 Si vous souhaitez prendre en charge THU ChatGLM/FDU MOSS en tant que backend, des dépendances supplémentaires doivent être installées (prérequis: compétent en Python + utilisez Pytorch + configuration suffisante de l'ordinateur):
 ```sh
-# 【Optional Step I】 Support THU ChatGLM. Remarque sur THU ChatGLM: Si vous rencontrez l'erreur "Appel à ChatGLM échoué, les paramètres ChatGLM ne peuvent pas être chargés normalement", reportez-vous à ce qui suit: 1: La version par défaut installée est torch+cpu, si vous souhaitez utiliser cuda, vous devez désinstaller torch et réinstaller torch+cuda; 2: Si le modèle ne peut pas être chargé en raison d'une configuration insuffisante de l'ordinateur local, vous pouvez modifier la précision du modèle dans request_llm/bridge_chatglm.py, modifier AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) par AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
-python -m pip install -r request_llm/requirements_chatglm.txt  
+# 【Optional Step I】 Support THU ChatGLM. Remarque sur THU ChatGLM: Si vous rencontrez l'erreur "Appel à ChatGLM échoué, les paramètres ChatGLM ne peuvent pas être chargés normalement", reportez-vous à ce qui suit: 1: La version par défaut installée est torch+cpu, si vous souhaitez utiliser cuda, vous devez désinstaller torch et réinstaller torch+cuda; 2: Si le modèle ne peut pas être chargé en raison d'une configuration insuffisante de l'ordinateur local, vous pouvez modifier la précision du modèle dans request_llms/bridge_chatglm.py, modifier AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) par AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
+python -m pip install -r request_llms/requirements_chatglm.txt  
 
 # 【Optional Step II】 Support FDU MOSS
-python -m pip install -r request_llm/requirements_moss.txt
-git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss  # Note: When running this line of code, you must be in the project root path.
+python -m pip install -r request_llms/requirements_moss.txt
+git clone https://github.com/OpenLMLab/MOSS.git request_llms/moss  # Note: When running this line of code, you must be in the project root path.
 
 # 【Optional Step III】Make sure the AVAIL_LLM_MODELS in the config.py configuration file contains the desired model. Currently, all models supported are as follows (the jittorllms series currently only supports the docker scheme):
 AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
diff --git a/docs/README_JP.md b/docs/README_JP.md
index 46145e1f..8ade71b4 100644
--- a/docs/README_JP.md
+++ b/docs/README_JP.md
@@ -120,12 +120,12 @@ python -m pip install -r requirements.txt # This step is the same as the pip ins
 [Optional Steps] If you need to support Tsinghua ChatGLM/Fudan MOSS as a backend, you need to install more dependencies (precondition: familiar with Python + used Pytorch + computer configuration). Strong enough):
 
 ```sh
-# Optional step I: support Tsinghua ChatGLM. Tsinghua ChatGLM remarks: If you encounter the error "Call ChatGLM fail cannot load ChatGLM parameters normally", refer to the following: 1: The version installed above is torch+cpu version, using cuda requires uninstalling torch and reinstalling torch+cuda; 2: If the model cannot be loaded due to insufficient local configuration, you can modify the model accuracy in request_llm/bridge_chatglm.py, and change AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) to AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).
-python -m pip install -r request_llm/requirements_chatglm.txt  
+# Optional step I: support Tsinghua ChatGLM. Tsinghua ChatGLM remarks: If you encounter the error "Call ChatGLM fail cannot load ChatGLM parameters normally", refer to the following: 1: The version installed above is torch+cpu version, using cuda requires uninstalling torch and reinstalling torch+cuda; 2: If the model cannot be loaded due to insufficient local configuration, you can modify the model accuracy in request_llms/bridge_chatglm.py, and change AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) to AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).
+python -m pip install -r request_llms/requirements_chatglm.txt  
 
 # Optional Step II: Support Fudan MOSS.
-python -m pip install -r request_llm/requirements_moss.txt
-git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss  # Note that when executing this line of code, it must be in the project root.
+python -m pip install -r request_llms/requirements_moss.txt
+git clone https://github.com/OpenLMLab/MOSS.git request_llms/moss  # Note that when executing this line of code, it must be in the project root.
 
 # 【Optional Step III】Ensure that the AVAIL_LLM_MODELS in the config.py configuration file contains the expected model. Currently, all supported models are as follows (jittorllms series currently only supports the docker solution):
 AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
diff --git a/docs/README_RS.md b/docs/README_RS.md
index d4888a05..52d18dfc 100644
--- a/docs/README_RS.md
+++ b/docs/README_RS.md
@@ -108,12 +108,12 @@ python -m pip install -r requirements.txt # This step is the same as the pip ins
 
 [Optional step] If you need to support Tsinghua ChatGLM/Fudan MOSS as backend, you need to install more dependencies (prerequisites: familiar with Python + have used Pytorch + computer configuration is strong):
 ```sh
-# [Optional step I] Support Tsinghua ChatGLM. Tsinghua ChatGLM note: If you encounter the "Call ChatGLM fail cannot load ChatGLM parameters normally" error, refer to the following: 1: The default installation above is torch+cpu version, and cuda is used Need to uninstall torch and reinstall torch+cuda; 2: If you cannot load the model due to insufficient local configuration, you can modify the model accuracy in request_llm/bridge_chatglm.py, AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) Modify to AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
-python -m pip install -r request_llm/requirements_chatglm.txt  
+# [Optional step I] Support Tsinghua ChatGLM. Tsinghua ChatGLM note: If you encounter the "Call ChatGLM fail cannot load ChatGLM parameters normally" error, refer to the following: 1: The default installation above is torch+cpu version, and cuda is used Need to uninstall torch and reinstall torch+cuda; 2: If you cannot load the model due to insufficient local configuration, you can modify the model accuracy in request_llms/bridge_chatglm.py, AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) Modify to AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
+python -m pip install -r request_llms/requirements_chatglm.txt  
 
 # [Optional step II] Support Fudan MOSS
-python -m pip install -r request_llm/requirements_moss.txt
-git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss  # Note that when executing this line of code, you must be in the project root path
+python -m pip install -r request_llms/requirements_moss.txt
+git clone https://github.com/OpenLMLab/MOSS.git request_llms/moss  # Note that when executing this line of code, you must be in the project root path
 
 # [Optional step III] Make sure the AVAIL_LLM_MODELS in the config.py configuration file contains the expected models. Currently, all supported models are as follows (the jittorllms series currently only supports the docker solution):
 AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
diff --git a/docs/WithFastapi.md b/docs/WithFastapi.md
index 188b5271..bbbb386e 100644
--- a/docs/WithFastapi.md
+++ b/docs/WithFastapi.md
@@ -16,7 +16,7 @@ nano config.py
     + demo.queue(concurrency_count=CONCURRENT_COUNT)
 
     - # 如果需要在二级路径下运行
-    - # CUSTOM_PATH, = get_conf('CUSTOM_PATH')
+    - # CUSTOM_PATH = get_conf('CUSTOM_PATH')
     - # if CUSTOM_PATH != "/": 
     - #     from toolbox import run_gradio_in_subpath
     - #     run_gradio_in_subpath(demo, auth=AUTHENTICATION, port=PORT, custom_path=CUSTOM_PATH)
@@ -24,7 +24,7 @@ nano config.py
     - #     demo.launch(server_name="0.0.0.0", server_port=PORT, auth=AUTHENTICATION, favicon_path="docs/logo.png")
 
     + 如果需要在二级路径下运行
-    + CUSTOM_PATH, = get_conf('CUSTOM_PATH')
+    + CUSTOM_PATH = get_conf('CUSTOM_PATH')
     + if CUSTOM_PATH != "/": 
     +     from toolbox import run_gradio_in_subpath
     +     run_gradio_in_subpath(demo, auth=AUTHENTICATION, port=PORT, custom_path=CUSTOM_PATH)
diff --git a/docs/self_analysis.md b/docs/self_analysis.md
index ebc23371..c3736193 100644
--- a/docs/self_analysis.md
+++ b/docs/self_analysis.md
@@ -38,20 +38,20 @@
 | crazy_functions\读文章写摘要.py | 对论文进行解析和全文摘要生成 |
 | crazy_functions\谷歌检索小助手.py | 提供谷歌学术搜索页面中相关文章的元数据信息。 |
 | crazy_functions\高级功能函数模板.py | 使用Unsplash API发送相关图片以回复用户的输入。 |
-| request_llm\bridge_all.py | 基于不同LLM模型进行对话。 |
-| request_llm\bridge_chatglm.py | 使用ChatGLM模型生成回复，支持单线程和多线程方式。 |
-| request_llm\bridge_chatgpt.py | 基于GPT模型完成对话。 |
-| request_llm\bridge_jittorllms_llama.py | 使用JittorLLMs模型完成对话，支持单线程和多线程方式。 |
-| request_llm\bridge_jittorllms_pangualpha.py | 使用JittorLLMs模型完成对话，基于多进程和多线程方式。 |
-| request_llm\bridge_jittorllms_rwkv.py | 使用JittorLLMs模型完成聊天功能，提供包括历史信息、参数调节等在内的多个功能选项。 |
-| request_llm\bridge_moss.py | 加载Moss模型完成对话功能。 |
-| request_llm\bridge_newbing.py | 使用Newbing聊天机器人进行对话，支持单线程和多线程方式。 |
-| request_llm\bridge_newbingfree.py | 基于Bing chatbot API实现聊天机器人的文本生成功能。 |
-| request_llm\bridge_stackclaude.py | 基于Slack API实现Claude与用户的交互。 |
-| request_llm\bridge_tgui.py | 通过websocket实现聊天机器人与UI界面交互。 |
-| request_llm\edge_gpt.py | 调用Bing chatbot API提供聊天机器人服务。 |
-| request_llm\edge_gpt_free.py | 实现聊天机器人API，采用aiohttp和httpx工具库。 |
-| request_llm\test_llms.py | 对llm模型进行单元测试。 |
+| request_llms\bridge_all.py | 基于不同LLM模型进行对话。 |
+| request_llms\bridge_chatglm.py | 使用ChatGLM模型生成回复，支持单线程和多线程方式。 |
+| request_llms\bridge_chatgpt.py | 基于GPT模型完成对话。 |
+| request_llms\bridge_jittorllms_llama.py | 使用JittorLLMs模型完成对话，支持单线程和多线程方式。 |
+| request_llms\bridge_jittorllms_pangualpha.py | 使用JittorLLMs模型完成对话，基于多进程和多线程方式。 |
+| request_llms\bridge_jittorllms_rwkv.py | 使用JittorLLMs模型完成聊天功能，提供包括历史信息、参数调节等在内的多个功能选项。 |
+| request_llms\bridge_moss.py | 加载Moss模型完成对话功能。 |
+| request_llms\bridge_newbing.py | 使用Newbing聊天机器人进行对话，支持单线程和多线程方式。 |
+| request_llms\bridge_newbingfree.py | 基于Bing chatbot API实现聊天机器人的文本生成功能。 |
+| request_llms\bridge_stackclaude.py | 基于Slack API实现Claude与用户的交互。 |
+| request_llms\bridge_tgui.py | 通过websocket实现聊天机器人与UI界面交互。 |
+| request_llms\edge_gpt.py | 调用Bing chatbot API提供聊天机器人服务。 |
+| request_llms\edge_gpt_free.py | 实现聊天机器人API，采用aiohttp和httpx工具库。 |
+| request_llms\test_llms.py | 对llm模型进行单元测试。 |
 
 ## 接下来请你逐文件分析下面的工程[0/48] 请对下面的程序文件做一个概述: check_proxy.py
 
@@ -129,7 +129,7 @@ toolbox.py是一个工具类库，其中主要包含了一些函数装饰器和
 1. `input_clipping`: 该函数用于裁剪输入文本长度，使其不超过一定的限制。
 2. `request_gpt_model_in_new_thread_with_ui_alive`: 该函数用于请求 GPT 模型并保持用户界面的响应，支持多线程和实时更新用户界面。
 
-这两个函数都依赖于从 `toolbox` 和 `request_llm` 中导入的一些工具函数。函数的输入和输出有详细的描述文档。
+这两个函数都依赖于从 `toolbox` 和 `request_llms` 中导入的一些工具函数。函数的输入和输出有详细的描述文档。
 
 ## [12/48] 请对下面的程序文件做一个概述: crazy_functions\Latex全文润色.py
 
@@ -137,7 +137,7 @@ toolbox.py是一个工具类库，其中主要包含了一些函数装饰器和
 
 ## [13/48] 请对下面的程序文件做一个概述: crazy_functions\Latex全文翻译.py
 
-这个文件包含两个函数 `Latex英译中` 和 `Latex中译英`，它们都会对整个Latex项目进行翻译。这个文件还包含一个类 `PaperFileGroup`，它拥有一个方法 `run_file_split`，用于把长文本文件分成多个短文件。其中使用了工具库 `toolbox` 中的一些函数和从 `request_llm` 中导入了 `model_info`。接下来的函数把文件读取进来，把它们的注释删除，进行分割，并进行翻译。这个文件还包括了一些异常处理和界面更新的操作。
+这个文件包含两个函数 `Latex英译中` 和 `Latex中译英`，它们都会对整个Latex项目进行翻译。这个文件还包含一个类 `PaperFileGroup`，它拥有一个方法 `run_file_split`，用于把长文本文件分成多个短文件。其中使用了工具库 `toolbox` 中的一些函数和从 `request_llms` 中导入了 `model_info`。接下来的函数把文件读取进来，把它们的注释删除，进行分割，并进行翻译。这个文件还包括了一些异常处理和界面更新的操作。
 
 ## [14/48] 请对下面的程序文件做一个概述: crazy_functions\__init__.py
 
@@ -227,19 +227,19 @@ toolbox.py是一个工具类库，其中主要包含了一些函数装饰器和
 
 该程序文件定义了一个名为高阶功能模板函数的函数，该函数接受多个参数，包括输入的文本、gpt模型参数、插件模型参数、聊天显示框的句柄、聊天历史等，并利用送出请求，使用 Unsplash API 发送相关图片。其中，为了避免输入溢出，函数会在开始时清空历史。函数也有一些 UI 更新的语句。该程序文件还依赖于其他两个模块：CatchException 和 update_ui，以及一个名为 request_gpt_model_in_new_thread_with_ui_alive 的来自 crazy_utils 模块（应该是自定义的工具包）的函数。
 
-## [34/48] 请对下面的程序文件做一个概述: request_llm\bridge_all.py
+## [34/48] 请对下面的程序文件做一个概述: request_llms\bridge_all.py
 
 该文件包含两个函数：predict和predict_no_ui_long_connection，用于基于不同的LLM模型进行对话。该文件还包含一个lazyloadTiktoken类和一个LLM_CATCH_EXCEPTION修饰器函数。其中lazyloadTiktoken类用于懒加载模型的tokenizer，LLM_CATCH_EXCEPTION用于错误处理。整个文件还定义了一些全局变量和模型信息字典，用于引用和配置LLM模型。
 
-## [35/48] 请对下面的程序文件做一个概述: request_llm\bridge_chatglm.py
+## [35/48] 请对下面的程序文件做一个概述: request_llms\bridge_chatglm.py
 
 这是一个Python程序文件，名为`bridge_chatglm.py`，其中定义了一个名为`GetGLMHandle`的类和三个方法：`predict_no_ui_long_connection`、 `predict`和 `stream_chat`。该文件依赖于多个Python库，如`transformers`和`sentencepiece`。该文件实现了一个聊天机器人，使用ChatGLM模型来生成回复，支持单线程和多线程方式。程序启动时需要加载ChatGLM的模型和tokenizer，需要一段时间。在配置文件`config.py`中设置参数会影响模型的内存和显存使用，因此程序可能会导致低配计算机卡死。
 
-## [36/48] 请对下面的程序文件做一个概述: request_llm\bridge_chatgpt.py
+## [36/48] 请对下面的程序文件做一个概述: request_llms\bridge_chatgpt.py
 
-该文件为 Python 代码文件，文件名为 request_llm\bridge_chatgpt.py。该代码文件主要提供三个函数：predict、predict_no_ui和 predict_no_ui_long_connection，用于发送至 chatGPT 并等待回复，获取输出。该代码文件还包含一些辅助函数，用于处理连接异常、生成 HTTP 请求等。该文件的代码架构清晰，使用了多个自定义函数和模块。
+该文件为 Python 代码文件，文件名为 request_llms\bridge_chatgpt.py。该代码文件主要提供三个函数：predict、predict_no_ui和 predict_no_ui_long_connection，用于发送至 chatGPT 并等待回复，获取输出。该代码文件还包含一些辅助函数，用于处理连接异常、生成 HTTP 请求等。该文件的代码架构清晰，使用了多个自定义函数和模块。
 
-## [37/48] 请对下面的程序文件做一个概述: request_llm\bridge_jittorllms_llama.py
+## [37/48] 请对下面的程序文件做一个概述: request_llms\bridge_jittorllms_llama.py
 
 该代码文件实现了一个聊天机器人，其中使用了 JittorLLMs 模型。主要包括以下几个部分：
 1. GetGLMHandle 类：一个进程类，用于加载 JittorLLMs 模型并接收并处理请求。
@@ -248,17 +248,17 @@ toolbox.py是一个工具类库，其中主要包含了一些函数装饰器和
 
 这个文件中还有一些辅助函数和全局变量，例如 importlib、time、threading 等。
 
-## [38/48] 请对下面的程序文件做一个概述: request_llm\bridge_jittorllms_pangualpha.py
+## [38/48] 请对下面的程序文件做一个概述: request_llms\bridge_jittorllms_pangualpha.py
 
 这个文件是为了实现使用jittorllms（一种机器学习模型）来进行聊天功能的代码。其中包括了模型加载、模型的参数加载、消息的收发等相关操作。其中使用了多进程和多线程来提高性能和效率。代码中还包括了处理依赖关系的函数和预处理函数等。
 
-## [39/48] 请对下面的程序文件做一个概述: request_llm\bridge_jittorllms_rwkv.py
+## [39/48] 请对下面的程序文件做一个概述: request_llms\bridge_jittorllms_rwkv.py
 
 这个文件是一个Python程序，文件名为request_llm\bridge_jittorllms_rwkv.py。它依赖transformers、time、threading、importlib、multiprocessing等库。在文件中，通过定义GetGLMHandle类加载jittorllms模型参数和定义stream_chat方法来实现与jittorllms模型的交互。同时，该文件还定义了predict_no_ui_long_connection和predict方法来处理历史信息、调用jittorllms模型、接收回复信息并输出结果。
 
-## [40/48] 请对下面的程序文件做一个概述: request_llm\bridge_moss.py
+## [40/48] 请对下面的程序文件做一个概述: request_llms\bridge_moss.py
 
-该文件为一个Python源代码文件，文件名为 request_llm\bridge_moss.py。代码定义了一个 GetGLMHandle 类和两个函数 predict_no_ui_long_connection 和 predict。
+该文件为一个Python源代码文件，文件名为 request_llms\bridge_moss.py。代码定义了一个 GetGLMHandle 类和两个函数 predict_no_ui_long_connection 和 predict。
 
 GetGLMHandle 类继承自Process类（多进程），主要功能是启动一个子进程并加载 MOSS 模型参数，通过 Pipe 进行主子进程的通信。该类还定义了 check_dependency、moss_init、run 和 stream_chat 等方法，其中 check_dependency 和 moss_init 是子进程的初始化方法，run 是子进程运行方法，stream_chat 实现了主进程和子进程的交互过程。
 
@@ -266,7 +266,7 @@ GetGLMHandle 类继承自Process类（多进程），主要功能是启动一个
 
 函数 predict 是单线程方法，通过调用 update_ui 将交互过程中 MOSS 的回复实时更新到UI（User Interface）中，并执行一个 named function（additional_fn）指定的函数对输入进行预处理。
 
-## [41/48] 请对下面的程序文件做一个概述: request_llm\bridge_newbing.py
+## [41/48] 请对下面的程序文件做一个概述: request_llms\bridge_newbing.py
 
 这是一个名为`bridge_newbing.py`的程序文件，包含三个部分：
 
@@ -276,11 +276,11 @@ GetGLMHandle 类继承自Process类（多进程），主要功能是启动一个
 
 第三部分定义了一个名为`newbing_handle`的全局变量，并导出了`predict_no_ui_long_connection`和`predict`这两个方法，以供其他程序可以调用。
 
-## [42/48] 请对下面的程序文件做一个概述: request_llm\bridge_newbingfree.py
+## [42/48] 请对下面的程序文件做一个概述: request_llms\bridge_newbingfree.py
 
 这个Python文件包含了三部分内容。第一部分是来自edge_gpt_free.py文件的聊天机器人程序。第二部分是子进程Worker，用于调用主体。第三部分提供了两个函数：predict_no_ui_long_connection和predict用于调用NewBing聊天机器人和返回响应。其中predict函数还提供了一些参数用于控制聊天机器人的回复和更新UI界面。
 
-## [43/48] 请对下面的程序文件做一个概述: request_llm\bridge_stackclaude.py
+## [43/48] 请对下面的程序文件做一个概述: request_llms\bridge_stackclaude.py
 
 这是一个Python源代码文件，文件名为request_llm\bridge_stackclaude.py。代码分为三个主要部分：
 
@@ -290,21 +290,21 @@ GetGLMHandle 类继承自Process类（多进程），主要功能是启动一个
 
 第三部分定义了predict_no_ui_long_connection和predict两个函数，主要用于通过调用ClaudeHandle对象的stream_chat方法来获取Claude的回复，并更新ui以显示相关信息。其中predict函数采用单线程方法，而predict_no_ui_long_connection函数使用多线程方法。
 
-## [44/48] 请对下面的程序文件做一个概述: request_llm\bridge_tgui.py
+## [44/48] 请对下面的程序文件做一个概述: request_llms\bridge_tgui.py
 
 该文件是一个Python代码文件，名为request_llm\bridge_tgui.py。它包含了一些函数用于与chatbot UI交互，并通过WebSocket协议与远程LLM模型通信完成文本生成任务，其中最重要的函数是predict()和predict_no_ui_long_connection()。这个程序还有其他的辅助函数，如random_hash()。整个代码文件在协作的基础上完成了一次修改。
 
-## [45/48] 请对下面的程序文件做一个概述: request_llm\edge_gpt.py
+## [45/48] 请对下面的程序文件做一个概述: request_llms\edge_gpt.py
 
 该文件是一个用于调用Bing chatbot API的Python程序，它由多个类和辅助函数构成，可以根据给定的对话连接在对话中提出问题，使用websocket与远程服务通信。程序实现了一个聊天机器人，可以为用户提供人工智能聊天。
 
-## [46/48] 请对下面的程序文件做一个概述: request_llm\edge_gpt_free.py
+## [46/48] 请对下面的程序文件做一个概述: request_llms\edge_gpt_free.py
 
 该代码文件为一个会话API，可通过Chathub发送消息以返回响应。其中使用了 aiohttp 和 httpx 库进行网络请求并发送。代码中包含了一些函数和常量，多数用于生成请求数据或是请求头信息等。同时该代码文件还包含了一个 Conversation 类，调用该类可实现对话交互。
 
-## [47/48] 请对下面的程序文件做一个概述: request_llm\test_llms.py
+## [47/48] 请对下面的程序文件做一个概述: request_llms\test_llms.py
 
-这个文件是用于对llm模型进行单元测试的Python程序。程序导入一个名为"request_llm.bridge_newbingfree"的模块，然后三次使用该模块中的predict_no_ui_long_connection()函数进行预测，并输出结果。此外，还有一些注释掉的代码段，这些代码段也是关于模型预测的。
+这个文件是用于对llm模型进行单元测试的Python程序。程序导入一个名为"request_llms.bridge_newbingfree"的模块，然后三次使用该模块中的predict_no_ui_long_connection()函数进行预测，并输出结果。此外，还有一些注释掉的代码段，这些代码段也是关于模型预测的。
 
 ## 用一张Markdown表格简要描述以下文件的功能：
 check_proxy.py, colorful.py, config.py, config_private.py, core_functional.py, crazy_functional.py, main.py, multi_language.py, theme.py, toolbox.py, crazy_functions\crazy_functions_test.py, crazy_functions\crazy_utils.py, crazy_functions\Latex全文润色.py, crazy_functions\Latex全文翻译.py, crazy_functions\__init__.py, crazy_functions\下载arxiv论文翻译摘要.py。根据以上分析，用一句话概括程序的整体功能。
@@ -355,24 +355,24 @@ crazy_functions\代码重写为全英文_多线程.py, crazy_functions\图片生
 概括程序的整体功能：提供了一系列处理文本、文件和代码的功能，使用了各类语言模型、多线程、网络请求和数据解析技术来提高效率和精度。
 
 ## 用一张Markdown表格简要描述以下文件的功能：
-crazy_functions\谷歌检索小助手.py, crazy_functions\高级功能函数模板.py, request_llm\bridge_all.py, request_llm\bridge_chatglm.py, request_llm\bridge_chatgpt.py, request_llm\bridge_jittorllms_llama.py, request_llm\bridge_jittorllms_pangualpha.py, request_llm\bridge_jittorllms_rwkv.py, request_llm\bridge_moss.py, request_llm\bridge_newbing.py, request_llm\bridge_newbingfree.py, request_llm\bridge_stackclaude.py, request_llm\bridge_tgui.py, request_llm\edge_gpt.py, request_llm\edge_gpt_free.py, request_llm\test_llms.py。根据以上分析，用一句话概括程序的整体功能。
+crazy_functions\谷歌检索小助手.py, crazy_functions\高级功能函数模板.py, request_llms\bridge_all.py, request_llms\bridge_chatglm.py, request_llms\bridge_chatgpt.py, request_llms\bridge_jittorllms_llama.py, request_llms\bridge_jittorllms_pangualpha.py, request_llms\bridge_jittorllms_rwkv.py, request_llms\bridge_moss.py, request_llms\bridge_newbing.py, request_llms\bridge_newbingfree.py, request_llms\bridge_stackclaude.py, request_llms\bridge_tgui.py, request_llms\edge_gpt.py, request_llms\edge_gpt_free.py, request_llms\test_llms.py。根据以上分析，用一句话概括程序的整体功能。
 
 | 文件名 | 功能描述 |
 | --- | --- |
 | crazy_functions\谷歌检索小助手.py | 提供谷歌学术搜索页面中相关文章的元数据信息。 |
 | crazy_functions\高级功能函数模板.py | 使用Unsplash API发送相关图片以回复用户的输入。 |
-| request_llm\bridge_all.py | 基于不同LLM模型进行对话。 |
-| request_llm\bridge_chatglm.py | 使用ChatGLM模型生成回复，支持单线程和多线程方式。 |
-| request_llm\bridge_chatgpt.py | 基于GPT模型完成对话。 |
-| request_llm\bridge_jittorllms_llama.py | 使用JittorLLMs模型完成对话，支持单线程和多线程方式。 |
-| request_llm\bridge_jittorllms_pangualpha.py | 使用JittorLLMs模型完成对话，基于多进程和多线程方式。 |
-| request_llm\bridge_jittorllms_rwkv.py | 使用JittorLLMs模型完成聊天功能，提供包括历史信息、参数调节等在内的多个功能选项。 |
-| request_llm\bridge_moss.py | 加载Moss模型完成对话功能。 |
-| request_llm\bridge_newbing.py | 使用Newbing聊天机器人进行对话，支持单线程和多线程方式。 |
-| request_llm\bridge_newbingfree.py | 基于Bing chatbot API实现聊天机器人的文本生成功能。 |
-| request_llm\bridge_stackclaude.py | 基于Slack API实现Claude与用户的交互。 |
-| request_llm\bridge_tgui.py | 通过websocket实现聊天机器人与UI界面交互。 |
-| request_llm\edge_gpt.py | 调用Bing chatbot API提供聊天机器人服务。 |
-| request_llm\edge_gpt_free.py | 实现聊天机器人API，采用aiohttp和httpx工具库。 |
-| request_llm\test_llms.py | 对llm模型进行单元测试。 |
+| request_llms\bridge_all.py | 基于不同LLM模型进行对话。 |
+| request_llms\bridge_chatglm.py | 使用ChatGLM模型生成回复，支持单线程和多线程方式。 |
+| request_llms\bridge_chatgpt.py | 基于GPT模型完成对话。 |
+| request_llms\bridge_jittorllms_llama.py | 使用JittorLLMs模型完成对话，支持单线程和多线程方式。 |
+| request_llms\bridge_jittorllms_pangualpha.py | 使用JittorLLMs模型完成对话，基于多进程和多线程方式。 |
+| request_llms\bridge_jittorllms_rwkv.py | 使用JittorLLMs模型完成聊天功能，提供包括历史信息、参数调节等在内的多个功能选项。 |
+| request_llms\bridge_moss.py | 加载Moss模型完成对话功能。 |
+| request_llms\bridge_newbing.py | 使用Newbing聊天机器人进行对话，支持单线程和多线程方式。 |
+| request_llms\bridge_newbingfree.py | 基于Bing chatbot API实现聊天机器人的文本生成功能。 |
+| request_llms\bridge_stackclaude.py | 基于Slack API实现Claude与用户的交互。 |
+| request_llms\bridge_tgui.py | 通过websocket实现聊天机器人与UI界面交互。 |
+| request_llms\edge_gpt.py | 调用Bing chatbot API提供聊天机器人服务。 |
+| request_llms\edge_gpt_free.py | 实现聊天机器人API，采用aiohttp和httpx工具库。 |
+| request_llms\test_llms.py | 对llm模型进行单元测试。 |
 | 程序整体功能 | 实现不同种类的聊天机器人，可以根据输入进行文本生成。 |
diff --git a/docs/translate_english.json b/docs/translate_english.json
index c13ac81a..850cae54 100644
--- a/docs/translate_english.json
+++ b/docs/translate_english.json
@@ -1184,7 +1184,7 @@
     "Call ChatGLM fail 不能正常加载ChatGLM的参数": "Call ChatGLM fail, unable to load parameters for ChatGLM",
     "不能正常加载ChatGLM的参数！": "Unable to load parameters for ChatGLM!",
     "多线程方法": "Multithreading method",
-    "函数的说明请见 request_llm/bridge_all.py": "For function details, please see request_llm/bridge_all.py",
+    "函数的说明请见 request_llms/bridge_all.py": "For function details, please see request_llms/bridge_all.py",
     "程序终止": "Program terminated",
     "单线程方法": "Single-threaded method",
     "等待ChatGLM响应中": "Waiting for response from ChatGLM",
@@ -1543,7 +1543,7 @@
     "str类型": "str type",
     "所有音频都总结完成了吗": "Are all audio summaries completed?",
     "SummaryAudioVideo内容": "SummaryAudioVideo content",
-    "使用教程详情见 request_llm/README.md": "See request_llm/README.md for detailed usage instructions",
+    "使用教程详情见 request_llms/README.md": "See request_llms/README.md for detailed usage instructions",
     "删除中间文件夹": "Delete intermediate folder",
     "Claude组件初始化成功": "Claude component initialized successfully",
     "$c$ 是光速": "$c$ is the speed of light",
diff --git a/docs/translate_japanese.json b/docs/translate_japanese.json
index fa3af4e0..ae86dc06 100644
--- a/docs/translate_japanese.json
+++ b/docs/translate_japanese.json
@@ -782,7 +782,7 @@
     "主进程统一调用函数接口": "メインプロセスが関数インターフェースを統一的に呼び出します",
     "再例如一个包含了待处理文件的路径": "処理待ちのファイルを含むパスの例",
     "负责把学术论文准确翻译成中文": "学術論文を正確に中国語に翻訳する責任があります",
-    "函数的说明请见 request_llm/bridge_all.py": "関数の説明については、request_llm/bridge_all.pyを参照してください",
+    "函数的说明请见 request_llms/bridge_all.py": "関数の説明については、request_llms/bridge_all.pyを参照してください",
     "然后回车提交": "そしてEnterを押して提出してください",
     "防止爆token": "トークンの爆発を防止する",
     "Latex项目全文中译英": "LaTeXプロジェクト全文の中国語から英語への翻訳",
@@ -1616,7 +1616,7 @@
     "正在重试": "再試行中",
     "从而更全面地理解项目的整体功能": "プロジェクトの全体的な機能をより理解するために",
     "正在等您说完问题": "質問が完了するのをお待ちしています",
-    "使用教程详情见 request_llm/README.md": "使用方法の詳細については、request_llm/README.mdを参照してください",
+    "使用教程详情见 request_llms/README.md": "使用方法の詳細については、request_llms/README.mdを参照してください",
     "6.25 加入判定latex模板的代码": "6.25 テンプレートの判定コードを追加",
     "找不到任何音频或视频文件": "音声またはビデオファイルが見つかりません",
     "请求GPT模型的": "GPTモデルのリクエスト",
diff --git a/docs/translate_traditionalchinese.json b/docs/translate_traditionalchinese.json
index 53570aea..a677f108 100644
--- a/docs/translate_traditionalchinese.json
+++ b/docs/translate_traditionalchinese.json
@@ -123,7 +123,7 @@
     "的第": "的第",
     "减少重复": "減少重複",
     "如果超过期限没有喂狗": "如果超過期限沒有餵狗",
-    "函数的说明请见 request_llm/bridge_all.py": "函數的說明請見 request_llm/bridge_all.py",
+    "函数的说明请见 request_llms/bridge_all.py": "函數的說明請見 request_llms/bridge_all.py",
     "第7步": "第7步",
     "说": "說",
     "中途接收可能的终止指令": "中途接收可能的終止指令",
@@ -1887,7 +1887,7 @@
     "请继续分析其他源代码": "請繼續分析其他源代碼",
     "质能方程式": "質能方程式",
     "功能尚不稳定": "功能尚不穩定",
-    "使用教程详情见 request_llm/README.md": "使用教程詳情見 request_llm/README.md",
+    "使用教程详情见 request_llms/README.md": "使用教程詳情見 request_llms/README.md",
     "从以上搜索结果中抽取信息": "從以上搜索結果中抽取信息",
     "虽然PDF生成失败了": "雖然PDF生成失敗了",
     "找图片": "尋找圖片",
diff --git a/main.py b/main.py
index f0914756..bf843825 100644
--- a/main.py
+++ b/main.py
@@ -7,14 +7,14 @@ def main():
     import gradio as gr
     if gr.__version__ not in ['3.32.6']: 
         raise ModuleNotFoundError("使用项目内置Gradio获取最优体验! 请运行 `pip install -r requirements.txt` 指令安装内置Gradio及其他依赖, 详情信息见requirements.txt.")
-    from request_llm.bridge_all import predict
+    from request_llms.bridge_all import predict
     from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith
     # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
     proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION = get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION')
     CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = get_conf('CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
     ENABLE_AUDIO, AUTO_CLEAR_TXT, PATH_LOGGING, AVAIL_THEMES, THEME = get_conf('ENABLE_AUDIO', 'AUTO_CLEAR_TXT', 'PATH_LOGGING', 'AVAIL_THEMES', 'THEME')
     DARK_MODE, NUM_CUSTOM_BASIC_BTN, SSL_KEYFILE, SSL_CERTFILE = get_conf('DARK_MODE', 'NUM_CUSTOM_BASIC_BTN', 'SSL_KEYFILE', 'SSL_CERTFILE')
-    INIT_SYS_PROMPT, = get_conf('INIT_SYS_PROMPT')
+    INIT_SYS_PROMPT = get_conf('INIT_SYS_PROMPT')
 
     # 如果WEB_PORT是-1, 则随机选取WEB端口
     PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
@@ -48,7 +48,7 @@ def main():
 
     # 高级函数插件
     from crazy_functional import get_crazy_functions
-    DEFAULT_FN_GROUPS, = get_conf('DEFAULT_FN_GROUPS')
+    DEFAULT_FN_GROUPS = get_conf('DEFAULT_FN_GROUPS')
     plugins = get_crazy_functions()
     all_plugin_groups = list(set([g for _, plugin in plugins.items() for g in plugin['Group'].split('|')]))
     match_group = lambda tags, groups: any([g in groups for g in tags.split('|')])
@@ -433,10 +433,10 @@ def main():
         server_port=PORT,
         favicon_path=os.path.join(os.path.dirname(__file__), "docs/logo.png"), 
         auth=AUTHENTICATION if len(AUTHENTICATION) != 0 else None,
-        blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
+        blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile","gpt_log/admin"])
 
     # 如果需要在二级路径下运行
-    # CUSTOM_PATH, = get_conf('CUSTOM_PATH')
+    # CUSTOM_PATH = get_conf('CUSTOM_PATH')
     # if CUSTOM_PATH != "/": 
     #     from toolbox import run_gradio_in_subpath
     #     run_gradio_in_subpath(demo, auth=AUTHENTICATION, port=PORT, custom_path=CUSTOM_PATH)
diff --git a/multi_language.py b/multi_language.py
index f78cd997..a20fb5af 100644
--- a/multi_language.py
+++ b/multi_language.py
@@ -38,7 +38,7 @@ from toolbox import get_conf
 
 CACHE_ONLY = os.environ.get('CACHE_ONLY', False)
 
-CACHE_FOLDER, = get_conf('PATH_LOGGING')
+CACHE_FOLDER = get_conf('PATH_LOGGING')
 
 blacklist = ['multi-language', CACHE_FOLDER, '.git', 'private_upload', 'multi_language.py', 'build', '.github', '.vscode', '__pycache__', 'venv']
 
diff --git a/request_llm/bridge_chatglm.py b/request_llm/bridge_chatglm.py
deleted file mode 100644
index 387b3e21..00000000
--- a/request_llm/bridge_chatglm.py
+++ /dev/null
@@ -1,167 +0,0 @@
-
-from transformers import AutoModel, AutoTokenizer
-import time
-import threading
-import importlib
-from toolbox import update_ui, get_conf, ProxyNetworkActivate
-from multiprocessing import Process, Pipe
-
-load_message = "ChatGLM尚未加载，加载需要一段时间。注意，取决于`config.py`的配置，ChatGLM消耗大量的内存（CPU）或显存（GPU），也许会导致低配计算机卡死 ……"
-
-#################################################################################
-class GetGLMHandle(Process):
-    def __init__(self):
-        super().__init__(daemon=True)
-        self.parent, self.child = Pipe()
-        self.chatglm_model = None
-        self.chatglm_tokenizer = None
-        self.info = ""
-        self.success = True
-        self.check_dependency()
-        self.start()
-        self.threadLock = threading.Lock()
-        
-    def check_dependency(self):
-        try:
-            import sentencepiece
-            self.info = "依赖检测通过"
-            self.success = True
-        except:
-            self.info = "缺少ChatGLM的依赖，如果要使用ChatGLM，除了基础的pip依赖以外，您还需要运行`pip install -r request_llm/requirements_chatglm.txt`安装ChatGLM的依赖。"
-            self.success = False
-
-    def ready(self):
-        return self.chatglm_model is not None
-
-    def run(self):
-        # 子进程执行
-        # 第一次运行，加载参数
-        retry = 0
-        LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
-
-        if LOCAL_MODEL_QUANT == "INT4":         # INT4
-            _model_name_ = "THUDM/chatglm2-6b-int4"
-        elif LOCAL_MODEL_QUANT == "INT8":       # INT8
-            _model_name_ = "THUDM/chatglm2-6b-int8"
-        else:
-            _model_name_ = "THUDM/chatglm2-6b"  # FP16
-
-        while True:
-            try:
-                with ProxyNetworkActivate('Download_LLM'):
-                    if self.chatglm_model is None:
-                        self.chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
-                        if device=='cpu':
-                            self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).float()
-                        else:
-                            self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).half().cuda()
-                        self.chatglm_model = self.chatglm_model.eval()
-                        break
-                    else:
-                        break
-            except:
-                retry += 1
-                if retry > 3: 
-                    self.child.send('[Local Message] Call ChatGLM fail 不能正常加载ChatGLM的参数。')
-                    raise RuntimeError("不能正常加载ChatGLM的参数！")
-
-        while True:
-            # 进入任务等待状态
-            kwargs = self.child.recv()
-            # 收到消息，开始请求
-            try:
-                for response, history in self.chatglm_model.stream_chat(self.chatglm_tokenizer, **kwargs):
-                    self.child.send(response)
-                    # # 中途接收可能的终止指令（如果有的话）
-                    # if self.child.poll(): 
-                    #     command = self.child.recv()
-                    #     if command == '[Terminate]': break
-            except:
-                from toolbox import trimmed_format_exc
-                self.child.send('[Local Message] Call ChatGLM fail.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
-            # 请求处理结束，开始下一个循环
-            self.child.send('[Finish]')
-
-    def stream_chat(self, **kwargs):
-        # 主进程执行
-        self.threadLock.acquire()
-        self.parent.send(kwargs)
-        while True:
-            res = self.parent.recv()
-            if res != '[Finish]':
-                yield res
-            else:
-                break
-        self.threadLock.release()
-    
-global glm_handle
-glm_handle = None
-#################################################################################
-def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
-    """
-        多线程方法
-        函数的说明请见 request_llm/bridge_all.py
-    """
-    global glm_handle
-    if glm_handle is None:
-        glm_handle = GetGLMHandle()
-        if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + glm_handle.info
-        if not glm_handle.success: 
-            error = glm_handle.info
-            glm_handle = None
-            raise RuntimeError(error)
-
-    # chatglm 没有 sys_prompt 接口，因此把prompt加入 history
-    history_feedin = []
-    history_feedin.append(["What can I do?", sys_prompt])
-    for i in range(len(history)//2):
-        history_feedin.append([history[2*i], history[2*i+1]] )
-
-    watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
-    response = ""
-    for response in glm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
-        if len(observe_window) >= 1:  observe_window[0] = response
-        if len(observe_window) >= 2:  
-            if (time.time()-observe_window[1]) > watch_dog_patience:
-                raise RuntimeError("程序终止。")
-    return response
-
-
-
-def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
-    """
-        单线程方法
-        函数的说明请见 request_llm/bridge_all.py
-    """
-    chatbot.append((inputs, ""))
-
-    global glm_handle
-    if glm_handle is None:
-        glm_handle = GetGLMHandle()
-        chatbot[-1] = (inputs, load_message + "\n\n" + glm_handle.info)
-        yield from update_ui(chatbot=chatbot, history=[])
-        if not glm_handle.success: 
-            glm_handle = None
-            return
-
-    if additional_fn is not None:
-        from core_functional import handle_core_functionality
-        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
-
-    # 处理历史信息
-    history_feedin = []
-    history_feedin.append(["What can I do?", system_prompt] )
-    for i in range(len(history)//2):
-        history_feedin.append([history[2*i], history[2*i+1]] )
-
-    # 开始接收chatglm的回复
-    response = "[Local Message]: 等待ChatGLM响应中 ..."
-    for response in glm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
-        chatbot[-1] = (inputs, response)
-        yield from update_ui(chatbot=chatbot, history=history)
-
-    # 总结输出
-    if response == "[Local Message]: 等待ChatGLM响应中 ...":
-        response = "[Local Message]: ChatGLM响应异常 ..."
-    history.extend([inputs, response])
-    yield from update_ui(chatbot=chatbot, history=history)
diff --git a/request_llm/local_llm_class.py b/request_llm/local_llm_class.py
deleted file mode 100644
index c9c72534..00000000
--- a/request_llm/local_llm_class.py
+++ /dev/null
@@ -1,180 +0,0 @@
-from transformers import AutoModel, AutoTokenizer
-import time
-import threading
-import importlib
-from toolbox import update_ui, get_conf, Singleton
-from multiprocessing import Process, Pipe
-
-def SingletonLocalLLM(cls):
-    """
-    一个单实例装饰器
-    """
-    _instance = {}
-    def _singleton(*args, **kargs):
-        if cls not in _instance:
-            _instance[cls] = cls(*args, **kargs)
-            return _instance[cls]
-        elif _instance[cls].corrupted:
-            _instance[cls] = cls(*args, **kargs)
-            return _instance[cls]
-        else:
-            return _instance[cls]
-    return _singleton
-
-class LocalLLMHandle(Process):
-    def __init__(self):
-        # ⭐主进程执行
-        super().__init__(daemon=True)
-        self.corrupted = False
-        self.load_model_info()
-        self.parent, self.child = Pipe()
-        self.running = True
-        self._model = None
-        self._tokenizer = None
-        self.info = ""
-        self.check_dependency()
-        self.start()
-        self.threadLock = threading.Lock()
-
-    def load_model_info(self):
-        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
-        raise NotImplementedError("Method not implemented yet")
-        self.model_name = ""
-        self.cmd_to_install = ""
-
-    def load_model_and_tokenizer(self):
-        """
-        This function should return the model and the tokenizer
-        """
-        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
-        raise NotImplementedError("Method not implemented yet")
-
-    def llm_stream_generator(self, **kwargs):
-        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
-        raise NotImplementedError("Method not implemented yet")
-        
-    def try_to_import_special_deps(self, **kwargs):
-        """
-        import something that will raise error if the user does not install requirement_*.txt
-        """
-        # ⭐主进程执行
-        raise NotImplementedError("Method not implemented yet")
-
-    def check_dependency(self):
-        # ⭐主进程执行
-        try:
-            self.try_to_import_special_deps()
-            self.info = "依赖检测通过"
-            self.running = True
-        except:
-            self.info = f"缺少{self.model_name}的依赖，如果要使用{self.model_name}，除了基础的pip依赖以外，您还需要运行{self.cmd_to_install}安装{self.model_name}的依赖。"
-            self.running = False
-
-    def run(self):
-        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
-        # 第一次运行，加载参数
-        try:
-            self._model, self._tokenizer = self.load_model_and_tokenizer()
-        except:
-            self.running = False
-            from toolbox import trimmed_format_exc
-            self.child.send(f'[Local Message] 不能正常加载{self.model_name}的参数.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
-            self.child.send('[FinishBad]')
-            raise RuntimeError(f"不能正常加载{self.model_name}的参数！")
-
-        while True:
-            # 进入任务等待状态
-            kwargs = self.child.recv()
-            # 收到消息，开始请求
-            try:
-                for response_full in self.llm_stream_generator(**kwargs):
-                    self.child.send(response_full)
-                self.child.send('[Finish]')
-                # 请求处理结束，开始下一个循环
-            except:
-                from toolbox import trimmed_format_exc
-                self.child.send(f'[Local Message] 调用{self.model_name}失败.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
-                self.child.send('[Finish]')
-
-    def stream_chat(self, **kwargs):
-        # ⭐主进程执行
-        self.threadLock.acquire()
-        self.parent.send(kwargs)
-        while True:
-            res = self.parent.recv()
-            if res == '[Finish]': 
-                break
-            if res == '[FinishBad]': 
-                self.running = False
-                self.corrupted = True
-                break
-            else: 
-                yield res
-        self.threadLock.release()
-    
-
-
-def get_local_llm_predict_fns(LLMSingletonClass, model_name):
-    load_message = f"{model_name}尚未加载，加载需要一段时间。注意，取决于`config.py`的配置，{model_name}消耗大量的内存（CPU）或显存（GPU），也许会导致低配计算机卡死 ……"
-
-    def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
-        """
-            ⭐多线程方法
-            函数的说明请见 request_llm/bridge_all.py
-        """
-        _llm_handle = LLMSingletonClass()
-        if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + _llm_handle.info
-        if not _llm_handle.running: raise RuntimeError(_llm_handle.info)
-
-        # chatglm 没有 sys_prompt 接口，因此把prompt加入 history
-        history_feedin = []
-        history_feedin.append([sys_prompt, "Certainly!"])
-        for i in range(len(history)//2):
-            history_feedin.append([history[2*i], history[2*i+1]] )
-
-        watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
-        response = ""
-        for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
-            if len(observe_window) >= 1:
-                observe_window[0] = response
-            if len(observe_window) >= 2:  
-                if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
-        return response
-
-
-
-    def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
-        """
-            ⭐单线程方法
-            函数的说明请见 request_llm/bridge_all.py
-        """
-        chatbot.append((inputs, ""))
-
-        _llm_handle = LLMSingletonClass()
-        chatbot[-1] = (inputs, load_message + "\n\n" + _llm_handle.info)
-        yield from update_ui(chatbot=chatbot, history=[])
-        if not _llm_handle.running: raise RuntimeError(_llm_handle.info)
-
-        if additional_fn is not None:
-            from core_functional import handle_core_functionality
-            inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
-
-        # 处理历史信息
-        history_feedin = []
-        history_feedin.append([system_prompt, "Certainly!"])
-        for i in range(len(history)//2):
-            history_feedin.append([history[2*i], history[2*i+1]] )
-
-        # 开始接收回复
-        response = f"[Local Message]: 等待{model_name}响应中 ..."
-        for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
-            chatbot[-1] = (inputs, response)
-            yield from update_ui(chatbot=chatbot, history=history)
-
-        # 总结输出
-        if response == f"[Local Message]: 等待{model_name}响应中 ...":
-            response = f"[Local Message]: {model_name}响应异常 ..."
-        history.extend([inputs, response])
-        yield from update_ui(chatbot=chatbot, history=history)
-
-    return predict_no_ui_long_connection, predict
\ No newline at end of file
diff --git a/request_llm/README.md b/request_llms/README.md
similarity index 96%
rename from request_llm/README.md
rename to request_llms/README.md
index 545bc1ff..92b856e3 100644
--- a/request_llm/README.md
+++ b/request_llms/README.md
@@ -2,7 +2,7 @@
 
 ## ChatGLM
 
-- 安装依赖 `pip install -r request_llm/requirements_chatglm.txt`
+- 安装依赖 `pip install -r request_llms/requirements_chatglm.txt`
 - 修改配置，在config.py中将LLM_MODEL的值改为"chatglm"
 
 ``` sh
diff --git a/request_llm/bridge_all.py b/request_llms/bridge_all.py
similarity index 95%
rename from request_llm/bridge_all.py
rename to request_llms/bridge_all.py
index f85d1b6b..27b91c26 100644
--- a/request_llm/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -19,8 +19,8 @@ from .bridge_chatgpt import predict as chatgpt_ui
 from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
 from .bridge_chatglm import predict as chatglm_ui
 
-from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
-from .bridge_chatglm import predict as chatglm_ui
+from .bridge_chatglm3 import predict_no_ui_long_connection as chatglm3_noui
+from .bridge_chatglm3 import predict as chatglm3_ui
 
 from .bridge_qianfan import predict_no_ui_long_connection as qianfan_noui
 from .bridge_qianfan import predict as qianfan_ui
@@ -56,7 +56,7 @@ if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
 azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
 # 兼容旧版的配置
 try:
-    API_URL, = get_conf("API_URL")
+    API_URL = get_conf("API_URL")
     if API_URL != "https://api.openai.com/v1/chat/completions": 
         openai_endpoint = API_URL
         print("警告！API_URL配置选项将被弃用，请更换为API_URL_REDIRECT配置")
@@ -208,6 +208,14 @@ model_info = {
         "tokenizer": tokenizer_gpt35,
         "token_cnt": get_token_num_gpt35,
     },
+    "chatglm3": {
+        "fn_with_ui": chatglm3_ui,
+        "fn_without_ui": chatglm3_noui,
+        "endpoint": None,
+        "max_token": 8192,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
     "qianfan": {
         "fn_with_ui": qianfan_ui,
         "fn_without_ui": qianfan_noui,
@@ -483,9 +491,25 @@ if "llama2" in AVAIL_LLM_MODELS:   # llama2
         })
     except:
         print(trimmed_format_exc())
+if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai
+    try:
+        from .bridge_zhipu import predict_no_ui_long_connection as zhipu_noui
+        from .bridge_zhipu import predict as zhipu_ui
+        model_info.update({
+            "zhipuai": {
+                "fn_with_ui": zhipu_ui,
+                "fn_without_ui": zhipu_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
 
 # <-- 用于定义和切换多个azure模型 -->
-AZURE_CFG_ARRAY, = get_conf("AZURE_CFG_ARRAY")
+AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY")
 if len(AZURE_CFG_ARRAY) > 0:
     for azure_model_name, azure_cfg_dict in AZURE_CFG_ARRAY.items():
         # 可能会覆盖之前的配置，但这是意料之中的
diff --git a/request_llms/bridge_chatglm.py b/request_llms/bridge_chatglm.py
new file mode 100644
index 00000000..16e1d8fc
--- /dev/null
+++ b/request_llms/bridge_chatglm.py
@@ -0,0 +1,79 @@
+model_name = "ChatGLM"
+cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
+
+
+from transformers import AutoModel, AutoTokenizer
+from toolbox import get_conf, ProxyNetworkActivate
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
+
+
+
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+@SingletonLocalLLM
+class GetGLM2Handle(LocalLLMHandle):
+
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        import os, glob
+        import os
+        import platform
+        LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
+
+        if LOCAL_MODEL_QUANT == "INT4":         # INT4
+            _model_name_ = "THUDM/chatglm2-6b-int4"
+        elif LOCAL_MODEL_QUANT == "INT8":       # INT8
+            _model_name_ = "THUDM/chatglm2-6b-int8"
+        else:
+            _model_name_ = "THUDM/chatglm2-6b"  # FP16
+
+        with ProxyNetworkActivate('Download_LLM'):
+            chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
+            if device=='cpu':
+                chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).float()
+            else:
+                chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).half().cuda()
+            chatglm_model = chatglm_model.eval()
+
+        self._model = chatglm_model
+        self._tokenizer = chatglm_tokenizer
+        return self._model, self._tokenizer
+
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor(kwargs):
+            query = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            return query, max_length, top_p, temperature, history
+
+        query, max_length, top_p, temperature, history = adaptor(kwargs)
+
+        for response, history in self._model.stream_chat(self._tokenizer, 
+                                                         query, 
+                                                         history, 
+                                                         max_length=max_length,
+                                                         top_p=top_p,
+                                                         temperature=temperature,
+                                                         ):
+            yield response
+        
+    def try_to_import_special_deps(self, **kwargs):
+        # import something that will raise error if the user does not install requirement_*.txt
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
+        import importlib
+        # importlib.import_module('modelscope')
+
+
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetGLM2Handle, model_name)
\ No newline at end of file
diff --git a/request_llms/bridge_chatglm3.py b/request_llms/bridge_chatglm3.py
new file mode 100644
index 00000000..461c3064
--- /dev/null
+++ b/request_llms/bridge_chatglm3.py
@@ -0,0 +1,78 @@
+model_name = "ChatGLM3"
+cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
+
+
+from transformers import AutoModel, AutoTokenizer
+from toolbox import get_conf, ProxyNetworkActivate
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
+
+
+
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+@SingletonLocalLLM
+class GetGLM3Handle(LocalLLMHandle):
+
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        import os, glob
+        import os
+        import platform
+        LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
+
+        if LOCAL_MODEL_QUANT == "INT4":         # INT4
+            _model_name_ = "THUDM/chatglm3-6b-int4"
+        elif LOCAL_MODEL_QUANT == "INT8":       # INT8
+            _model_name_ = "THUDM/chatglm3-6b-int8"
+        else:
+            _model_name_ = "THUDM/chatglm3-6b"  # FP16
+        with ProxyNetworkActivate('Download_LLM'):
+            chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
+            if device=='cpu':
+                chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cpu').float()
+            else:
+                chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cuda')
+            chatglm_model = chatglm_model.eval()
+
+        self._model = chatglm_model
+        self._tokenizer = chatglm_tokenizer
+        return self._model, self._tokenizer
+
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor(kwargs):
+            query = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            return query, max_length, top_p, temperature, history
+
+        query, max_length, top_p, temperature, history = adaptor(kwargs)
+
+        for response, history in self._model.stream_chat(self._tokenizer, 
+                                                         query, 
+                                                         history, 
+                                                         max_length=max_length,
+                                                         top_p=top_p,
+                                                         temperature=temperature,
+                                                         ):
+            yield response
+        
+    def try_to_import_special_deps(self, **kwargs):
+        # import something that will raise error if the user does not install requirement_*.txt
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
+        import importlib
+        # importlib.import_module('modelscope')
+
+
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetGLM3Handle, model_name, history_format='chatglm3')
\ No newline at end of file
diff --git a/request_llm/bridge_chatglmft.py b/request_llms/bridge_chatglmft.py
similarity index 94%
rename from request_llm/bridge_chatglmft.py
rename to request_llms/bridge_chatglmft.py
index 4416382a..d812bae3 100644
--- a/request_llm/bridge_chatglmft.py
+++ b/request_llms/bridge_chatglmft.py
@@ -44,7 +44,7 @@ class GetGLMFTHandle(Process):
             self.info = "依赖检测通过"
             self.success = True
         except:
-            self.info = "缺少ChatGLMFT的依赖，如果要使用ChatGLMFT，除了基础的pip依赖以外，您还需要运行`pip install -r request_llm/requirements_chatglm.txt`安装ChatGLM的依赖。"
+            self.info = "缺少ChatGLMFT的依赖，如果要使用ChatGLMFT，除了基础的pip依赖以外，您还需要运行`pip install -r request_llms/requirements_chatglm.txt`安装ChatGLM的依赖。"
             self.success = False
 
     def ready(self):
@@ -59,11 +59,11 @@ class GetGLMFTHandle(Process):
                 if self.chatglmft_model is None:
                     from transformers import AutoConfig
                     import torch
-                    # conf = 'request_llm/current_ptune_model.json'
+                    # conf = 'request_llms/current_ptune_model.json'
                     # if not os.path.exists(conf): raise RuntimeError('找不到微调模型信息')
                     # with open(conf, 'r', encoding='utf8') as f:
                     #     model_args = json.loads(f.read())
-                    CHATGLM_PTUNING_CHECKPOINT, = get_conf('CHATGLM_PTUNING_CHECKPOINT')
+                    CHATGLM_PTUNING_CHECKPOINT = get_conf('CHATGLM_PTUNING_CHECKPOINT')
                     assert os.path.exists(CHATGLM_PTUNING_CHECKPOINT), "找不到微调模型检查点"
                     conf = os.path.join(CHATGLM_PTUNING_CHECKPOINT, "config.json")
                     with open(conf, 'r', encoding='utf8') as f:
@@ -140,7 +140,7 @@ glmft_handle = None
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
     """
         多线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     global glmft_handle
     if glmft_handle is None:
@@ -171,7 +171,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
 def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
     """
         单线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     chatbot.append((inputs, ""))
 
@@ -195,13 +195,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
         history_feedin.append([history[2*i], history[2*i+1]] )
 
     # 开始接收chatglmft的回复
-    response = "[Local Message]: 等待ChatGLMFT响应中 ..."
+    response = "[Local Message] 等待ChatGLMFT响应中 ..."
     for response in glmft_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
         chatbot[-1] = (inputs, response)
         yield from update_ui(chatbot=chatbot, history=history)
 
     # 总结输出
-    if response == "[Local Message]: 等待ChatGLMFT响应中 ...":
-        response = "[Local Message]: ChatGLMFT响应异常 ..."
+    if response == "[Local Message] 等待ChatGLMFT响应中 ...":
+        response = "[Local Message] ChatGLMFT响应异常 ..."
     history.extend([inputs, response])
     yield from update_ui(chatbot=chatbot, history=history)
diff --git a/request_llm/bridge_chatglmonnx.py b/request_llms/bridge_chatglmonnx.py
similarity index 83%
rename from request_llm/bridge_chatglmonnx.py
rename to request_llms/bridge_chatglmonnx.py
index 594bcca1..312c6846 100644
--- a/request_llm/bridge_chatglmonnx.py
+++ b/request_llms/bridge_chatglmonnx.py
@@ -1,5 +1,5 @@
 model_name = "ChatGLM-ONNX"
-cmd_to_install = "`pip install -r request_llm/requirements_chatglm_onnx.txt`"
+cmd_to_install = "`pip install -r request_llms/requirements_chatglm_onnx.txt`"
 
 
 from transformers import AutoModel, AutoTokenizer
@@ -28,13 +28,13 @@ class GetONNXGLMHandle(LocalLLMHandle):
     def load_model_and_tokenizer(self):
         # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
         import os, glob
-        if not len(glob.glob("./request_llm/ChatGLM-6b-onnx-u8s8/chatglm-6b-int8-onnx-merged/*.bin")) >= 7: # 该模型有七个 bin 文件
+        if not len(glob.glob("./request_llms/ChatGLM-6b-onnx-u8s8/chatglm-6b-int8-onnx-merged/*.bin")) >= 7: # 该模型有七个 bin 文件
             from huggingface_hub import snapshot_download
-            snapshot_download(repo_id="K024/ChatGLM-6b-onnx-u8s8", local_dir="./request_llm/ChatGLM-6b-onnx-u8s8")
+            snapshot_download(repo_id="K024/ChatGLM-6b-onnx-u8s8", local_dir="./request_llms/ChatGLM-6b-onnx-u8s8")
         def create_model():
             return ChatGLMModel(
-                tokenizer_path = "./request_llm/ChatGLM-6b-onnx-u8s8/chatglm-6b-int8-onnx-merged/sentencepiece.model",
-                onnx_model_path = "./request_llm/ChatGLM-6b-onnx-u8s8/chatglm-6b-int8-onnx-merged/chatglm-6b-int8.onnx"
+                tokenizer_path = "./request_llms/ChatGLM-6b-onnx-u8s8/chatglm-6b-int8-onnx-merged/sentencepiece.model",
+                onnx_model_path = "./request_llms/ChatGLM-6b-onnx-u8s8/chatglm-6b-int8-onnx-merged/chatglm-6b-int8.onnx"
             )
         self._model = create_model()
         return self._model, None
diff --git a/request_llm/bridge_chatgpt.py b/request_llms/bridge_chatgpt.py
similarity index 100%
rename from request_llm/bridge_chatgpt.py
rename to request_llms/bridge_chatgpt.py
diff --git a/request_llm/bridge_chatgpt_website.py b/request_llms/bridge_chatgpt_website.py
similarity index 100%
rename from request_llm/bridge_chatgpt_website.py
rename to request_llms/bridge_chatgpt_website.py
diff --git a/request_llm/bridge_claude.py b/request_llms/bridge_claude.py
similarity index 100%
rename from request_llm/bridge_claude.py
rename to request_llms/bridge_claude.py
diff --git a/request_llm/bridge_internlm.py b/request_llms/bridge_internlm.py
similarity index 98%
rename from request_llm/bridge_internlm.py
rename to request_llms/bridge_internlm.py
index 0ec65b64..073c193a 100644
--- a/request_llm/bridge_internlm.py
+++ b/request_llms/bridge_internlm.py
@@ -1,5 +1,5 @@
 model_name = "InternLM"
-cmd_to_install = "`pip install -r request_llm/requirements_chatglm.txt`"
+cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
 
 from transformers import AutoModel, AutoTokenizer
 import time
@@ -52,7 +52,7 @@ class GetInternlmHandle(LocalLLMHandle):
         # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
         import torch
         from transformers import AutoModelForCausalLM, AutoTokenizer
-        device, = get_conf('LOCAL_MODEL_DEVICE')
+        device = get_conf('LOCAL_MODEL_DEVICE')
         if self._model is None:
             tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
             if device=='cpu':
diff --git a/request_llm/bridge_jittorllms_llama.py b/request_llms/bridge_jittorllms_llama.py
similarity index 90%
rename from request_llm/bridge_jittorllms_llama.py
rename to request_llms/bridge_jittorllms_llama.py
index d4853578..2d3005e5 100644
--- a/request_llm/bridge_jittorllms_llama.py
+++ b/request_llms/bridge_jittorllms_llama.py
@@ -28,8 +28,8 @@ class GetGLMHandle(Process):
             self.success = True
         except:
             from toolbox import trimmed_format_exc
-            self.info = r"缺少jittorllms的依赖，如果要使用jittorllms，除了基础的pip依赖以外，您还需要运行`pip install -r request_llm/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I`"+\
-                        r"和`git clone https://gitlink.org.cn/jittor/JittorLLMs.git --depth 1 request_llm/jittorllms`两个指令来安装jittorllms的依赖（在项目根目录运行这两个指令）。" +\
+            self.info = r"缺少jittorllms的依赖，如果要使用jittorllms，除了基础的pip依赖以外，您还需要运行`pip install -r request_llms/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I`"+\
+                        r"和`git clone https://gitlink.org.cn/jittor/JittorLLMs.git --depth 1 request_llms/jittorllms`两个指令来安装jittorllms的依赖（在项目根目录运行这两个指令）。" +\
                         r"警告：安装jittorllms依赖后将完全破坏现有的pytorch环境，建议使用docker环境！" + trimmed_format_exc()
             self.success = False
 
@@ -45,15 +45,15 @@ class GetGLMHandle(Process):
             env = os.environ.get("PATH", "")
             os.environ["PATH"] = env.replace('/cuda/bin', '/x/bin')
             root_dir_assume = os.path.abspath(os.path.dirname(__file__) +  '/..')
-            os.chdir(root_dir_assume + '/request_llm/jittorllms')
-            sys.path.append(root_dir_assume + '/request_llm/jittorllms')
+            os.chdir(root_dir_assume + '/request_llms/jittorllms')
+            sys.path.append(root_dir_assume + '/request_llms/jittorllms')
         validate_path() # validate path so you can run from base directory
 
         def load_model():
             import types
             try:
                 if self.jittorllms_model is None:
-                    device, = get_conf('LOCAL_MODEL_DEVICE')
+                    device = get_conf('LOCAL_MODEL_DEVICE')
                     from .jittorllms.models import get_model
                     # availabel_models = ["chatglm", "pangualpha", "llama", "chatrwkv"]
                     args_dict = {'model': 'llama'}
@@ -109,7 +109,7 @@ llama_glm_handle = None
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
     """
         多线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     global llama_glm_handle
     if llama_glm_handle is None:
@@ -140,7 +140,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
 def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
     """
         单线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     chatbot.append((inputs, ""))
 
@@ -163,13 +163,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
         history_feedin.append([history[2*i], history[2*i+1]] )
 
     # 开始接收jittorllms的回复
-    response = "[Local Message]: 等待jittorllms响应中 ..."
+    response = "[Local Message] 等待jittorllms响应中 ..."
     for response in llama_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
         chatbot[-1] = (inputs, response)
         yield from update_ui(chatbot=chatbot, history=history)
 
     # 总结输出
-    if response == "[Local Message]: 等待jittorllms响应中 ...":
-        response = "[Local Message]: jittorllms响应异常 ..."
+    if response == "[Local Message] 等待jittorllms响应中 ...":
+        response = "[Local Message] jittorllms响应异常 ..."
     history.extend([inputs, response])
     yield from update_ui(chatbot=chatbot, history=history)
diff --git a/request_llm/bridge_jittorllms_pangualpha.py b/request_llms/bridge_jittorllms_pangualpha.py
similarity index 90%
rename from request_llm/bridge_jittorllms_pangualpha.py
rename to request_llms/bridge_jittorllms_pangualpha.py
index 20a30213..26401764 100644
--- a/request_llm/bridge_jittorllms_pangualpha.py
+++ b/request_llms/bridge_jittorllms_pangualpha.py
@@ -28,8 +28,8 @@ class GetGLMHandle(Process):
             self.success = True
         except:
             from toolbox import trimmed_format_exc
-            self.info = r"缺少jittorllms的依赖，如果要使用jittorllms，除了基础的pip依赖以外，您还需要运行`pip install -r request_llm/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I`"+\
-                        r"和`git clone https://gitlink.org.cn/jittor/JittorLLMs.git --depth 1 request_llm/jittorllms`两个指令来安装jittorllms的依赖（在项目根目录运行这两个指令）。" +\
+            self.info = r"缺少jittorllms的依赖，如果要使用jittorllms，除了基础的pip依赖以外，您还需要运行`pip install -r request_llms/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I`"+\
+                        r"和`git clone https://gitlink.org.cn/jittor/JittorLLMs.git --depth 1 request_llms/jittorllms`两个指令来安装jittorllms的依赖（在项目根目录运行这两个指令）。" +\
                         r"警告：安装jittorllms依赖后将完全破坏现有的pytorch环境，建议使用docker环境！" + trimmed_format_exc()
             self.success = False
 
@@ -45,15 +45,15 @@ class GetGLMHandle(Process):
             env = os.environ.get("PATH", "")
             os.environ["PATH"] = env.replace('/cuda/bin', '/x/bin')
             root_dir_assume = os.path.abspath(os.path.dirname(__file__) +  '/..')
-            os.chdir(root_dir_assume + '/request_llm/jittorllms')
-            sys.path.append(root_dir_assume + '/request_llm/jittorllms')
+            os.chdir(root_dir_assume + '/request_llms/jittorllms')
+            sys.path.append(root_dir_assume + '/request_llms/jittorllms')
         validate_path() # validate path so you can run from base directory
 
         def load_model():
             import types
             try:
                 if self.jittorllms_model is None:
-                    device, = get_conf('LOCAL_MODEL_DEVICE')
+                    device = get_conf('LOCAL_MODEL_DEVICE')
                     from .jittorllms.models import get_model
                     # availabel_models = ["chatglm", "pangualpha", "llama", "chatrwkv"]
                     args_dict = {'model': 'pangualpha'}
@@ -109,7 +109,7 @@ pangu_glm_handle = None
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
     """
         多线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     global pangu_glm_handle
     if pangu_glm_handle is None:
@@ -140,7 +140,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
 def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
     """
         单线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     chatbot.append((inputs, ""))
 
@@ -163,13 +163,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
         history_feedin.append([history[2*i], history[2*i+1]] )
 
     # 开始接收jittorllms的回复
-    response = "[Local Message]: 等待jittorllms响应中 ..."
+    response = "[Local Message] 等待jittorllms响应中 ..."
     for response in pangu_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
         chatbot[-1] = (inputs, response)
         yield from update_ui(chatbot=chatbot, history=history)
 
     # 总结输出
-    if response == "[Local Message]: 等待jittorllms响应中 ...":
-        response = "[Local Message]: jittorllms响应异常 ..."
+    if response == "[Local Message] 等待jittorllms响应中 ...":
+        response = "[Local Message] jittorllms响应异常 ..."
     history.extend([inputs, response])
     yield from update_ui(chatbot=chatbot, history=history)
diff --git a/request_llm/bridge_jittorllms_rwkv.py b/request_llms/bridge_jittorllms_rwkv.py
similarity index 90%
rename from request_llm/bridge_jittorllms_rwkv.py
rename to request_llms/bridge_jittorllms_rwkv.py
index ee4f592f..0021a50d 100644
--- a/request_llm/bridge_jittorllms_rwkv.py
+++ b/request_llms/bridge_jittorllms_rwkv.py
@@ -28,8 +28,8 @@ class GetGLMHandle(Process):
             self.success = True
         except:
             from toolbox import trimmed_format_exc
-            self.info = r"缺少jittorllms的依赖，如果要使用jittorllms，除了基础的pip依赖以外，您还需要运行`pip install -r request_llm/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I`"+\
-                        r"和`git clone https://gitlink.org.cn/jittor/JittorLLMs.git --depth 1 request_llm/jittorllms`两个指令来安装jittorllms的依赖（在项目根目录运行这两个指令）。" +\
+            self.info = r"缺少jittorllms的依赖，如果要使用jittorllms，除了基础的pip依赖以外，您还需要运行`pip install -r request_llms/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I`"+\
+                        r"和`git clone https://gitlink.org.cn/jittor/JittorLLMs.git --depth 1 request_llms/jittorllms`两个指令来安装jittorllms的依赖（在项目根目录运行这两个指令）。" +\
                         r"警告：安装jittorllms依赖后将完全破坏现有的pytorch环境，建议使用docker环境！" + trimmed_format_exc()
             self.success = False
 
@@ -45,15 +45,15 @@ class GetGLMHandle(Process):
             env = os.environ.get("PATH", "")
             os.environ["PATH"] = env.replace('/cuda/bin', '/x/bin')
             root_dir_assume = os.path.abspath(os.path.dirname(__file__) +  '/..')
-            os.chdir(root_dir_assume + '/request_llm/jittorllms')
-            sys.path.append(root_dir_assume + '/request_llm/jittorllms')
+            os.chdir(root_dir_assume + '/request_llms/jittorllms')
+            sys.path.append(root_dir_assume + '/request_llms/jittorllms')
         validate_path() # validate path so you can run from base directory
 
         def load_model():
             import types
             try:
                 if self.jittorllms_model is None:
-                    device, = get_conf('LOCAL_MODEL_DEVICE')
+                    device = get_conf('LOCAL_MODEL_DEVICE')
                     from .jittorllms.models import get_model
                     # availabel_models = ["chatglm", "pangualpha", "llama", "chatrwkv"]
                     args_dict = {'model': 'chatrwkv'}
@@ -109,7 +109,7 @@ rwkv_glm_handle = None
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
     """
         多线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     global rwkv_glm_handle
     if rwkv_glm_handle is None:
@@ -140,7 +140,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
 def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
     """
         单线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     chatbot.append((inputs, ""))
 
@@ -163,13 +163,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
         history_feedin.append([history[2*i], history[2*i+1]] )
 
     # 开始接收jittorllms的回复
-    response = "[Local Message]: 等待jittorllms响应中 ..."
+    response = "[Local Message] 等待jittorllms响应中 ..."
     for response in rwkv_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
         chatbot[-1] = (inputs, response)
         yield from update_ui(chatbot=chatbot, history=history)
 
     # 总结输出
-    if response == "[Local Message]: 等待jittorllms响应中 ...":
-        response = "[Local Message]: jittorllms响应异常 ..."
+    if response == "[Local Message] 等待jittorllms响应中 ...":
+        response = "[Local Message] jittorllms响应异常 ..."
     history.extend([inputs, response])
     yield from update_ui(chatbot=chatbot, history=history)
diff --git a/request_llm/bridge_llama2.py b/request_llms/bridge_llama2.py
similarity index 98%
rename from request_llm/bridge_llama2.py
rename to request_llms/bridge_llama2.py
index d1be4463..bc8ef7eb 100644
--- a/request_llm/bridge_llama2.py
+++ b/request_llms/bridge_llama2.py
@@ -1,5 +1,5 @@
 model_name = "LLaMA"
-cmd_to_install = "`pip install -r request_llm/requirements_chatglm.txt`"
+cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
 
 
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
diff --git a/request_llm/bridge_moss.py b/request_llms/bridge_moss.py
similarity index 94%
rename from request_llm/bridge_moss.py
rename to request_llms/bridge_moss.py
index 3c6217d2..d7399f52 100644
--- a/request_llm/bridge_moss.py
+++ b/request_llms/bridge_moss.py
@@ -24,12 +24,12 @@ class GetGLMHandle(Process):
     def check_dependency(self): # 主进程执行
         try:
             import datasets, os
-            assert os.path.exists('request_llm/moss/models')
+            assert os.path.exists('request_llms/moss/models')
             self.info = "依赖检测通过"
             self.success = True
         except:
             self.info = """
-            缺少MOSS的依赖，如果要使用MOSS，除了基础的pip依赖以外，您还需要运行`pip install -r request_llm/requirements_moss.txt`和`git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss`安装MOSS的依赖。
+            缺少MOSS的依赖，如果要使用MOSS，除了基础的pip依赖以外，您还需要运行`pip install -r request_llms/requirements_moss.txt`和`git clone https://github.com/OpenLMLab/MOSS.git request_llms/moss`安装MOSS的依赖。
             """
             self.success = False
         return self.success
@@ -110,8 +110,8 @@ class GetGLMHandle(Process):
         def validate_path():
             import os, sys
             root_dir_assume = os.path.abspath(os.path.dirname(__file__) +  '/..')
-            os.chdir(root_dir_assume + '/request_llm/moss')
-            sys.path.append(root_dir_assume + '/request_llm/moss')
+            os.chdir(root_dir_assume + '/request_llms/moss')
+            sys.path.append(root_dir_assume + '/request_llms/moss')
         validate_path() # validate path so you can run from base directory
 
         try:
@@ -176,7 +176,7 @@ moss_handle = None
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
     """
         多线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     global moss_handle
     if moss_handle is None:
@@ -206,7 +206,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
 def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
     """
         单线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     chatbot.append((inputs, ""))
 
@@ -219,7 +219,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
             moss_handle = None
             return
     else:
-        response = "[Local Message]: 等待MOSS响应中 ..."
+        response = "[Local Message] 等待MOSS响应中 ..."
         chatbot[-1] = (inputs, response)
         yield from update_ui(chatbot=chatbot, history=history)
 
@@ -238,7 +238,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
         yield from update_ui(chatbot=chatbot, history=history)
 
     # 总结输出
-    if response == "[Local Message]: 等待MOSS响应中 ...":
-        response = "[Local Message]: MOSS响应异常 ..."
+    if response == "[Local Message] 等待MOSS响应中 ...":
+        response = "[Local Message] MOSS响应异常 ..."
     history.extend([inputs, response.strip('<|MOSS|>: ')])
     yield from update_ui(chatbot=chatbot, history=history)
diff --git a/request_llm/bridge_newbingfree.py b/request_llms/bridge_newbingfree.py
similarity index 93%
rename from request_llm/bridge_newbingfree.py
rename to request_llms/bridge_newbingfree.py
index c6066454..cb83a0fb 100644
--- a/request_llm/bridge_newbingfree.py
+++ b/request_llms/bridge_newbingfree.py
@@ -54,7 +54,7 @@ class NewBingHandle(Process):
             self.info = "依赖检测通过，等待NewBing响应。注意目前不能多人同时调用NewBing接口（有线程锁），否则将导致每个人的NewBing问询历史互相渗透。调用NewBing时，会自动使用已配置的代理。"
             self.success = True
         except:
-            self.info = "缺少的依赖，如果要使用Newbing，除了基础的pip依赖以外，您还需要运行`pip install -r request_llm/requirements_newbing.txt`安装Newbing的依赖。"
+            self.info = "缺少的依赖，如果要使用Newbing，除了基础的pip依赖以外，您还需要运行`pip install -r request_llms/requirements_newbing.txt`安装Newbing的依赖。"
             self.success = False
 
     def ready(self):
@@ -62,8 +62,8 @@ class NewBingHandle(Process):
 
     async def async_run(self):
         # 读取配置
-        NEWBING_STYLE, = get_conf('NEWBING_STYLE')
-        from request_llm.bridge_all import model_info
+        NEWBING_STYLE = get_conf('NEWBING_STYLE')
+        from request_llms.bridge_all import model_info
         endpoint = model_info['newbing']['endpoint']
         while True:
             # 等待
@@ -181,7 +181,7 @@ newbingfree_handle = None
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
     """
         多线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     global newbingfree_handle
     if (newbingfree_handle is None) or (not newbingfree_handle.success):
@@ -199,7 +199,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
 
     watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
     response = ""
-    if len(observe_window) >= 1: observe_window[0] = "[Local Message]: 等待NewBing响应中 ..."
+    if len(observe_window) >= 1: observe_window[0] = "[Local Message] 等待NewBing响应中 ..."
     for response in newbingfree_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
         if len(observe_window) >= 1:  observe_window[0] = preprocess_newbing_out_simple(response)
         if len(observe_window) >= 2:  
@@ -210,9 +210,9 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
 def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
     """
         单线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
-    chatbot.append((inputs, "[Local Message]: 等待NewBing响应中 ..."))
+    chatbot.append((inputs, "[Local Message] 等待NewBing响应中 ..."))
 
     global newbingfree_handle
     if (newbingfree_handle is None) or (not newbingfree_handle.success):
@@ -231,13 +231,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
     for i in range(len(history)//2):
         history_feedin.append([history[2*i], history[2*i+1]] )
 
-    chatbot[-1] = (inputs, "[Local Message]: 等待NewBing响应中 ...")
-    response = "[Local Message]: 等待NewBing响应中 ..."
+    chatbot[-1] = (inputs, "[Local Message] 等待NewBing响应中 ...")
+    response = "[Local Message] 等待NewBing响应中 ..."
     yield from update_ui(chatbot=chatbot, history=history, msg="NewBing响应缓慢，尚未完成全部响应，请耐心完成后再提交新问题。")
     for response in newbingfree_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
         chatbot[-1] = (inputs, preprocess_newbing_out(response))
         yield from update_ui(chatbot=chatbot, history=history, msg="NewBing响应缓慢，尚未完成全部响应，请耐心完成后再提交新问题。")
-    if response == "[Local Message]: 等待NewBing响应中 ...": response = "[Local Message]: NewBing响应异常，请刷新界面重试 ..."
+    if response == "[Local Message] 等待NewBing响应中 ...": response = "[Local Message] NewBing响应异常，请刷新界面重试 ..."
     history.extend([inputs, response])
     logging.info(f'[raw_input] {inputs}')
     logging.info(f'[response] {response}')
diff --git a/request_llm/bridge_qianfan.py b/request_llms/bridge_qianfan.py
similarity index 94%
rename from request_llm/bridge_qianfan.py
rename to request_llms/bridge_qianfan.py
index bbae5630..a806e0d4 100644
--- a/request_llm/bridge_qianfan.py
+++ b/request_llms/bridge_qianfan.py
@@ -75,7 +75,7 @@ def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
 
 
 def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
-    BAIDU_CLOUD_QIANFAN_MODEL,  = get_conf('BAIDU_CLOUD_QIANFAN_MODEL')
+    BAIDU_CLOUD_QIANFAN_MODEL = get_conf('BAIDU_CLOUD_QIANFAN_MODEL')
 
     url_lib = {
         "ERNIE-Bot-4":          "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro",
@@ -120,7 +120,7 @@ def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
     """
         ⭐多线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     watch_dog_patience = 5
     response = ""
@@ -135,7 +135,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
 def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
     """
         ⭐单线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     chatbot.append((inputs, ""))
 
@@ -159,8 +159,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
         return
     
     # 总结输出
-    response = f"[Local Message]: {model_name}响应异常 ..."
-    if response == f"[Local Message]: 等待{model_name}响应中 ...":
-        response = f"[Local Message]: {model_name}响应异常 ..."
+    response = f"[Local Message] {model_name}响应异常 ..."
+    if response == f"[Local Message] 等待{model_name}响应中 ...":
+        response = f"[Local Message] {model_name}响应异常 ..."
     history.extend([inputs, response])
     yield from update_ui(chatbot=chatbot, history=history)
\ No newline at end of file
diff --git a/request_llm/bridge_qwen.py b/request_llms/bridge_qwen.py
similarity index 97%
rename from request_llm/bridge_qwen.py
rename to request_llms/bridge_qwen.py
index 07ed243f..62682cfa 100644
--- a/request_llm/bridge_qwen.py
+++ b/request_llms/bridge_qwen.py
@@ -1,5 +1,5 @@
 model_name = "Qwen"
-cmd_to_install = "`pip install -r request_llm/requirements_qwen.txt`"
+cmd_to_install = "`pip install -r request_llms/requirements_qwen.txt`"
 
 
 from transformers import AutoModel, AutoTokenizer
diff --git a/request_llm/bridge_spark.py b/request_llms/bridge_spark.py
similarity index 82%
rename from request_llm/bridge_spark.py
rename to request_llms/bridge_spark.py
index 0fe925f7..6ba39ee7 100644
--- a/request_llm/bridge_spark.py
+++ b/request_llms/bridge_spark.py
@@ -8,7 +8,7 @@ from multiprocessing import Process, Pipe
 model_name = '星火认知大模型'
 
 def validate_key():
-    XFYUN_APPID,  = get_conf('XFYUN_APPID', )
+    XFYUN_APPID = get_conf('XFYUN_APPID')
     if XFYUN_APPID == '00000000' or XFYUN_APPID == '': 
         return False
     return True
@@ -16,7 +16,7 @@ def validate_key():
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
     """
         ⭐多线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     watch_dog_patience = 5
     response = ""
@@ -36,13 +36,13 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
 def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
     """
         ⭐单线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     chatbot.append((inputs, ""))
     yield from update_ui(chatbot=chatbot, history=history)
 
     if validate_key() is False:
-        yield from update_ui_lastest_msg(lastmsg="[Local Message]: 请配置讯飞星火大模型的XFYUN_APPID, XFYUN_API_KEY, XFYUN_API_SECRET", chatbot=chatbot, history=history, delay=0)
+        yield from update_ui_lastest_msg(lastmsg="[Local Message] 请配置讯飞星火大模型的XFYUN_APPID, XFYUN_API_KEY, XFYUN_API_SECRET", chatbot=chatbot, history=history, delay=0)
         return
 
     if additional_fn is not None:
@@ -57,7 +57,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
         yield from update_ui(chatbot=chatbot, history=history)
 
     # 总结输出
-    if response == f"[Local Message]: 等待{model_name}响应中 ...":
-        response = f"[Local Message]: {model_name}响应异常 ..."
+    if response == f"[Local Message] 等待{model_name}响应中 ...":
+        response = f"[Local Message] {model_name}响应异常 ..."
     history.extend([inputs, response])
     yield from update_ui(chatbot=chatbot, history=history)
\ No newline at end of file
diff --git a/request_llm/bridge_stackclaude.py b/request_llms/bridge_stackclaude.py
similarity index 92%
rename from request_llm/bridge_stackclaude.py
rename to request_llms/bridge_stackclaude.py
index 3f2ee674..0b42a17c 100644
--- a/request_llm/bridge_stackclaude.py
+++ b/request_llms/bridge_stackclaude.py
@@ -36,7 +36,7 @@ try:
         CHANNEL_ID = None
 
         async def open_channel(self):
-            response = await self.conversations_open(users=get_conf('SLACK_CLAUDE_BOT_ID')[0])
+            response = await self.conversations_open(users=get_conf('SLACK_CLAUDE_BOT_ID'))
             self.CHANNEL_ID = response["channel"]["id"]
 
         async def chat(self, text):
@@ -51,7 +51,7 @@ try:
                 # TODO：暂时不支持历史消息，因为在同一个频道里存在多人使用时历史消息渗透问题
                 resp = await self.conversations_history(channel=self.CHANNEL_ID, oldest=self.LAST_TS, limit=1)
                 msg = [msg for msg in resp["messages"]
-                    if msg.get("user") == get_conf('SLACK_CLAUDE_BOT_ID')[0]]
+                    if msg.get("user") == get_conf('SLACK_CLAUDE_BOT_ID')]
                 return msg
             except (SlackApiError, KeyError) as e:
                 raise RuntimeError(f"获取Slack消息失败。")
@@ -99,7 +99,7 @@ class ClaudeHandle(Process):
             self.info = "依赖检测通过，等待Claude响应。注意目前不能多人同时调用Claude接口（有线程锁），否则将导致每个人的Claude问询历史互相渗透。调用Claude时，会自动使用已配置的代理。"
             self.success = True
         except:
-            self.info = "缺少的依赖，如果要使用Claude，除了基础的pip依赖以外，您还需要运行`pip install -r request_llm/requirements_slackclaude.txt`安装Claude的依赖，然后重启程序。"
+            self.info = "缺少的依赖，如果要使用Claude，除了基础的pip依赖以外，您还需要运行`pip install -r request_llms/requirements_slackclaude.txt`安装Claude的依赖，然后重启程序。"
             self.success = False
 
     def ready(self):
@@ -146,14 +146,14 @@ class ClaudeHandle(Process):
         self.local_history = []
         if (self.claude_model is None) or (not self.success):
             # 代理设置
-            proxies, = get_conf('proxies')
+            proxies = get_conf('proxies')
             if proxies is None:
                 self.proxies_https = None
             else:
                 self.proxies_https = proxies['https']
 
             try:
-                SLACK_CLAUDE_USER_TOKEN, = get_conf('SLACK_CLAUDE_USER_TOKEN')
+                SLACK_CLAUDE_USER_TOKEN = get_conf('SLACK_CLAUDE_USER_TOKEN')
                 self.claude_model = SlackClient(token=SLACK_CLAUDE_USER_TOKEN, proxy=self.proxies_https)
                 print('Claude组件初始化成功。')
             except:
@@ -204,7 +204,7 @@ claude_handle = None
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
     """
         多线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
     global claude_handle
     if (claude_handle is None) or (not claude_handle.success):
@@ -222,7 +222,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
 
     watch_dog_patience = 5  # 看门狗 (watchdog) 的耐心, 设置5秒即可
     response = ""
-    observe_window[0] = "[Local Message]: 等待Claude响应中 ..."
+    observe_window[0] = "[Local Message] 等待Claude响应中 ..."
     for response in claude_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
         observe_window[0] = preprocess_newbing_out_simple(response)
         if len(observe_window) >= 2:
@@ -234,9 +234,9 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
 def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream=True, additional_fn=None):
     """
         单线程方法
-        函数的说明请见 request_llm/bridge_all.py
+        函数的说明请见 request_llms/bridge_all.py
     """
-    chatbot.append((inputs, "[Local Message]: 等待Claude响应中 ..."))
+    chatbot.append((inputs, "[Local Message] 等待Claude响应中 ..."))
 
     global claude_handle
     if (claude_handle is None) or (not claude_handle.success):
@@ -255,14 +255,14 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
     for i in range(len(history)//2):
         history_feedin.append([history[2*i], history[2*i+1]])
 
-    chatbot[-1] = (inputs, "[Local Message]: 等待Claude响应中 ...")
-    response = "[Local Message]: 等待Claude响应中 ..."
+    chatbot[-1] = (inputs, "[Local Message] 等待Claude响应中 ...")
+    response = "[Local Message] 等待Claude响应中 ..."
     yield from update_ui(chatbot=chatbot, history=history, msg="Claude响应缓慢，尚未完成全部响应，请耐心完成后再提交新问题。")
     for response in claude_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt):
         chatbot[-1] = (inputs, preprocess_newbing_out(response))
         yield from update_ui(chatbot=chatbot, history=history, msg="Claude响应缓慢，尚未完成全部响应，请耐心完成后再提交新问题。")
-    if response == "[Local Message]: 等待Claude响应中 ...":
-        response = "[Local Message]: Claude响应异常，请刷新界面重试 ..."
+    if response == "[Local Message] 等待Claude响应中 ...":
+        response = "[Local Message] Claude响应异常，请刷新界面重试 ..."
     history.extend([inputs, response])
     logging.info(f'[raw_input] {inputs}')
     logging.info(f'[response] {response}')
diff --git a/request_llm/bridge_tgui.py b/request_llms/bridge_tgui.py
similarity index 100%
rename from request_llm/bridge_tgui.py
rename to request_llms/bridge_tgui.py
diff --git a/request_llms/bridge_zhipu.py b/request_llms/bridge_zhipu.py
new file mode 100644
index 00000000..a1e0de59
--- /dev/null
+++ b/request_llms/bridge_zhipu.py
@@ -0,0 +1,59 @@
+
+import time
+from toolbox import update_ui, get_conf, update_ui_lastest_msg
+
+model_name = '智谱AI大模型'
+
+def validate_key():
+    ZHIPUAI_API_KEY = get_conf("ZHIPUAI_API_KEY")
+    if ZHIPUAI_API_KEY == '': return False
+    return True
+
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+    """
+        ⭐多线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    watch_dog_patience = 5
+    response = ""
+
+    if validate_key() is False:
+        raise RuntimeError('请配置ZHIPUAI_API_KEY')
+
+    from .com_zhipuapi import ZhipuRequestInstance
+    sri = ZhipuRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
+        if len(observe_window) >= 1:
+            observe_window[0] = response
+        if len(observe_window) >= 2:
+            if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
+    return response
+
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        ⭐单线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    chatbot.append((inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history)
+
+    if validate_key() is False:
+        yield from update_ui_lastest_msg(lastmsg="[Local Message] 请配置ZHIPUAI_API_KEY", chatbot=chatbot, history=history, delay=0)
+        return
+
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+
+    # 开始接收回复    
+    from .com_zhipuapi import ZhipuRequestInstance
+    sri = ZhipuRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
+        chatbot[-1] = (inputs, response)
+        yield from update_ui(chatbot=chatbot, history=history)
+
+    # 总结输出
+    if response == f"[Local Message] 等待{model_name}响应中 ...":
+        response = f"[Local Message] {model_name}响应异常 ..."
+    history.extend([inputs, response])
+    yield from update_ui(chatbot=chatbot, history=history)
\ No newline at end of file
diff --git a/request_llm/chatglmoonx.py b/request_llms/chatglmoonx.py
similarity index 100%
rename from request_llm/chatglmoonx.py
rename to request_llms/chatglmoonx.py
diff --git a/request_llm/com_sparkapi.py b/request_llms/com_sparkapi.py
similarity index 100%
rename from request_llm/com_sparkapi.py
rename to request_llms/com_sparkapi.py
diff --git a/request_llms/com_zhipuapi.py b/request_llms/com_zhipuapi.py
new file mode 100644
index 00000000..445720d8
--- /dev/null
+++ b/request_llms/com_zhipuapi.py
@@ -0,0 +1,67 @@
+from toolbox import get_conf
+import threading
+import logging
+
+timeout_bot_msg = '[Local Message] Request timeout. Network error.'
+
+class ZhipuRequestInstance():
+    def __init__(self):
+
+        self.time_to_yield_event = threading.Event()
+        self.time_to_exit_event = threading.Event()
+
+        self.result_buf = ""
+
+    def generate(self, inputs, llm_kwargs, history, system_prompt):
+        # import _thread as thread
+        import zhipuai
+        ZHIPUAI_API_KEY, ZHIPUAI_MODEL = get_conf("ZHIPUAI_API_KEY", "ZHIPUAI_MODEL")
+        zhipuai.api_key = ZHIPUAI_API_KEY
+        self.result_buf = ""
+        response = zhipuai.model_api.sse_invoke(
+            model=ZHIPUAI_MODEL,
+            prompt=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
+            top_p=llm_kwargs['top_p'],
+            temperature=llm_kwargs['temperature'],
+        )
+        for event in response.events():
+            if event.event == "add":
+                self.result_buf += event.data
+                yield self.result_buf
+            elif event.event == "error" or event.event == "interrupted":
+                raise RuntimeError("Unknown error:" + event.data)
+            elif event.event == "finish":
+                yield self.result_buf
+                break
+            else:
+                raise RuntimeError("Unknown error:" + str(event))
+            
+        logging.info(f'[raw_input] {inputs}')
+        logging.info(f'[response] {self.result_buf}')
+        return self.result_buf
+
+def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
+    conversation_cnt = len(history) // 2
+    messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}]
+    if conversation_cnt:
+        for index in range(0, 2*conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = history[index]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = history[index+1]
+            if what_i_have_asked["content"] != "":
+                if what_gpt_answer["content"] == "":
+                    continue
+                if what_gpt_answer["content"] == timeout_bot_msg:
+                    continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]['content'] = what_gpt_answer['content']
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = inputs
+    messages.append(what_i_ask_now)
+    return messages
diff --git a/request_llm/edge_gpt_free.py b/request_llms/edge_gpt_free.py
similarity index 100%
rename from request_llm/edge_gpt_free.py
rename to request_llms/edge_gpt_free.py
diff --git a/request_llms/key_manager.py b/request_llms/key_manager.py
new file mode 100644
index 00000000..8563d2ef
--- /dev/null
+++ b/request_llms/key_manager.py
@@ -0,0 +1,29 @@
+import random
+
+def Singleton(cls):
+    _instance = {}
+ 
+    def _singleton(*args, **kargs):
+        if cls not in _instance:
+            _instance[cls] = cls(*args, **kargs)
+        return _instance[cls]
+ 
+    return _singleton
+
+
+@Singleton
+class OpenAI_ApiKeyManager():
+    def __init__(self, mode='blacklist') -> None:
+        # self.key_avail_list = []
+        self.key_black_list = []
+    
+    def add_key_to_blacklist(self, key):
+        self.key_black_list.append(key)
+
+    def select_avail_key(self, key_list):
+        # select key from key_list, but avoid keys also in self.key_black_list, raise error if no key can be found
+        available_keys = [key for key in key_list if key not in self.key_black_list]
+        if not available_keys:
+            raise KeyError("No available key found.")
+        selected_key = random.choice(available_keys)
+        return selected_key
\ No newline at end of file
diff --git a/request_llms/local_llm_class.py b/request_llms/local_llm_class.py
new file mode 100644
index 00000000..b6f49ba4
--- /dev/null
+++ b/request_llms/local_llm_class.py
@@ -0,0 +1,321 @@
+import time
+import threading
+from toolbox import update_ui
+from multiprocessing import Process, Pipe
+from contextlib import redirect_stdout
+from request_llms.queued_pipe import create_queue_pipe
+
+class DebugLock(object):
+    def __init__(self):
+        self._lock = threading.Lock()
+
+    def acquire(self):
+        print("acquiring", self)
+        #traceback.print_tb
+        self._lock.acquire()
+        print("acquired", self)
+
+    def release(self):
+        print("released", self)
+        #traceback.print_tb
+        self._lock.release()
+
+    def __enter__(self):
+        self.acquire()
+
+    def __exit__(self, type, value, traceback):
+        self.release()
+
+def SingletonLocalLLM(cls):
+    """
+    Singleton Decroator for LocalLLMHandle
+    """
+    _instance = {}
+
+    def _singleton(*args, **kargs):
+        if cls not in _instance:
+            _instance[cls] = cls(*args, **kargs)
+            return _instance[cls]
+        elif _instance[cls].corrupted:
+            _instance[cls] = cls(*args, **kargs)
+            return _instance[cls]
+        else:
+            return _instance[cls]
+    return _singleton
+
+
+def reset_tqdm_output():
+    import sys, tqdm
+    def status_printer(self, file):
+        fp = file
+        if fp in (sys.stderr, sys.stdout):
+            getattr(sys.stderr, 'flush', lambda: None)()
+            getattr(sys.stdout, 'flush', lambda: None)()
+
+        def fp_write(s):
+            print(s)
+        last_len = [0]
+
+        def print_status(s):
+            from tqdm.utils import disp_len
+            len_s = disp_len(s)
+            fp_write('\r' + s + (' ' * max(last_len[0] - len_s, 0)))
+            last_len[0] = len_s
+        return print_status
+    tqdm.tqdm.status_printer = status_printer
+
+
+class LocalLLMHandle(Process):
+    def __init__(self):
+        # ⭐run in main process
+        super().__init__(daemon=True)
+        self.is_main_process = True # init
+        self.corrupted = False
+        self.load_model_info()
+        self.parent, self.child = create_queue_pipe()
+        self.parent_state, self.child_state = create_queue_pipe()
+        # allow redirect_stdout
+        self.std_tag = "[Subprocess Message] "
+        self.child.write = lambda x: self.child.send(self.std_tag + x)
+        self.running = True
+        self._model = None
+        self._tokenizer = None
+        self.state = ""
+        self.check_dependency()
+        self.is_main_process = False    # state wrap for child process
+        self.start()
+        self.is_main_process = True     # state wrap for child process
+        self.threadLock = DebugLock()
+
+    def get_state(self):
+        # ⭐run in main process
+        while self.parent_state.poll():
+            self.state = self.parent_state.recv()
+        return self.state
+
+    def set_state(self, new_state):
+        # ⭐run in main process or 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process 
+        if self.is_main_process:
+            self.state = new_state
+        else:
+            self.child_state.send(new_state)
+
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
+        raise NotImplementedError("Method not implemented yet")
+        self.model_name = ""
+        self.cmd_to_install = ""
+
+    def load_model_and_tokenizer(self):
+        """
+        This function should return the model and the tokenizer
+        """
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
+        raise NotImplementedError("Method not implemented yet")
+
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
+        raise NotImplementedError("Method not implemented yet")
+
+    def try_to_import_special_deps(self, **kwargs):
+        """
+        import something that will raise error if the user does not install requirement_*.txt
+        """
+        # ⭐run in main process
+        raise NotImplementedError("Method not implemented yet")
+
+    def check_dependency(self):
+        # ⭐run in main process
+        try:
+            self.try_to_import_special_deps()
+            self.set_state("`依赖检测通过`")
+            self.running = True
+        except:
+            self.set_state(f"缺少{self.model_name}的依赖，如果要使用{self.model_name}，除了基础的pip依赖以外，您还需要运行{self.cmd_to_install}安装{self.model_name}的依赖。")
+            self.running = False
+
+    def run(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
+        # 第一次运行，加载参数
+        reset_tqdm_output()
+        self.set_state("`尝试加载模型`")
+        try:
+            with redirect_stdout(self.child):
+                self._model, self._tokenizer = self.load_model_and_tokenizer()
+        except:
+            self.set_state("`加载模型失败`")
+            self.running = False
+            from toolbox import trimmed_format_exc
+            self.child.send(
+                f'[Local Message] 不能正常加载{self.model_name}的参数.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
+            self.child.send('[FinishBad]')
+            raise RuntimeError(f"不能正常加载{self.model_name}的参数！")
+
+        self.set_state("`准备就绪`")
+        while True:
+            # 进入任务等待状态
+            kwargs = self.child.recv()
+            # 收到消息，开始请求
+            try:
+                for response_full in self.llm_stream_generator(**kwargs):
+                    self.child.send(response_full)
+                    print('debug' + response_full)
+                self.child.send('[Finish]')
+                # 请求处理结束，开始下一个循环
+            except:
+                from toolbox import trimmed_format_exc
+                self.child.send(
+                    f'[Local Message] 调用{self.model_name}失败.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
+                self.child.send('[Finish]')
+
+    def clear_pending_messages(self):
+        # ⭐run in main process
+        while True:
+            if  self.parent.poll():
+                self.parent.recv()
+                continue
+            for _ in range(5):
+                time.sleep(0.5)
+                if  self.parent.poll():
+                    r = self.parent.recv()
+                    continue
+            break
+        return 
+    
+    def stream_chat(self, **kwargs):
+        # ⭐run in main process
+        if self.get_state() == "`准备就绪`":
+            yield "`正在等待线程锁，排队中请稍后 ...`"
+
+        with self.threadLock:
+            if self.parent.poll():
+                yield "`排队中请稍后 ...`"
+                self.clear_pending_messages()
+            self.parent.send(kwargs)
+            std_out = ""
+            std_out_clip_len = 4096
+            while True:
+                res = self.parent.recv()
+                # pipe_watch_dog.feed()
+                if res.startswith(self.std_tag):
+                    new_output = res[len(self.std_tag):]
+                    std_out = std_out[:std_out_clip_len]
+                    print(new_output, end='')
+                    std_out = new_output + std_out
+                    yield self.std_tag + '\n```\n' + std_out + '\n```\n'
+                elif res == '[Finish]':
+                    break
+                elif res == '[FinishBad]':
+                    self.running = False
+                    self.corrupted = True
+                    break
+                else:
+                    std_out = ""
+                    yield res
+
+def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='classic'):
+    load_message = f"{model_name}尚未加载，加载需要一段时间。注意，取决于`config.py`的配置，{model_name}消耗大量的内存（CPU）或显存（GPU），也许会导致低配计算机卡死 ……"
+
+    def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+        """
+            refer to request_llms/bridge_all.py
+        """
+        _llm_handle = LLMSingletonClass()
+        if len(observe_window) >= 1:
+            observe_window[0] = load_message + "\n\n" + _llm_handle.get_state()
+        if not _llm_handle.running:
+            raise RuntimeError(_llm_handle.get_state())
+
+        if history_format == 'classic':
+            # 没有 sys_prompt 接口，因此把prompt加入 history
+            history_feedin = []
+            history_feedin.append([sys_prompt, "Certainly!"])
+            for i in range(len(history)//2):
+                history_feedin.append([history[2*i], history[2*i+1]])
+        elif history_format == 'chatglm3':
+            # 有 sys_prompt 接口
+            conversation_cnt = len(history) // 2
+            history_feedin = [{"role": "system", "content": sys_prompt}]
+            if conversation_cnt:
+                for index in range(0, 2*conversation_cnt, 2):
+                    what_i_have_asked = {}
+                    what_i_have_asked["role"] = "user"
+                    what_i_have_asked["content"] = history[index]
+                    what_gpt_answer = {}
+                    what_gpt_answer["role"] = "assistant"
+                    what_gpt_answer["content"] = history[index+1]
+                    if what_i_have_asked["content"] != "":
+                        if what_gpt_answer["content"] == "":
+                            continue
+                        history_feedin.append(what_i_have_asked)
+                        history_feedin.append(what_gpt_answer)
+                    else:
+                        history_feedin[-1]['content'] = what_gpt_answer['content']
+
+        watch_dog_patience = 5  # 看门狗 (watchdog) 的耐心, 设置5秒即可
+        response = ""
+        for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+            if len(observe_window) >= 1:
+                observe_window[0] = response
+            if len(observe_window) >= 2:
+                if (time.time()-observe_window[1]) > watch_dog_patience:
+                    raise RuntimeError("程序终止。")
+        return response
+
+    def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream=True, additional_fn=None):
+        """
+            refer to request_llms/bridge_all.py
+        """
+        chatbot.append((inputs, ""))
+
+        _llm_handle = LLMSingletonClass()
+        chatbot[-1] = (inputs, load_message + "\n\n" + _llm_handle.get_state())
+        yield from update_ui(chatbot=chatbot, history=[])
+        if not _llm_handle.running:
+            raise RuntimeError(_llm_handle.get_state())
+
+        if additional_fn is not None:
+            from core_functional import handle_core_functionality
+            inputs, history = handle_core_functionality(
+                additional_fn, inputs, history, chatbot)
+
+        # 处理历史信息
+        if history_format == 'classic':
+            # 没有 sys_prompt 接口，因此把prompt加入 history
+            history_feedin = []
+            history_feedin.append([system_prompt, "Certainly!"])
+            for i in range(len(history)//2):
+                history_feedin.append([history[2*i], history[2*i+1]])
+        elif history_format == 'chatglm3':
+            # 有 sys_prompt 接口
+            conversation_cnt = len(history) // 2
+            history_feedin = [{"role": "system", "content": system_prompt}]
+            if conversation_cnt:
+                for index in range(0, 2*conversation_cnt, 2):
+                    what_i_have_asked = {}
+                    what_i_have_asked["role"] = "user"
+                    what_i_have_asked["content"] = history[index]
+                    what_gpt_answer = {}
+                    what_gpt_answer["role"] = "assistant"
+                    what_gpt_answer["content"] = history[index+1]
+                    if what_i_have_asked["content"] != "":
+                        if what_gpt_answer["content"] == "":
+                            continue
+                        history_feedin.append(what_i_have_asked)
+                        history_feedin.append(what_gpt_answer)
+                    else:
+                        history_feedin[-1]['content'] = what_gpt_answer['content']
+
+        # 开始接收回复
+        response = f"[Local Message] 等待{model_name}响应中 ..."
+        for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+            chatbot[-1] = (inputs, response)
+            yield from update_ui(chatbot=chatbot, history=history)
+
+        # 总结输出
+        if response == f"[Local Message] 等待{model_name}响应中 ...":
+            response = f"[Local Message] {model_name}响应异常 ..."
+        history.extend([inputs, response])
+        yield from update_ui(chatbot=chatbot, history=history)
+
+    return predict_no_ui_long_connection, predict
diff --git a/request_llms/queued_pipe.py b/request_llms/queued_pipe.py
new file mode 100644
index 00000000..1fc2e5bd
--- /dev/null
+++ b/request_llms/queued_pipe.py
@@ -0,0 +1,24 @@
+from multiprocessing import Pipe, Queue
+import time
+import threading
+
+class PipeSide(object):
+    def __init__(self, q_2remote, q_2local) -> None:
+        self.q_2remote = q_2remote
+        self.q_2local = q_2local
+
+    def recv(self):
+        return self.q_2local.get()
+
+    def send(self, buf):
+        self.q_2remote.put(buf)
+
+    def poll(self):
+        return not self.q_2local.empty()
+
+def create_queue_pipe():
+    q_p2c = Queue()
+    q_c2p = Queue()
+    pipe_c = PipeSide(q_2local=q_p2c, q_2remote=q_c2p)
+    pipe_p = PipeSide(q_2local=q_c2p, q_2remote=q_p2c)
+    return pipe_c, pipe_p
diff --git a/request_llm/requirements_chatglm.txt b/request_llms/requirements_chatglm.txt
similarity index 100%
rename from request_llm/requirements_chatglm.txt
rename to request_llms/requirements_chatglm.txt
diff --git a/request_llm/requirements_chatglm_onnx.txt b/request_llms/requirements_chatglm_onnx.txt
similarity index 100%
rename from request_llm/requirements_chatglm_onnx.txt
rename to request_llms/requirements_chatglm_onnx.txt
diff --git a/request_llm/requirements_jittorllms.txt b/request_llms/requirements_jittorllms.txt
similarity index 100%
rename from request_llm/requirements_jittorllms.txt
rename to request_llms/requirements_jittorllms.txt
diff --git a/request_llm/requirements_moss.txt b/request_llms/requirements_moss.txt
similarity index 100%
rename from request_llm/requirements_moss.txt
rename to request_llms/requirements_moss.txt
diff --git a/request_llm/requirements_newbing.txt b/request_llms/requirements_newbing.txt
similarity index 100%
rename from request_llm/requirements_newbing.txt
rename to request_llms/requirements_newbing.txt
diff --git a/request_llm/requirements_qwen.txt b/request_llms/requirements_qwen.txt
similarity index 100%
rename from request_llm/requirements_qwen.txt
rename to request_llms/requirements_qwen.txt
diff --git a/request_llm/requirements_slackclaude.txt b/request_llms/requirements_slackclaude.txt
similarity index 100%
rename from request_llm/requirements_slackclaude.txt
rename to request_llms/requirements_slackclaude.txt
diff --git a/tests/test_llms.py b/tests/test_llms.py
index 75e23032..5c5d2f6c 100644
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@@ -10,14 +10,16 @@ def validate_path():
     
 validate_path() # validate path so you can run from base directory
 if __name__ == "__main__":
-    # from request_llm.bridge_newbingfree import predict_no_ui_long_connection
-    # from request_llm.bridge_moss import predict_no_ui_long_connection
-    # from request_llm.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
-    # from request_llm.bridge_jittorllms_llama import predict_no_ui_long_connection
-    # from request_llm.bridge_claude import predict_no_ui_long_connection
-    # from request_llm.bridge_internlm import predict_no_ui_long_connection
-    # from request_llm.bridge_qwen import predict_no_ui_long_connection
-    from request_llm.bridge_spark import predict_no_ui_long_connection
+    # from request_llms.bridge_newbingfree import predict_no_ui_long_connection
+    # from request_llms.bridge_moss import predict_no_ui_long_connection
+    # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
+    # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
+    # from request_llms.bridge_claude import predict_no_ui_long_connection
+    # from request_llms.bridge_internlm import predict_no_ui_long_connection
+    # from request_llms.bridge_qwen import predict_no_ui_long_connection
+    # from request_llms.bridge_spark import predict_no_ui_long_connection
+    # from request_llms.bridge_zhipu import predict_no_ui_long_connection
+    from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
 
     llm_kwargs = {
         'max_length': 4096,
diff --git a/tests/test_markdown.py b/tests/test_markdown.py
new file mode 100644
index 00000000..c92b4c4d
--- /dev/null
+++ b/tests/test_markdown.py
@@ -0,0 +1,44 @@
+md = """
+作为您的写作和编程助手，我可以为您提供以下服务：
+
+1. 写作：
+    - 帮助您撰写文章、报告、散文、故事等。
+    - 提供写作建议和技巧。
+    - 协助您进行文案策划和内容创作。
+
+2. 编程：
+    - 帮助您解决编程问题，提供编程思路和建议。
+    - 协助您编写代码，包括但不限于 Python、Java、C++ 等。
+    - 为您解释复杂的技术概念，让您更容易理解。
+
+3. 项目支持：
+    - 协助您规划项目进度和任务分配。
+    - 提供项目管理和协作建议。
+    - 在项目实施过程中提供支持，确保项目顺利进行。
+
+4. 学习辅导：
+    - 帮助您巩固编程基础，提高编程能力。
+    - 提供计算机科学、数据科学、人工智能等相关领域的学习资源和建议。
+    - 解答您在学习过程中遇到的问题，让您更好地掌握知识。
+
+5. 行业动态和趋势分析：
+    - 为您提供业界最新的新闻和技术趋势。
+    - 分析行业动态，帮助您了解市场发展和竞争态势。
+    - 为您制定技术战略提供参考和建议。
+
+请随时告诉我您的需求，我会尽力提供帮助。如果您有任何问题或需要解答的议题，请随时提问。
+"""
+
+def validate_path():
+    import os, sys
+    dir_name = os.path.dirname(__file__)
+    root_dir_assume = os.path.abspath(os.path.dirname(__file__) +  '/..')
+    os.chdir(root_dir_assume)
+    sys.path.append(root_dir_assume)
+validate_path() # validate path so you can run from base directory
+from toolbox import markdown_convertion
+
+html = markdown_convertion(md)
+print(html)
+with open('test.html', 'w', encoding='utf-8') as f:
+    f.write(html)
\ No newline at end of file
diff --git a/themes/gradios.py b/themes/gradios.py
index 7693a238..96a9c54e 100644
--- a/themes/gradios.py
+++ b/themes/gradios.py
@@ -18,7 +18,7 @@ def adjust_theme():
         set_theme = gr.themes.ThemeClass()
         with ProxyNetworkActivate('Download_Gradio_Theme'):
             logging.info('正在下载Gradio主题，请稍等。')
-            THEME, = get_conf('THEME')
+            THEME = get_conf('THEME')
             if THEME.startswith('Huggingface-'): THEME = THEME.lstrip('Huggingface-')
             if THEME.startswith('huggingface-'): THEME = THEME.lstrip('huggingface-')
             set_theme = set_theme.from_hub(THEME.lower())
diff --git a/themes/theme.py b/themes/theme.py
index 42ee7500..f59db9f8 100644
--- a/themes/theme.py
+++ b/themes/theme.py
@@ -1,6 +1,6 @@
 import gradio as gr
 from toolbox import get_conf
-THEME, = get_conf('THEME')
+THEME = get_conf('THEME')
 
 def load_dynamic_theme(THEME):
     adjust_dynamic_theme = None
diff --git a/toolbox.py b/toolbox.py
index 07a9fda0..8c6e7fae 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -7,6 +7,7 @@ import os
 import gradio
 import shutil
 import glob
+import math
 from latex2mathml.converter import convert as tex2mathml
 from functools import wraps, lru_cache
 pj = os.path.join
@@ -151,7 +152,7 @@ def CatchException(f):
         except Exception as e:
             from check_proxy import check_proxy
             from toolbox import get_conf
-            proxies, = get_conf('proxies')
+            proxies = get_conf('proxies')
             tb_str = '```\n' + trimmed_format_exc() + '```'
             if len(chatbot_with_cookie) == 0:
                 chatbot_with_cookie.clear()
@@ -372,6 +373,26 @@ def markdown_convertion(txt):
                 contain_any_eq = True
         return contain_any_eq
 
+    def fix_markdown_indent(txt):
+        # fix markdown indent
+        if (' - ' not in txt) or ('. ' not in txt): 
+            return txt # do not need to fix, fast escape
+        # walk through the lines and fix non-standard indentation
+        lines = txt.split("\n")
+        pattern = re.compile(r'^\s+-')
+        activated = False
+        for i, line in enumerate(lines):
+            if line.startswith('- ') or line.startswith('1. '):
+                activated = True
+            if activated and pattern.match(line):
+                stripped_string = line.lstrip()
+                num_spaces = len(line) - len(stripped_string)
+                if (num_spaces % 4) == 3:
+                    num_spaces_should_be = math.ceil(num_spaces/4) * 4
+                    lines[i] = ' ' * num_spaces_should_be + stripped_string
+        return '\n'.join(lines)
+
+    txt = fix_markdown_indent(txt)
     if is_equation(txt):  # 有$标识的公式符号，且没有代码段```的标识
         # convert everything to html format
         split = markdown.markdown(text='---')
@@ -534,14 +555,14 @@ def disable_auto_promotion(chatbot):
     return
 
 def is_the_upload_folder(string):
-    PATH_PRIVATE_UPLOAD, = get_conf('PATH_PRIVATE_UPLOAD')
+    PATH_PRIVATE_UPLOAD = get_conf('PATH_PRIVATE_UPLOAD')
     pattern = r'^PATH_PRIVATE_UPLOAD/[A-Za-z0-9_-]+/\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}$'
     pattern = pattern.replace('PATH_PRIVATE_UPLOAD', PATH_PRIVATE_UPLOAD)
     if re.match(pattern, string): return True
     else: return False
 
 def del_outdated_uploads(outdate_time_seconds):
-    PATH_PRIVATE_UPLOAD, = get_conf('PATH_PRIVATE_UPLOAD')
+    PATH_PRIVATE_UPLOAD = get_conf('PATH_PRIVATE_UPLOAD')
     current_time = time.time()
     one_hour_ago = current_time - outdate_time_seconds
     # Get a list of all subdirectories in the PATH_PRIVATE_UPLOAD folder
@@ -567,7 +588,7 @@ def on_file_uploaded(request: gradio.Request, files, chatbot, txt, txt2, checkbo
     # 创建工作路径
     user_name = "default" if not request.username else request.username
     time_tag = gen_time_str()
-    PATH_PRIVATE_UPLOAD, = get_conf('PATH_PRIVATE_UPLOAD')
+    PATH_PRIVATE_UPLOAD = get_conf('PATH_PRIVATE_UPLOAD')
     target_path_base = pj(PATH_PRIVATE_UPLOAD, user_name, time_tag)
     os.makedirs(target_path_base, exist_ok=True)
 
@@ -605,7 +626,7 @@ def on_file_uploaded(request: gradio.Request, files, chatbot, txt, txt2, checkbo
 
 def on_report_generated(cookies, files, chatbot):
     from toolbox import find_recent_files
-    PATH_LOGGING, = get_conf('PATH_LOGGING')
+    PATH_LOGGING = get_conf('PATH_LOGGING')
     if 'files_to_promote' in cookies:
         report_files = cookies['files_to_promote']
         cookies.pop('files_to_promote')
@@ -648,7 +669,7 @@ def load_chat_cookies():
     return {'api_key': API_KEY, 'llm_model': LLM_MODEL, 'customize_fn_overwrite': customize_fn_overwrite_}
 
 def is_openai_api_key(key):
-    CUSTOM_API_KEY_PATTERN, = get_conf('CUSTOM_API_KEY_PATTERN')
+    CUSTOM_API_KEY_PATTERN = get_conf('CUSTOM_API_KEY_PATTERN')
     if len(CUSTOM_API_KEY_PATTERN) != 0:
         API_MATCH_ORIGINAL = re.match(CUSTOM_API_KEY_PATTERN, key)
     else:
@@ -807,6 +828,7 @@ def get_conf(*args):
     for arg in args:
         r = read_single_conf_with_lru_cache(arg)
         res.append(r)
+    if len(res) == 1: return res[0]
     return res
 
 
@@ -878,7 +900,7 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
     直到历史记录的标记数量降低到阈值以下。
     """
     import numpy as np
-    from request_llm.bridge_all import model_info
+    from request_llms.bridge_all import model_info
     def get_token_num(txt): 
         return len(tokenizer.encode(txt, disallowed_special=()))
     input_token_num = get_token_num(inputs)
@@ -968,7 +990,7 @@ def gen_time_str():
     return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
 
 def get_log_folder(user='default', plugin_name='shared'):
-    PATH_LOGGING, = get_conf('PATH_LOGGING')
+    PATH_LOGGING = get_conf('PATH_LOGGING')
     _dir = pj(PATH_LOGGING, user, plugin_name)
     if not os.path.exists(_dir): os.makedirs(_dir)
     return _dir
@@ -985,13 +1007,13 @@ class ProxyNetworkActivate():
         else:
             # 给定了task, 我们检查一下
             from toolbox import get_conf
-            WHEN_TO_USE_PROXY, = get_conf('WHEN_TO_USE_PROXY')
+            WHEN_TO_USE_PROXY = get_conf('WHEN_TO_USE_PROXY')
             self.valid = (task in WHEN_TO_USE_PROXY)
 
     def __enter__(self):
         if not self.valid: return self
         from toolbox import get_conf
-        proxies, = get_conf('proxies')
+        proxies = get_conf('proxies')
         if 'no_proxy' in os.environ: os.environ.pop('no_proxy')
         if proxies is not None:
             if 'http' in proxies: os.environ['HTTP_PROXY'] = proxies['http']
@@ -1033,7 +1055,7 @@ def Singleton(cls):
 """
 ========================================================================
 第四部分
-接驳虚空终端:
+接驳void-terminal:
     - set_conf:                     在运行过程中动态地修改配置
     - set_multi_conf:               在运行过程中动态地修改多个配置
     - get_plugin_handle:            获取插件的句柄
@@ -1048,7 +1070,7 @@ def set_conf(key, value):
     read_single_conf_with_lru_cache.cache_clear()
     get_conf.cache_clear()
     os.environ[key] = str(value)
-    altered, = get_conf(key)
+    altered = get_conf(key)
     return altered
 
 def set_multi_conf(dic):
@@ -1069,7 +1091,7 @@ def get_plugin_handle(plugin_name):
 def get_chat_handle():
     """
     """
-    from request_llm.bridge_all import predict_no_ui_long_connection
+    from request_llms.bridge_all import predict_no_ui_long_connection
     return predict_no_ui_long_connection
 
 def get_plugin_default_kwargs():