Merge branch 'master' into frontier

2023-11-24 03:28:07 +08:00
parent 4fefbb80ac 5d5695cd9a
commit 5b06a6cae5
12 changed files with 219 additions and 122 deletions
--- a/request_llms/README.md
+++ b/request_llms/README.md
@@ -1,79 +1,35 @@
-# 如何使用其他大语言模型
-
-## ChatGLM
-
- 安装依赖 `pip install -r request_llms/requirements_chatglm.txt`
- 修改配置，在config.py中将LLM_MODEL的值改为"chatglm"
-
-``` sh
-LLM_MODEL = "chatglm"
-```
- 运行！
-``` sh
-`python main.py`
-``` 
-
-## Claude-Stack
-
- 请参考此教程获取  https://zhuanlan.zhihu.com/p/627485689
-    - 1、SLACK_CLAUDE_BOT_ID 
-    - 2、SLACK_CLAUDE_USER_TOKEN
-
- 把token加入config.py
-
-## Newbing
-
- 使用cookie editor获取cookie（json）
- 把cookie（json）加入config.py （NEWBING_COOKIES）
-
-## Moss
- 使用docker-compose
-
-## RWKV
- 使用docker-compose
-
-## LLAMA
- 使用docker-compose
-
-## 盘古
- 使用docker-compose
+P.S. 如果您按照以下步骤成功接入了新的大模型，欢迎发Pull Requests（如果您在自己接入新模型的过程中遇到困难，欢迎加README底部QQ群联系群主）


---
-## Text-Generation-UI (TGUI，调试中，暂不可用)
+# 如何接入其他本地大语言模型

-### 1. 部署TGUI
-``` sh
-# 1 下载模型
-git clone https://github.com/oobabooga/text-generation-webui.git
-# 2 这个仓库的最新代码有问题，回滚到几周之前
-git reset --hard fcda3f87767e642d1c0411776e549e1d3894843d
-# 3 切换路径
-cd text-generation-webui
-# 4 安装text-generation的额外依赖
-pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers
-# 5 下载模型
-python download-model.py facebook/galactica-1.3b
-# 其他可选如 facebook/opt-1.3b
-#           facebook/galactica-1.3b
-#           facebook/galactica-6.7b
-#           facebook/galactica-120b
-#           facebook/pygmalion-1.3b 等
-# 详情见 https://github.com/oobabooga/text-generation-webui
+1. 复制`request_llms/bridge_llama2.py`，重命名为你喜欢的名字

-# 6 启动text-generation
-python server.py --cpu --listen --listen-port 7865 --model facebook_galactica-1.3b
-```
+2. 修改`load_model_and_tokenizer`方法，加载你的模型和分词器（去该模型官网找demo，复制粘贴即可）

-### 2. 修改config.py
+3. 修改`llm_stream_generator`方法，定义推理模型（去该模型官网找demo，复制粘贴即可）

-``` sh
-# LLM_MODEL格式:   tgui:[模型]@[ws地址]:[ws端口] ,   端口要和上面给定的端口一致
-LLM_MODEL = "tgui:galactica-1.3b@localhost:7860"
-```
+4. 命令行测试
+    - 修改`tests/test_llms.py`（聪慧如您，只需要看一眼该文件就明白怎么修改了）
+    - 运行`python tests/test_llms.py`

-### 3. 运行！
-``` sh
-cd chatgpt-academic
-python main.py
-```
+5. 测试通过后，在`request_llms/bridge_all.py`中做最后的修改，把你的模型完全接入到框架中（聪慧如您，只需要看一眼该文件就明白怎么修改了）
+
+6. 修改`LLM_MODEL`配置，然后运行`python main.py`，测试最后的效果
+
+
+# 如何接入其他在线大语言模型
+
+1. 复制`request_llms/bridge_zhipu.py`，重命名为你喜欢的名字
+
+2. 修改`predict_no_ui_long_connection`
+
+3. 修改`predict`
+
+4. 命令行测试
+    - 修改`tests/test_llms.py`（聪慧如您，只需要看一眼该文件就明白怎么修改了）
+    - 运行`python tests/test_llms.py`
+
+5. 测试通过后，在`request_llms/bridge_all.py`中做最后的修改，把你的模型完全接入到框架中（聪慧如您，只需要看一眼该文件就明白怎么修改了）
+
+6. 修改`LLM_MODEL`配置，然后运行`python main.py`，测试最后的效果
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -543,6 +543,22 @@ if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai
        })
    except:
        print(trimmed_format_exc())
+if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
+    try:
+        from .bridge_deepseekcoder import predict_no_ui_long_connection as deepseekcoder_noui
+        from .bridge_deepseekcoder import predict as deepseekcoder_ui
+        model_info.update({
+            "deepseekcoder": {
+                "fn_with_ui": deepseekcoder_ui,
+                "fn_without_ui": deepseekcoder_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())

 # <-- 用于定义和切换多个azure模型 -->
 AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY")
--- a/request_llms/bridge_deepseekcoder.py
+++ b/request_llms/bridge_deepseekcoder.py
@@ -0,0 +1,88 @@
+model_name = "deepseek-coder-6.7b-instruct"
+cmd_to_install = "未知" # "`pip install -r request_llms/requirements_qwen.txt`"
+
+import os
+from toolbox import ProxyNetworkActivate
+from toolbox import get_conf
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
+from threading import Thread
+
+def download_huggingface_model(model_name, max_retry, local_dir):
+    from huggingface_hub import snapshot_download
+    for i in range(1, max_retry):
+        try:
+            snapshot_download(repo_id=model_name, local_dir=local_dir, resume_download=True)
+            break
+        except Exception as e:
+            print(f'\n\n下载失败，重试第{i}次中...\n\n')
+    return local_dir
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+class GetCoderLMHandle(LocalLLMHandle):
+
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        with ProxyNetworkActivate('Download_LLM'):
+            from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+            model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
+            # local_dir = f"~/.cache/{model_name}"
+            # if not os.path.exists(local_dir):
+            #     tokenizer = download_huggingface_model(model_name, max_retry=128, local_dir=local_dir)
+            tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+            self._streamer = TextIteratorStreamer(tokenizer)
+            model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
+            if get_conf('LOCAL_MODEL_DEVICE') != 'cpu':
+                model = model.cuda()
+        return model, tokenizer
+
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor(kwargs):
+            query = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            return query, max_length, top_p, temperature, history
+        
+        query, max_length, top_p, temperature, history = adaptor(kwargs)
+        history.append({ 'role': 'user', 'content': query})
+        messages = history
+        inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt").to(self._model.device)
+        generation_kwargs = dict(
+                                    inputs=inputs, 
+                                    max_new_tokens=max_length,
+                                    do_sample=False,
+                                    top_p=top_p,
+                                    streamer = self._streamer,
+                                    top_k=50,
+                                    temperature=temperature,
+                                    num_return_sequences=1, 
+                                    eos_token_id=32021,
+                                )
+        thread = Thread(target=self._model.generate, kwargs=generation_kwargs, daemon=True)
+        thread.start()
+        generated_text = ""
+        for new_text in self._streamer:
+            generated_text += new_text
+            # print(generated_text)
+            yield generated_text
+
+
+    def try_to_import_special_deps(self, **kwargs): pass
+        # import something that will raise error if the user does not install requirement_*.txt
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
+        # import importlib
+        # importlib.import_module('modelscope')
+
+
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetCoderLMHandle, model_name, history_format='chatglm3')
--- a/request_llms/bridge_llama2.py
+++ b/request_llms/bridge_llama2.py
@@ -12,7 +12,7 @@ from threading import Thread
 # ------------------------------------------------------------------------------------------------------------------------
 # 🔌💻 Local Model
 # ------------------------------------------------------------------------------------------------------------------------
-class GetONNXGLMHandle(LocalLLMHandle):
+class GetLlamaHandle(LocalLLMHandle):

    def load_model_info(self):
        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
@@ -87,4 +87,4 @@ class GetONNXGLMHandle(LocalLLMHandle):
 # ------------------------------------------------------------------------------------------------------------------------
 # 🔌💻 GPT-Academic Interface
 # ------------------------------------------------------------------------------------------------------------------------
-predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name)
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetLlamaHandle, model_name)
--- a/request_llms/bridge_qwen.py
+++ b/request_llms/bridge_qwen.py
@@ -15,7 +15,7 @@ from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
 # ------------------------------------------------------------------------------------------------------------------------
 # 🔌💻 Local Model
 # ------------------------------------------------------------------------------------------------------------------------
-class GetONNXGLMHandle(LocalLLMHandle):
+class GetQwenLMHandle(LocalLLMHandle):

    def load_model_info(self):
        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
@@ -64,4 +64,4 @@ class GetONNXGLMHandle(LocalLLMHandle):
 # ------------------------------------------------------------------------------------------------------------------------
 # 🔌💻 GPT-Academic Interface
 # ------------------------------------------------------------------------------------------------------------------------
-predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name)
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)
--- a/request_llms/bridge_zhipu.py
+++ b/request_llms/bridge_zhipu.py
@@ -1,6 +1,7 @@

 import time
 from toolbox import update_ui, get_conf, update_ui_lastest_msg
+from toolbox import check_packages, report_exception

 model_name = '智谱AI大模型'

@@ -37,6 +38,14 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
    chatbot.append((inputs, ""))
    yield from update_ui(chatbot=chatbot, history=history)

+    # 尝试导入依赖，如果缺少依赖，则给出安装建议
+    try:
+        check_packages(["zhipuai"])
+    except:
+        yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖，安装方法```pip install --upgrade zhipuai```。",
+                                         chatbot=chatbot, history=history, delay=0)
+        return
+    
    if validate_key() is False:
        yield from update_ui_lastest_msg(lastmsg="[Local Message] 请配置ZHIPUAI_API_KEY", chatbot=chatbot, history=history, delay=0)
        return
--- a/request_llms/local_llm_class.py
+++ b/request_llms/local_llm_class.py
@@ -198,7 +198,7 @@ class LocalLLMHandle(Process):
                if res.startswith(self.std_tag):
                    new_output = res[len(self.std_tag):]
                    std_out = std_out[:std_out_clip_len]
-                    # print(new_output, end='')
+                    print(new_output, end='')
                    std_out = new_output + std_out
                    yield self.std_tag + '\n```\n' + std_out + '\n```\n'
                elif res == '[Finish]':