Merge branch 'master' into frontier
This commit is contained in:
@@ -1,79 +1,35 @@
|
||||
# 如何使用其他大语言模型
|
||||
|
||||
## ChatGLM
|
||||
|
||||
- 安装依赖 `pip install -r request_llms/requirements_chatglm.txt`
|
||||
- 修改配置,在config.py中将LLM_MODEL的值改为"chatglm"
|
||||
|
||||
``` sh
|
||||
LLM_MODEL = "chatglm"
|
||||
```
|
||||
- 运行!
|
||||
``` sh
|
||||
`python main.py`
|
||||
```
|
||||
|
||||
## Claude-Stack
|
||||
|
||||
- 请参考此教程获取 https://zhuanlan.zhihu.com/p/627485689
|
||||
- 1、SLACK_CLAUDE_BOT_ID
|
||||
- 2、SLACK_CLAUDE_USER_TOKEN
|
||||
|
||||
- 把token加入config.py
|
||||
|
||||
## Newbing
|
||||
|
||||
- 使用cookie editor获取cookie(json)
|
||||
- 把cookie(json)加入config.py (NEWBING_COOKIES)
|
||||
|
||||
## Moss
|
||||
- 使用docker-compose
|
||||
|
||||
## RWKV
|
||||
- 使用docker-compose
|
||||
|
||||
## LLAMA
|
||||
- 使用docker-compose
|
||||
|
||||
## 盘古
|
||||
- 使用docker-compose
|
||||
P.S. 如果您按照以下步骤成功接入了新的大模型,欢迎发Pull Requests(如果您在自己接入新模型的过程中遇到困难,欢迎加README底部QQ群联系群主)
|
||||
|
||||
|
||||
---
|
||||
## Text-Generation-UI (TGUI,调试中,暂不可用)
|
||||
# 如何接入其他本地大语言模型
|
||||
|
||||
### 1. 部署TGUI
|
||||
``` sh
|
||||
# 1 下载模型
|
||||
git clone https://github.com/oobabooga/text-generation-webui.git
|
||||
# 2 这个仓库的最新代码有问题,回滚到几周之前
|
||||
git reset --hard fcda3f87767e642d1c0411776e549e1d3894843d
|
||||
# 3 切换路径
|
||||
cd text-generation-webui
|
||||
# 4 安装text-generation的额外依赖
|
||||
pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers
|
||||
# 5 下载模型
|
||||
python download-model.py facebook/galactica-1.3b
|
||||
# 其他可选如 facebook/opt-1.3b
|
||||
# facebook/galactica-1.3b
|
||||
# facebook/galactica-6.7b
|
||||
# facebook/galactica-120b
|
||||
# facebook/pygmalion-1.3b 等
|
||||
# 详情见 https://github.com/oobabooga/text-generation-webui
|
||||
1. 复制`request_llms/bridge_llama2.py`,重命名为你喜欢的名字
|
||||
|
||||
# 6 启动text-generation
|
||||
python server.py --cpu --listen --listen-port 7865 --model facebook_galactica-1.3b
|
||||
```
|
||||
2. 修改`load_model_and_tokenizer`方法,加载你的模型和分词器(去该模型官网找demo,复制粘贴即可)
|
||||
|
||||
### 2. 修改config.py
|
||||
3. 修改`llm_stream_generator`方法,定义推理模型(去该模型官网找demo,复制粘贴即可)
|
||||
|
||||
``` sh
|
||||
# LLM_MODEL格式: tgui:[模型]@[ws地址]:[ws端口] , 端口要和上面给定的端口一致
|
||||
LLM_MODEL = "tgui:galactica-1.3b@localhost:7860"
|
||||
```
|
||||
4. 命令行测试
|
||||
- 修改`tests/test_llms.py`(聪慧如您,只需要看一眼该文件就明白怎么修改了)
|
||||
- 运行`python tests/test_llms.py`
|
||||
|
||||
### 3. 运行!
|
||||
``` sh
|
||||
cd chatgpt-academic
|
||||
python main.py
|
||||
```
|
||||
5. 测试通过后,在`request_llms/bridge_all.py`中做最后的修改,把你的模型完全接入到框架中(聪慧如您,只需要看一眼该文件就明白怎么修改了)
|
||||
|
||||
6. 修改`LLM_MODEL`配置,然后运行`python main.py`,测试最后的效果
|
||||
|
||||
|
||||
# 如何接入其他在线大语言模型
|
||||
|
||||
1. 复制`request_llms/bridge_zhipu.py`,重命名为你喜欢的名字
|
||||
|
||||
2. 修改`predict_no_ui_long_connection`
|
||||
|
||||
3. 修改`predict`
|
||||
|
||||
4. 命令行测试
|
||||
- 修改`tests/test_llms.py`(聪慧如您,只需要看一眼该文件就明白怎么修改了)
|
||||
- 运行`python tests/test_llms.py`
|
||||
|
||||
5. 测试通过后,在`request_llms/bridge_all.py`中做最后的修改,把你的模型完全接入到框架中(聪慧如您,只需要看一眼该文件就明白怎么修改了)
|
||||
|
||||
6. 修改`LLM_MODEL`配置,然后运行`python main.py`,测试最后的效果
|
||||
@@ -543,6 +543,22 @@ if "zhipuai" in AVAIL_LLM_MODELS: # zhipuai
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder
|
||||
try:
|
||||
from .bridge_deepseekcoder import predict_no_ui_long_connection as deepseekcoder_noui
|
||||
from .bridge_deepseekcoder import predict as deepseekcoder_ui
|
||||
model_info.update({
|
||||
"deepseekcoder": {
|
||||
"fn_with_ui": deepseekcoder_ui,
|
||||
"fn_without_ui": deepseekcoder_noui,
|
||||
"endpoint": None,
|
||||
"max_token": 4096,
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
|
||||
# <-- 用于定义和切换多个azure模型 -->
|
||||
AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY")
|
||||
|
||||
88
request_llms/bridge_deepseekcoder.py
Normal file
88
request_llms/bridge_deepseekcoder.py
Normal file
@@ -0,0 +1,88 @@
|
||||
model_name = "deepseek-coder-6.7b-instruct"
|
||||
cmd_to_install = "未知" # "`pip install -r request_llms/requirements_qwen.txt`"
|
||||
|
||||
import os
|
||||
from toolbox import ProxyNetworkActivate
|
||||
from toolbox import get_conf
|
||||
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
|
||||
from threading import Thread
|
||||
|
||||
def download_huggingface_model(model_name, max_retry, local_dir):
|
||||
from huggingface_hub import snapshot_download
|
||||
for i in range(1, max_retry):
|
||||
try:
|
||||
snapshot_download(repo_id=model_name, local_dir=local_dir, resume_download=True)
|
||||
break
|
||||
except Exception as e:
|
||||
print(f'\n\n下载失败,重试第{i}次中...\n\n')
|
||||
return local_dir
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
# 🔌💻 Local Model
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
class GetCoderLMHandle(LocalLLMHandle):
|
||||
|
||||
def load_model_info(self):
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
self.model_name = model_name
|
||||
self.cmd_to_install = cmd_to_install
|
||||
|
||||
def load_model_and_tokenizer(self):
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
with ProxyNetworkActivate('Download_LLM'):
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
|
||||
model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
|
||||
# local_dir = f"~/.cache/{model_name}"
|
||||
# if not os.path.exists(local_dir):
|
||||
# tokenizer = download_huggingface_model(model_name, max_retry=128, local_dir=local_dir)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||
self._streamer = TextIteratorStreamer(tokenizer)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
||||
if get_conf('LOCAL_MODEL_DEVICE') != 'cpu':
|
||||
model = model.cuda()
|
||||
return model, tokenizer
|
||||
|
||||
def llm_stream_generator(self, **kwargs):
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
def adaptor(kwargs):
|
||||
query = kwargs['query']
|
||||
max_length = kwargs['max_length']
|
||||
top_p = kwargs['top_p']
|
||||
temperature = kwargs['temperature']
|
||||
history = kwargs['history']
|
||||
return query, max_length, top_p, temperature, history
|
||||
|
||||
query, max_length, top_p, temperature, history = adaptor(kwargs)
|
||||
history.append({ 'role': 'user', 'content': query})
|
||||
messages = history
|
||||
inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt").to(self._model.device)
|
||||
generation_kwargs = dict(
|
||||
inputs=inputs,
|
||||
max_new_tokens=max_length,
|
||||
do_sample=False,
|
||||
top_p=top_p,
|
||||
streamer = self._streamer,
|
||||
top_k=50,
|
||||
temperature=temperature,
|
||||
num_return_sequences=1,
|
||||
eos_token_id=32021,
|
||||
)
|
||||
thread = Thread(target=self._model.generate, kwargs=generation_kwargs, daemon=True)
|
||||
thread.start()
|
||||
generated_text = ""
|
||||
for new_text in self._streamer:
|
||||
generated_text += new_text
|
||||
# print(generated_text)
|
||||
yield generated_text
|
||||
|
||||
|
||||
def try_to_import_special_deps(self, **kwargs): pass
|
||||
# import something that will raise error if the user does not install requirement_*.txt
|
||||
# 🏃♂️🏃♂️🏃♂️ 主进程执行
|
||||
# import importlib
|
||||
# importlib.import_module('modelscope')
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
# 🔌💻 GPT-Academic Interface
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetCoderLMHandle, model_name, history_format='chatglm3')
|
||||
@@ -12,7 +12,7 @@ from threading import Thread
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
# 🔌💻 Local Model
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
class GetONNXGLMHandle(LocalLLMHandle):
|
||||
class GetLlamaHandle(LocalLLMHandle):
|
||||
|
||||
def load_model_info(self):
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
@@ -87,4 +87,4 @@ class GetONNXGLMHandle(LocalLLMHandle):
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
# 🔌💻 GPT-Academic Interface
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name)
|
||||
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetLlamaHandle, model_name)
|
||||
@@ -15,7 +15,7 @@ from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
# 🔌💻 Local Model
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
class GetONNXGLMHandle(LocalLLMHandle):
|
||||
class GetQwenLMHandle(LocalLLMHandle):
|
||||
|
||||
def load_model_info(self):
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
@@ -64,4 +64,4 @@ class GetONNXGLMHandle(LocalLLMHandle):
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
# 🔌💻 GPT-Academic Interface
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name)
|
||||
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)
|
||||
@@ -1,6 +1,7 @@
|
||||
|
||||
import time
|
||||
from toolbox import update_ui, get_conf, update_ui_lastest_msg
|
||||
from toolbox import check_packages, report_exception
|
||||
|
||||
model_name = '智谱AI大模型'
|
||||
|
||||
@@ -37,6 +38,14 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
chatbot.append((inputs, ""))
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
check_packages(["zhipuai"])
|
||||
except:
|
||||
yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖,安装方法```pip install --upgrade zhipuai```。",
|
||||
chatbot=chatbot, history=history, delay=0)
|
||||
return
|
||||
|
||||
if validate_key() is False:
|
||||
yield from update_ui_lastest_msg(lastmsg="[Local Message] 请配置ZHIPUAI_API_KEY", chatbot=chatbot, history=history, delay=0)
|
||||
return
|
||||
|
||||
@@ -198,7 +198,7 @@ class LocalLLMHandle(Process):
|
||||
if res.startswith(self.std_tag):
|
||||
new_output = res[len(self.std_tag):]
|
||||
std_out = std_out[:std_out_clip_len]
|
||||
# print(new_output, end='')
|
||||
print(new_output, end='')
|
||||
std_out = new_output + std_out
|
||||
yield self.std_tag + '\n```\n' + std_out + '\n```\n'
|
||||
elif res == '[Finish]':
|
||||
|
||||
Reference in New Issue
Block a user