Merge branch 'frontier' into master_autogen

This commit is contained in:
qingxu fu
2023-11-11 22:03:20 +08:00
45 changed files with 423 additions and 193 deletions

View File

@@ -94,7 +94,7 @@ model_info = {
"fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui,
"endpoint": openai_endpoint,
"max_token": 1024*16,
"max_token": 16385,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
@@ -112,7 +112,16 @@ model_info = {
"fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui,
"endpoint": openai_endpoint,
"max_token": 1024 * 16,
"max_token": 16385,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"gpt-3.5-turbo-1106": {#16k
"fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui,
"endpoint": openai_endpoint,
"max_token": 16385,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
@@ -135,6 +144,15 @@ model_info = {
"token_cnt": get_token_num_gpt4,
},
"gpt-4-1106-preview": {
"fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui,
"endpoint": openai_endpoint,
"max_token": 128000,
"tokenizer": tokenizer_gpt4,
"token_cnt": get_token_num_gpt4,
},
"gpt-3.5-random": {
"fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui,
@@ -159,11 +177,11 @@ model_info = {
"fn_without_ui": chatgpt_noui,
"endpoint": azure_endpoint,
"max_token": 8192,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
"tokenizer": tokenizer_gpt4,
"token_cnt": get_token_num_gpt4,
},
# api_2d
# api_2d (此后不需要在此处添加api2d的接口了因为下面的代码会自动添加)
"api2d-gpt-3.5-turbo": {
"fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui,
@@ -182,15 +200,6 @@ model_info = {
"token_cnt": get_token_num_gpt4,
},
"api2d-gpt-3.5-turbo-16k": {
"fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui,
"endpoint": api2d_endpoint,
"max_token": 1024*16,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
# 将 chatglm 直接对齐到 chatglm2
"chatglm": {
"fn_with_ui": chatglm_ui,
@@ -226,6 +235,13 @@ model_info = {
},
}
# -=-=-=-=-=-=- api2d 对齐支持 -=-=-=-=-=-=-
for model in AVAIL_LLM_MODELS:
if model.startswith('api2d-') and (model.replace('api2d-','') in model_info.keys()):
mi = model_info[model.replace('api2d-','')]
mi.update({"endpoint": api2d_endpoint})
model_info.update({model: mi})
# -=-=-=-=-=-=- 以下部分是新加入的模型,可能附带额外依赖 -=-=-=-=-=-=-
if "claude-1-100k" in AVAIL_LLM_MODELS or "claude-2" in AVAIL_LLM_MODELS:
from .bridge_claude import predict_no_ui_long_connection as claude_noui

View File

@@ -4,14 +4,13 @@ cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
from transformers import AutoModel, AutoTokenizer
from toolbox import get_conf, ProxyNetworkActivate
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model
# ------------------------------------------------------------------------------------------------------------------------
@SingletonLocalLLM
class GetGLM2Handle(LocalLLMHandle):
def load_model_info(self):

View File

@@ -4,14 +4,13 @@ cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
from transformers import AutoModel, AutoTokenizer
from toolbox import get_conf, ProxyNetworkActivate
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model
# ------------------------------------------------------------------------------------------------------------------------
@SingletonLocalLLM
class GetGLM3Handle(LocalLLMHandle):
def load_model_info(self):

View File

@@ -8,7 +8,7 @@ import threading
import importlib
from toolbox import update_ui, get_conf
from multiprocessing import Process, Pipe
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
from .chatglmoonx import ChatGLMModel, chat_template
@@ -17,7 +17,6 @@ from .chatglmoonx import ChatGLMModel, chat_template
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model
# ------------------------------------------------------------------------------------------------------------------------
@SingletonLocalLLM
class GetONNXGLMHandle(LocalLLMHandle):
def load_model_info(self):

View File

@@ -7,8 +7,7 @@
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
具备多线程调用能力的函数
2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑
3. predict_no_ui_long_connection在实验过程中发现调用predict_no_ui处理长文档时和openai的连接容易断掉这个函数用stream的方式解决这个问题同样支持多线程
2. predict_no_ui_long_connection支持多线程
"""
import json
@@ -351,6 +350,7 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
model = random.choice([
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-1106",
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-0301",

View File

@@ -7,8 +7,7 @@
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
具备多线程调用能力的函数
2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑
3. predict_no_ui_long_connection在实验过程中发现调用predict_no_ui处理长文档时和openai的连接容易断掉这个函数用stream的方式解决这个问题同样支持多线程
2. predict_no_ui_long_connection支持多线程
"""
import json

View File

@@ -7,7 +7,7 @@
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
具备多线程调用能力的函数
2. predict_no_ui_long_connection在实验过程中发现调用predict_no_ui处理长文档时和openai的连接容易断掉这个函数用stream的方式解决这个问题同样支持多线程
2. predict_no_ui_long_connection支持多线程
"""
import os

View File

@@ -5,9 +5,9 @@ from transformers import AutoModel, AutoTokenizer
import time
import threading
import importlib
from toolbox import update_ui, get_conf
from toolbox import update_ui, get_conf, ProxyNetworkActivate
from multiprocessing import Process, Pipe
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
# ------------------------------------------------------------------------------------------------------------------------
@@ -34,7 +34,6 @@ def combine_history(prompt, hist):
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model
# ------------------------------------------------------------------------------------------------------------------------
@SingletonLocalLLM
class GetInternlmHandle(LocalLLMHandle):
def load_model_info(self):
@@ -53,14 +52,15 @@ class GetInternlmHandle(LocalLLMHandle):
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
device = get_conf('LOCAL_MODEL_DEVICE')
if self._model is None:
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
if device=='cpu':
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
else:
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()
with ProxyNetworkActivate('Download_LLM'):
if self._model is None:
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
if device=='cpu':
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
else:
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()
model = model.eval()
model = model.eval()
return model, tokenizer
def llm_stream_generator(self, **kwargs):

View File

@@ -5,14 +5,13 @@ cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from toolbox import update_ui, get_conf, ProxyNetworkActivate
from multiprocessing import Process, Pipe
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
from threading import Thread
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model
# ------------------------------------------------------------------------------------------------------------------------
@SingletonLocalLLM
class GetONNXGLMHandle(LocalLLMHandle):
def load_model_info(self):

View File

@@ -6,16 +6,15 @@ from transformers import AutoModel, AutoTokenizer
import time
import threading
import importlib
from toolbox import update_ui, get_conf
from toolbox import update_ui, get_conf, ProxyNetworkActivate
from multiprocessing import Process, Pipe
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model
# ------------------------------------------------------------------------------------------------------------------------
@SingletonLocalLLM
class GetONNXGLMHandle(LocalLLMHandle):
def load_model_info(self):
@@ -30,13 +29,13 @@ class GetONNXGLMHandle(LocalLLMHandle):
import platform
from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
model_id = 'qwen/Qwen-7B-Chat'
revision = 'v1.0.1'
self._tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True)
# use fp16
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", revision=revision, trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
self._model = model
with ProxyNetworkActivate('Download_LLM'):
model_id = 'qwen/Qwen-7B-Chat'
self._tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen-7B-Chat', trust_remote_code=True, resume_download=True)
# use fp16
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
self._model = model
return self._model, self._tokenizer

View File

@@ -76,7 +76,6 @@ class LocalLLMHandle(Process):
self.parent_state, self.child_state = create_queue_pipe()
# allow redirect_stdout
self.std_tag = "[Subprocess Message] "
self.child.write = lambda x: self.child.send(self.std_tag + x)
self.running = True
self._model = None
self._tokenizer = None
@@ -137,6 +136,8 @@ class LocalLLMHandle(Process):
def run(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
# 第一次运行,加载参数
self.child.flush = lambda *args: None
self.child.write = lambda x: self.child.send(self.std_tag + x)
reset_tqdm_output()
self.set_state("`尝试加载模型`")
try:
@@ -220,7 +221,7 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='cla
"""
refer to request_llms/bridge_all.py
"""
_llm_handle = LLMSingletonClass()
_llm_handle = SingletonLocalLLM(LLMSingletonClass)()
if len(observe_window) >= 1:
observe_window[0] = load_message + "\n\n" + _llm_handle.get_state()
if not _llm_handle.running:
@@ -268,7 +269,7 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='cla
"""
chatbot.append((inputs, ""))
_llm_handle = LLMSingletonClass()
_llm_handle = SingletonLocalLLM(LLMSingletonClass)()
chatbot[-1] = (inputs, load_message + "\n\n" + _llm_handle.get_state())
yield from update_ui(chatbot=chatbot, history=[])
if not _llm_handle.running: