This commit is contained in:
binary-husky
2023-12-26 23:59:36 +08:00
parent 15f14f51ff
commit 8dd4d48474
43 changed files with 1343 additions and 618 deletions

View File

@@ -431,16 +431,48 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS:
})
except:
print(trimmed_format_exc())
if "qwen" in AVAIL_LLM_MODELS:
if "qwen-local" in AVAIL_LLM_MODELS:
try:
from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui
from .bridge_qwen_local import predict as qwen_local_ui
model_info.update({
"qwen-local": {
"fn_with_ui": qwen_local_ui,
"fn_without_ui": qwen_local_noui,
"endpoint": None,
"max_token": 4096,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
})
except:
print(trimmed_format_exc())
if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai
try:
from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
from .bridge_qwen import predict as qwen_ui
model_info.update({
"qwen": {
"qwen-turbo": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"endpoint": None,
"max_token": 4096,
"max_token": 6144,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-plus": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"endpoint": None,
"max_token": 30720,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-max": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"endpoint": None,
"max_token": 28672,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
@@ -552,7 +584,7 @@ if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder
"fn_with_ui": deepseekcoder_ui,
"fn_without_ui": deepseekcoder_noui,
"endpoint": None,
"max_token": 4096,
"max_token": 2048,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}

View File

@@ -51,7 +51,8 @@ def decode_chunk(chunk):
chunkjson = json.loads(chunk_decoded[6:])
has_choices = 'choices' in chunkjson
if has_choices: choice_valid = (len(chunkjson['choices']) > 0)
if has_choices and choice_valid: has_content = "content" in chunkjson['choices'][0]["delta"]
if has_choices and choice_valid: has_content = ("content" in chunkjson['choices'][0]["delta"])
if has_content: has_content = (chunkjson['choices'][0]["delta"]["content"] is not None)
if has_choices and choice_valid: has_role = "role" in chunkjson['choices'][0]["delta"]
except:
pass
@@ -101,20 +102,25 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
result = ''
json_data = None
while True:
try: chunk = next(stream_response).decode()
try: chunk = next(stream_response)
except StopIteration:
break
except requests.exceptions.ConnectionError:
chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
if len(chunk)==0: continue
if not chunk.startswith('data:'):
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。
chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
if len(chunk_decoded)==0: continue
if not chunk_decoded.startswith('data:'):
error_msg = get_full_error(chunk, stream_response).decode()
if "reduce the length" in error_msg:
raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
else:
raise RuntimeError("OpenAI拒绝了请求" + error_msg)
if ('data: [DONE]' in chunk): break # api2d 正常完成
json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
if ('data: [DONE]' in chunk_decoded): break # api2d 正常完成
# 提前读取一些信息 (用于判断异常)
if has_choices and not choice_valid:
# 一些垃圾第三方接口的出现这样的错误
continue
json_data = chunkjson['choices'][0]
delta = json_data["delta"]
if len(delta) == 0: break
if "role" in delta: continue

View File

@@ -15,29 +15,16 @@ import requests
import base64
import os
import glob
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, \
update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, update_ui_lastest_msg, get_max_token
proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \
get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY')
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
def have_any_recent_upload_image_files(chatbot):
_5min = 5 * 60
if chatbot is None: return False, None # chatbot is None
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
if not most_recent_uploaded: return False, None # most_recent_uploaded is None
if time.time() - most_recent_uploaded["time"] < _5min:
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
path = most_recent_uploaded['path']
file_manifest = [f for f in glob.glob(f'{path}/**/*.jpg', recursive=True)]
file_manifest += [f for f in glob.glob(f'{path}/**/*.jpeg', recursive=True)]
file_manifest += [f for f in glob.glob(f'{path}/**/*.png', recursive=True)]
if len(file_manifest) == 0: return False, None
return True, file_manifest # most_recent_uploaded is new
else:
return False, None # most_recent_uploaded is too old
def report_invalid_key(key):
if get_conf("BLOCK_INVALID_APIKEY"):
@@ -258,10 +245,6 @@ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg,
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
return chatbot, history
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths):
"""

View File

@@ -6,6 +6,7 @@ from toolbox import ProxyNetworkActivate
from toolbox import get_conf
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
from threading import Thread
import torch
def download_huggingface_model(model_name, max_retry, local_dir):
from huggingface_hub import snapshot_download
@@ -36,9 +37,46 @@ class GetCoderLMHandle(LocalLLMHandle):
# tokenizer = download_huggingface_model(model_name, max_retry=128, local_dir=local_dir)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
self._streamer = TextIteratorStreamer(tokenizer)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
device_map = {
"transformer.word_embeddings": 0,
"transformer.word_embeddings_layernorm": 0,
"lm_head": 0,
"transformer.h": 0,
"transformer.ln_f": 0,
"model.embed_tokens": 0,
"model.layers": 0,
"model.norm": 0,
}
# 检查量化配置
quantization_type = get_conf('LOCAL_MODEL_QUANT')
if get_conf('LOCAL_MODEL_DEVICE') != 'cpu':
model = model.cuda()
if quantization_type == "INT8":
from transformers import BitsAndBytesConfig
# 使用 INT8 量化
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, load_in_8bit=True,
device_map=device_map)
elif quantization_type == "INT4":
from transformers import BitsAndBytesConfig
# 使用 INT4 量化
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True,
quantization_config=bnb_config, device_map=device_map)
else:
# 使用默认的 FP16
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True,
torch_dtype=torch.bfloat16, device_map=device_map)
else:
# CPU 模式
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True,
torch_dtype=torch.bfloat16)
return model, tokenizer
def llm_stream_generator(self, **kwargs):
@@ -54,7 +92,10 @@ class GetCoderLMHandle(LocalLLMHandle):
query, max_length, top_p, temperature, history = adaptor(kwargs)
history.append({ 'role': 'user', 'content': query})
messages = history
inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt").to(self._model.device)
inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt")
if inputs.shape[1] > max_length:
inputs = inputs[:, -max_length:]
inputs = inputs.to(self._model.device)
generation_kwargs = dict(
inputs=inputs,
max_new_tokens=max_length,

View File

@@ -1,67 +1,62 @@
model_name = "Qwen"
cmd_to_install = "`pip install -r request_llms/requirements_qwen.txt`"
from transformers import AutoModel, AutoTokenizer
import time
import threading
import importlib
from toolbox import update_ui, get_conf, ProxyNetworkActivate
from multiprocessing import Process, Pipe
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
import os
from toolbox import update_ui, get_conf, update_ui_lastest_msg
from toolbox import check_packages, report_exception
model_name = 'Qwen'
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
"""
⭐多线程方法
函数的说明请见 request_llms/bridge_all.py
"""
watch_dog_patience = 5
response = ""
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model
# ------------------------------------------------------------------------------------------------------------------------
class GetQwenLMHandle(LocalLLMHandle):
from .com_qwenapi import QwenRequestInstance
sri = QwenRequestInstance()
for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
if len(observe_window) >= 1:
observe_window[0] = response
if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
return response
def load_model_info(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
self.model_name = model_name
self.cmd_to_install = cmd_to_install
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
"""
⭐单线程方法
函数的说明请见 request_llms/bridge_all.py
"""
chatbot.append((inputs, ""))
yield from update_ui(chatbot=chatbot, history=history)
def load_model_and_tokenizer(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
import os, glob
import os
import platform
from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
check_packages(["dashscope"])
except:
yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖,安装方法```pip install --upgrade dashscope```。",
chatbot=chatbot, history=history, delay=0)
return
with ProxyNetworkActivate('Download_LLM'):
model_id = 'qwen/Qwen-7B-Chat'
self._tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen-7B-Chat', trust_remote_code=True, resume_download=True)
# use fp16
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
self._model = model
# 检查DASHSCOPE_API_KEY
if get_conf("DASHSCOPE_API_KEY") == "":
yield from update_ui_lastest_msg(f"请配置 DASHSCOPE_API_KEY。",
chatbot=chatbot, history=history, delay=0)
return
return self._model, self._tokenizer
if additional_fn is not None:
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
def llm_stream_generator(self, **kwargs):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
def adaptor(kwargs):
query = kwargs['query']
max_length = kwargs['max_length']
top_p = kwargs['top_p']
temperature = kwargs['temperature']
history = kwargs['history']
return query, max_length, top_p, temperature, history
# 开始接收回复
from .com_qwenapi import QwenRequestInstance
sri = QwenRequestInstance()
for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
chatbot[-1] = (inputs, response)
yield from update_ui(chatbot=chatbot, history=history)
query, max_length, top_p, temperature, history = adaptor(kwargs)
for response in self._model.chat(self._tokenizer, query, history=history, stream=True):
yield response
def try_to_import_special_deps(self, **kwargs):
# import something that will raise error if the user does not install requirement_*.txt
# 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
import importlib
importlib.import_module('modelscope')
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 GPT-Academic Interface
# ------------------------------------------------------------------------------------------------------------------------
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)
# 总结输出
if response == f"[Local Message] 等待{model_name}响应中 ...":
response = f"[Local Message] {model_name}响应异常 ..."
history.extend([inputs, response])
yield from update_ui(chatbot=chatbot, history=history)

View File

@@ -26,7 +26,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
from .com_sparkapi import SparkRequestInstance
sri = SparkRequestInstance()
for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
for response in sri.generate(inputs, llm_kwargs, history, sys_prompt, use_image_api=False):
if len(observe_window) >= 1:
observe_window[0] = response
if len(observe_window) >= 2:
@@ -52,7 +52,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
# 开始接收回复
from .com_sparkapi import SparkRequestInstance
sri = SparkRequestInstance()
for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
for response in sri.generate(inputs, llm_kwargs, history, system_prompt, use_image_api=True):
chatbot[-1] = (inputs, response)
yield from update_ui(chatbot=chatbot, history=history)

View File

@@ -1,4 +1,4 @@
from toolbox import get_conf
from toolbox import get_conf, get_pictures_list, encode_image
import base64
import datetime
import hashlib
@@ -65,18 +65,19 @@ class SparkRequestInstance():
self.gpt_url = "ws://spark-api.xf-yun.com/v1.1/chat"
self.gpt_url_v2 = "ws://spark-api.xf-yun.com/v2.1/chat"
self.gpt_url_v3 = "ws://spark-api.xf-yun.com/v3.1/chat"
self.gpt_url_img = "wss://spark-api.cn-huabei-1.xf-yun.com/v2.1/image"
self.time_to_yield_event = threading.Event()
self.time_to_exit_event = threading.Event()
self.result_buf = ""
def generate(self, inputs, llm_kwargs, history, system_prompt):
def generate(self, inputs, llm_kwargs, history, system_prompt, use_image_api=False):
llm_kwargs = llm_kwargs
history = history
system_prompt = system_prompt
import _thread as thread
thread.start_new_thread(self.create_blocking_request, (inputs, llm_kwargs, history, system_prompt))
thread.start_new_thread(self.create_blocking_request, (inputs, llm_kwargs, history, system_prompt, use_image_api))
while True:
self.time_to_yield_event.wait(timeout=1)
if self.time_to_yield_event.is_set():
@@ -85,14 +86,20 @@ class SparkRequestInstance():
return self.result_buf
def create_blocking_request(self, inputs, llm_kwargs, history, system_prompt):
def create_blocking_request(self, inputs, llm_kwargs, history, system_prompt, use_image_api):
if llm_kwargs['llm_model'] == 'sparkv2':
gpt_url = self.gpt_url_v2
elif llm_kwargs['llm_model'] == 'sparkv3':
gpt_url = self.gpt_url_v3
else:
gpt_url = self.gpt_url
file_manifest = []
if use_image_api and llm_kwargs.get('most_recent_uploaded'):
if llm_kwargs['most_recent_uploaded'].get('path'):
file_manifest = get_pictures_list(llm_kwargs['most_recent_uploaded']['path'])
if len(file_manifest) > 0:
print('正在使用讯飞图片理解API')
gpt_url = self.gpt_url_img
wsParam = Ws_Param(self.appid, self.api_key, self.api_secret, gpt_url)
websocket.enableTrace(False)
wsUrl = wsParam.create_url()
@@ -101,9 +108,8 @@ class SparkRequestInstance():
def on_open(ws):
import _thread as thread
thread.start_new_thread(run, (ws,))
def run(ws, *args):
data = json.dumps(gen_params(ws.appid, *ws.all_args))
data = json.dumps(gen_params(ws.appid, *ws.all_args, file_manifest))
ws.send(data)
# 收到websocket消息的处理
@@ -142,9 +148,18 @@ class SparkRequestInstance():
ws.all_args = (inputs, llm_kwargs, history, system_prompt)
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
def generate_message_payload(inputs, llm_kwargs, history, system_prompt, file_manifest):
conversation_cnt = len(history) // 2
messages = [{"role": "system", "content": system_prompt}]
messages = []
if file_manifest:
base64_images = []
for image_path in file_manifest:
base64_images.append(encode_image(image_path))
for img_s in base64_images:
if img_s not in str(messages):
messages.append({"role": "user", "content": img_s, "content_type": "image"})
else:
messages = [{"role": "system", "content": system_prompt}]
if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {}
@@ -167,7 +182,7 @@ def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
return messages
def gen_params(appid, inputs, llm_kwargs, history, system_prompt):
def gen_params(appid, inputs, llm_kwargs, history, system_prompt, file_manifest):
"""
通过appid和用户的提问来生成请参数
"""
@@ -176,6 +191,8 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt):
"sparkv2": "generalv2",
"sparkv3": "generalv3",
}
domains_select = domains[llm_kwargs['llm_model']]
if file_manifest: domains_select = 'image'
data = {
"header": {
"app_id": appid,
@@ -183,7 +200,7 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt):
},
"parameter": {
"chat": {
"domain": domains[llm_kwargs['llm_model']],
"domain": domains_select,
"temperature": llm_kwargs["temperature"],
"random_threshold": 0.5,
"max_tokens": 4096,
@@ -192,7 +209,7 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt):
},
"payload": {
"message": {
"text": generate_message_payload(inputs, llm_kwargs, history, system_prompt)
"text": generate_message_payload(inputs, llm_kwargs, history, system_prompt, file_manifest)
}
}
}

View File

@@ -183,11 +183,11 @@ class LocalLLMHandle(Process):
def stream_chat(self, **kwargs):
# ⭐run in main process
if self.get_state() == "`准备就绪`":
yield "`正在等待线程锁,排队中请稍 ...`"
yield "`正在等待线程锁,排队中请稍 ...`"
with self.threadLock:
if self.parent.poll():
yield "`排队中请稍 ...`"
yield "`排队中请稍 ...`"
self.clear_pending_messages()
self.parent.send(kwargs)
std_out = ""

View File

@@ -6,5 +6,3 @@ sentencepiece
numpy
onnxruntime
sentencepiece
streamlit
streamlit-chat

View File

@@ -5,5 +5,4 @@ accelerate
matplotlib
huggingface_hub
triton
streamlit

View File

@@ -1,2 +1 @@
modelscope
transformers_stream_generator
dashscope