From 8fce49fa02bb5e6efcfce935485c6f8d551a98b2 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 25 Aug 2023 12:31:51 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E7=99=BE=E5=BA=A6=E4=BA=91?= =?UTF-8?q?=E5=8D=83=E5=B8=86=E5=92=8C=E6=96=87=E5=BF=83=E4=B8=80=E8=A8=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 67 +++++++++++++-- request_llm/bridge_all.py | 17 +++- request_llm/bridge_qianfan.py | 150 +++++++++++++++++++++++++++++++++ request_llm/local_llm_class.py | 4 +- 4 files changed, 229 insertions(+), 9 deletions(-) create mode 100644 request_llm/bridge_qianfan.py diff --git a/config.py b/config.py index 4898aac5..f82891c4 100644 --- a/config.py +++ b/config.py @@ -11,7 +11,7 @@ API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4" -# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改 +# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改;如果使用本地或无地域限制的大模型时,此处也不需要修改 USE_PROXY = False if USE_PROXY: """ @@ -71,10 +71,16 @@ MAX_RETRY = 2 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 ) LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"] -# P.S. 其他可用的模型还包括 ["llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "spark", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] +# P.S. 其他可用的模型还包括 ["qianfan", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "spark", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] -# ChatGLM(2) Finetune Model Path (如果使用ChatGLM2微调模型,需要把"chatglmft"加入AVAIL_LLM_MODELS中) +# 百度千帆(LLM_MODEL="qianfan") +BAIDU_CLOUD_API_KEY = '' +BAIDU_CLOUD_SECRET_KEY = '' +BAIDU_CLOUD_QIANFAN_MODEL = 'ERNIE-Bot' # 可选 "ERNIE-Bot"(文心一言), "ERNIE-Bot-turbo", "BLOOMZ-7B", "Llama-2-70B-Chat", "Llama-2-13B-Chat", "Llama-2-7B-Chat" + + +# 如果使用ChatGLM2微调模型,请把 LLM_MODEL="chatglmft",并在此处指定模型路径 ChatGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b-pt-128-1e-2/checkpoint-100" @@ -152,5 +158,56 @@ ANTHROPIC_API_KEY = "" CUSTOM_API_KEY_PATTERN = "" -# HUGGINGFACE的TOKEN 下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens -HUGGINGFACE_ACCESS_TOKEN = "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" \ No newline at end of file +# HUGGINGFACE的TOKEN,下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens +HUGGINGFACE_ACCESS_TOKEN = "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" + + + +""" +在线大模型配置关联关系示意图 +│ +├── "gpt-3.5-turbo" 等openai模型 +│ ├── API_KEY +│ ├── CUSTOM_API_KEY_PATTERN(不常用) +│ ├── API_ORG(不常用) +│ └── API_URL_REDIRECT(不常用) +│ +├── "azure-gpt-3.5" 等azure模型 +│ ├── API_KEY +│ ├── AZURE_ENDPOINT +│ ├── AZURE_API_KEY +│ ├── AZURE_ENGINE +│ └── API_URL_REDIRECT +│ +├── "spark" 星火认知大模型 +│ ├── XFYUN_APPID +│ ├── XFYUN_API_SECRET +│ └── XFYUN_API_KEY +│ +├── "claude-1-100k" 等claude模型 +│ └── ANTHROPIC_API_KEY +│ +├── "stack-claude" +│ ├── SLACK_CLAUDE_BOT_ID +│ └── SLACK_CLAUDE_USER_TOKEN +│ +├── "qianfan" 百度千帆大模型库 +│ ├── BAIDU_CLOUD_QIANFAN_MODEL +│ ├── BAIDU_CLOUD_API_KEY +│ └── BAIDU_CLOUD_SECRET_KEY +│ +├── "newbing" Newbing接口不再稳定,不推荐使用 + ├── NEWBING_STYLE + └── NEWBING_COOKIES + + + +插件在线服务配置依赖关系示意图 +│ +├── 语音功能 + ├── ENABLE_AUDIO + ├── ALIYUN_TOKEN + ├── ALIYUN_APPKEY + ├── ALIYUN_ACCESSKEY + └── ALIYUN_SECRET +""" \ No newline at end of file diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index db26e52c..b645d01f 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -19,6 +19,12 @@ from .bridge_chatgpt import predict as chatgpt_ui from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui from .bridge_chatglm import predict as chatglm_ui +from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui +from .bridge_chatglm import predict as chatglm_ui + +from .bridge_qianfan import predict_no_ui_long_connection as qianfan_noui +from .bridge_qianfan import predict as qianfan_ui + colors = ['#FF00FF', '#00FFFF', '#FF0000', '#990099', '#009999', '#990044'] class LazyloadTiktoken(object): @@ -165,7 +171,14 @@ model_info = { "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, - + "qianfan": { + "fn_with_ui": qianfan_ui, + "fn_without_ui": qianfan_noui, + "endpoint": None, + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, } # -=-=-=-=-=-=- 以下部分是新加入的模型,可能附带额外依赖 -=-=-=-=-=-=- @@ -385,7 +398,7 @@ if "spark" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型 }) except: print(trimmed_format_exc()) -if "llama2" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型 +if "llama2" in AVAIL_LLM_MODELS: # llama2 try: from .bridge_llama2 import predict_no_ui_long_connection as llama2_noui from .bridge_llama2 import predict as llama2_ui diff --git a/request_llm/bridge_qianfan.py b/request_llm/bridge_qianfan.py new file mode 100644 index 00000000..0d56e3a2 --- /dev/null +++ b/request_llm/bridge_qianfan.py @@ -0,0 +1,150 @@ + +import time, requests, json +from toolbox import update_ui, get_conf +from multiprocessing import Process, Pipe +from functools import wraps +from datetime import datetime, timedelta + +model_name = '千帆大模型平台' +timeout_bot_msg = '[Local Message] Request timeout. Network error.' + +def cache_decorator(timeout): + cache = {} + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + key = (func.__name__, args, frozenset(kwargs.items())) + # Check if result is already cached and not expired + if key in cache: + result, timestamp = cache[key] + if datetime.now() - timestamp < timedelta(seconds=timeout): + return result + + # Call the function and cache the result + result = func(*args, **kwargs) + cache[key] = (result, datetime.now()) + return result + return wrapper + return decorator + +@cache_decorator(timeout=3600) +def get_access_token(): + """ + 使用 AK,SK 生成鉴权签名(Access Token) + :return: access_token,或是None(如果错误) + """ + # if (access_token_cache is None) or (time.time() - last_access_token_obtain_time > 3600): + BAIDU_CLOUD_API_KEY, BAIDU_CLOUD_SECRET_KEY = get_conf('BAIDU_CLOUD_API_KEY', 'BAIDU_CLOUD_SECRET_KEY') + + if len(BAIDU_CLOUD_SECRET_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_SECRET_KEY") + if len(BAIDU_CLOUD_API_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_API_KEY") + + url = "https://aip.baidubce.com/oauth/2.0/token" + params = {"grant_type": "client_credentials", "client_id": BAIDU_CLOUD_API_KEY, "client_secret": BAIDU_CLOUD_SECRET_KEY} + access_token_cache = str(requests.post(url, params=params).json().get("access_token")) + return access_token_cache + # else: + # return access_token_cache + + +def generate_message_payload(inputs, llm_kwargs, history, system_prompt): + conversation_cnt = len(history) // 2 + messages = [{"role": "user", "content": system_prompt}] + messages.append({"role": "assistant", "content": 'Certainly!'}) + if conversation_cnt: + for index in range(0, 2*conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = history[index] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = history[index+1] + if what_i_have_asked["content"] != "": + if what_gpt_answer["content"] == "": continue + if what_gpt_answer["content"] == timeout_bot_msg: continue + messages.append(what_i_have_asked) + messages.append(what_gpt_answer) + else: + messages[-1]['content'] = what_gpt_answer['content'] + what_i_ask_now = {} + what_i_ask_now["role"] = "user" + what_i_ask_now["content"] = inputs + messages.append(what_i_ask_now) + return messages + + +def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt): + BAIDU_CLOUD_QIANFAN_MODEL, = get_conf('BAIDU_CLOUD_QIANFAN_MODEL') + + url_lib = { + "ERNIE-Bot": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions" , + "ERNIE-Bot-turbo": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant" , + "BLOOMZ-7B": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/bloomz_7b1", + + "Llama-2-70B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_70b", + "Llama-2-13B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_13b", + "Llama-2-7B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_7b", + } + + url = url_lib[BAIDU_CLOUD_QIANFAN_MODEL] + + url += "?access_token=" + get_access_token() + + + payload = json.dumps({ + "messages": generate_message_payload(inputs, llm_kwargs, history, system_prompt), + "stream": True + }) + headers = { + 'Content-Type': 'application/json' + } + response = requests.request("POST", url, headers=headers, data=payload, stream=True) + buffer = "" + for line in response.iter_lines(): + try: + dec = line.decode().lstrip('data:') + dec = json.loads(dec) + incoming = dec['result'] + buffer += incoming + yield buffer + except: + if 'error_code' in dec: raise RuntimeError(dec['error_msg']) + + +def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False): + """ + ⭐多线程方法 + 函数的说明请见 request_llm/bridge_all.py + """ + watch_dog_patience = 5 + response = "" + + for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, sys_prompt): + if len(observe_window) >= 1: + observe_window[0] = response + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。") + return response + +def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): + """ + ⭐单线程方法 + 函数的说明请见 request_llm/bridge_all.py + """ + chatbot.append((inputs, "")) + + if additional_fn is not None: + from core_functional import handle_core_functionality + inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) + + # 开始接收回复 + for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt): + chatbot[-1] = (inputs, response) + yield from update_ui(chatbot=chatbot, history=history) + + # 总结输出 + response = f"[Local Message]: {model_name}响应异常 ..." + if response == f"[Local Message]: 等待{model_name}响应中 ...": + response = f"[Local Message]: {model_name}响应异常 ..." + history.extend([inputs, response]) + yield from update_ui(chatbot=chatbot, history=history) \ No newline at end of file diff --git a/request_llm/local_llm_class.py b/request_llm/local_llm_class.py index 3dd266fe..c9c72534 100644 --- a/request_llm/local_llm_class.py +++ b/request_llm/local_llm_class.py @@ -128,7 +128,7 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name): # chatglm 没有 sys_prompt 接口,因此把prompt加入 history history_feedin = [] - history_feedin.append(["What can I do?", sys_prompt]) + history_feedin.append([sys_prompt, "Certainly!"]) for i in range(len(history)//2): history_feedin.append([history[2*i], history[2*i+1]] ) @@ -161,7 +161,7 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name): # 处理历史信息 history_feedin = [] - history_feedin.append(["What can I do?", system_prompt] ) + history_feedin.append([system_prompt, "Certainly!"]) for i in range(len(history)//2): history_feedin.append([history[2*i], history[2*i+1]] )