diff --git a/README.md b/README.md index 62e48896..00017df5 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ To translate this project to arbitary language with GPT, read and run [`multi_la 功能(⭐= 近期新增功能) | 描述 --- | --- -⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B)! | ⭐阿里达摩院[通义千问](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/) +⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B)! | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, [通义千问](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/),[LLaMa2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) 一键润色 | 支持一键润色、一键查找论文语法错误 一键中英互译 | 一键中英互译 一键代码解释 | 显示代码、解释代码、生成代码、给代码加注释 @@ -178,7 +178,7 @@ docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic ``` P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用docker-compose获取Latex功能(修改docker-compose.yml,保留方案4并删除其他方案)。 -2. ChatGPT + ChatGLM2 + MOSS(需要熟悉Docker) +2. ChatGPT + ChatGLM2 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时) [![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml) ``` sh @@ -186,7 +186,7 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 docker-compose up ``` -3. ChatGPT + LLAMA + 盘古 + RWKV(需要熟悉Docker) +3. ChatGPT + LLAMA + 盘古 + RWKV(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时) [![jittorllms](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml) ``` sh @@ -313,6 +313,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h ### II:版本: - version 3.5(Todo): 使用自然语言调用本项目的所有函数插件(高优先级) +- version 3.49: 支持百度千帆平台和文心一言 +- version 3.48: 支持阿里达摩院通义千问,上海AI-Lab书生,讯飞星火 - version 3.46: 支持完全脱手操作的实时语音对话 - version 3.45: 支持自定义ChatGLM2微调模型 - version 3.44: 正式支持Azure,优化界面易用性 diff --git a/app.py b/app.py index e38af7df..a9bc4922 100644 --- a/app.py +++ b/app.py @@ -4,7 +4,7 @@ def main(): import subprocess, sys subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gradio-stable-fork']) import gradio as gr - if gr.__version__ not in ['3.28.3','3.32.3']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖,详情信息见requirements.txt" + if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖,详情信息见requirements.txt" from request_llm.bridge_all import predict from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 diff --git a/config.py b/config.py index 21e77d50..a161bb19 100644 --- a/config.py +++ b/config.py @@ -11,7 +11,11 @@ API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4" -# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改 +# [step 1]>> API_KEY = "sk-123456789xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx123456789"。极少数情况下,还需要填写组织(格式如org-123456789abcdefghijklmno的),请向下翻,找 API_ORG 设置项 +API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4" + + +# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改;如果使用本地或无地域限制的大模型时,此处也不需要修改 USE_PROXY = False if USE_PROXY: """ @@ -69,7 +73,7 @@ MAX_RETRY = 2 # OpenAI模型选择是(gpt4现在只对申请成功的人开放) LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm" -AVAIL_LLM_MODELS = ["newbing-free", "gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo"] +AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo", "spark", "azure-gpt-3.5"] # ChatGLM(2) Finetune Model Path (如果使用ChatGLM2微调模型,需要把"chatglmft"加入AVAIL_LLM_MODELS中) ChatGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b-pt-128-1e-2/checkpoint-100" @@ -147,3 +151,70 @@ ANTHROPIC_API_KEY = "" # 自定义API KEY格式 CUSTOM_API_KEY_PATTERN = "" + + +# HUGGINGFACE的TOKEN,下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens +HUGGINGFACE_ACCESS_TOKEN = "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" + + +# GROBID服务器地址(填写多个可以均衡负载),用于高质量地读取PDF文档 +# 获取方法:复制以下空间https://huggingface.co/spaces/qingxu98/grobid,设为public,然后GROBID_URL = "https://(你的hf用户名如qingxu98)-(你的填写的空间名如grobid).hf.space" +GROBID_URLS = [ + "https://qingxu98-grobid.hf.space","https://qingxu98-grobid2.hf.space","https://qingxu98-grobid3.hf.space", + "https://shaocongma-grobid.hf.space","https://FBR123-grobid.hf.space", +] + + + +""" +在线大模型配置关联关系示意图 +│ +├── "gpt-3.5-turbo" 等openai模型 +│ ├── API_KEY +│ ├── CUSTOM_API_KEY_PATTERN(不常用) +│ ├── API_ORG(不常用) +│ └── API_URL_REDIRECT(不常用) +│ +├── "azure-gpt-3.5" 等azure模型 +│ ├── API_KEY +│ ├── AZURE_ENDPOINT +│ ├── AZURE_API_KEY +│ ├── AZURE_ENGINE +│ └── API_URL_REDIRECT +│ +├── "spark" 星火认知大模型 +│ ├── XFYUN_APPID +│ ├── XFYUN_API_SECRET +│ └── XFYUN_API_KEY +│ +├── "claude-1-100k" 等claude模型 +│ └── ANTHROPIC_API_KEY +│ +├── "stack-claude" +│ ├── SLACK_CLAUDE_BOT_ID +│ └── SLACK_CLAUDE_USER_TOKEN +│ +├── "qianfan" 百度千帆大模型库 +│ ├── BAIDU_CLOUD_QIANFAN_MODEL +│ ├── BAIDU_CLOUD_API_KEY +│ └── BAIDU_CLOUD_SECRET_KEY +│ +├── "newbing" Newbing接口不再稳定,不推荐使用 + ├── NEWBING_STYLE + └── NEWBING_COOKIES + + + +插件在线服务配置依赖关系示意图 +│ +├── 语音功能 +│ ├── ENABLE_AUDIO +│ ├── ALIYUN_TOKEN +│ ├── ALIYUN_APPKEY +│ ├── ALIYUN_ACCESSKEY +│ └── ALIYUN_SECRET +│ +├── PDF文档精准解析 +│ └── GROBID_URLS + +""" diff --git a/crazy_functional.py b/crazy_functional.py index 03aaaf55..119ff0cd 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -24,6 +24,7 @@ def get_crazy_functions(): from crazy_functions.对话历史存档 import 对话历史存档 from crazy_functions.对话历史存档 import 载入对话历史存档 from crazy_functions.对话历史存档 import 删除所有本地对话历史记录 + from crazy_functions.辅助功能 import 清除缓存 from crazy_functions.批量Markdown翻译 import Markdown英译中 function_plugins = { @@ -40,7 +41,12 @@ def get_crazy_functions(): "AsButton":False, "Function": HotReload(删除所有本地对话历史记录) }, - "[测试功能] 解析Jupyter Notebook文件": { + "清除所有缓存文件(请谨慎操作)": { + "Color": "stop", + "AsButton": False, # 加入下拉菜单中 + "Function": HotReload(清除缓存) + }, + "解析Jupyter Notebook文件": { "Color": "stop", "AsButton":False, "Function": HotReload(解析ipynb文件), @@ -328,7 +334,7 @@ def get_crazy_functions(): try: from crazy_functions.Langchain知识库 import 知识库问答 function_plugins.update({ - "[功能尚不稳定] 构建知识库(请先上传文件素材)": { + "构建知识库(请先上传文件素材)": { "Color": "stop", "AsButton": False, "AdvancedArgs": True, @@ -342,7 +348,7 @@ def get_crazy_functions(): try: from crazy_functions.Langchain知识库 import 读取知识库作答 function_plugins.update({ - "[功能尚不稳定] 知识库问答": { + "知识库问答": { "Color": "stop", "AsButton": False, "AdvancedArgs": True, @@ -352,6 +358,32 @@ def get_crazy_functions(): }) except: print('Load function plugin failed') + + try: + from crazy_functions.交互功能函数模板 import 交互功能模板函数 + function_plugins.update({ + "交互功能模板函数": { + "Color": "stop", + "AsButton": False, + "Function": HotReload(交互功能模板函数) + } + }) + except: + print('Load function plugin failed') + + # try: + # from crazy_functions.chatglm微调工具 import 微调数据集生成 + # function_plugins.update({ + # "黑盒模型学习: 微调数据集生成 (先上传数据集)": { + # "Color": "stop", + # "AsButton": False, + # "AdvancedArgs": True, + # "ArgsReminder": "针对数据集输入(如 绿帽子*深蓝色衬衫*黑色运动裤)给出指令,例如您可以将以下命令复制到下方: --llm_to_learn=azure-gpt-3.5 --prompt_prefix='根据下面的服装类型提示,想象一个穿着者,对这个人外貌、身处的环境、内心世界、过去经历进行描写。要求:100字以内,用第二人称。' --system_prompt=''", + # "Function": HotReload(微调数据集生成) + # } + # }) + # except: + # print('Load function plugin failed') try: from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比 @@ -366,7 +398,7 @@ def get_crazy_functions(): }) from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF function_plugins.update({ - "Arixv翻译(输入arxivID)[需Latex]": { + "Arixv论文精细翻译(输入arxivID)[需Latex]": { "Color": "stop", "AsButton": False, "AdvancedArgs": True, @@ -377,7 +409,7 @@ def get_crazy_functions(): } }) function_plugins.update({ - "本地论文翻译(上传Latex压缩包)[需Latex]": { + "本地Latex论文精细翻译(上传Latex项目)[需Latex]": { "Color": "stop", "AsButton": False, "AdvancedArgs": True, diff --git a/crazy_functions/latex_fns/latex_toolbox.py b/crazy_functions/latex_fns/latex_toolbox.py index a0c889a8..5adc7ea8 100644 --- a/crazy_functions/latex_fns/latex_toolbox.py +++ b/crazy_functions/latex_fns/latex_toolbox.py @@ -281,9 +281,12 @@ def rm_comments(main_file): def find_tex_file_ignore_case(fp): dir_name = os.path.dirname(fp) base_name = os.path.basename(fp) + # 如果输入的文件路径是正确的 + if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name) + # 如果不正确,试着加上.tex后缀试试 if not base_name.endswith('.tex'): base_name+='.tex' if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name) - # go case in-sensitive + # 如果还找不到,解除大小写限制,再试一次 import glob for f in glob.glob(dir_name+'/*.tex'): base_name_s = os.path.basename(fp) diff --git a/crazy_functions/pdf_fns/parse_pdf.py b/crazy_functions/pdf_fns/parse_pdf.py new file mode 100644 index 00000000..00016be5 --- /dev/null +++ b/crazy_functions/pdf_fns/parse_pdf.py @@ -0,0 +1,25 @@ +import requests +import random +from functools import lru_cache +class GROBID_OFFLINE_EXCEPTION(Exception): pass + +def get_avail_grobid_url(): + from toolbox import get_conf + GROBID_URLS, = get_conf('GROBID_URLS') + if len(GROBID_URLS) == 0: return None + try: + _grobid_url = random.choice(GROBID_URLS) # 随机负载均衡 + if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/') + res = requests.get(_grobid_url+'/api/isalive') + if res.text=='true': return _grobid_url + else: return None + except: + return None + +@lru_cache(maxsize=32) +def parse_pdf(pdf_path, grobid_url): + import scipdf # pip install scipdf_parser + if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/') + article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url) + return article_dict + diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py index 0adac966..e0558e90 100644 --- a/crazy_functions/批量翻译PDF文档_多线程.py +++ b/crazy_functions/批量翻译PDF文档_多线程.py @@ -1,15 +1,19 @@ from toolbox import CatchException, report_execption, write_results_to_file -from toolbox import update_ui, promote_file_to_downloadzone +from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion +from toolbox import write_history_to_file, get_log_folder from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from .crazy_utils import read_and_clean_pdf_text +from .pdf_fns.parse_pdf import parse_pdf, get_avail_grobid_url from colorful import * +import glob +import os +import math @CatchException -def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt, web_port): - import glob - import os +def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + disable_auto_promotion(chatbot) # 基本信息:功能、贡献者 chatbot.append([ "函数插件功能?", @@ -30,20 +34,11 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_ # 清空历史,以免输入溢出 history = [] + from .crazy_utils import get_files_from_everything + success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf') # 检测输入参数,如没有给定输入参数,直接退出 - if os.path.exists(txt): - project_folder = txt - else: - if txt == "": - txt = '空空如也的输入栏' - report_execption(chatbot, history, - a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}") - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - return - - # 搜索需要处理的文件清单 - file_manifest = [f for f in glob.glob( - f'{project_folder}/**/*.pdf', recursive=True)] + if not success: + if txt == "": txt = '空空如也的输入栏' # 如果没找到任何文件 if len(file_manifest) == 0: @@ -53,22 +48,130 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_ return # 开始正式执行任务 - yield from 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt) + grobid_url = get_avail_grobid_url() + if grobid_url is not None: + yield from 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, grobid_url) + else: + yield from update_ui_lastest_msg("GROBID服务不可用,请检查config中的GROBID_URL。作为替代,现在将执行效果稍差的旧版代码。", chatbot, history, delay=3) + yield from 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt) -def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt): - import os +def 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, grobid_url): import copy import tiktoken TOKEN_LIMIT_PER_FRAGMENT = 1280 generated_conclusion_files = [] generated_html_files = [] + DST_LANG = "中文" for index, fp in enumerate(file_manifest): + chatbot.append(["当前进度:", f"正在连接GROBID服务,请稍候: {grobid_url}\n如果等待时间过长,请修改config中的GROBID_URL,可修改成本地GROBID服务。"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + article_dict = parse_pdf(fp, grobid_url) + print(article_dict) + prompt = "以下是一篇学术论文的基本信息:\n" + # title + title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n' + # authors + authors = article_dict.get('authors', '无法获取 authors'); prompt += f'authors:{authors}\n\n' + # abstract + abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n' + # command + prompt += f"请将题目和摘要翻译为{DST_LANG}。" + meta = [f'# Title:\n\n', title, f'# Abstract:\n\n', abstract ] + # 单线,获取文章meta信息 + paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=prompt, + inputs_show_user=prompt, + llm_kwargs=llm_kwargs, + chatbot=chatbot, history=[], + sys_prompt="You are an academic paper reader。", + ) + + # 多线,翻译 + inputs_array = [] + inputs_show_user_array = [] + + # get_token_num + from request_llm.bridge_all import model_info + enc = model_info[llm_kwargs['llm_model']]['tokenizer'] + def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) + from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf + + def break_down(txt): + raw_token_num = get_token_num(txt) + if raw_token_num <= TOKEN_LIMIT_PER_FRAGMENT: + return [txt] + else: + # raw_token_num > TOKEN_LIMIT_PER_FRAGMENT + # find a smooth token limit to achieve even seperation + count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT)) + token_limit_smooth = raw_token_num // count + count + return breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn=get_token_num, limit=token_limit_smooth) + + for section in article_dict.get('sections'): + if len(section['text']) == 0: continue + section_frags = break_down(section['text']) + for i, fragment in enumerate(section_frags): + heading = section['heading'] + if len(section_frags) > 1: heading += f'Part-{i+1}' + inputs_array.append( + f"你需要翻译{heading}章节,内容如下: \n\n{fragment}" + ) + inputs_show_user_array.append( + f"# {heading}\n\n{fragment}" + ) + + gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( + inputs_array=inputs_array, + inputs_show_user_array=inputs_show_user_array, + llm_kwargs=llm_kwargs, + chatbot=chatbot, + history_array=[meta for _ in inputs_array], + sys_prompt_array=[ + "请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in inputs_array], + ) + res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + gpt_response_collection, file_basename=None, file_fullname=None) + promote_file_to_downloadzone(res_path, rename_file=os.path.basename(fp)+'.md', chatbot=chatbot) + generated_conclusion_files.append(res_path) + + ch = construct_html() + orig = "" + trans = "" + gpt_response_collection_html = copy.deepcopy(gpt_response_collection) + for i,k in enumerate(gpt_response_collection_html): + if i%2==0: + gpt_response_collection_html[i] = inputs_show_user_array[i//2] + else: + gpt_response_collection_html[i] = gpt_response_collection_html[i] + + final = ["", "", "一、论文概况", "", "Abstract", paper_meta_info, "二、论文翻译", ""] + final.extend(gpt_response_collection_html) + for i, k in enumerate(final): + if i%2==0: + orig = k + if i%2==1: + trans = k + ch.add_row(a=orig, b=trans) + create_report_file_name = f"{os.path.basename(fp)}.trans.html" + html_file = ch.save_file(create_report_file_name) + generated_html_files.append(html_file) + promote_file_to_downloadzone(html_file, rename_file=os.path.basename(html_file), chatbot=chatbot) + + chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files))) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + +def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): + import copy + TOKEN_LIMIT_PER_FRAGMENT = 1280 + generated_conclusion_files = [] + generated_html_files = [] + for index, fp in enumerate(file_manifest): # 读取PDF文件 file_content, page_one = read_and_clean_pdf_text(fp) file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars + # 递归地切割PDF文件 from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf from request_llm.bridge_all import model_info @@ -140,8 +243,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, trans = k ch.add_row(a=orig, b=trans) create_report_file_name = f"{os.path.basename(fp)}.trans.html" - ch.save_file(create_report_file_name) - generated_html_files.append(f'./gpt_log/{create_report_file_name}') + generated_html_files.append(ch.save_file(create_report_file_name)) except: from toolbox import trimmed_format_exc print('writing html result failed:', trimmed_format_exc()) @@ -202,6 +304,6 @@ class construct_html(): def save_file(self, file_name): - with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f: + with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f: f.write(self.html_string.encode('utf-8', 'ignore').decode()) - + return os.path.join(get_log_folder(), file_name) diff --git a/crazy_functions/辅助功能.py b/crazy_functions/辅助功能.py new file mode 100644 index 00000000..e56deaa4 --- /dev/null +++ b/crazy_functions/辅助功能.py @@ -0,0 +1,43 @@ +# encoding: utf-8 +# @Time : 2023/4/19 +# @Author : Spike +# @Descr : +from toolbox import update_ui +from toolbox import CatchException, report_execption, write_results_to_file, get_log_folder +from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive + + +@CatchException +def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + if txt: + show_say = txt + prompt = txt+'\n回答完问题后,再列出用户可能提出的三个问题。' + else: + prompt = history[-1]+"\n分析上述回答,再列出用户可能提出的三个问题。" + show_say = '分析上述回答,再列出用户可能提出的三个问题。' + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=prompt, + inputs_show_user=show_say, + llm_kwargs=llm_kwargs, + chatbot=chatbot, + history=history, + sys_prompt=system_prompt + ) + chatbot[-1] = (show_say, gpt_say) + history.extend([show_say, gpt_say]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + +@CatchException +def 清除缓存(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + chatbot.append(['清除本地缓存数据', '执行中. 删除 gpt_log & private_upload']) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + import shutil, os + gpt_log_dir = os.path.join(os.path.dirname(__file__), '..', 'gpt_log') + private_upload_dir = os.path.join(os.path.dirname(__file__), '..', 'private_upload') + shutil.rmtree(gpt_log_dir, ignore_errors=True) + shutil.rmtree(private_upload_dir, ignore_errors=True) + + chatbot.append(['清除本地缓存数据', '执行完成']) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 874bdc21..cf753b56 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,6 +16,7 @@ services: AVAIL_LLM_MODELS: ' ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "newbing"] ' WEB_PORT: ' 22303 ' ADD_WAIFU: ' True ' + # THEME: ' Chuanhu-Small-and-Beautiful ' # DEFAULT_WORKER_NUM: ' 10 ' # AUTHENTICATION: ' [("username", "passwd"), ("username2", "passwd2")] ' @@ -28,7 +29,7 @@ services: ### =================================================== -### 【方案二】 如果需要运行ChatGLM本地模型 +### 【方案二】 如果需要运行ChatGLM + Qwen + MOSS等本地模型 ### =================================================== version: '3' services: @@ -36,11 +37,11 @@ services: image: ghcr.io/binary-husky/gpt_academic_chatglm_moss:master # (Auto Built by Dockerfile: docs/Dockerfile+ChatGLM) environment: # 请查阅 `config.py` 以查看所有的配置信息 - API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,fkxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ' + API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ' USE_PROXY: ' True ' proxies: ' { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } ' LLM_MODEL: ' gpt-3.5-turbo ' - AVAIL_LLM_MODELS: ' ["chatglm", "moss", "gpt-3.5-turbo", "gpt-4", "newbing"] ' + AVAIL_LLM_MODELS: ' ["chatglm", "qwen", "moss", "gpt-3.5-turbo", "gpt-4", "newbing"] ' LOCAL_MODEL_DEVICE: ' cuda ' DEFAULT_WORKER_NUM: ' 10 ' WEB_PORT: ' 12303 ' @@ -57,6 +58,10 @@ services: command: > bash -c "python3 -u main.py" + # P.S. 通过对 command 进行微调,可以便捷地安装额外的依赖 + # command: > + # bash -c "pip install -r request_llm/requirements_qwen.txt && python3 -u main.py" + ### =================================================== ### 【方案三】 如果需要运行ChatGPT + LLAMA + 盘古 + RWKV本地模型 ### =================================================== diff --git a/docs/GithubAction+ChatGLM+Moss b/docs/GithubAction+ChatGLM+Moss index ba3deae9..7bb11a2c 100644 --- a/docs/GithubAction+ChatGLM+Moss +++ b/docs/GithubAction+ChatGLM+Moss @@ -18,6 +18,7 @@ WORKDIR /gpt/gpt_academic RUN git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss RUN python3 -m pip install -r requirements.txt RUN python3 -m pip install -r request_llm/requirements_moss.txt +RUN python3 -m pip install -r request_llm/requirements_qwen.txt RUN python3 -m pip install -r request_llm/requirements_chatglm.txt RUN python3 -m pip install -r request_llm/requirements_newbing.txt diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index 4c7f19d0..e167825a 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -19,6 +19,12 @@ from .bridge_chatgpt import predict as chatgpt_ui from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui from .bridge_chatglm import predict as chatglm_ui +from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui +from .bridge_chatglm import predict as chatglm_ui + +from .bridge_qianfan import predict_no_ui_long_connection as qianfan_noui +from .bridge_qianfan import predict as qianfan_ui + colors = ['#FF00FF', '#00FFFF', '#FF0000', '#990099', '#009999', '#990044'] class LazyloadTiktoken(object): @@ -165,7 +171,14 @@ model_info = { "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, - + "qianfan": { + "fn_with_ui": qianfan_ui, + "fn_without_ui": qianfan_noui, + "endpoint": None, + "max_token": 2000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, } # -=-=-=-=-=-=- 以下部分是新加入的模型,可能附带额外依赖 -=-=-=-=-=-=- @@ -361,7 +374,7 @@ if "chatgpt_website" in AVAIL_LLM_MODELS: # 接入一些逆向工程https://gi "chatgpt_website": { "fn_with_ui": chatgpt_website_ui, "fn_without_ui": chatgpt_website_noui, - "endpoint": None, + "endpoint": openai_endpoint, "max_token": 4096, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, @@ -385,6 +398,22 @@ if "spark" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型 }) except: print(trimmed_format_exc()) +if "llama2" in AVAIL_LLM_MODELS: # llama2 + try: + from .bridge_llama2 import predict_no_ui_long_connection as llama2_noui + from .bridge_llama2 import predict as llama2_ui + model_info.update({ + "llama2": { + "fn_with_ui": llama2_ui, + "fn_without_ui": llama2_noui, + "endpoint": None, + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + } + }) + except: + print(trimmed_format_exc()) diff --git a/request_llm/bridge_chatgpt.py b/request_llm/bridge_chatgpt.py index ea48fbaf..5a7a274e 100644 --- a/request_llm/bridge_chatgpt.py +++ b/request_llm/bridge_chatgpt.py @@ -177,14 +177,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面 return - # print(chunk.decode()[6:]) - if is_head_of_the_stream and (r'"object":"error"' not in chunk.decode()): + chunk_decoded = chunk.decode() + if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r"choices" not in chunk_decoded): # 数据流的第一帧不携带content is_head_of_the_stream = False; continue if chunk: try: - chunk_decoded = chunk.decode() # 前者是API2D的结束条件,后者是OPENAI的结束条件 if ('data: [DONE]' in chunk_decoded) or (len(json.loads(chunk_decoded[6:])['choices'][0]["delta"]) == 0): # 判定为数据流的结束,gpt_replying_buffer也写完了 @@ -192,7 +191,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp break # 处理数据流的主体 chunkjson = json.loads(chunk_decoded[6:]) - status_text = f"finish_reason: {chunkjson['choices'][0]['finish_reason']}" + status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}" # 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出 gpt_replying_buffer = gpt_replying_buffer + json.loads(chunk_decoded[6:])['choices'][0]["delta"]["content"] history[-1] = gpt_replying_buffer @@ -216,7 +215,6 @@ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg) history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'], max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一 chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)") - # history = [] # 清除历史 elif "does not exist" in error_msg: chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.") elif "Incorrect API key" in error_msg: diff --git a/request_llm/bridge_chatgpt_website.py b/request_llm/bridge_chatgpt_website.py index 96af8332..7f3147b1 100644 --- a/request_llm/bridge_chatgpt_website.py +++ b/request_llm/bridge_chatgpt_website.py @@ -118,16 +118,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容 additional_fn代表点击的哪个按钮,按钮见functional.py """ - if is_any_api_key(inputs): - chatbot._cookies['api_key'] = inputs - chatbot.append(("输入已识别为openai的api_key", what_keys(inputs))) - yield from update_ui(chatbot=chatbot, history=history, msg="api_key已导入") # 刷新界面 - return - elif not is_any_api_key(chatbot._cookies['api_key']): - chatbot.append((inputs, "缺少api_key。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。")) - yield from update_ui(chatbot=chatbot, history=history, msg="缺少api_key") # 刷新界面 - return - if additional_fn is not None: from core_functional import handle_core_functionality inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) @@ -245,14 +235,9 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream): if not is_any_api_key(llm_kwargs['api_key']): raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。") - api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model']) - headers = { "Content-Type": "application/json", - "Authorization": f"Bearer {api_key}" } - if API_ORG.startswith('org-'): headers.update({"OpenAI-Organization": API_ORG}) - if llm_kwargs['llm_model'].startswith('azure-'): headers.update({"api-key": api_key}) conversation_cnt = len(history) // 2 diff --git a/request_llm/bridge_llama2.py b/request_llm/bridge_llama2.py new file mode 100644 index 00000000..e236c942 --- /dev/null +++ b/request_llm/bridge_llama2.py @@ -0,0 +1,91 @@ +model_name = "LLaMA" +cmd_to_install = "`pip install -r request_llm/requirements_chatglm.txt`" + + +from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer +from toolbox import update_ui, get_conf, ProxyNetworkActivate +from multiprocessing import Process, Pipe +from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM +from threading import Thread + + +# ------------------------------------------------------------------------------------------------------------------------ +# 🔌💻 Local Model +# ------------------------------------------------------------------------------------------------------------------------ +@SingletonLocalLLM +class GetONNXGLMHandle(LocalLLMHandle): + + def load_model_info(self): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + self.model_name = model_name + self.cmd_to_install = cmd_to_install + + def load_model_and_tokenizer(self): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + import os, glob + import os + import platform + huggingface_token, device = get_conf('HUGGINGFACE_ACCESS_TOKEN', 'LOCAL_MODEL_DEVICE') + assert len(huggingface_token) != 0, "没有填写 HUGGINGFACE_ACCESS_TOKEN" + with open(os.path.expanduser('~/.cache/huggingface/token'), 'w') as f: + f.write(huggingface_token) + model_id = 'meta-llama/Llama-2-7b-chat-hf' + with ProxyNetworkActivate(): + self._tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=huggingface_token) + # use fp16 + model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=huggingface_token).eval() + if device.startswith('cuda'): model = model.half().to(device) + self._model = model + + return self._model, self._tokenizer + + def llm_stream_generator(self, **kwargs): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + def adaptor(kwargs): + query = kwargs['query'] + max_length = kwargs['max_length'] + top_p = kwargs['top_p'] + temperature = kwargs['temperature'] + history = kwargs['history'] + console_slience = kwargs.get('console_slience', True) + return query, max_length, top_p, temperature, history, console_slience + + def convert_messages_to_prompt(query, history): + prompt = "" + for a, b in history: + prompt += f"\n[INST]{a}[/INST]" + prompt += "\n{b}" + b + prompt += f"\n[INST]{query}[/INST]" + return prompt + + query, max_length, top_p, temperature, history, console_slience = adaptor(kwargs) + prompt = convert_messages_to_prompt(query, history) + # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=- + # code from transformers.llama + streamer = TextIteratorStreamer(self._tokenizer) + # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way. + inputs = self._tokenizer([prompt], return_tensors="pt") + prompt_tk_back = self._tokenizer.batch_decode(inputs['input_ids'])[0] + + generation_kwargs = dict(inputs.to(self._model.device), streamer=streamer, max_new_tokens=max_length) + thread = Thread(target=self._model.generate, kwargs=generation_kwargs) + thread.start() + generated_text = "" + for new_text in streamer: + generated_text += new_text + if not console_slience: print(new_text, end='') + yield generated_text.lstrip(prompt_tk_back).rstrip("") + if not console_slience: print() + # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=- + + def try_to_import_special_deps(self, **kwargs): + # import something that will raise error if the user does not install requirement_*.txt + # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行 + import importlib + importlib.import_module('transformers') + + +# ------------------------------------------------------------------------------------------------------------------------ +# 🔌💻 GPT-Academic Interface +# ------------------------------------------------------------------------------------------------------------------------ +predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name) \ No newline at end of file diff --git a/request_llm/bridge_qianfan.py b/request_llm/bridge_qianfan.py new file mode 100644 index 00000000..e2cdb0ee --- /dev/null +++ b/request_llm/bridge_qianfan.py @@ -0,0 +1,164 @@ + +import time, requests, json +from multiprocessing import Process, Pipe +from functools import wraps +from datetime import datetime, timedelta +from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, get_conf + +model_name = '千帆大模型平台' +timeout_bot_msg = '[Local Message] Request timeout. Network error.' + +def cache_decorator(timeout): + cache = {} + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + key = (func.__name__, args, frozenset(kwargs.items())) + # Check if result is already cached and not expired + if key in cache: + result, timestamp = cache[key] + if datetime.now() - timestamp < timedelta(seconds=timeout): + return result + + # Call the function and cache the result + result = func(*args, **kwargs) + cache[key] = (result, datetime.now()) + return result + return wrapper + return decorator + +@cache_decorator(timeout=3600) +def get_access_token(): + """ + 使用 AK,SK 生成鉴权签名(Access Token) + :return: access_token,或是None(如果错误) + """ + # if (access_token_cache is None) or (time.time() - last_access_token_obtain_time > 3600): + BAIDU_CLOUD_API_KEY, BAIDU_CLOUD_SECRET_KEY = get_conf('BAIDU_CLOUD_API_KEY', 'BAIDU_CLOUD_SECRET_KEY') + + if len(BAIDU_CLOUD_SECRET_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_SECRET_KEY") + if len(BAIDU_CLOUD_API_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_API_KEY") + + url = "https://aip.baidubce.com/oauth/2.0/token" + params = {"grant_type": "client_credentials", "client_id": BAIDU_CLOUD_API_KEY, "client_secret": BAIDU_CLOUD_SECRET_KEY} + access_token_cache = str(requests.post(url, params=params).json().get("access_token")) + return access_token_cache + # else: + # return access_token_cache + + +def generate_message_payload(inputs, llm_kwargs, history, system_prompt): + conversation_cnt = len(history) // 2 + messages = [{"role": "user", "content": system_prompt}] + messages.append({"role": "assistant", "content": 'Certainly!'}) + if conversation_cnt: + for index in range(0, 2*conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = history[index] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = history[index+1] + if what_i_have_asked["content"] != "": + if what_gpt_answer["content"] == "": continue + if what_gpt_answer["content"] == timeout_bot_msg: continue + messages.append(what_i_have_asked) + messages.append(what_gpt_answer) + else: + messages[-1]['content'] = what_gpt_answer['content'] + what_i_ask_now = {} + what_i_ask_now["role"] = "user" + what_i_ask_now["content"] = inputs + messages.append(what_i_ask_now) + return messages + + +def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt): + BAIDU_CLOUD_QIANFAN_MODEL, = get_conf('BAIDU_CLOUD_QIANFAN_MODEL') + + url_lib = { + "ERNIE-Bot": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions" , + "ERNIE-Bot-turbo": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant" , + "BLOOMZ-7B": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/bloomz_7b1", + + "Llama-2-70B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_70b", + "Llama-2-13B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_13b", + "Llama-2-7B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_7b", + } + + url = url_lib[BAIDU_CLOUD_QIANFAN_MODEL] + + url += "?access_token=" + get_access_token() + + + payload = json.dumps({ + "messages": generate_message_payload(inputs, llm_kwargs, history, system_prompt), + "stream": True + }) + headers = { + 'Content-Type': 'application/json' + } + response = requests.request("POST", url, headers=headers, data=payload, stream=True) + buffer = "" + for line in response.iter_lines(): + if len(line) == 0: continue + try: + dec = line.decode().lstrip('data:') + dec = json.loads(dec) + incoming = dec['result'] + buffer += incoming + yield buffer + except: + if ('error_code' in dec) and ("max length" in dec['error_msg']): + raise ConnectionAbortedError(dec['error_msg']) # 上下文太长导致 token 溢出 + elif ('error_code' in dec): + raise RuntimeError(dec['error_msg']) + + +def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False): + """ + ⭐多线程方法 + 函数的说明请见 request_llm/bridge_all.py + """ + watch_dog_patience = 5 + response = "" + + for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, sys_prompt): + if len(observe_window) >= 1: + observe_window[0] = response + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。") + return response + +def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): + """ + ⭐单线程方法 + 函数的说明请见 request_llm/bridge_all.py + """ + chatbot.append((inputs, "")) + + if additional_fn is not None: + from core_functional import handle_core_functionality + inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) + + yield from update_ui(chatbot=chatbot, history=history) + # 开始接收回复 + try: + for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt): + chatbot[-1] = (inputs, response) + yield from update_ui(chatbot=chatbot, history=history) + except ConnectionAbortedError as e: + from .bridge_all import model_info + if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出 + history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'], + max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一 + chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)") + yield from update_ui(chatbot=chatbot, history=history, msg="异常") # 刷新界面 + return + + # 总结输出 + response = f"[Local Message]: {model_name}响应异常 ..." + if response == f"[Local Message]: 等待{model_name}响应中 ...": + response = f"[Local Message]: {model_name}响应异常 ..." + history.extend([inputs, response]) + yield from update_ui(chatbot=chatbot, history=history) \ No newline at end of file diff --git a/request_llm/local_llm_class.py b/request_llm/local_llm_class.py index 3dd266fe..c9c72534 100644 --- a/request_llm/local_llm_class.py +++ b/request_llm/local_llm_class.py @@ -128,7 +128,7 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name): # chatglm 没有 sys_prompt 接口,因此把prompt加入 history history_feedin = [] - history_feedin.append(["What can I do?", sys_prompt]) + history_feedin.append([sys_prompt, "Certainly!"]) for i in range(len(history)//2): history_feedin.append([history[2*i], history[2*i+1]] ) @@ -161,7 +161,7 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name): # 处理历史信息 history_feedin = [] - history_feedin.append(["What can I do?", system_prompt] ) + history_feedin.append([system_prompt, "Certainly!"]) for i in range(len(history)//2): history_feedin.append([history[2*i], history[2*i+1]] ) diff --git a/requirements.txt b/requirements.txt index 4e5e9e36..ea939339 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,5 +17,6 @@ openai numpy arxiv rich -websocket-client pypdf2==2.12.1 +websocket-client +scipdf_parser==0.3 diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 53969bf5..4913a59b 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -9,9 +9,9 @@ validate_path() # 返回项目根路径 from tests.test_utils import plugin_test if __name__ == "__main__": - plugin_test(plugin='crazy_functions.命令行助手->命令行助手', main_input='查看当前的docker容器列表') + # plugin_test(plugin='crazy_functions.命令行助手->命令行助手', main_input='查看当前的docker容器列表') - plugin_test(plugin='crazy_functions.解析项目源代码->解析一个Python项目', main_input="crazy_functions/test_project/python/dqn") + # plugin_test(plugin='crazy_functions.解析项目源代码->解析一个Python项目', main_input="crazy_functions/test_project/python/dqn") # plugin_test(plugin='crazy_functions.解析项目源代码->解析一个C项目', main_input="crazy_functions/test_project/cpp/cppipc") @@ -19,7 +19,7 @@ if __name__ == "__main__": # plugin_test(plugin='crazy_functions.批量Markdown翻译->Markdown中译英', main_input="README.md") - # plugin_test(plugin='crazy_functions.批量翻译PDF文档_多线程->批量翻译PDF文档', main_input="crazy_functions/test_project/pdf_and_word") + plugin_test(plugin='crazy_functions.批量翻译PDF文档_多线程->批量翻译PDF文档', main_input='crazy_functions/test_project/pdf_and_word/aaai.pdf') # plugin_test(plugin='crazy_functions.谷歌检索小助手->谷歌检索小助手', main_input="https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=auto+reinforcement+learning&btnG=") diff --git a/tests/test_utils.py b/tests/test_utils.py index 682a96f6..f3a45aa8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -22,10 +22,12 @@ def silence_stdout(func): def wrapper(*args, **kwargs): _original_stdout = sys.stdout sys.stdout = open(os.devnull, 'w') + sys.stdout.reconfigure(encoding='utf-8') for q in func(*args, **kwargs): sys.stdout = _original_stdout yield q sys.stdout = open(os.devnull, 'w') + sys.stdout.reconfigure(encoding='utf-8') sys.stdout.close() sys.stdout = _original_stdout return wrapper @@ -35,6 +37,7 @@ def silence_stdout_fn(func): def wrapper(*args, **kwargs): _original_stdout = sys.stdout sys.stdout = open(os.devnull, 'w') + sys.stdout.reconfigure(encoding='utf-8') result = func(*args, **kwargs) sys.stdout.close() sys.stdout = _original_stdout diff --git a/version b/version index 900f168d..303a44b0 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.48, + "version": 3.49, "show_feature": true, - "new_feature": "接入阿里通义千问、讯飞星火、上海AI-Lab书生 <-> 优化一键升级 <-> 提高arxiv翻译速度和成功率 <-> 支持自定义APIKEY格式 <-> 临时修复theme的文件丢失问题 <-> 新增实时语音对话插件(自动断句,脱手对话) <-> 支持加载自定义的ChatGLM2微调模型 <-> 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块 <-> 完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持" + "new_feature": "支持借助GROBID实现PDF高精度翻译 <-> 接入百度千帆平台和文心一言 <-> 接入阿里通义千问、讯飞星火、上海AI-Lab书生 <-> 优化一键升级 <-> 提高arxiv翻译速度和成功率 <-> 支持自定义APIKEY格式 <-> 临时修复theme的文件丢失问题 <-> 新增实时语音对话插件(自动断句,脱手对话) <-> 支持加载自定义的ChatGLM2微调模型 <-> 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块" }