up
This commit is contained in:
@@ -39,7 +39,7 @@ To translate this project to arbitary language with GPT, read and run [`multi_la
|
||||
|
||||
功能(⭐= 近期新增功能) | 描述
|
||||
--- | ---
|
||||
⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B)! | ⭐阿里达摩院[通义千问](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/)
|
||||
⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B)! | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, [通义千问](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/),[LLaMa2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)
|
||||
一键润色 | 支持一键润色、一键查找论文语法错误
|
||||
一键中英互译 | 一键中英互译
|
||||
一键代码解释 | 显示代码、解释代码、生成代码、给代码加注释
|
||||
@@ -178,7 +178,7 @@ docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic
|
||||
```
|
||||
P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用docker-compose获取Latex功能(修改docker-compose.yml,保留方案4并删除其他方案)。
|
||||
|
||||
2. ChatGPT + ChatGLM2 + MOSS(需要熟悉Docker)
|
||||
2. ChatGPT + ChatGLM2 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时)
|
||||
[](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml)
|
||||
|
||||
``` sh
|
||||
@@ -186,7 +186,7 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
3. ChatGPT + LLAMA + 盘古 + RWKV(需要熟悉Docker)
|
||||
3. ChatGPT + LLAMA + 盘古 + RWKV(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时)
|
||||
[](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml)
|
||||
|
||||
``` sh
|
||||
@@ -313,6 +313,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
|
||||
|
||||
### II:版本:
|
||||
- version 3.5(Todo): 使用自然语言调用本项目的所有函数插件(高优先级)
|
||||
- version 3.49: 支持百度千帆平台和文心一言
|
||||
- version 3.48: 支持阿里达摩院通义千问,上海AI-Lab书生,讯飞星火
|
||||
- version 3.46: 支持完全脱手操作的实时语音对话
|
||||
- version 3.45: 支持自定义ChatGLM2微调模型
|
||||
- version 3.44: 正式支持Azure,优化界面易用性
|
||||
|
||||
2
app.py
2
app.py
@@ -4,7 +4,7 @@ def main():
|
||||
import subprocess, sys
|
||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gradio-stable-fork'])
|
||||
import gradio as gr
|
||||
if gr.__version__ not in ['3.28.3','3.32.3']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖,详情信息见requirements.txt"
|
||||
if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖,详情信息见requirements.txt"
|
||||
from request_llm.bridge_all import predict
|
||||
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith
|
||||
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
||||
|
||||
75
config.py
75
config.py
@@ -11,7 +11,11 @@
|
||||
API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4"
|
||||
|
||||
|
||||
# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改
|
||||
# [step 1]>> API_KEY = "sk-123456789xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx123456789"。极少数情况下,还需要填写组织(格式如org-123456789abcdefghijklmno的),请向下翻,找 API_ORG 设置项
|
||||
API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4"
|
||||
|
||||
|
||||
# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改;如果使用本地或无地域限制的大模型时,此处也不需要修改
|
||||
USE_PROXY = False
|
||||
if USE_PROXY:
|
||||
"""
|
||||
@@ -69,7 +73,7 @@ MAX_RETRY = 2
|
||||
|
||||
# OpenAI模型选择是(gpt4现在只对申请成功的人开放)
|
||||
LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm"
|
||||
AVAIL_LLM_MODELS = ["newbing-free", "gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo"]
|
||||
AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo", "spark", "azure-gpt-3.5"]
|
||||
|
||||
# ChatGLM(2) Finetune Model Path (如果使用ChatGLM2微调模型,需要把"chatglmft"加入AVAIL_LLM_MODELS中)
|
||||
ChatGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b-pt-128-1e-2/checkpoint-100"
|
||||
@@ -147,3 +151,70 @@ ANTHROPIC_API_KEY = ""
|
||||
|
||||
# 自定义API KEY格式
|
||||
CUSTOM_API_KEY_PATTERN = ""
|
||||
|
||||
|
||||
# HUGGINGFACE的TOKEN,下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens
|
||||
HUGGINGFACE_ACCESS_TOKEN = "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"
|
||||
|
||||
|
||||
# GROBID服务器地址(填写多个可以均衡负载),用于高质量地读取PDF文档
|
||||
# 获取方法:复制以下空间https://huggingface.co/spaces/qingxu98/grobid,设为public,然后GROBID_URL = "https://(你的hf用户名如qingxu98)-(你的填写的空间名如grobid).hf.space"
|
||||
GROBID_URLS = [
|
||||
"https://qingxu98-grobid.hf.space","https://qingxu98-grobid2.hf.space","https://qingxu98-grobid3.hf.space",
|
||||
"https://shaocongma-grobid.hf.space","https://FBR123-grobid.hf.space",
|
||||
]
|
||||
|
||||
|
||||
|
||||
"""
|
||||
在线大模型配置关联关系示意图
|
||||
│
|
||||
├── "gpt-3.5-turbo" 等openai模型
|
||||
│ ├── API_KEY
|
||||
│ ├── CUSTOM_API_KEY_PATTERN(不常用)
|
||||
│ ├── API_ORG(不常用)
|
||||
│ └── API_URL_REDIRECT(不常用)
|
||||
│
|
||||
├── "azure-gpt-3.5" 等azure模型
|
||||
│ ├── API_KEY
|
||||
│ ├── AZURE_ENDPOINT
|
||||
│ ├── AZURE_API_KEY
|
||||
│ ├── AZURE_ENGINE
|
||||
│ └── API_URL_REDIRECT
|
||||
│
|
||||
├── "spark" 星火认知大模型
|
||||
│ ├── XFYUN_APPID
|
||||
│ ├── XFYUN_API_SECRET
|
||||
│ └── XFYUN_API_KEY
|
||||
│
|
||||
├── "claude-1-100k" 等claude模型
|
||||
│ └── ANTHROPIC_API_KEY
|
||||
│
|
||||
├── "stack-claude"
|
||||
│ ├── SLACK_CLAUDE_BOT_ID
|
||||
│ └── SLACK_CLAUDE_USER_TOKEN
|
||||
│
|
||||
├── "qianfan" 百度千帆大模型库
|
||||
│ ├── BAIDU_CLOUD_QIANFAN_MODEL
|
||||
│ ├── BAIDU_CLOUD_API_KEY
|
||||
│ └── BAIDU_CLOUD_SECRET_KEY
|
||||
│
|
||||
├── "newbing" Newbing接口不再稳定,不推荐使用
|
||||
├── NEWBING_STYLE
|
||||
└── NEWBING_COOKIES
|
||||
|
||||
|
||||
|
||||
插件在线服务配置依赖关系示意图
|
||||
│
|
||||
├── 语音功能
|
||||
│ ├── ENABLE_AUDIO
|
||||
│ ├── ALIYUN_TOKEN
|
||||
│ ├── ALIYUN_APPKEY
|
||||
│ ├── ALIYUN_ACCESSKEY
|
||||
│ └── ALIYUN_SECRET
|
||||
│
|
||||
├── PDF文档精准解析
|
||||
│ └── GROBID_URLS
|
||||
|
||||
"""
|
||||
|
||||
@@ -24,6 +24,7 @@ def get_crazy_functions():
|
||||
from crazy_functions.对话历史存档 import 对话历史存档
|
||||
from crazy_functions.对话历史存档 import 载入对话历史存档
|
||||
from crazy_functions.对话历史存档 import 删除所有本地对话历史记录
|
||||
from crazy_functions.辅助功能 import 清除缓存
|
||||
|
||||
from crazy_functions.批量Markdown翻译 import Markdown英译中
|
||||
function_plugins = {
|
||||
@@ -40,7 +41,12 @@ def get_crazy_functions():
|
||||
"AsButton":False,
|
||||
"Function": HotReload(删除所有本地对话历史记录)
|
||||
},
|
||||
"[测试功能] 解析Jupyter Notebook文件": {
|
||||
"清除所有缓存文件(请谨慎操作)": {
|
||||
"Color": "stop",
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(清除缓存)
|
||||
},
|
||||
"解析Jupyter Notebook文件": {
|
||||
"Color": "stop",
|
||||
"AsButton":False,
|
||||
"Function": HotReload(解析ipynb文件),
|
||||
@@ -328,7 +334,7 @@ def get_crazy_functions():
|
||||
try:
|
||||
from crazy_functions.Langchain知识库 import 知识库问答
|
||||
function_plugins.update({
|
||||
"[功能尚不稳定] 构建知识库(请先上传文件素材)": {
|
||||
"构建知识库(请先上传文件素材)": {
|
||||
"Color": "stop",
|
||||
"AsButton": False,
|
||||
"AdvancedArgs": True,
|
||||
@@ -342,7 +348,7 @@ def get_crazy_functions():
|
||||
try:
|
||||
from crazy_functions.Langchain知识库 import 读取知识库作答
|
||||
function_plugins.update({
|
||||
"[功能尚不稳定] 知识库问答": {
|
||||
"知识库问答": {
|
||||
"Color": "stop",
|
||||
"AsButton": False,
|
||||
"AdvancedArgs": True,
|
||||
@@ -353,6 +359,32 @@ def get_crazy_functions():
|
||||
except:
|
||||
print('Load function plugin failed')
|
||||
|
||||
try:
|
||||
from crazy_functions.交互功能函数模板 import 交互功能模板函数
|
||||
function_plugins.update({
|
||||
"交互功能模板函数": {
|
||||
"Color": "stop",
|
||||
"AsButton": False,
|
||||
"Function": HotReload(交互功能模板函数)
|
||||
}
|
||||
})
|
||||
except:
|
||||
print('Load function plugin failed')
|
||||
|
||||
# try:
|
||||
# from crazy_functions.chatglm微调工具 import 微调数据集生成
|
||||
# function_plugins.update({
|
||||
# "黑盒模型学习: 微调数据集生成 (先上传数据集)": {
|
||||
# "Color": "stop",
|
||||
# "AsButton": False,
|
||||
# "AdvancedArgs": True,
|
||||
# "ArgsReminder": "针对数据集输入(如 绿帽子*深蓝色衬衫*黑色运动裤)给出指令,例如您可以将以下命令复制到下方: --llm_to_learn=azure-gpt-3.5 --prompt_prefix='根据下面的服装类型提示,想象一个穿着者,对这个人外貌、身处的环境、内心世界、过去经历进行描写。要求:100字以内,用第二人称。' --system_prompt=''",
|
||||
# "Function": HotReload(微调数据集生成)
|
||||
# }
|
||||
# })
|
||||
# except:
|
||||
# print('Load function plugin failed')
|
||||
|
||||
try:
|
||||
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
|
||||
function_plugins.update({
|
||||
@@ -366,7 +398,7 @@ def get_crazy_functions():
|
||||
})
|
||||
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
|
||||
function_plugins.update({
|
||||
"Arixv翻译(输入arxivID)[需Latex]": {
|
||||
"Arixv论文精细翻译(输入arxivID)[需Latex]": {
|
||||
"Color": "stop",
|
||||
"AsButton": False,
|
||||
"AdvancedArgs": True,
|
||||
@@ -377,7 +409,7 @@ def get_crazy_functions():
|
||||
}
|
||||
})
|
||||
function_plugins.update({
|
||||
"本地论文翻译(上传Latex压缩包)[需Latex]": {
|
||||
"本地Latex论文精细翻译(上传Latex项目)[需Latex]": {
|
||||
"Color": "stop",
|
||||
"AsButton": False,
|
||||
"AdvancedArgs": True,
|
||||
|
||||
@@ -281,9 +281,12 @@ def rm_comments(main_file):
|
||||
def find_tex_file_ignore_case(fp):
|
||||
dir_name = os.path.dirname(fp)
|
||||
base_name = os.path.basename(fp)
|
||||
# 如果输入的文件路径是正确的
|
||||
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
|
||||
# 如果不正确,试着加上.tex后缀试试
|
||||
if not base_name.endswith('.tex'): base_name+='.tex'
|
||||
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
|
||||
# go case in-sensitive
|
||||
# 如果还找不到,解除大小写限制,再试一次
|
||||
import glob
|
||||
for f in glob.glob(dir_name+'/*.tex'):
|
||||
base_name_s = os.path.basename(fp)
|
||||
|
||||
25
crazy_functions/pdf_fns/parse_pdf.py
Normal file
25
crazy_functions/pdf_fns/parse_pdf.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import requests
|
||||
import random
|
||||
from functools import lru_cache
|
||||
class GROBID_OFFLINE_EXCEPTION(Exception): pass
|
||||
|
||||
def get_avail_grobid_url():
|
||||
from toolbox import get_conf
|
||||
GROBID_URLS, = get_conf('GROBID_URLS')
|
||||
if len(GROBID_URLS) == 0: return None
|
||||
try:
|
||||
_grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
|
||||
if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
|
||||
res = requests.get(_grobid_url+'/api/isalive')
|
||||
if res.text=='true': return _grobid_url
|
||||
else: return None
|
||||
except:
|
||||
return None
|
||||
|
||||
@lru_cache(maxsize=32)
|
||||
def parse_pdf(pdf_path, grobid_url):
|
||||
import scipdf # pip install scipdf_parser
|
||||
if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
|
||||
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
|
||||
return article_dict
|
||||
|
||||
@@ -1,15 +1,19 @@
|
||||
from toolbox import CatchException, report_execption, write_results_to_file
|
||||
from toolbox import update_ui, promote_file_to_downloadzone
|
||||
from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion
|
||||
from toolbox import write_history_to_file, get_log_folder
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from .crazy_utils import read_and_clean_pdf_text
|
||||
from .pdf_fns.parse_pdf import parse_pdf, get_avail_grobid_url
|
||||
from colorful import *
|
||||
import glob
|
||||
import os
|
||||
import math
|
||||
|
||||
@CatchException
|
||||
def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt, web_port):
|
||||
import glob
|
||||
import os
|
||||
def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
|
||||
disable_auto_promotion(chatbot)
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
@@ -30,20 +34,11 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
||||
# 清空历史,以免输入溢出
|
||||
history = []
|
||||
|
||||
from .crazy_utils import get_files_from_everything
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
|
||||
# 检测输入参数,如没有给定输入参数,直接退出
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
if txt == "":
|
||||
txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
# 搜索需要处理的文件清单
|
||||
file_manifest = [f for f in glob.glob(
|
||||
f'{project_folder}/**/*.pdf', recursive=True)]
|
||||
if not success:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
|
||||
# 如果没找到任何文件
|
||||
if len(file_manifest) == 0:
|
||||
@@ -53,22 +48,130 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
||||
return
|
||||
|
||||
# 开始正式执行任务
|
||||
yield from 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt)
|
||||
grobid_url = get_avail_grobid_url()
|
||||
if grobid_url is not None:
|
||||
yield from 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, grobid_url)
|
||||
else:
|
||||
yield from update_ui_lastest_msg("GROBID服务不可用,请检查config中的GROBID_URL。作为替代,现在将执行效果稍差的旧版代码。", chatbot, history, delay=3)
|
||||
yield from 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
|
||||
|
||||
def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt):
|
||||
import os
|
||||
def 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, grobid_url):
|
||||
import copy
|
||||
import tiktoken
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 1280
|
||||
generated_conclusion_files = []
|
||||
generated_html_files = []
|
||||
DST_LANG = "中文"
|
||||
for index, fp in enumerate(file_manifest):
|
||||
chatbot.append(["当前进度:", f"正在连接GROBID服务,请稍候: {grobid_url}\n如果等待时间过长,请修改config中的GROBID_URL,可修改成本地GROBID服务。"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
article_dict = parse_pdf(fp, grobid_url)
|
||||
print(article_dict)
|
||||
prompt = "以下是一篇学术论文的基本信息:\n"
|
||||
# title
|
||||
title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
|
||||
# authors
|
||||
authors = article_dict.get('authors', '无法获取 authors'); prompt += f'authors:{authors}\n\n'
|
||||
# abstract
|
||||
abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
|
||||
# command
|
||||
prompt += f"请将题目和摘要翻译为{DST_LANG}。"
|
||||
meta = [f'# Title:\n\n', title, f'# Abstract:\n\n', abstract ]
|
||||
|
||||
# 单线,获取文章meta信息
|
||||
paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=prompt,
|
||||
inputs_show_user=prompt,
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot, history=[],
|
||||
sys_prompt="You are an academic paper reader。",
|
||||
)
|
||||
|
||||
# 多线,翻译
|
||||
inputs_array = []
|
||||
inputs_show_user_array = []
|
||||
|
||||
# get_token_num
|
||||
from request_llm.bridge_all import model_info
|
||||
enc = model_info[llm_kwargs['llm_model']]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
|
||||
def break_down(txt):
|
||||
raw_token_num = get_token_num(txt)
|
||||
if raw_token_num <= TOKEN_LIMIT_PER_FRAGMENT:
|
||||
return [txt]
|
||||
else:
|
||||
# raw_token_num > TOKEN_LIMIT_PER_FRAGMENT
|
||||
# find a smooth token limit to achieve even seperation
|
||||
count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT))
|
||||
token_limit_smooth = raw_token_num // count + count
|
||||
return breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn=get_token_num, limit=token_limit_smooth)
|
||||
|
||||
for section in article_dict.get('sections'):
|
||||
if len(section['text']) == 0: continue
|
||||
section_frags = break_down(section['text'])
|
||||
for i, fragment in enumerate(section_frags):
|
||||
heading = section['heading']
|
||||
if len(section_frags) > 1: heading += f'Part-{i+1}'
|
||||
inputs_array.append(
|
||||
f"你需要翻译{heading}章节,内容如下: \n\n{fragment}"
|
||||
)
|
||||
inputs_show_user_array.append(
|
||||
f"# {heading}\n\n{fragment}"
|
||||
)
|
||||
|
||||
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array=inputs_array,
|
||||
inputs_show_user_array=inputs_show_user_array,
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history_array=[meta for _ in inputs_array],
|
||||
sys_prompt_array=[
|
||||
"请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in inputs_array],
|
||||
)
|
||||
res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + gpt_response_collection, file_basename=None, file_fullname=None)
|
||||
promote_file_to_downloadzone(res_path, rename_file=os.path.basename(fp)+'.md', chatbot=chatbot)
|
||||
generated_conclusion_files.append(res_path)
|
||||
|
||||
ch = construct_html()
|
||||
orig = ""
|
||||
trans = ""
|
||||
gpt_response_collection_html = copy.deepcopy(gpt_response_collection)
|
||||
for i,k in enumerate(gpt_response_collection_html):
|
||||
if i%2==0:
|
||||
gpt_response_collection_html[i] = inputs_show_user_array[i//2]
|
||||
else:
|
||||
gpt_response_collection_html[i] = gpt_response_collection_html[i]
|
||||
|
||||
final = ["", "", "一、论文概况", "", "Abstract", paper_meta_info, "二、论文翻译", ""]
|
||||
final.extend(gpt_response_collection_html)
|
||||
for i, k in enumerate(final):
|
||||
if i%2==0:
|
||||
orig = k
|
||||
if i%2==1:
|
||||
trans = k
|
||||
ch.add_row(a=orig, b=trans)
|
||||
create_report_file_name = f"{os.path.basename(fp)}.trans.html"
|
||||
html_file = ch.save_file(create_report_file_name)
|
||||
generated_html_files.append(html_file)
|
||||
promote_file_to_downloadzone(html_file, rename_file=os.path.basename(html_file), chatbot=chatbot)
|
||||
|
||||
chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files)))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
|
||||
def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
import copy
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 1280
|
||||
generated_conclusion_files = []
|
||||
generated_html_files = []
|
||||
for index, fp in enumerate(file_manifest):
|
||||
# 读取PDF文件
|
||||
file_content, page_one = read_and_clean_pdf_text(fp)
|
||||
file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
||||
page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
||||
|
||||
# 递归地切割PDF文件
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from request_llm.bridge_all import model_info
|
||||
@@ -140,8 +243,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
|
||||
trans = k
|
||||
ch.add_row(a=orig, b=trans)
|
||||
create_report_file_name = f"{os.path.basename(fp)}.trans.html"
|
||||
ch.save_file(create_report_file_name)
|
||||
generated_html_files.append(f'./gpt_log/{create_report_file_name}')
|
||||
generated_html_files.append(ch.save_file(create_report_file_name))
|
||||
except:
|
||||
from toolbox import trimmed_format_exc
|
||||
print('writing html result failed:', trimmed_format_exc())
|
||||
@@ -202,6 +304,6 @@ class construct_html():
|
||||
|
||||
|
||||
def save_file(self, file_name):
|
||||
with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
|
||||
with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
|
||||
f.write(self.html_string.encode('utf-8', 'ignore').decode())
|
||||
|
||||
return os.path.join(get_log_folder(), file_name)
|
||||
|
||||
43
crazy_functions/辅助功能.py
Normal file
43
crazy_functions/辅助功能.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# encoding: utf-8
|
||||
# @Time : 2023/4/19
|
||||
# @Author : Spike
|
||||
# @Descr :
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, report_execption, write_results_to_file, get_log_folder
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
|
||||
|
||||
@CatchException
|
||||
def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
if txt:
|
||||
show_say = txt
|
||||
prompt = txt+'\n回答完问题后,再列出用户可能提出的三个问题。'
|
||||
else:
|
||||
prompt = history[-1]+"\n分析上述回答,再列出用户可能提出的三个问题。"
|
||||
show_say = '分析上述回答,再列出用户可能提出的三个问题。'
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=prompt,
|
||||
inputs_show_user=show_say,
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history=history,
|
||||
sys_prompt=system_prompt
|
||||
)
|
||||
chatbot[-1] = (show_say, gpt_say)
|
||||
history.extend([show_say, gpt_say])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
|
||||
@CatchException
|
||||
def 清除缓存(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
chatbot.append(['清除本地缓存数据', '执行中. 删除 gpt_log & private_upload'])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
import shutil, os
|
||||
gpt_log_dir = os.path.join(os.path.dirname(__file__), '..', 'gpt_log')
|
||||
private_upload_dir = os.path.join(os.path.dirname(__file__), '..', 'private_upload')
|
||||
shutil.rmtree(gpt_log_dir, ignore_errors=True)
|
||||
shutil.rmtree(private_upload_dir, ignore_errors=True)
|
||||
|
||||
chatbot.append(['清除本地缓存数据', '执行完成'])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
@@ -16,6 +16,7 @@ services:
|
||||
AVAIL_LLM_MODELS: ' ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "newbing"] '
|
||||
WEB_PORT: ' 22303 '
|
||||
ADD_WAIFU: ' True '
|
||||
# THEME: ' Chuanhu-Small-and-Beautiful '
|
||||
# DEFAULT_WORKER_NUM: ' 10 '
|
||||
# AUTHENTICATION: ' [("username", "passwd"), ("username2", "passwd2")] '
|
||||
|
||||
@@ -28,7 +29,7 @@ services:
|
||||
|
||||
|
||||
### ===================================================
|
||||
### 【方案二】 如果需要运行ChatGLM本地模型
|
||||
### 【方案二】 如果需要运行ChatGLM + Qwen + MOSS等本地模型
|
||||
### ===================================================
|
||||
version: '3'
|
||||
services:
|
||||
@@ -36,11 +37,11 @@ services:
|
||||
image: ghcr.io/binary-husky/gpt_academic_chatglm_moss:master # (Auto Built by Dockerfile: docs/Dockerfile+ChatGLM)
|
||||
environment:
|
||||
# 请查阅 `config.py` 以查看所有的配置信息
|
||||
API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,fkxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx '
|
||||
API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx '
|
||||
USE_PROXY: ' True '
|
||||
proxies: ' { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } '
|
||||
LLM_MODEL: ' gpt-3.5-turbo '
|
||||
AVAIL_LLM_MODELS: ' ["chatglm", "moss", "gpt-3.5-turbo", "gpt-4", "newbing"] '
|
||||
AVAIL_LLM_MODELS: ' ["chatglm", "qwen", "moss", "gpt-3.5-turbo", "gpt-4", "newbing"] '
|
||||
LOCAL_MODEL_DEVICE: ' cuda '
|
||||
DEFAULT_WORKER_NUM: ' 10 '
|
||||
WEB_PORT: ' 12303 '
|
||||
@@ -57,6 +58,10 @@ services:
|
||||
command: >
|
||||
bash -c "python3 -u main.py"
|
||||
|
||||
# P.S. 通过对 command 进行微调,可以便捷地安装额外的依赖
|
||||
# command: >
|
||||
# bash -c "pip install -r request_llm/requirements_qwen.txt && python3 -u main.py"
|
||||
|
||||
### ===================================================
|
||||
### 【方案三】 如果需要运行ChatGPT + LLAMA + 盘古 + RWKV本地模型
|
||||
### ===================================================
|
||||
|
||||
@@ -18,6 +18,7 @@ WORKDIR /gpt/gpt_academic
|
||||
RUN git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss
|
||||
RUN python3 -m pip install -r requirements.txt
|
||||
RUN python3 -m pip install -r request_llm/requirements_moss.txt
|
||||
RUN python3 -m pip install -r request_llm/requirements_qwen.txt
|
||||
RUN python3 -m pip install -r request_llm/requirements_chatglm.txt
|
||||
RUN python3 -m pip install -r request_llm/requirements_newbing.txt
|
||||
|
||||
|
||||
@@ -19,6 +19,12 @@ from .bridge_chatgpt import predict as chatgpt_ui
|
||||
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
|
||||
from .bridge_chatglm import predict as chatglm_ui
|
||||
|
||||
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
|
||||
from .bridge_chatglm import predict as chatglm_ui
|
||||
|
||||
from .bridge_qianfan import predict_no_ui_long_connection as qianfan_noui
|
||||
from .bridge_qianfan import predict as qianfan_ui
|
||||
|
||||
colors = ['#FF00FF', '#00FFFF', '#FF0000', '#990099', '#009999', '#990044']
|
||||
|
||||
class LazyloadTiktoken(object):
|
||||
@@ -165,7 +171,14 @@ model_info = {
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
},
|
||||
|
||||
"qianfan": {
|
||||
"fn_with_ui": qianfan_ui,
|
||||
"fn_without_ui": qianfan_noui,
|
||||
"endpoint": None,
|
||||
"max_token": 2000,
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
},
|
||||
}
|
||||
|
||||
# -=-=-=-=-=-=- 以下部分是新加入的模型,可能附带额外依赖 -=-=-=-=-=-=-
|
||||
@@ -361,7 +374,7 @@ if "chatgpt_website" in AVAIL_LLM_MODELS: # 接入一些逆向工程https://gi
|
||||
"chatgpt_website": {
|
||||
"fn_with_ui": chatgpt_website_ui,
|
||||
"fn_without_ui": chatgpt_website_noui,
|
||||
"endpoint": None,
|
||||
"endpoint": openai_endpoint,
|
||||
"max_token": 4096,
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
@@ -385,6 +398,22 @@ if "spark" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
if "llama2" in AVAIL_LLM_MODELS: # llama2
|
||||
try:
|
||||
from .bridge_llama2 import predict_no_ui_long_connection as llama2_noui
|
||||
from .bridge_llama2 import predict as llama2_ui
|
||||
model_info.update({
|
||||
"llama2": {
|
||||
"fn_with_ui": llama2_ui,
|
||||
"fn_without_ui": llama2_noui,
|
||||
"endpoint": None,
|
||||
"max_token": 4096,
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -177,14 +177,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
|
||||
return
|
||||
|
||||
# print(chunk.decode()[6:])
|
||||
if is_head_of_the_stream and (r'"object":"error"' not in chunk.decode()):
|
||||
chunk_decoded = chunk.decode()
|
||||
if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r"choices" not in chunk_decoded):
|
||||
# 数据流的第一帧不携带content
|
||||
is_head_of_the_stream = False; continue
|
||||
|
||||
if chunk:
|
||||
try:
|
||||
chunk_decoded = chunk.decode()
|
||||
# 前者是API2D的结束条件,后者是OPENAI的结束条件
|
||||
if ('data: [DONE]' in chunk_decoded) or (len(json.loads(chunk_decoded[6:])['choices'][0]["delta"]) == 0):
|
||||
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
||||
@@ -192,7 +191,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
break
|
||||
# 处理数据流的主体
|
||||
chunkjson = json.loads(chunk_decoded[6:])
|
||||
status_text = f"finish_reason: {chunkjson['choices'][0]['finish_reason']}"
|
||||
status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
|
||||
# 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
|
||||
gpt_replying_buffer = gpt_replying_buffer + json.loads(chunk_decoded[6:])['choices'][0]["delta"]["content"]
|
||||
history[-1] = gpt_replying_buffer
|
||||
@@ -216,7 +215,6 @@ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
||||
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
||||
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
||||
# history = [] # 清除历史
|
||||
elif "does not exist" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
|
||||
elif "Incorrect API key" in error_msg:
|
||||
|
||||
@@ -118,16 +118,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
|
||||
additional_fn代表点击的哪个按钮,按钮见functional.py
|
||||
"""
|
||||
if is_any_api_key(inputs):
|
||||
chatbot._cookies['api_key'] = inputs
|
||||
chatbot.append(("输入已识别为openai的api_key", what_keys(inputs)))
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="api_key已导入") # 刷新界面
|
||||
return
|
||||
elif not is_any_api_key(chatbot._cookies['api_key']):
|
||||
chatbot.append((inputs, "缺少api_key。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。"))
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="缺少api_key") # 刷新界面
|
||||
return
|
||||
|
||||
if additional_fn is not None:
|
||||
from core_functional import handle_core_functionality
|
||||
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
||||
@@ -245,14 +235,9 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
|
||||
if not is_any_api_key(llm_kwargs['api_key']):
|
||||
raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。")
|
||||
|
||||
api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}"
|
||||
}
|
||||
if API_ORG.startswith('org-'): headers.update({"OpenAI-Organization": API_ORG})
|
||||
if llm_kwargs['llm_model'].startswith('azure-'): headers.update({"api-key": api_key})
|
||||
|
||||
conversation_cnt = len(history) // 2
|
||||
|
||||
|
||||
91
request_llm/bridge_llama2.py
Normal file
91
request_llm/bridge_llama2.py
Normal file
@@ -0,0 +1,91 @@
|
||||
model_name = "LLaMA"
|
||||
cmd_to_install = "`pip install -r request_llm/requirements_chatglm.txt`"
|
||||
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
||||
from toolbox import update_ui, get_conf, ProxyNetworkActivate
|
||||
from multiprocessing import Process, Pipe
|
||||
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
|
||||
from threading import Thread
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
# 🔌💻 Local Model
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
@SingletonLocalLLM
|
||||
class GetONNXGLMHandle(LocalLLMHandle):
|
||||
|
||||
def load_model_info(self):
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
self.model_name = model_name
|
||||
self.cmd_to_install = cmd_to_install
|
||||
|
||||
def load_model_and_tokenizer(self):
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
import os, glob
|
||||
import os
|
||||
import platform
|
||||
huggingface_token, device = get_conf('HUGGINGFACE_ACCESS_TOKEN', 'LOCAL_MODEL_DEVICE')
|
||||
assert len(huggingface_token) != 0, "没有填写 HUGGINGFACE_ACCESS_TOKEN"
|
||||
with open(os.path.expanduser('~/.cache/huggingface/token'), 'w') as f:
|
||||
f.write(huggingface_token)
|
||||
model_id = 'meta-llama/Llama-2-7b-chat-hf'
|
||||
with ProxyNetworkActivate():
|
||||
self._tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=huggingface_token)
|
||||
# use fp16
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=huggingface_token).eval()
|
||||
if device.startswith('cuda'): model = model.half().to(device)
|
||||
self._model = model
|
||||
|
||||
return self._model, self._tokenizer
|
||||
|
||||
def llm_stream_generator(self, **kwargs):
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
def adaptor(kwargs):
|
||||
query = kwargs['query']
|
||||
max_length = kwargs['max_length']
|
||||
top_p = kwargs['top_p']
|
||||
temperature = kwargs['temperature']
|
||||
history = kwargs['history']
|
||||
console_slience = kwargs.get('console_slience', True)
|
||||
return query, max_length, top_p, temperature, history, console_slience
|
||||
|
||||
def convert_messages_to_prompt(query, history):
|
||||
prompt = ""
|
||||
for a, b in history:
|
||||
prompt += f"\n[INST]{a}[/INST]"
|
||||
prompt += "\n{b}" + b
|
||||
prompt += f"\n[INST]{query}[/INST]"
|
||||
return prompt
|
||||
|
||||
query, max_length, top_p, temperature, history, console_slience = adaptor(kwargs)
|
||||
prompt = convert_messages_to_prompt(query, history)
|
||||
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-
|
||||
# code from transformers.llama
|
||||
streamer = TextIteratorStreamer(self._tokenizer)
|
||||
# Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
|
||||
inputs = self._tokenizer([prompt], return_tensors="pt")
|
||||
prompt_tk_back = self._tokenizer.batch_decode(inputs['input_ids'])[0]
|
||||
|
||||
generation_kwargs = dict(inputs.to(self._model.device), streamer=streamer, max_new_tokens=max_length)
|
||||
thread = Thread(target=self._model.generate, kwargs=generation_kwargs)
|
||||
thread.start()
|
||||
generated_text = ""
|
||||
for new_text in streamer:
|
||||
generated_text += new_text
|
||||
if not console_slience: print(new_text, end='')
|
||||
yield generated_text.lstrip(prompt_tk_back).rstrip("</s>")
|
||||
if not console_slience: print()
|
||||
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-
|
||||
|
||||
def try_to_import_special_deps(self, **kwargs):
|
||||
# import something that will raise error if the user does not install requirement_*.txt
|
||||
# 🏃♂️🏃♂️🏃♂️ 主进程执行
|
||||
import importlib
|
||||
importlib.import_module('transformers')
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
# 🔌💻 GPT-Academic Interface
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name)
|
||||
164
request_llm/bridge_qianfan.py
Normal file
164
request_llm/bridge_qianfan.py
Normal file
@@ -0,0 +1,164 @@
|
||||
|
||||
import time, requests, json
|
||||
from multiprocessing import Process, Pipe
|
||||
from functools import wraps
|
||||
from datetime import datetime, timedelta
|
||||
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, get_conf
|
||||
|
||||
model_name = '千帆大模型平台'
|
||||
timeout_bot_msg = '[Local Message] Request timeout. Network error.'
|
||||
|
||||
def cache_decorator(timeout):
|
||||
cache = {}
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
key = (func.__name__, args, frozenset(kwargs.items()))
|
||||
# Check if result is already cached and not expired
|
||||
if key in cache:
|
||||
result, timestamp = cache[key]
|
||||
if datetime.now() - timestamp < timedelta(seconds=timeout):
|
||||
return result
|
||||
|
||||
# Call the function and cache the result
|
||||
result = func(*args, **kwargs)
|
||||
cache[key] = (result, datetime.now())
|
||||
return result
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
@cache_decorator(timeout=3600)
|
||||
def get_access_token():
|
||||
"""
|
||||
使用 AK,SK 生成鉴权签名(Access Token)
|
||||
:return: access_token,或是None(如果错误)
|
||||
"""
|
||||
# if (access_token_cache is None) or (time.time() - last_access_token_obtain_time > 3600):
|
||||
BAIDU_CLOUD_API_KEY, BAIDU_CLOUD_SECRET_KEY = get_conf('BAIDU_CLOUD_API_KEY', 'BAIDU_CLOUD_SECRET_KEY')
|
||||
|
||||
if len(BAIDU_CLOUD_SECRET_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_SECRET_KEY")
|
||||
if len(BAIDU_CLOUD_API_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_API_KEY")
|
||||
|
||||
url = "https://aip.baidubce.com/oauth/2.0/token"
|
||||
params = {"grant_type": "client_credentials", "client_id": BAIDU_CLOUD_API_KEY, "client_secret": BAIDU_CLOUD_SECRET_KEY}
|
||||
access_token_cache = str(requests.post(url, params=params).json().get("access_token"))
|
||||
return access_token_cache
|
||||
# else:
|
||||
# return access_token_cache
|
||||
|
||||
|
||||
def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
|
||||
conversation_cnt = len(history) // 2
|
||||
messages = [{"role": "user", "content": system_prompt}]
|
||||
messages.append({"role": "assistant", "content": 'Certainly!'})
|
||||
if conversation_cnt:
|
||||
for index in range(0, 2*conversation_cnt, 2):
|
||||
what_i_have_asked = {}
|
||||
what_i_have_asked["role"] = "user"
|
||||
what_i_have_asked["content"] = history[index]
|
||||
what_gpt_answer = {}
|
||||
what_gpt_answer["role"] = "assistant"
|
||||
what_gpt_answer["content"] = history[index+1]
|
||||
if what_i_have_asked["content"] != "":
|
||||
if what_gpt_answer["content"] == "": continue
|
||||
if what_gpt_answer["content"] == timeout_bot_msg: continue
|
||||
messages.append(what_i_have_asked)
|
||||
messages.append(what_gpt_answer)
|
||||
else:
|
||||
messages[-1]['content'] = what_gpt_answer['content']
|
||||
what_i_ask_now = {}
|
||||
what_i_ask_now["role"] = "user"
|
||||
what_i_ask_now["content"] = inputs
|
||||
messages.append(what_i_ask_now)
|
||||
return messages
|
||||
|
||||
|
||||
def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
|
||||
BAIDU_CLOUD_QIANFAN_MODEL, = get_conf('BAIDU_CLOUD_QIANFAN_MODEL')
|
||||
|
||||
url_lib = {
|
||||
"ERNIE-Bot": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions" ,
|
||||
"ERNIE-Bot-turbo": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant" ,
|
||||
"BLOOMZ-7B": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/bloomz_7b1",
|
||||
|
||||
"Llama-2-70B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_70b",
|
||||
"Llama-2-13B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_13b",
|
||||
"Llama-2-7B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_7b",
|
||||
}
|
||||
|
||||
url = url_lib[BAIDU_CLOUD_QIANFAN_MODEL]
|
||||
|
||||
url += "?access_token=" + get_access_token()
|
||||
|
||||
|
||||
payload = json.dumps({
|
||||
"messages": generate_message_payload(inputs, llm_kwargs, history, system_prompt),
|
||||
"stream": True
|
||||
})
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
response = requests.request("POST", url, headers=headers, data=payload, stream=True)
|
||||
buffer = ""
|
||||
for line in response.iter_lines():
|
||||
if len(line) == 0: continue
|
||||
try:
|
||||
dec = line.decode().lstrip('data:')
|
||||
dec = json.loads(dec)
|
||||
incoming = dec['result']
|
||||
buffer += incoming
|
||||
yield buffer
|
||||
except:
|
||||
if ('error_code' in dec) and ("max length" in dec['error_msg']):
|
||||
raise ConnectionAbortedError(dec['error_msg']) # 上下文太长导致 token 溢出
|
||||
elif ('error_code' in dec):
|
||||
raise RuntimeError(dec['error_msg'])
|
||||
|
||||
|
||||
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
|
||||
"""
|
||||
⭐多线程方法
|
||||
函数的说明请见 request_llm/bridge_all.py
|
||||
"""
|
||||
watch_dog_patience = 5
|
||||
response = ""
|
||||
|
||||
for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, sys_prompt):
|
||||
if len(observe_window) >= 1:
|
||||
observe_window[0] = response
|
||||
if len(observe_window) >= 2:
|
||||
if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
|
||||
return response
|
||||
|
||||
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
|
||||
"""
|
||||
⭐单线程方法
|
||||
函数的说明请见 request_llm/bridge_all.py
|
||||
"""
|
||||
chatbot.append((inputs, ""))
|
||||
|
||||
if additional_fn is not None:
|
||||
from core_functional import handle_core_functionality
|
||||
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
||||
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
# 开始接收回复
|
||||
try:
|
||||
for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
|
||||
chatbot[-1] = (inputs, response)
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
except ConnectionAbortedError as e:
|
||||
from .bridge_all import model_info
|
||||
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
|
||||
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
||||
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="异常") # 刷新界面
|
||||
return
|
||||
|
||||
# 总结输出
|
||||
response = f"[Local Message]: {model_name}响应异常 ..."
|
||||
if response == f"[Local Message]: 等待{model_name}响应中 ...":
|
||||
response = f"[Local Message]: {model_name}响应异常 ..."
|
||||
history.extend([inputs, response])
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
@@ -128,7 +128,7 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name):
|
||||
|
||||
# chatglm 没有 sys_prompt 接口,因此把prompt加入 history
|
||||
history_feedin = []
|
||||
history_feedin.append(["What can I do?", sys_prompt])
|
||||
history_feedin.append([sys_prompt, "Certainly!"])
|
||||
for i in range(len(history)//2):
|
||||
history_feedin.append([history[2*i], history[2*i+1]] )
|
||||
|
||||
@@ -161,7 +161,7 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name):
|
||||
|
||||
# 处理历史信息
|
||||
history_feedin = []
|
||||
history_feedin.append(["What can I do?", system_prompt] )
|
||||
history_feedin.append([system_prompt, "Certainly!"])
|
||||
for i in range(len(history)//2):
|
||||
history_feedin.append([history[2*i], history[2*i+1]] )
|
||||
|
||||
|
||||
@@ -17,5 +17,6 @@ openai
|
||||
numpy
|
||||
arxiv
|
||||
rich
|
||||
websocket-client
|
||||
pypdf2==2.12.1
|
||||
websocket-client
|
||||
scipdf_parser==0.3
|
||||
|
||||
@@ -9,9 +9,9 @@ validate_path() # 返回项目根路径
|
||||
from tests.test_utils import plugin_test
|
||||
|
||||
if __name__ == "__main__":
|
||||
plugin_test(plugin='crazy_functions.命令行助手->命令行助手', main_input='查看当前的docker容器列表')
|
||||
# plugin_test(plugin='crazy_functions.命令行助手->命令行助手', main_input='查看当前的docker容器列表')
|
||||
|
||||
plugin_test(plugin='crazy_functions.解析项目源代码->解析一个Python项目', main_input="crazy_functions/test_project/python/dqn")
|
||||
# plugin_test(plugin='crazy_functions.解析项目源代码->解析一个Python项目', main_input="crazy_functions/test_project/python/dqn")
|
||||
|
||||
# plugin_test(plugin='crazy_functions.解析项目源代码->解析一个C项目', main_input="crazy_functions/test_project/cpp/cppipc")
|
||||
|
||||
@@ -19,7 +19,7 @@ if __name__ == "__main__":
|
||||
|
||||
# plugin_test(plugin='crazy_functions.批量Markdown翻译->Markdown中译英', main_input="README.md")
|
||||
|
||||
# plugin_test(plugin='crazy_functions.批量翻译PDF文档_多线程->批量翻译PDF文档', main_input="crazy_functions/test_project/pdf_and_word")
|
||||
plugin_test(plugin='crazy_functions.批量翻译PDF文档_多线程->批量翻译PDF文档', main_input='crazy_functions/test_project/pdf_and_word/aaai.pdf')
|
||||
|
||||
# plugin_test(plugin='crazy_functions.谷歌检索小助手->谷歌检索小助手', main_input="https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=auto+reinforcement+learning&btnG=")
|
||||
|
||||
|
||||
@@ -22,10 +22,12 @@ def silence_stdout(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
_original_stdout = sys.stdout
|
||||
sys.stdout = open(os.devnull, 'w')
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
for q in func(*args, **kwargs):
|
||||
sys.stdout = _original_stdout
|
||||
yield q
|
||||
sys.stdout = open(os.devnull, 'w')
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
sys.stdout.close()
|
||||
sys.stdout = _original_stdout
|
||||
return wrapper
|
||||
@@ -35,6 +37,7 @@ def silence_stdout_fn(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
_original_stdout = sys.stdout
|
||||
sys.stdout = open(os.devnull, 'w')
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
result = func(*args, **kwargs)
|
||||
sys.stdout.close()
|
||||
sys.stdout = _original_stdout
|
||||
|
||||
4
version
4
version
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"version": 3.48,
|
||||
"version": 3.49,
|
||||
"show_feature": true,
|
||||
"new_feature": "接入阿里通义千问、讯飞星火、上海AI-Lab书生 <-> 优化一键升级 <-> 提高arxiv翻译速度和成功率 <-> 支持自定义APIKEY格式 <-> 临时修复theme的文件丢失问题 <-> 新增实时语音对话插件(自动断句,脱手对话) <-> 支持加载自定义的ChatGLM2微调模型 <-> 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块 <-> 完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持"
|
||||
"new_feature": "支持借助GROBID实现PDF高精度翻译 <-> 接入百度千帆平台和文心一言 <-> 接入阿里通义千问、讯飞星火、上海AI-Lab书生 <-> 优化一键升级 <-> 提高arxiv翻译速度和成功率 <-> 支持自定义APIKEY格式 <-> 临时修复theme的文件丢失问题 <-> 新增实时语音对话插件(自动断句,脱手对话) <-> 支持加载自定义的ChatGLM2微调模型 <-> 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user