Compare commits

..

40 Commits

Author SHA1 Message Date
qingxu fu
8ddc1adae4 version 2.5 2023-04-08 22:27:02 +08:00
qingxu fu
4e3f759d0c 移动参数位置 2023-04-08 22:16:33 +08:00
qingxu fu
94ff62bdaa 错别字 2023-04-08 22:15:33 +08:00
qingxu fu
2cbb5dbdaa up 2023-04-08 22:14:05 +08:00
Your Name
3b85a29f91 加入自动更新协议 2023-04-08 02:48:35 +08:00
Your Name
166daa1ea7 显示版本 2023-04-08 02:39:54 +08:00
Your Name
5c3ecd7477 自动更新程序 2023-04-08 02:38:02 +08:00
Your Name
d5b03377ff 多种接口 2023-04-08 00:51:58 +08:00
Your Name
7cd11f2bbd 新插件移动到插件菜单中 2023-04-08 00:42:54 +08:00
Your Name
f65cc8deea Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-04-08 00:41:46 +08:00
Your Name
48ee620524 代码高亮开关 2023-04-08 00:41:39 +08:00
binary-husky
8a5be8fb8d Merge pull request #366 from Hanzoe/master
new function: 实现单篇PDF论文翻译理解
2023-04-08 00:41:03 +08:00
binary-husky
f26b8e28e1 Update README.md 2023-04-08 00:32:22 +08:00
Your Name
b005b84ad6 更新requirements.txt实现代码高亮必要 2023-04-08 00:23:26 +08:00
Your Name
1edf7ef80d Fix dockerfile 2023-04-08 00:01:11 +08:00
Your Name
3fed08f65e version 2.45 2023-04-07 23:58:10 +08:00
Your Name
fa8603d745 Merge branch 'master' into dev 2023-04-07 23:55:19 +08:00
Your Name
6b5c2538cf 新增谷歌学术统合小助手 2023-04-07 23:54:24 +08:00
Your Name
7f1c7ebd68 version 2.43 2023-04-07 22:08:05 +08:00
Your Name
ff87aebc29 处理多线程中出现的网络问题 2023-04-07 22:06:08 +08:00
Hanzoe
2c746056ff Update crazy_functional.py 2023-04-07 21:35:36 +08:00
Hanzoe
0e4cac29f8 Add files via upload 2023-04-07 21:34:55 +08:00
Hanzoe
8513d46398 Merge pull request #1 from binary-husky/master
单篇论文翻译理解
2023-04-07 21:34:11 +08:00
Your Name
b2495a6f7e Merge branch 'dev' of github.com:binary-husky/chatgpt_academic into dev 2023-04-07 21:09:43 +08:00
Your Name
5603d33d67 highlight 2023-04-07 21:09:37 +08:00
Your Name
d06d4f3a6f highlight 2023-04-07 21:08:34 +08:00
Your Name
b2adc77a73 Merge branch 'dev' of github.com:binary-husky/chatgpt_academic into dev 2023-04-07 21:00:32 +08:00
Your Name
1f6e2547b2 Merge branch 'master' into dev 2023-04-07 20:59:35 +08:00
qingxu fu
fd0e3fb5c4 代码、公式高亮 2023-04-07 20:30:30 +08:00
qingxu fu
a0b7ae6674 Merge branch 'master' of https://github.com/binary-husky/chatgpt_academic into master 2023-04-07 19:26:20 +08:00
qingxu fu
8ca232cda3 修复小BUG 2023-04-07 19:26:17 +08:00
binary-husky
34e983c7a5 Update README.md 2023-04-07 19:09:18 +08:00
binary-husky
c0d096726c Update README.md 2023-04-07 19:08:41 +08:00
qingxu fu
969e8c1d89 正确显示列表序号 2023-04-07 18:33:46 +08:00
binary-husky
d4e3082db4 Update toolbox.py 2023-04-07 18:27:52 +08:00
binary-husky
777e56882b Update README.md 2023-04-07 18:21:13 +08:00
Your Name
4da7d75ad4 修复公式显示错误 2023-04-07 18:14:27 +08:00
qingxu fu
1538acaa5a fix equation 2023-04-07 17:55:24 +08:00
qingxu fu
b47f69978e 更新requirements.txt 2023-04-07 12:45:47 +08:00
binary-husky
823c136de4 Update README.md 2023-04-06 19:24:37 +08:00
15 changed files with 612 additions and 53 deletions

3
.gitignore vendored
View File

@@ -140,4 +140,5 @@ gpt_log
private.md
private_upload
other_llms
cradle.py
cradle*
debug*

View File

@@ -4,10 +4,10 @@ RUN echo '[global]' > /etc/pip.conf && \
echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
RUN pip3 install gradio requests[socks] mdtex2html
COPY . /gpt
WORKDIR /gpt
RUN pip3 install -r requirements.txt
CMD ["python3", "main.py"]

View File

@@ -33,6 +33,7 @@ If you like this project, please give it a Star. If you've come up with more use
chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
[arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF
[PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [函数插件] PDF论文提取题目&摘要+翻译全文(多线程)
[谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) (Version>=2.45) | [函数插件] 给定任意谷歌学术搜索页面URL让gpt帮你选择有趣的文章
公式显示 | 可以同时显示公式的tex形式和渲染形式
图片显示 | 可以在markdown中显示图片
多线程函数插件支持 | 支持多线调用chatgpt一键处理海量文本或程序
@@ -69,10 +70,11 @@ huggingface免科学上网[在线体验](https://huggingface.co/spaces/qingxu98/
- 如果输出包含公式会同时以tex形式和渲染形式显示方便复制和阅读
<div align="center">
<img src="img/demo.jpg" width="500" >
<img src="https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png" width="700" >
</div>
- 懒得看项目代码整个工程直接给chatgpt炫嘴里
<div align="center">
<img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="700" >
@@ -260,11 +262,12 @@ python check_proxy.py
- version 3 (Todo):
- - 支持gpt4和其他更多llm
- version 2.3+ (Todo):
- version 2.4+ (Todo):
- - 总结大工程源代码时文本过长、token溢出的问题
- - 实现项目打包部署
- - 函数插件参数接口优化
- - 自更新
- version 2.4: (1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。
- version 2.3: 增强多线程交互性
- version 2.2: 函数插件支持热重载
- version 2.1: 可折叠式布局

View File

@@ -20,31 +20,110 @@ def check_proxy(proxies):
return result
def auto_update():
def backup_and_download(current_version, remote_version):
"""
一键更新协议:备份和下载
"""
from toolbox import get_conf
import shutil
import os
import requests
import time
import json
import zipfile
os.makedirs(f'./history', exist_ok=True)
backup_dir = f'./history/backup-{current_version}/'
new_version_dir = f'./history/new-version-{remote_version}/'
if os.path.exists(new_version_dir):
return new_version_dir
os.makedirs(new_version_dir)
shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
proxies, = get_conf('proxies')
response = requests.get("https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version",
proxies=proxies, timeout=1)
remote_json_data = json.loads(response.text)
remote_version = remote_json_data['version']
if remote_json_data["show_feature"]:
new_feature = "新功能:" + remote_json_data["new_feature"]
else:
new_feature = ""
with open('./version', 'r', encoding='utf8') as f:
current_version = f.read()
current_version = json.loads(current_version)['version']
if (remote_version - current_version) >= 0.05:
print(
f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}{new_feature}')
print('Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
time.sleep(3)
return
else:
return
r = requests.get(
'https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
zip_file_path = backup_dir+'/master.zip'
with open(zip_file_path, 'wb+') as f:
f.write(r.content)
dst_path = new_version_dir
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
for zip_info in zip_ref.infolist():
dst_file_path = os.path.join(dst_path, zip_info.filename)
if os.path.exists(dst_file_path):
os.remove(dst_file_path)
zip_ref.extract(zip_info, dst_path)
return new_version_dir
def patch_and_restart(path):
"""
一键更新协议:覆盖和重启
"""
import distutils
import shutil
import os
import sys
import time
# if not using config_private, move origin config.py as config_private.py
if not os.path.exists('config_private.py'):
print('由于您没有设置config_private.py私密配置现将您的现有配置移动至config_private.py以防止配置丢失',
'另外您可以随时在history子文件夹下找回旧版的程序。')
shutil.copyfile('config.py', 'config_private.py')
distutils.dir_util.copy_tree(path+'/chatgpt_academic-master', './')
print('更新完成您可以随时在history子文件夹下找回旧版的程序5s之后重启')
for i in reversed(range(5)):
time.sleep(1)
print(i)
print(' ------------------------------ -----------------------------------')
os.execl(sys.executable, 'python', 'main.py')
def get_current_version():
import json
try:
with open('./version', 'r', encoding='utf8') as f:
current_version = json.loads(f.read())['version']
except:
current_version = ""
return current_version
def auto_update():
"""
一键更新协议:查询版本和用户意见
"""
try:
from toolbox import get_conf
import requests
import time
import json
proxies, = get_conf('proxies')
response = requests.get(
"https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=1)
remote_json_data = json.loads(response.text)
remote_version = remote_json_data['version']
if remote_json_data["show_feature"]:
new_feature = "新功能:" + remote_json_data["new_feature"]
else:
new_feature = ""
with open('./version', 'r', encoding='utf8') as f:
current_version = f.read()
current_version = json.loads(current_version)['version']
if (remote_version - current_version) >= 0.05:
print(
f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}{new_feature}')
print('1Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
user_instruction = input('2是否一键更新代码Y/y+回车=确认,输入其他/无输入+回车=不更新)?')
if user_instruction in ['Y', 'y']:
path = backup_and_download(current_version, remote_version)
try:
patch_and_restart(path)
except:
print('更新失败。')
else:
print('自动更新程序:已禁用')
return
else:
return
except:
print('自动更新程序:已禁用')
if __name__ == '__main__':

View File

@@ -24,6 +24,9 @@ else:
# 对话窗的高度
CHATBOT_HEIGHT = 1115
# 代码高亮
CODE_HIGHLIGHT = True
# 窗口布局
LAYOUT = "LEFT-RIGHT" # "LEFT-RIGHT"(左右布局) # "TOP-DOWN"(上下布局)

View File

@@ -65,6 +65,7 @@ def get_crazy_functions():
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
"Function": HotReload(高阶功能模板函数)
},
}
###################### 第二组插件 ###########################
# [第二组插件]: 经过充分测试,但功能上距离达到完美状态还差一点点
@@ -72,6 +73,9 @@ def get_crazy_functions():
from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
from crazy_functions.总结word文档 import 总结word文档
from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
function_plugins.update({
"批量翻译PDF文档多线程": {
@@ -90,10 +94,26 @@ def get_crazy_functions():
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(批量总结PDF文档pdfminer)
},
"谷歌学术检索助手输入谷歌学术搜索页url": {
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(谷歌检索小助手)
},
"批量总结Word文档": {
"Color": "stop",
"Function": HotReload(总结word文档)
},
"理解PDF文档内容Tk文件选择接口仅本地": {
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(理解PDF文档内容)
},
"理解PDF文档内容通用接口读取文件输入区": {
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(理解PDF文档内容标准文件输入)
},
})
###################### 第三组插件 ###########################

View File

@@ -1,4 +1,4 @@
import traceback
def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2):
import time
@@ -43,10 +43,16 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
mutable = [["", time.time()] for _ in range(n_frag)]
def _req_gpt(index, inputs, history, sys_prompt):
gpt_say = predict_no_ui_long_connection(
inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[
index]
)
try:
gpt_say = predict_no_ui_long_connection(
inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[index]
)
except:
# 收拾残局
tb_str = '```\n' + traceback.format_exc() + '```'
gpt_say = f"[Local Message] 线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if len(mutable[index][0]) > 0:
gpt_say += "此线程失败前收到的回答:" + mutable[index][0]
return gpt_say
# 异步任务开始
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(

View File

@@ -0,0 +1,185 @@
from request_llm.bridge_chatgpt import predict_no_ui
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
import re
import unicodedata
fast_debug = False
def is_paragraph_break(match):
"""
根据给定的匹配结果来判断换行符是否表示段落分隔。
如果换行符前为句子结束标志(句号,感叹号,问号),且下一个字符为大写字母,则换行符更有可能表示段落分隔。
也可以根据之前的内容长度来判断段落是否已经足够长。
"""
prev_char, next_char = match.groups()
# 句子结束标志
sentence_endings = ".!?"
# 设定一个最小段落长度阈值
min_paragraph_length = 140
if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length:
return "\n\n"
else:
return " "
def normalize_text(text):
"""
通过把连字ligatures等文本特殊符号转换为其基本形式来对文本进行归一化处理。
例如,将连字 "fi" 转换为 "f""i"
"""
# 对文本进行归一化处理,分解连字
normalized_text = unicodedata.normalize("NFKD", text)
# 替换其他特殊字符
cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text)
return cleaned_text
def clean_text(raw_text):
"""
对从 PDF 提取出的原始文本进行清洗和格式化处理。
1. 对原始文本进行归一化处理。
2. 替换跨行的连词,例如 “Espe-\ncially” 转换为 “Especially”。
3. 根据 heuristic 规则判断换行符是否是段落分隔,并相应地进行替换。
"""
# 对文本进行归一化处理
normalized_text = normalize_text(raw_text)
# 替换跨行的连词
text = re.sub(r'(\w+-\n\w+)', lambda m: m.group(1).replace('-\n', ''), normalized_text)
# 根据前后相邻字符的特点,找到原文本中的换行符
newlines = re.compile(r'(\S)\n(\S)')
# 根据 heuristic 规则,用空格或段落分隔符替换原换行符
final_text = re.sub(newlines, lambda m: m.group(1) + is_paragraph_break(m) + m.group(2), text)
return final_text.strip()
def 解析PDF(file_name, top_p, temperature, chatbot, history, systemPromptTxt):
import time, glob, os, fitz
print('begin analysis on:', file_name)
with fitz.open(file_name) as doc:
file_content = ""
for page in doc:
file_content += page.get_text()
file_content = clean_text(file_content)
# print(file_content)
split_number = 10000
split_group = (len(file_content)//split_number)+1
for i in range(0,split_group):
if i==0:
prefix = "接下来请你仔细分析下面的论文,学习里面的内容(专业术语、公式、数学概念).并且注意:由于论文内容较多,将分批次发送,每次发送完之后,你只需要回答“接受完成”"
i_say = prefix + f'文件名是{file_name},文章内容第{i+1}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
i_say_show_user = f'文件名是:\n{file_name},\n由于论文内容过长,将分批请求(共{len(file_content)}字符,将分为{split_group}批,每批{split_number}字符)。\n当前发送{i+1}/{split_group}部分'
elif i==split_group-1:
i_say = f'你只需要回答“所有论文接受完成,请进行下一步”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:]}```'
i_say_show_user = f'当前发送{i+1}/{split_group}部分'
else:
i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
i_say_show_user = f'当前发送{i+1}/{split_group}部分'
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
while "完成" not in gpt_say:
i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
i_say_show_user = f'出现error重新发送{i+1}/{split_group}部分'
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
time.sleep(1)
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
yield chatbot, history, '正常'
time.sleep(2)
i_say = f'接下来请你扮演一名专业的学术教授利用你的所有知识并且结合这篇文章回答我的问题。请牢记1.直到我说“退出”你才能结束任务2.所有问题需要紧密围绕文章内容;3.如果有公式请使用tex渲染)'
chatbot.append((i_say, "[Local Message] waiting gpt response."))
yield chatbot, history, '正常'
# ** gpt request **
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
chatbot[-1] = (i_say, gpt_say)
history.append(i_say); history.append(gpt_say)
yield chatbot, history, '正常'
@CatchException
def 理解PDF文档内容(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
import glob, os
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"理解PDF论文内容并且将结合上下文内容进行学术解答。函数插件贡献者: Hanzoe。"])
yield chatbot, history, '正常'
import tkinter as tk
from tkinter import filedialog
root = tk.Tk()
root.withdraw()
txt = filedialog.askopenfilename()
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import fitz
except:
report_execption(chatbot, history,
a = f"解析项目: {txt}",
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
yield chatbot, history, '正常'
return
# 清空历史,以免输入溢出
history = []
# 开始正式执行任务
yield from 解析PDF(txt, top_p, temperature, chatbot, history, systemPromptTxt)
@CatchException
def 理解PDF文档内容标准文件输入(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
import glob, os
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"理解PDF论文内容并且将结合上下文内容进行学术解答。函数插件贡献者: Hanzoe。"])
yield chatbot, history, '正常'
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import fitz
except:
report_execption(chatbot, history,
a = f"解析项目: {txt}",
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
yield chatbot, history, '正常'
return
# 清空历史,以免输入溢出
history = []
# 检测输入参数,如没有给定输入参数,直接退出
if os.path.exists(txt):
project_folder = txt
else:
if txt == "":
txt = '空空如也的输入栏'
report_execption(chatbot, history,
a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield chatbot, history, '正常'
return
# 搜索需要处理的文件清单
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)]
# 如果没找到任何文件
if len(file_manifest) == 0:
report_execption(chatbot, history,
a=f"解析项目: {txt}", b=f"找不到任何.tex或.pdf文件: {txt}")
yield chatbot, history, '正常'
return
txt = file_manifest[0]
# 开始正式执行任务
yield from 解析PDF(txt, top_p, temperature, chatbot, history, systemPromptTxt)

View File

@@ -0,0 +1,106 @@
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import CatchException, report_execption, write_results_to_file
def get_meta_information(url, chatbot, history):
import requests
import arxiv
import difflib
from bs4 import BeautifulSoup
from toolbox import get_conf
proxies, = get_conf('proxies')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
}
# 发送 GET 请求
response = requests.get(url, proxies=proxies, headers=headers)
# 解析网页内容
soup = BeautifulSoup(response.text, "html.parser")
def string_similar(s1, s2):
return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
profile = []
# 获取所有文章的标题和作者
for result in soup.select(".gs_ri"):
title = result.a.text.replace('\n', ' ').replace(' ', ' ')
author = result.select_one(".gs_a").text
try:
citation = result.select_one(".gs_fl > a[href*='cites']").text # 引用次数是链接中的文本,直接取出来
except:
citation = 'cited by 0'
abstract = result.select_one(".gs_rs").text.strip() # 摘要在 .gs_rs 中的文本,需要清除首尾空格
search = arxiv.Search(
query = title,
max_results = 1,
sort_by = arxiv.SortCriterion.Relevance,
)
paper = next(search.results())
if string_similar(title, paper.title) > 0.90: # same paper
abstract = paper.summary.replace('\n', ' ')
is_paper_in_arxiv = True
else: # different paper
abstract = abstract
is_paper_in_arxiv = False
paper = next(search.results())
print(title)
print(author)
print(citation)
profile.append({
'title':title,
'author':author,
'citation':citation,
'abstract':abstract,
'is_paper_in_arxiv':is_paper_in_arxiv,
})
chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中不在arxiv中无法获取完整摘要:{is_paper_in_arxiv}\n\n' + abstract]
msg = "正常"
yield chatbot, [], msg
return profile
@CatchException
def 谷歌检索小助手(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"分析用户提供的谷歌学术google scholar搜索页面中出现的所有文章: binary-husky插件初始化中..."])
yield chatbot, history, '正常'
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import arxiv
from bs4 import BeautifulSoup
except:
report_execption(chatbot, history,
a = f"解析项目: {txt}",
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4 arxiv```。")
yield chatbot, history, '正常'
return
# 清空历史,以免输入溢出
history = []
meta_paper_info_list = yield from get_meta_information(txt, chatbot, history)
if len(meta_paper_info_list[:10]) > 0:
i_say = "下面是一些学术文献的数据,请从中提取出以下内容。" + \
"1、英文题目2、中文题目翻译3、作者4、arxiv公开is_paper_in_arxiv4、引用数量cite5、中文摘要翻译。" + \
f"以下是信息源:{str(meta_paper_info_list[:10])}"
inputs_show_user = f"请分析此页面中出现的所有文章:{txt}"
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=inputs_show_user,
top_p=top_p, temperature=temperature, chatbot=chatbot, history=[],
sys_prompt="你是一个学术翻译请从数据中提取信息。你必须使用Markdown格式。你必须逐个文献进行处理。"
)
history.extend([ "第一批", gpt_say ])
meta_paper_info_list = meta_paper_info_list[10:]
chatbot.append(["状态?", "已经全部完成"])
msg = '正常'
yield chatbot, history, msg
res = write_results_to_file(history)
chatbot.append(("完成了吗?", res));
yield chatbot, history, msg

15
main.py
View File

@@ -11,8 +11,9 @@ proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT,
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
if not AUTHENTICATION: AUTHENTICATION = None
from check_proxy import get_current_version
initial_prompt = "Serve me as a writing and programming assistant."
title_html = "<h1 align=\"center\">ChatGPT 学术优化</h1>"
title_html = f"<h1 align=\"center\">ChatGPT 学术优化 {get_current_version()}</h1>"
description = """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic),感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)"""
# 问询记录, python 版本建议3.9+(越新越好)
@@ -49,7 +50,7 @@ if LAYOUT == "TOP-DOWN":
CHATBOT_HEIGHT /= 2
cancel_handles = []
with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
gr.HTML(title_html)
with gr_L1():
with gr_L2(scale=2):
@@ -160,15 +161,13 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as de
def auto_opentab_delay():
import threading, webbrowser, time
print(f"如果浏览器没有自动打开请复制并转到以下URL")
print(f"\t(亮色主: http://localhost:{PORT}")
print(f"\t(暗色主: http://localhost:{PORT}/?__dark-theme=true")
print(f"\t(亮色主: http://localhost:{PORT}")
print(f"\t(暗色主: http://localhost:{PORT}/?__dark-theme=true")
def open():
time.sleep(2)
try: auto_update() # 检查新版本
except: pass
time.sleep(2) # 打开浏览器
webbrowser.open_new_tab(f"http://localhost:{PORT}/?__dark-theme=true")
threading.Thread(target=open, name="open-browser", daemon=True).start()
threading.Thread(target=auto_update, name="self-upgrade", daemon=True).start()
auto_opentab_delay()
demo.title = "ChatGPT 学术优化"
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=True, server_port=PORT, auth=AUTHENTICATION)

View File

@@ -104,7 +104,10 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr
result = ''
while True:
try: chunk = next(stream_response).decode()
except StopIteration: break
except StopIteration:
break
except requests.exceptions.ConnectionError:
chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
if len(chunk)==0: continue
if not chunk.startswith('data:'):
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()

View File

@@ -1,8 +1,13 @@
gradio>=3.23
requests[socks]
mdtex2html
Markdown
latex2mathml
openai
transformers
python-markdown-math
beautifulsoup4
latex2mathml
mdtex2html
tiktoken
Markdown
pygments
pymupdf
openai
numpy

View File

@@ -1,5 +1,6 @@
import gradio as gr
from toolbox import get_conf
CODE_HIGHLIGHT, = get_conf('CODE_HIGHLIGHT')
# gradio可用颜色列表
# gr.themes.utils.colors.slate (石板色)
# gr.themes.utils.colors.gray (灰色)
@@ -154,3 +155,75 @@ advanced_css = """
margin: 1em 2em 1em 0.5em;
}
"""
if CODE_HIGHLIGHT:
advanced_css += """
.hll { background-color: #ffffcc }
.c { color: #3D7B7B; font-style: italic } /* Comment */
.err { border: 1px solid #FF0000 } /* Error */
.k { color: hsl(197, 94%, 51%); font-weight: bold } /* Keyword */
.o { color: #666666 } /* Operator */
.ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */
.cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */
.cp { color: #9C6500 } /* Comment.Preproc */
.cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */
.c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */
.cs { color: #3D7B7B; font-style: italic } /* Comment.Special */
.gd { color: #A00000 } /* Generic.Deleted */
.ge { font-style: italic } /* Generic.Emph */
.gr { color: #E40000 } /* Generic.Error */
.gh { color: #000080; font-weight: bold } /* Generic.Heading */
.gi { color: #008400 } /* Generic.Inserted */
.go { color: #717171 } /* Generic.Output */
.gp { color: #000080; font-weight: bold } /* Generic.Prompt */
.gs { font-weight: bold } /* Generic.Strong */
.gu { color: #800080; font-weight: bold } /* Generic.Subheading */
.gt { color: #a9dd00 } /* Generic.Traceback */
.kc { color: #008000; font-weight: bold } /* Keyword.Constant */
.kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
.kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
.kp { color: #008000 } /* Keyword.Pseudo */
.kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
.kt { color: #B00040 } /* Keyword.Type */
.m { color: #666666 } /* Literal.Number */
.s { color: #BA2121 } /* Literal.String */
.na { color: #687822 } /* Name.Attribute */
.nb { color: #e5f8c3 } /* Name.Builtin */
.nc { color: #ffad65; font-weight: bold } /* Name.Class */
.no { color: #880000 } /* Name.Constant */
.nd { color: #AA22FF } /* Name.Decorator */
.ni { color: #717171; font-weight: bold } /* Name.Entity */
.ne { color: #CB3F38; font-weight: bold } /* Name.Exception */
.nf { color: #f9f978 } /* Name.Function */
.nl { color: #767600 } /* Name.Label */
.nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
.nt { color: #008000; font-weight: bold } /* Name.Tag */
.nv { color: #19177C } /* Name.Variable */
.ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
.w { color: #bbbbbb } /* Text.Whitespace */
.mb { color: #666666 } /* Literal.Number.Bin */
.mf { color: #666666 } /* Literal.Number.Float */
.mh { color: #666666 } /* Literal.Number.Hex */
.mi { color: #666666 } /* Literal.Number.Integer */
.mo { color: #666666 } /* Literal.Number.Oct */
.sa { color: #BA2121 } /* Literal.String.Affix */
.sb { color: #BA2121 } /* Literal.String.Backtick */
.sc { color: #BA2121 } /* Literal.String.Char */
.dl { color: #BA2121 } /* Literal.String.Delimiter */
.sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
.s2 { color: #2bf840 } /* Literal.String.Double */
.se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */
.sh { color: #BA2121 } /* Literal.String.Heredoc */
.si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */
.sx { color: #008000 } /* Literal.String.Other */
.sr { color: #A45A77 } /* Literal.String.Regex */
.s1 { color: #BA2121 } /* Literal.String.Single */
.ss { color: #19177C } /* Literal.String.Symbol */
.bp { color: #008000 } /* Name.Builtin.Pseudo */
.fm { color: #0000FF } /* Name.Function.Magic */
.vc { color: #19177C } /* Name.Variable.Class */
.vg { color: #19177C } /* Name.Variable.Global */
.vi { color: #19177C } /* Name.Variable.Instance */
.vm { color: #19177C } /* Name.Variable.Magic */
.il { color: #666666 } /* Literal.Number.Integer.Long */
"""

View File

@@ -6,7 +6,7 @@ import traceback
import importlib
import inspect
import re
from show_math import convert as convert_math
from latex2mathml.converter import convert as tex2mathml
from functools import wraps, lru_cache
@@ -162,7 +162,13 @@ def CatchException(f):
def HotReload(f):
"""
装饰器函数,实现函数插件热更新
HotReload的装饰器函数用于实现Python函数插件热更新
函数热更新是指在不停止程序运行的情况下,更新函数代码,从而达到实时更新功能。
在装饰器内部使用wraps(f)来保留函数的元信息并定义了一个名为decorated的内部函数。
内部函数通过使用importlib模块的reload函数和inspect模块的getmodule函数来重新加载并获取函数模块
然后通过getattr函数获取函数名并在新模块中重新加载函数。
最后使用yield from语句返回重新加载过的函数并在被装饰的函数上执行。
最终,装饰器函数返回内部函数。这个内部函数可以将函数的原始定义更新为最新版本,并执行函数的新版本。
"""
@wraps(f)
def decorated(*args, **kwargs):
@@ -203,15 +209,76 @@ def markdown_convertion(txt):
"""
pre = '<div class="markdown-body">'
suf = '</div>'
if ('$' in txt) and ('```' not in txt):
return pre + markdown.markdown(txt, extensions=['fenced_code', 'tables']) + '<br><br>' + markdown.markdown(convert_math(txt, splitParagraphs=False), extensions=['fenced_code', 'tables']) + suf
markdown_extension_configs = {
'mdx_math': {
'enable_dollar_delimiter': True,
'use_gitlab_delimiters': False,
},
}
find_equation_pattern = r'<script type="math/tex(?:.*?)>(.*?)</script>'
def tex2mathml_catch_exception(content, *args, **kwargs):
try:
content = tex2mathml(content, *args, **kwargs)
except:
content = content
return content
def replace_math_no_render(match):
content = match.group(1)
if 'mode=display' in match.group(0):
content = content.replace('\n', '</br>')
return f"<font color=\"#00FF00\">$$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$$</font>"
else:
return f"<font color=\"#00FF00\">$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$</font>"
def replace_math_render(match):
content = match.group(1)
if 'mode=display' in match.group(0):
if '\\begin{aligned}' in content:
content = content.replace('\\begin{aligned}', '\\begin{array}')
content = content.replace('\\end{aligned}', '\\end{array}')
content = content.replace('&', ' ')
content = tex2mathml_catch_exception(content, display="block")
return content
else:
return tex2mathml_catch_exception(content)
def markdown_bug_hunt(content):
"""
解决一个mdx_math的bug单$包裹begin命令时多余<script>
"""
content = content.replace('<script type="math/tex">\n<script type="math/tex; mode=display">', '<script type="math/tex; mode=display">')
content = content.replace('</script>\n</script>', '</script>')
return content
if ('$' in txt) and ('```' not in txt): # 有$标识的公式符号,且没有代码段```的标识
# convert everything to html format
split = markdown.markdown(text='---')
convert_stage_1 = markdown.markdown(text=txt, extensions=['mdx_math', 'fenced_code', 'tables', 'sane_lists'], extension_configs=markdown_extension_configs)
convert_stage_1 = markdown_bug_hunt(convert_stage_1)
# re.DOTALL: Make the '.' special character match any character at all, including a newline; without this flag, '.' will match anything except a newline. Corresponds to the inline flag (?s).
# 1. convert to easy-to-copy tex (do not render math)
convert_stage_2_1, n = re.subn(find_equation_pattern, replace_math_no_render, convert_stage_1, flags=re.DOTALL)
# 2. convert to rendered equation
convert_stage_2_2, n = re.subn(find_equation_pattern, replace_math_render, convert_stage_1, flags=re.DOTALL)
# cat them together
return pre + convert_stage_2_1 + f'{split}' + convert_stage_2_2 + suf
else:
return pre + markdown.markdown(txt, extensions=['fenced_code', 'tables']) + suf
return pre + markdown.markdown(txt, extensions=['fenced_code', 'codehilite', 'tables', 'sane_lists']) + suf
def close_up_code_segment_during_stream(gpt_reply):
"""
在gpt输出代码的中途输出了前面的```,但还没输出完后面的```),补上后面的```
在gpt输出代码的中途输出了前面的```,但还没输出完后面的```),补上后面的```
Args:
gpt_reply (str): GPT模型返回的回复字符串。
Returns:
str: 返回一个新的字符串,将输出代码片段的“后面的```”补上。
"""
if '```' not in gpt_reply:
return gpt_reply
@@ -409,6 +476,15 @@ def clear_line_break(txt):
class DummyWith():
"""
这段代码定义了一个名为DummyWith的空上下文管理器
它的作用是……额……没用,即在代码结构不变得情况下取代其他的上下文管理器。
上下文管理器是一种Python对象用于与with语句一起使用
以确保一些资源在代码块执行期间得到正确的初始化和清理。
上下文管理器必须实现两个方法,分别为 __enter__()和 __exit__()。
在上下文执行开始的情况下__enter__()方法会在代码块被执行前被调用,
而在上下文执行结束时__exit__()方法则会被调用。
"""
def __enter__(self):
return self

View File

@@ -1,5 +1,5 @@
{
"version": 2.4,
"version": 2.5,
"show_feature": true,
"new_feature": "(1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。"
"new_feature": "新增一键更新程序<->高亮代码<->高亮公式<->新增垂直布局选项"
}