Compare commits
40 Commits
version2.4
...
version2.5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8ddc1adae4 | ||
|
|
4e3f759d0c | ||
|
|
94ff62bdaa | ||
|
|
2cbb5dbdaa | ||
|
|
3b85a29f91 | ||
|
|
166daa1ea7 | ||
|
|
5c3ecd7477 | ||
|
|
d5b03377ff | ||
|
|
7cd11f2bbd | ||
|
|
f65cc8deea | ||
|
|
48ee620524 | ||
|
|
8a5be8fb8d | ||
|
|
f26b8e28e1 | ||
|
|
b005b84ad6 | ||
|
|
1edf7ef80d | ||
|
|
3fed08f65e | ||
|
|
fa8603d745 | ||
|
|
6b5c2538cf | ||
|
|
7f1c7ebd68 | ||
|
|
ff87aebc29 | ||
|
|
2c746056ff | ||
|
|
0e4cac29f8 | ||
|
|
8513d46398 | ||
|
|
b2495a6f7e | ||
|
|
5603d33d67 | ||
|
|
d06d4f3a6f | ||
|
|
b2adc77a73 | ||
|
|
1f6e2547b2 | ||
|
|
fd0e3fb5c4 | ||
|
|
a0b7ae6674 | ||
|
|
8ca232cda3 | ||
|
|
34e983c7a5 | ||
|
|
c0d096726c | ||
|
|
969e8c1d89 | ||
|
|
d4e3082db4 | ||
|
|
777e56882b | ||
|
|
4da7d75ad4 | ||
|
|
1538acaa5a | ||
|
|
b47f69978e | ||
|
|
823c136de4 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -140,4 +140,5 @@ gpt_log
|
||||
private.md
|
||||
private_upload
|
||||
other_llms
|
||||
cradle.py
|
||||
cradle*
|
||||
debug*
|
||||
@@ -4,10 +4,10 @@ RUN echo '[global]' > /etc/pip.conf && \
|
||||
echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
|
||||
echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
|
||||
|
||||
RUN pip3 install gradio requests[socks] mdtex2html
|
||||
|
||||
COPY . /gpt
|
||||
WORKDIR /gpt
|
||||
RUN pip3 install -r requirements.txt
|
||||
|
||||
|
||||
CMD ["python3", "main.py"]
|
||||
@@ -33,6 +33,7 @@ If you like this project, please give it a Star. If you've come up with more use
|
||||
chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
|
||||
[arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF
|
||||
[PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [函数插件] PDF论文提取题目&摘要+翻译全文(多线程)
|
||||
[谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) (Version>=2.45) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你选择有趣的文章
|
||||
公式显示 | 可以同时显示公式的tex形式和渲染形式
|
||||
图片显示 | 可以在markdown中显示图片
|
||||
多线程函数插件支持 | 支持多线调用chatgpt,一键处理海量文本或程序
|
||||
@@ -69,10 +70,11 @@ huggingface免科学上网[在线体验](https://huggingface.co/spaces/qingxu98/
|
||||
|
||||
- 如果输出包含公式,会同时以tex形式和渲染形式显示,方便复制和阅读
|
||||
<div align="center">
|
||||
<img src="img/demo.jpg" width="500" >
|
||||
<img src="https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png" width="700" >
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
- 懒得看项目代码?整个工程直接给chatgpt炫嘴里
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="700" >
|
||||
@@ -260,11 +262,12 @@ python check_proxy.py
|
||||
|
||||
- version 3 (Todo):
|
||||
- - 支持gpt4和其他更多llm
|
||||
- version 2.3+ (Todo):
|
||||
- version 2.4+ (Todo):
|
||||
- - 总结大工程源代码时文本过长、token溢出的问题
|
||||
- - 实现项目打包部署
|
||||
- - 函数插件参数接口优化
|
||||
- - 自更新
|
||||
- version 2.4: (1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。
|
||||
- version 2.3: 增强多线程交互性
|
||||
- version 2.2: 函数插件支持热重载
|
||||
- version 2.1: 可折叠式布局
|
||||
|
||||
123
check_proxy.py
123
check_proxy.py
@@ -20,31 +20,110 @@ def check_proxy(proxies):
|
||||
return result
|
||||
|
||||
|
||||
def auto_update():
|
||||
def backup_and_download(current_version, remote_version):
|
||||
"""
|
||||
一键更新协议:备份和下载
|
||||
"""
|
||||
from toolbox import get_conf
|
||||
import shutil
|
||||
import os
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
import zipfile
|
||||
os.makedirs(f'./history', exist_ok=True)
|
||||
backup_dir = f'./history/backup-{current_version}/'
|
||||
new_version_dir = f'./history/new-version-{remote_version}/'
|
||||
if os.path.exists(new_version_dir):
|
||||
return new_version_dir
|
||||
os.makedirs(new_version_dir)
|
||||
shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
|
||||
proxies, = get_conf('proxies')
|
||||
response = requests.get("https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version",
|
||||
proxies=proxies, timeout=1)
|
||||
remote_json_data = json.loads(response.text)
|
||||
remote_version = remote_json_data['version']
|
||||
if remote_json_data["show_feature"]:
|
||||
new_feature = "新功能:" + remote_json_data["new_feature"]
|
||||
else:
|
||||
new_feature = ""
|
||||
with open('./version', 'r', encoding='utf8') as f:
|
||||
current_version = f.read()
|
||||
current_version = json.loads(current_version)['version']
|
||||
if (remote_version - current_version) >= 0.05:
|
||||
print(
|
||||
f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}。{new_feature}')
|
||||
print('Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
|
||||
time.sleep(3)
|
||||
return
|
||||
else:
|
||||
return
|
||||
r = requests.get(
|
||||
'https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
|
||||
zip_file_path = backup_dir+'/master.zip'
|
||||
with open(zip_file_path, 'wb+') as f:
|
||||
f.write(r.content)
|
||||
dst_path = new_version_dir
|
||||
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
|
||||
for zip_info in zip_ref.infolist():
|
||||
dst_file_path = os.path.join(dst_path, zip_info.filename)
|
||||
if os.path.exists(dst_file_path):
|
||||
os.remove(dst_file_path)
|
||||
zip_ref.extract(zip_info, dst_path)
|
||||
return new_version_dir
|
||||
|
||||
|
||||
def patch_and_restart(path):
|
||||
"""
|
||||
一键更新协议:覆盖和重启
|
||||
"""
|
||||
import distutils
|
||||
import shutil
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
# if not using config_private, move origin config.py as config_private.py
|
||||
if not os.path.exists('config_private.py'):
|
||||
print('由于您没有设置config_private.py私密配置,现将您的现有配置移动至config_private.py以防止配置丢失,',
|
||||
'另外您可以随时在history子文件夹下找回旧版的程序。')
|
||||
shutil.copyfile('config.py', 'config_private.py')
|
||||
distutils.dir_util.copy_tree(path+'/chatgpt_academic-master', './')
|
||||
print('更新完成,您可以随时在history子文件夹下找回旧版的程序,5s之后重启')
|
||||
for i in reversed(range(5)):
|
||||
time.sleep(1)
|
||||
print(i)
|
||||
print(' ------------------------------ -----------------------------------')
|
||||
os.execl(sys.executable, 'python', 'main.py')
|
||||
|
||||
|
||||
def get_current_version():
|
||||
import json
|
||||
try:
|
||||
with open('./version', 'r', encoding='utf8') as f:
|
||||
current_version = json.loads(f.read())['version']
|
||||
except:
|
||||
current_version = ""
|
||||
return current_version
|
||||
|
||||
|
||||
def auto_update():
|
||||
"""
|
||||
一键更新协议:查询版本和用户意见
|
||||
"""
|
||||
try:
|
||||
from toolbox import get_conf
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
proxies, = get_conf('proxies')
|
||||
response = requests.get(
|
||||
"https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=1)
|
||||
remote_json_data = json.loads(response.text)
|
||||
remote_version = remote_json_data['version']
|
||||
if remote_json_data["show_feature"]:
|
||||
new_feature = "新功能:" + remote_json_data["new_feature"]
|
||||
else:
|
||||
new_feature = ""
|
||||
with open('./version', 'r', encoding='utf8') as f:
|
||||
current_version = f.read()
|
||||
current_version = json.loads(current_version)['version']
|
||||
if (remote_version - current_version) >= 0.05:
|
||||
print(
|
||||
f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}。{new_feature}')
|
||||
print('(1)Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
|
||||
user_instruction = input('(2)是否一键更新代码(Y/y+回车=确认,输入其他/无输入+回车=不更新)?')
|
||||
if user_instruction in ['Y', 'y']:
|
||||
path = backup_and_download(current_version, remote_version)
|
||||
try:
|
||||
patch_and_restart(path)
|
||||
except:
|
||||
print('更新失败。')
|
||||
else:
|
||||
print('自动更新程序:已禁用')
|
||||
return
|
||||
else:
|
||||
return
|
||||
except:
|
||||
print('自动更新程序:已禁用')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -24,6 +24,9 @@ else:
|
||||
# 对话窗的高度
|
||||
CHATBOT_HEIGHT = 1115
|
||||
|
||||
# 代码高亮
|
||||
CODE_HIGHLIGHT = True
|
||||
|
||||
# 窗口布局
|
||||
LAYOUT = "LEFT-RIGHT" # "LEFT-RIGHT"(左右布局) # "TOP-DOWN"(上下布局)
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ def get_crazy_functions():
|
||||
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
||||
"Function": HotReload(高阶功能模板函数)
|
||||
},
|
||||
|
||||
}
|
||||
###################### 第二组插件 ###########################
|
||||
# [第二组插件]: 经过充分测试,但功能上距离达到完美状态还差一点点
|
||||
@@ -72,6 +73,9 @@ def get_crazy_functions():
|
||||
from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
|
||||
from crazy_functions.总结word文档 import 总结word文档
|
||||
from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
|
||||
from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
|
||||
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容
|
||||
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
|
||||
|
||||
function_plugins.update({
|
||||
"批量翻译PDF文档(多线程)": {
|
||||
@@ -90,10 +94,26 @@ def get_crazy_functions():
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(批量总结PDF文档pdfminer)
|
||||
},
|
||||
"谷歌学术检索助手(输入谷歌学术搜索页url)": {
|
||||
"Color": "stop",
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(谷歌检索小助手)
|
||||
},
|
||||
"批量总结Word文档": {
|
||||
"Color": "stop",
|
||||
"Function": HotReload(总结word文档)
|
||||
},
|
||||
"理解PDF文档内容(Tk文件选择接口,仅本地)": {
|
||||
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(理解PDF文档内容)
|
||||
},
|
||||
"理解PDF文档内容(通用接口,读取文件输入区)": {
|
||||
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
||||
"Color": "stop",
|
||||
"AsButton": False, # 加入下拉菜单中
|
||||
"Function": HotReload(理解PDF文档内容标准文件输入)
|
||||
},
|
||||
})
|
||||
|
||||
###################### 第三组插件 ###########################
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
|
||||
import traceback
|
||||
|
||||
def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2):
|
||||
import time
|
||||
@@ -43,10 +43,16 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
|
||||
mutable = [["", time.time()] for _ in range(n_frag)]
|
||||
|
||||
def _req_gpt(index, inputs, history, sys_prompt):
|
||||
gpt_say = predict_no_ui_long_connection(
|
||||
inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[
|
||||
index]
|
||||
)
|
||||
try:
|
||||
gpt_say = predict_no_ui_long_connection(
|
||||
inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[index]
|
||||
)
|
||||
except:
|
||||
# 收拾残局
|
||||
tb_str = '```\n' + traceback.format_exc() + '```'
|
||||
gpt_say = f"[Local Message] 线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||
if len(mutable[index][0]) > 0:
|
||||
gpt_say += "此线程失败前收到的回答:" + mutable[index][0]
|
||||
return gpt_say
|
||||
# 异步任务开始
|
||||
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
|
||||
|
||||
185
crazy_functions/理解PDF文档内容.py
Normal file
185
crazy_functions/理解PDF文档内容.py
Normal file
@@ -0,0 +1,185 @@
|
||||
from request_llm.bridge_chatgpt import predict_no_ui
|
||||
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
||||
import re
|
||||
import unicodedata
|
||||
fast_debug = False
|
||||
|
||||
def is_paragraph_break(match):
|
||||
"""
|
||||
根据给定的匹配结果来判断换行符是否表示段落分隔。
|
||||
如果换行符前为句子结束标志(句号,感叹号,问号),且下一个字符为大写字母,则换行符更有可能表示段落分隔。
|
||||
也可以根据之前的内容长度来判断段落是否已经足够长。
|
||||
"""
|
||||
prev_char, next_char = match.groups()
|
||||
|
||||
# 句子结束标志
|
||||
sentence_endings = ".!?"
|
||||
|
||||
# 设定一个最小段落长度阈值
|
||||
min_paragraph_length = 140
|
||||
|
||||
if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length:
|
||||
return "\n\n"
|
||||
else:
|
||||
return " "
|
||||
|
||||
def normalize_text(text):
|
||||
"""
|
||||
通过把连字(ligatures)等文本特殊符号转换为其基本形式来对文本进行归一化处理。
|
||||
例如,将连字 "fi" 转换为 "f" 和 "i"。
|
||||
"""
|
||||
# 对文本进行归一化处理,分解连字
|
||||
normalized_text = unicodedata.normalize("NFKD", text)
|
||||
|
||||
# 替换其他特殊字符
|
||||
cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text)
|
||||
|
||||
return cleaned_text
|
||||
|
||||
def clean_text(raw_text):
|
||||
"""
|
||||
对从 PDF 提取出的原始文本进行清洗和格式化处理。
|
||||
1. 对原始文本进行归一化处理。
|
||||
2. 替换跨行的连词,例如 “Espe-\ncially” 转换为 “Especially”。
|
||||
3. 根据 heuristic 规则判断换行符是否是段落分隔,并相应地进行替换。
|
||||
"""
|
||||
# 对文本进行归一化处理
|
||||
normalized_text = normalize_text(raw_text)
|
||||
|
||||
# 替换跨行的连词
|
||||
text = re.sub(r'(\w+-\n\w+)', lambda m: m.group(1).replace('-\n', ''), normalized_text)
|
||||
|
||||
# 根据前后相邻字符的特点,找到原文本中的换行符
|
||||
newlines = re.compile(r'(\S)\n(\S)')
|
||||
|
||||
# 根据 heuristic 规则,用空格或段落分隔符替换原换行符
|
||||
final_text = re.sub(newlines, lambda m: m.group(1) + is_paragraph_break(m) + m.group(2), text)
|
||||
|
||||
return final_text.strip()
|
||||
|
||||
def 解析PDF(file_name, top_p, temperature, chatbot, history, systemPromptTxt):
|
||||
import time, glob, os, fitz
|
||||
print('begin analysis on:', file_name)
|
||||
|
||||
with fitz.open(file_name) as doc:
|
||||
file_content = ""
|
||||
for page in doc:
|
||||
file_content += page.get_text()
|
||||
file_content = clean_text(file_content)
|
||||
# print(file_content)
|
||||
split_number = 10000
|
||||
split_group = (len(file_content)//split_number)+1
|
||||
for i in range(0,split_group):
|
||||
if i==0:
|
||||
prefix = "接下来请你仔细分析下面的论文,学习里面的内容(专业术语、公式、数学概念).并且注意:由于论文内容较多,将分批次发送,每次发送完之后,你只需要回答“接受完成”"
|
||||
i_say = prefix + f'文件名是{file_name},文章内容第{i+1}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
|
||||
i_say_show_user = f'文件名是:\n{file_name},\n由于论文内容过长,将分批请求(共{len(file_content)}字符,将分为{split_group}批,每批{split_number}字符)。\n当前发送{i+1}/{split_group}部分'
|
||||
elif i==split_group-1:
|
||||
i_say = f'你只需要回答“所有论文接受完成,请进行下一步”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:]}```'
|
||||
i_say_show_user = f'当前发送{i+1}/{split_group}部分'
|
||||
else:
|
||||
i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
|
||||
i_say_show_user = f'当前发送{i+1}/{split_group}部分'
|
||||
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
|
||||
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
|
||||
while "完成" not in gpt_say:
|
||||
i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
|
||||
i_say_show_user = f'出现error,重新发送{i+1}/{split_group}部分'
|
||||
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
|
||||
time.sleep(1)
|
||||
chatbot[-1] = (i_say_show_user, gpt_say)
|
||||
history.append(i_say_show_user); history.append(gpt_say)
|
||||
yield chatbot, history, '正常'
|
||||
time.sleep(2)
|
||||
|
||||
i_say = f'接下来,请你扮演一名专业的学术教授,利用你的所有知识并且结合这篇文章,回答我的问题。(请牢记:1.直到我说“退出”,你才能结束任务;2.所有问题需要紧密围绕文章内容;3.如果有公式,请使用tex渲染)'
|
||||
chatbot.append((i_say, "[Local Message] waiting gpt response."))
|
||||
yield chatbot, history, '正常'
|
||||
|
||||
# ** gpt request **
|
||||
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
|
||||
chatbot[-1] = (i_say, gpt_say)
|
||||
history.append(i_say); history.append(gpt_say)
|
||||
yield chatbot, history, '正常'
|
||||
|
||||
|
||||
@CatchException
|
||||
def 理解PDF文档内容(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
||||
import glob, os
|
||||
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"理解PDF论文内容,并且将结合上下文内容,进行学术解答。函数插件贡献者: Hanzoe。"])
|
||||
yield chatbot, history, '正常'
|
||||
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
txt = filedialog.askopenfilename()
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
import fitz
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a = f"解析项目: {txt}",
|
||||
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
|
||||
yield chatbot, history, '正常'
|
||||
return
|
||||
|
||||
# 清空历史,以免输入溢出
|
||||
history = []
|
||||
|
||||
# 开始正式执行任务
|
||||
yield from 解析PDF(txt, top_p, temperature, chatbot, history, systemPromptTxt)
|
||||
|
||||
|
||||
|
||||
@CatchException
|
||||
def 理解PDF文档内容标准文件输入(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
||||
import glob, os
|
||||
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"理解PDF论文内容,并且将结合上下文内容,进行学术解答。函数插件贡献者: Hanzoe。"])
|
||||
yield chatbot, history, '正常'
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
import fitz
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a = f"解析项目: {txt}",
|
||||
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
|
||||
yield chatbot, history, '正常'
|
||||
return
|
||||
|
||||
# 清空历史,以免输入溢出
|
||||
history = []
|
||||
|
||||
# 检测输入参数,如没有给定输入参数,直接退出
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
if txt == "":
|
||||
txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
||||
yield chatbot, history, '正常'
|
||||
return
|
||||
|
||||
# 搜索需要处理的文件清单
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)]
|
||||
# 如果没找到任何文件
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}", b=f"找不到任何.tex或.pdf文件: {txt}")
|
||||
yield chatbot, history, '正常'
|
||||
return
|
||||
txt = file_manifest[0]
|
||||
# 开始正式执行任务
|
||||
yield from 解析PDF(txt, top_p, temperature, chatbot, history, systemPromptTxt)
|
||||
106
crazy_functions/谷歌检索小助手.py
Normal file
106
crazy_functions/谷歌检索小助手.py
Normal file
@@ -0,0 +1,106 @@
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from toolbox import CatchException, report_execption, write_results_to_file
|
||||
|
||||
def get_meta_information(url, chatbot, history):
|
||||
import requests
|
||||
import arxiv
|
||||
import difflib
|
||||
from bs4 import BeautifulSoup
|
||||
from toolbox import get_conf
|
||||
proxies, = get_conf('proxies')
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
|
||||
}
|
||||
# 发送 GET 请求
|
||||
response = requests.get(url, proxies=proxies, headers=headers)
|
||||
|
||||
# 解析网页内容
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
def string_similar(s1, s2):
|
||||
return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
|
||||
|
||||
profile = []
|
||||
# 获取所有文章的标题和作者
|
||||
for result in soup.select(".gs_ri"):
|
||||
title = result.a.text.replace('\n', ' ').replace(' ', ' ')
|
||||
author = result.select_one(".gs_a").text
|
||||
try:
|
||||
citation = result.select_one(".gs_fl > a[href*='cites']").text # 引用次数是链接中的文本,直接取出来
|
||||
except:
|
||||
citation = 'cited by 0'
|
||||
abstract = result.select_one(".gs_rs").text.strip() # 摘要在 .gs_rs 中的文本,需要清除首尾空格
|
||||
search = arxiv.Search(
|
||||
query = title,
|
||||
max_results = 1,
|
||||
sort_by = arxiv.SortCriterion.Relevance,
|
||||
)
|
||||
paper = next(search.results())
|
||||
if string_similar(title, paper.title) > 0.90: # same paper
|
||||
abstract = paper.summary.replace('\n', ' ')
|
||||
is_paper_in_arxiv = True
|
||||
else: # different paper
|
||||
abstract = abstract
|
||||
is_paper_in_arxiv = False
|
||||
paper = next(search.results())
|
||||
print(title)
|
||||
print(author)
|
||||
print(citation)
|
||||
profile.append({
|
||||
'title':title,
|
||||
'author':author,
|
||||
'citation':citation,
|
||||
'abstract':abstract,
|
||||
'is_paper_in_arxiv':is_paper_in_arxiv,
|
||||
})
|
||||
|
||||
chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中(不在arxiv中无法获取完整摘要):{is_paper_in_arxiv}\n\n' + abstract]
|
||||
msg = "正常"
|
||||
yield chatbot, [], msg
|
||||
return profile
|
||||
|
||||
@CatchException
|
||||
def 谷歌检索小助手(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"分析用户提供的谷歌学术(google scholar)搜索页面中,出现的所有文章: binary-husky,插件初始化中..."])
|
||||
yield chatbot, history, '正常'
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
import arxiv
|
||||
from bs4 import BeautifulSoup
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a = f"解析项目: {txt}",
|
||||
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4 arxiv```。")
|
||||
yield chatbot, history, '正常'
|
||||
return
|
||||
|
||||
# 清空历史,以免输入溢出
|
||||
history = []
|
||||
|
||||
meta_paper_info_list = yield from get_meta_information(txt, chatbot, history)
|
||||
|
||||
if len(meta_paper_info_list[:10]) > 0:
|
||||
i_say = "下面是一些学术文献的数据,请从中提取出以下内容。" + \
|
||||
"1、英文题目;2、中文题目翻译;3、作者;4、arxiv公开(is_paper_in_arxiv);4、引用数量(cite);5、中文摘要翻译。" + \
|
||||
f"以下是信息源:{str(meta_paper_info_list[:10])}"
|
||||
|
||||
inputs_show_user = f"请分析此页面中出现的所有文章:{txt}"
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=i_say, inputs_show_user=inputs_show_user,
|
||||
top_p=top_p, temperature=temperature, chatbot=chatbot, history=[],
|
||||
sys_prompt="你是一个学术翻译,请从数据中提取信息。你必须使用Markdown格式。你必须逐个文献进行处理。"
|
||||
)
|
||||
|
||||
history.extend([ "第一批", gpt_say ])
|
||||
meta_paper_info_list = meta_paper_info_list[10:]
|
||||
|
||||
chatbot.append(["状态?", "已经全部完成"])
|
||||
msg = '正常'
|
||||
yield chatbot, history, msg
|
||||
res = write_results_to_file(history)
|
||||
chatbot.append(("完成了吗?", res));
|
||||
yield chatbot, history, msg
|
||||
15
main.py
15
main.py
@@ -11,8 +11,9 @@ proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT,
|
||||
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
|
||||
if not AUTHENTICATION: AUTHENTICATION = None
|
||||
|
||||
from check_proxy import get_current_version
|
||||
initial_prompt = "Serve me as a writing and programming assistant."
|
||||
title_html = "<h1 align=\"center\">ChatGPT 学术优化</h1>"
|
||||
title_html = f"<h1 align=\"center\">ChatGPT 学术优化 {get_current_version()}</h1>"
|
||||
description = """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic),感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)"""
|
||||
|
||||
# 问询记录, python 版本建议3.9+(越新越好)
|
||||
@@ -49,7 +50,7 @@ if LAYOUT == "TOP-DOWN":
|
||||
CHATBOT_HEIGHT /= 2
|
||||
|
||||
cancel_handles = []
|
||||
with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
|
||||
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
|
||||
gr.HTML(title_html)
|
||||
with gr_L1():
|
||||
with gr_L2(scale=2):
|
||||
@@ -160,15 +161,13 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as de
|
||||
def auto_opentab_delay():
|
||||
import threading, webbrowser, time
|
||||
print(f"如果浏览器没有自动打开,请复制并转到以下URL:")
|
||||
print(f"\t(亮色主体): http://localhost:{PORT}")
|
||||
print(f"\t(暗色主体): http://localhost:{PORT}/?__dark-theme=true")
|
||||
print(f"\t(亮色主题): http://localhost:{PORT}")
|
||||
print(f"\t(暗色主题): http://localhost:{PORT}/?__dark-theme=true")
|
||||
def open():
|
||||
time.sleep(2)
|
||||
try: auto_update() # 检查新版本
|
||||
except: pass
|
||||
time.sleep(2) # 打开浏览器
|
||||
webbrowser.open_new_tab(f"http://localhost:{PORT}/?__dark-theme=true")
|
||||
threading.Thread(target=open, name="open-browser", daemon=True).start()
|
||||
threading.Thread(target=auto_update, name="self-upgrade", daemon=True).start()
|
||||
|
||||
auto_opentab_delay()
|
||||
demo.title = "ChatGPT 学术优化"
|
||||
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=True, server_port=PORT, auth=AUTHENTICATION)
|
||||
|
||||
@@ -104,7 +104,10 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr
|
||||
result = ''
|
||||
while True:
|
||||
try: chunk = next(stream_response).decode()
|
||||
except StopIteration: break
|
||||
except StopIteration:
|
||||
break
|
||||
except requests.exceptions.ConnectionError:
|
||||
chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
|
||||
if len(chunk)==0: continue
|
||||
if not chunk.startswith('data:'):
|
||||
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
gradio>=3.23
|
||||
requests[socks]
|
||||
mdtex2html
|
||||
Markdown
|
||||
latex2mathml
|
||||
openai
|
||||
transformers
|
||||
python-markdown-math
|
||||
beautifulsoup4
|
||||
latex2mathml
|
||||
mdtex2html
|
||||
tiktoken
|
||||
Markdown
|
||||
pygments
|
||||
pymupdf
|
||||
openai
|
||||
numpy
|
||||
75
theme.py
75
theme.py
@@ -1,5 +1,6 @@
|
||||
import gradio as gr
|
||||
|
||||
from toolbox import get_conf
|
||||
CODE_HIGHLIGHT, = get_conf('CODE_HIGHLIGHT')
|
||||
# gradio可用颜色列表
|
||||
# gr.themes.utils.colors.slate (石板色)
|
||||
# gr.themes.utils.colors.gray (灰色)
|
||||
@@ -154,3 +155,75 @@ advanced_css = """
|
||||
margin: 1em 2em 1em 0.5em;
|
||||
}
|
||||
"""
|
||||
|
||||
if CODE_HIGHLIGHT:
|
||||
advanced_css += """
|
||||
.hll { background-color: #ffffcc }
|
||||
.c { color: #3D7B7B; font-style: italic } /* Comment */
|
||||
.err { border: 1px solid #FF0000 } /* Error */
|
||||
.k { color: hsl(197, 94%, 51%); font-weight: bold } /* Keyword */
|
||||
.o { color: #666666 } /* Operator */
|
||||
.ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */
|
||||
.cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */
|
||||
.cp { color: #9C6500 } /* Comment.Preproc */
|
||||
.cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */
|
||||
.c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */
|
||||
.cs { color: #3D7B7B; font-style: italic } /* Comment.Special */
|
||||
.gd { color: #A00000 } /* Generic.Deleted */
|
||||
.ge { font-style: italic } /* Generic.Emph */
|
||||
.gr { color: #E40000 } /* Generic.Error */
|
||||
.gh { color: #000080; font-weight: bold } /* Generic.Heading */
|
||||
.gi { color: #008400 } /* Generic.Inserted */
|
||||
.go { color: #717171 } /* Generic.Output */
|
||||
.gp { color: #000080; font-weight: bold } /* Generic.Prompt */
|
||||
.gs { font-weight: bold } /* Generic.Strong */
|
||||
.gu { color: #800080; font-weight: bold } /* Generic.Subheading */
|
||||
.gt { color: #a9dd00 } /* Generic.Traceback */
|
||||
.kc { color: #008000; font-weight: bold } /* Keyword.Constant */
|
||||
.kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
|
||||
.kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
|
||||
.kp { color: #008000 } /* Keyword.Pseudo */
|
||||
.kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
|
||||
.kt { color: #B00040 } /* Keyword.Type */
|
||||
.m { color: #666666 } /* Literal.Number */
|
||||
.s { color: #BA2121 } /* Literal.String */
|
||||
.na { color: #687822 } /* Name.Attribute */
|
||||
.nb { color: #e5f8c3 } /* Name.Builtin */
|
||||
.nc { color: #ffad65; font-weight: bold } /* Name.Class */
|
||||
.no { color: #880000 } /* Name.Constant */
|
||||
.nd { color: #AA22FF } /* Name.Decorator */
|
||||
.ni { color: #717171; font-weight: bold } /* Name.Entity */
|
||||
.ne { color: #CB3F38; font-weight: bold } /* Name.Exception */
|
||||
.nf { color: #f9f978 } /* Name.Function */
|
||||
.nl { color: #767600 } /* Name.Label */
|
||||
.nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
|
||||
.nt { color: #008000; font-weight: bold } /* Name.Tag */
|
||||
.nv { color: #19177C } /* Name.Variable */
|
||||
.ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
|
||||
.w { color: #bbbbbb } /* Text.Whitespace */
|
||||
.mb { color: #666666 } /* Literal.Number.Bin */
|
||||
.mf { color: #666666 } /* Literal.Number.Float */
|
||||
.mh { color: #666666 } /* Literal.Number.Hex */
|
||||
.mi { color: #666666 } /* Literal.Number.Integer */
|
||||
.mo { color: #666666 } /* Literal.Number.Oct */
|
||||
.sa { color: #BA2121 } /* Literal.String.Affix */
|
||||
.sb { color: #BA2121 } /* Literal.String.Backtick */
|
||||
.sc { color: #BA2121 } /* Literal.String.Char */
|
||||
.dl { color: #BA2121 } /* Literal.String.Delimiter */
|
||||
.sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
|
||||
.s2 { color: #2bf840 } /* Literal.String.Double */
|
||||
.se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */
|
||||
.sh { color: #BA2121 } /* Literal.String.Heredoc */
|
||||
.si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */
|
||||
.sx { color: #008000 } /* Literal.String.Other */
|
||||
.sr { color: #A45A77 } /* Literal.String.Regex */
|
||||
.s1 { color: #BA2121 } /* Literal.String.Single */
|
||||
.ss { color: #19177C } /* Literal.String.Symbol */
|
||||
.bp { color: #008000 } /* Name.Builtin.Pseudo */
|
||||
.fm { color: #0000FF } /* Name.Function.Magic */
|
||||
.vc { color: #19177C } /* Name.Variable.Class */
|
||||
.vg { color: #19177C } /* Name.Variable.Global */
|
||||
.vi { color: #19177C } /* Name.Variable.Instance */
|
||||
.vm { color: #19177C } /* Name.Variable.Magic */
|
||||
.il { color: #666666 } /* Literal.Number.Integer.Long */
|
||||
"""
|
||||
|
||||
88
toolbox.py
88
toolbox.py
@@ -6,7 +6,7 @@ import traceback
|
||||
import importlib
|
||||
import inspect
|
||||
import re
|
||||
from show_math import convert as convert_math
|
||||
from latex2mathml.converter import convert as tex2mathml
|
||||
from functools import wraps, lru_cache
|
||||
|
||||
|
||||
@@ -162,7 +162,13 @@ def CatchException(f):
|
||||
|
||||
def HotReload(f):
|
||||
"""
|
||||
装饰器函数,实现函数插件热更新
|
||||
HotReload的装饰器函数,用于实现Python函数插件的热更新。
|
||||
函数热更新是指在不停止程序运行的情况下,更新函数代码,从而达到实时更新功能。
|
||||
在装饰器内部,使用wraps(f)来保留函数的元信息,并定义了一个名为decorated的内部函数。
|
||||
内部函数通过使用importlib模块的reload函数和inspect模块的getmodule函数来重新加载并获取函数模块,
|
||||
然后通过getattr函数获取函数名,并在新模块中重新加载函数。
|
||||
最后,使用yield from语句返回重新加载过的函数,并在被装饰的函数上执行。
|
||||
最终,装饰器函数返回内部函数。这个内部函数可以将函数的原始定义更新为最新版本,并执行函数的新版本。
|
||||
"""
|
||||
@wraps(f)
|
||||
def decorated(*args, **kwargs):
|
||||
@@ -203,15 +209,76 @@ def markdown_convertion(txt):
|
||||
"""
|
||||
pre = '<div class="markdown-body">'
|
||||
suf = '</div>'
|
||||
if ('$' in txt) and ('```' not in txt):
|
||||
return pre + markdown.markdown(txt, extensions=['fenced_code', 'tables']) + '<br><br>' + markdown.markdown(convert_math(txt, splitParagraphs=False), extensions=['fenced_code', 'tables']) + suf
|
||||
markdown_extension_configs = {
|
||||
'mdx_math': {
|
||||
'enable_dollar_delimiter': True,
|
||||
'use_gitlab_delimiters': False,
|
||||
},
|
||||
}
|
||||
find_equation_pattern = r'<script type="math/tex(?:.*?)>(.*?)</script>'
|
||||
|
||||
def tex2mathml_catch_exception(content, *args, **kwargs):
|
||||
try:
|
||||
content = tex2mathml(content, *args, **kwargs)
|
||||
except:
|
||||
content = content
|
||||
return content
|
||||
|
||||
def replace_math_no_render(match):
|
||||
content = match.group(1)
|
||||
if 'mode=display' in match.group(0):
|
||||
content = content.replace('\n', '</br>')
|
||||
return f"<font color=\"#00FF00\">$$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$$</font>"
|
||||
else:
|
||||
return f"<font color=\"#00FF00\">$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$</font>"
|
||||
|
||||
def replace_math_render(match):
|
||||
content = match.group(1)
|
||||
if 'mode=display' in match.group(0):
|
||||
if '\\begin{aligned}' in content:
|
||||
content = content.replace('\\begin{aligned}', '\\begin{array}')
|
||||
content = content.replace('\\end{aligned}', '\\end{array}')
|
||||
content = content.replace('&', ' ')
|
||||
content = tex2mathml_catch_exception(content, display="block")
|
||||
return content
|
||||
else:
|
||||
return tex2mathml_catch_exception(content)
|
||||
|
||||
def markdown_bug_hunt(content):
|
||||
"""
|
||||
解决一个mdx_math的bug(单$包裹begin命令时多余<script>)
|
||||
"""
|
||||
content = content.replace('<script type="math/tex">\n<script type="math/tex; mode=display">', '<script type="math/tex; mode=display">')
|
||||
content = content.replace('</script>\n</script>', '</script>')
|
||||
return content
|
||||
|
||||
|
||||
if ('$' in txt) and ('```' not in txt): # 有$标识的公式符号,且没有代码段```的标识
|
||||
# convert everything to html format
|
||||
split = markdown.markdown(text='---')
|
||||
convert_stage_1 = markdown.markdown(text=txt, extensions=['mdx_math', 'fenced_code', 'tables', 'sane_lists'], extension_configs=markdown_extension_configs)
|
||||
convert_stage_1 = markdown_bug_hunt(convert_stage_1)
|
||||
# re.DOTALL: Make the '.' special character match any character at all, including a newline; without this flag, '.' will match anything except a newline. Corresponds to the inline flag (?s).
|
||||
# 1. convert to easy-to-copy tex (do not render math)
|
||||
convert_stage_2_1, n = re.subn(find_equation_pattern, replace_math_no_render, convert_stage_1, flags=re.DOTALL)
|
||||
# 2. convert to rendered equation
|
||||
convert_stage_2_2, n = re.subn(find_equation_pattern, replace_math_render, convert_stage_1, flags=re.DOTALL)
|
||||
# cat them together
|
||||
return pre + convert_stage_2_1 + f'{split}' + convert_stage_2_2 + suf
|
||||
else:
|
||||
return pre + markdown.markdown(txt, extensions=['fenced_code', 'tables']) + suf
|
||||
return pre + markdown.markdown(txt, extensions=['fenced_code', 'codehilite', 'tables', 'sane_lists']) + suf
|
||||
|
||||
|
||||
def close_up_code_segment_during_stream(gpt_reply):
|
||||
"""
|
||||
在gpt输出代码的中途(输出了前面的```,但还没输出完后面的```),补上后面的```
|
||||
在gpt输出代码的中途(输出了前面的```,但还没输出完后面的```),补上后面的```
|
||||
|
||||
Args:
|
||||
gpt_reply (str): GPT模型返回的回复字符串。
|
||||
|
||||
Returns:
|
||||
str: 返回一个新的字符串,将输出代码片段的“后面的```”补上。
|
||||
|
||||
"""
|
||||
if '```' not in gpt_reply:
|
||||
return gpt_reply
|
||||
@@ -409,6 +476,15 @@ def clear_line_break(txt):
|
||||
|
||||
|
||||
class DummyWith():
|
||||
"""
|
||||
这段代码定义了一个名为DummyWith的空上下文管理器,
|
||||
它的作用是……额……没用,即在代码结构不变得情况下取代其他的上下文管理器。
|
||||
上下文管理器是一种Python对象,用于与with语句一起使用,
|
||||
以确保一些资源在代码块执行期间得到正确的初始化和清理。
|
||||
上下文管理器必须实现两个方法,分别为 __enter__()和 __exit__()。
|
||||
在上下文执行开始的情况下,__enter__()方法会在代码块被执行前被调用,
|
||||
而在上下文执行结束时,__exit__()方法则会被调用。
|
||||
"""
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
|
||||
Reference in New Issue
Block a user