version 2.5

移动参数位置
错别字
2023-04-08 22:27:02 +08:00 · 2023-04-08 22:16:33 +08:00 · 2023-04-08 22:15:33 +08:00 · 2023-04-08 22:14:05 +08:00 · 2023-04-08 02:48:35 +08:00 · 2023-04-08 02:39:54 +08:00
15 changed files with 612 additions and 53 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -140,4 +140,5 @@ gpt_log
 private.md
 private_upload
 other_llms
-cradle.py
+cradle*
+debug*
--- a/2
+++ b/2
@@ -4,10 +4,10 @@ RUN echo '[global]' > /etc/pip.conf && \
    echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
    echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf

-RUN pip3 install gradio requests[socks] mdtex2html

 COPY . /gpt
 WORKDIR /gpt
+RUN pip3 install -r requirements.txt


 CMD ["python3", "main.py"]
--- a/README.md
+++ b/README.md
@@ -33,6 +33,7 @@ If you like this project, please give it a Star. If you've come up with more use
 chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
 [arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF
 [PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [函数插件] PDF论文提取题目&摘要+翻译全文（多线程）
+[谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) (Version>=2.45) | [函数插件] 给定任意谷歌学术搜索页面URL，让gpt帮你选择有趣的文章
 公式显示 | 可以同时显示公式的tex形式和渲染形式
 图片显示 | 可以在markdown中显示图片
 多线程函数插件支持 | 支持多线调用chatgpt，一键处理海量文本或程序
@@ -69,10 +70,11 @@ huggingface免科学上网[在线体验](https://huggingface.co/spaces/qingxu98/

 - 如果输出包含公式，会同时以tex形式和渲染形式显示，方便复制和阅读
 <div align="center">
-<img src="img/demo.jpg" width="500" >
+<img src="https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png" width="700" >
 </div>


+
 - 懒得看项目代码？整个工程直接给chatgpt炫嘴里
 <div align="center">
 <img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="700" >
@@ -260,11 +262,12 @@ python check_proxy.py

 - version 3 (Todo): 
 - - 支持gpt4和其他更多llm
- version 2.3+ (Todo): 
+- version 2.4+ (Todo): 
 - - 总结大工程源代码时文本过长、token溢出的问题
 - - 实现项目打包部署
 - - 函数插件参数接口优化
 - - 自更新
+- version 2.4: (1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。
 - version 2.3: 增强多线程交互性
 - version 2.2: 函数插件支持热重载
 - version 2.1: 可折叠式布局
--- a/check_proxy.py
+++ b/check_proxy.py
@@ -20,31 +20,110 @@ def check_proxy(proxies):
        return result


-def auto_update():
+def backup_and_download(current_version, remote_version):
+    """
+    一键更新协议：备份和下载
+    """
    from toolbox import get_conf
+    import shutil
+    import os
    import requests
-    import time
-    import json
+    import zipfile
+    os.makedirs(f'./history', exist_ok=True)
+    backup_dir = f'./history/backup-{current_version}/'
+    new_version_dir = f'./history/new-version-{remote_version}/'
+    if os.path.exists(new_version_dir):
+        return new_version_dir
+    os.makedirs(new_version_dir)
+    shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
    proxies, = get_conf('proxies')
-    response = requests.get("https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version",
-                            proxies=proxies, timeout=1)
-    remote_json_data = json.loads(response.text)
-    remote_version = remote_json_data['version']
-    if remote_json_data["show_feature"]:
-        new_feature = "新功能：" + remote_json_data["new_feature"]
-    else:
-        new_feature = ""
-    with open('./version', 'r', encoding='utf8') as f:
-        current_version = f.read()
-        current_version = json.loads(current_version)['version']
-    if (remote_version - current_version) >= 0.05:
-        print(
-            f'\n新版本可用。新版本:{remote_version}，当前版本:{current_version}。{new_feature}')
-        print('Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
-        time.sleep(3)
-        return
-    else:
-        return
+    r = requests.get(
+        'https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
+    zip_file_path = backup_dir+'/master.zip'
+    with open(zip_file_path, 'wb+') as f:
+        f.write(r.content)
+    dst_path = new_version_dir
+    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
+        for zip_info in zip_ref.infolist():
+            dst_file_path = os.path.join(dst_path, zip_info.filename)
+            if os.path.exists(dst_file_path):
+                os.remove(dst_file_path)
+            zip_ref.extract(zip_info, dst_path)
+    return new_version_dir
+
+
+def patch_and_restart(path):
+    """
+    一键更新协议：覆盖和重启
+    """
+    import distutils
+    import shutil
+    import os
+    import sys
+    import time
+    # if not using config_private, move origin config.py as config_private.py
+    if not os.path.exists('config_private.py'):
+        print('由于您没有设置config_private.py私密配置，现将您的现有配置移动至config_private.py以防止配置丢失，',
+              '另外您可以随时在history子文件夹下找回旧版的程序。')
+        shutil.copyfile('config.py', 'config_private.py')
+    distutils.dir_util.copy_tree(path+'/chatgpt_academic-master', './')
+    print('更新完成，您可以随时在history子文件夹下找回旧版的程序，5s之后重启')
+    for i in reversed(range(5)):
+        time.sleep(1)
+        print(i)
+    print(' ------------------------------ -----------------------------------')
+    os.execl(sys.executable, 'python', 'main.py')
+
+
+def get_current_version():
+    import json
+    try:
+        with open('./version', 'r', encoding='utf8') as f:
+            current_version = json.loads(f.read())['version']
+    except:
+        current_version = ""
+    return current_version
+
+
+def auto_update():
+    """
+    一键更新协议：查询版本和用户意见
+    """
+    try:
+        from toolbox import get_conf
+        import requests
+        import time
+        import json
+        proxies, = get_conf('proxies')
+        response = requests.get(
+            "https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=1)
+        remote_json_data = json.loads(response.text)
+        remote_version = remote_json_data['version']
+        if remote_json_data["show_feature"]:
+            new_feature = "新功能：" + remote_json_data["new_feature"]
+        else:
+            new_feature = ""
+        with open('./version', 'r', encoding='utf8') as f:
+            current_version = f.read()
+            current_version = json.loads(current_version)['version']
+        if (remote_version - current_version) >= 0.05:
+            print(
+                f'\n新版本可用。新版本:{remote_version}，当前版本:{current_version}。{new_feature}')
+            print('（1）Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
+            user_instruction = input('（2）是否一键更新代码（Y/y+回车=确认，输入其他/无输入+回车=不更新）？')
+            if user_instruction in ['Y', 'y']:
+                path = backup_and_download(current_version, remote_version)
+                try:
+                    patch_and_restart(path)
+                except:
+                    print('更新失败。')
+            else:
+                print('自动更新程序：已禁用')
+                return
+        else:
+            return
+    except:
+        print('自动更新程序：已禁用')


 if __name__ == '__main__':
--- a/config.py
+++ b/config.py
@@ -24,6 +24,9 @@ else:
 # 对话窗的高度
 CHATBOT_HEIGHT = 1115

+# 代码高亮
+CODE_HIGHLIGHT = True
+
 # 窗口布局
 LAYOUT = "LEFT-RIGHT"  # "LEFT-RIGHT"（左右布局） # "TOP-DOWN"（上下布局）

--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -65,6 +65,7 @@ def get_crazy_functions():
            # HotReload 的意思是热更新，修改函数插件代码后，不需要重启程序，代码直接生效
            "Function": HotReload(高阶功能模板函数)
        },
+
    }
    ###################### 第二组插件 ###########################
    # [第二组插件]: 经过充分测试，但功能上距离达到完美状态还差一点点
@@ -72,6 +73,9 @@ def get_crazy_functions():
    from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
    from crazy_functions.总结word文档 import 总结word文档
    from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
+    from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
+    from crazy_functions.理解PDF文档内容 import 理解PDF文档内容
+    from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入

    function_plugins.update({
        "批量翻译PDF文档（多线程）": {
@@ -90,10 +94,26 @@ def get_crazy_functions():
            "AsButton": False,  # 加入下拉菜单中
            "Function": HotReload(批量总结PDF文档pdfminer)
        },
+        "谷歌学术检索助手（输入谷歌学术搜索页url）": {
+            "Color": "stop",
+            "AsButton": False,  # 加入下拉菜单中
+            "Function": HotReload(谷歌检索小助手)
+        },
        "批量总结Word文档": {
            "Color": "stop",
            "Function": HotReload(总结word文档)
        },
+        "理解PDF文档内容（Tk文件选择接口，仅本地）": {
+            # HotReload 的意思是热更新，修改函数插件代码后，不需要重启程序，代码直接生效
+            "AsButton": False,  # 加入下拉菜单中
+            "Function": HotReload(理解PDF文档内容)
+        },
+        "理解PDF文档内容（通用接口，读取文件输入区）": {
+            # HotReload 的意思是热更新，修改函数插件代码后，不需要重启程序，代码直接生效
+            "Color": "stop",
+            "AsButton": False,  # 加入下拉菜单中
+            "Function": HotReload(理解PDF文档内容标准文件输入)
+        },
    })

    ###################### 第三组插件 ###########################
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@@ -1,4 +1,4 @@
-
+import traceback

 def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2):
    import time
@@ -43,10 +43,16 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
    mutable = [["", time.time()] for _ in range(n_frag)]

    def _req_gpt(index, inputs, history, sys_prompt):
-        gpt_say = predict_no_ui_long_connection(
-            inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[
-                index]
-        )
+        try:
+            gpt_say = predict_no_ui_long_connection(
+                inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[index]
+            )
+        except:
+            # 收拾残局
+            tb_str = '```\n' + traceback.format_exc() + '```'
+            gpt_say = f"[Local Message] 线程{index}在执行过程中遭遇问题, Traceback：\n\n{tb_str}\n\n"
+            if len(mutable[index][0]) > 0:
+                gpt_say += "此线程失败前收到的回答：" + mutable[index][0]
        return gpt_say
    # 异步任务开始
    futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
--- a/crazy_functions/理解PDF文档内容.py
+++ b/crazy_functions/理解PDF文档内容.py
@@ -0,0 +1,185 @@
+from request_llm.bridge_chatgpt import predict_no_ui
+from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
+import re
+import unicodedata
+fast_debug = False
+
+def is_paragraph_break(match):
+    """
+    根据给定的匹配结果来判断换行符是否表示段落分隔。
+    如果换行符前为句子结束标志（句号，感叹号，问号），且下一个字符为大写字母，则换行符更有可能表示段落分隔。
+    也可以根据之前的内容长度来判断段落是否已经足够长。
+    """
+    prev_char, next_char = match.groups()
+
+    # 句子结束标志
+    sentence_endings = ".!?"
+
+    # 设定一个最小段落长度阈值
+    min_paragraph_length = 140
+
+    if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length:
+        return "\n\n" 
+    else:
+        return " "
+
+def normalize_text(text):
+    """
+    通过把连字（ligatures）等文本特殊符号转换为其基本形式来对文本进行归一化处理。
+    例如，将连字 "fi" 转换为 "f" 和 "i"。
+    """
+    # 对文本进行归一化处理，分解连字
+    normalized_text = unicodedata.normalize("NFKD", text)
+
+    # 替换其他特殊字符
+    cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text)
+
+    return cleaned_text
+
+def clean_text(raw_text):
+    """
+    对从 PDF 提取出的原始文本进行清洗和格式化处理。
+    1. 对原始文本进行归一化处理。
+    2. 替换跨行的连词，例如 “Espe-\ncially” 转换为 “Especially”。
+    3. 根据 heuristic 规则判断换行符是否是段落分隔，并相应地进行替换。
+    """
+    # 对文本进行归一化处理
+    normalized_text = normalize_text(raw_text)
+
+    # 替换跨行的连词
+    text = re.sub(r'(\w+-\n\w+)', lambda m: m.group(1).replace('-\n', ''), normalized_text)
+
+    # 根据前后相邻字符的特点，找到原文本中的换行符
+    newlines = re.compile(r'(\S)\n(\S)')
+
+    # 根据 heuristic 规则，用空格或段落分隔符替换原换行符
+    final_text = re.sub(newlines, lambda m: m.group(1) + is_paragraph_break(m) + m.group(2), text)
+
+    return final_text.strip()
+
+def 解析PDF(file_name, top_p, temperature, chatbot, history, systemPromptTxt):
+    import time, glob, os, fitz
+    print('begin analysis on:', file_name)
+
+    with fitz.open(file_name) as doc:
+        file_content = ""
+        for page in doc:
+            file_content += page.get_text()
+        file_content = clean_text(file_content)
+        # print(file_content)
+    split_number = 10000
+    split_group = (len(file_content)//split_number)+1
+    for i in range(0,split_group):
+        if i==0:
+            prefix = "接下来请你仔细分析下面的论文，学习里面的内容（专业术语、公式、数学概念）.并且注意：由于论文内容较多，将分批次发送，每次发送完之后，你只需要回答“接受完成”"
+            i_say = prefix + f'文件名是{file_name}，文章内容第{i+1}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
+            i_say_show_user = f'文件名是：\n{file_name},\n由于论文内容过长，将分批请求（共{len(file_content)}字符，将分为{split_group}批，每批{split_number}字符）。\n当前发送{i+1}/{split_group}部分'
+        elif i==split_group-1:
+            i_say = f'你只需要回答“所有论文接受完成，请进行下一步”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:]}```'
+            i_say_show_user = f'当前发送{i+1}/{split_group}部分'
+        else:
+            i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
+            i_say_show_user = f'当前发送{i+1}/{split_group}部分'
+        chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
+        gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[])   # 带超时倒计时
+        while "完成" not in gpt_say:
+            i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
+            i_say_show_user = f'出现error，重新发送{i+1}/{split_group}部分'
+            gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[])   # 带超时倒计时
+            time.sleep(1)
+        chatbot[-1] = (i_say_show_user, gpt_say)
+        history.append(i_say_show_user); history.append(gpt_say)
+        yield chatbot, history, '正常'
+        time.sleep(2)
+
+    i_say = f'接下来，请你扮演一名专业的学术教授，利用你的所有知识并且结合这篇文章，回答我的问题。（请牢记：1.直到我说“退出”，你才能结束任务；2.所有问题需要紧密围绕文章内容;3.如果有公式，请使用tex渲染)'
+    chatbot.append((i_say, "[Local Message] waiting gpt response."))
+    yield chatbot, history, '正常'
+
+    # ** gpt request **
+    gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history)   # 带超时倒计时
+    chatbot[-1] = (i_say, gpt_say)
+    history.append(i_say); history.append(gpt_say)
+    yield chatbot, history, '正常'
+
+
+@CatchException
+def 理解PDF文档内容(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
+    import glob, os
+
+    # 基本信息：功能、贡献者
+    chatbot.append([
+        "函数插件功能？",
+        "理解PDF论文内容，并且将结合上下文内容，进行学术解答。函数插件贡献者: Hanzoe。"])
+    yield chatbot, history, '正常'
+
+    import tkinter as tk
+    from tkinter import filedialog
+
+    root = tk.Tk()
+    root.withdraw()
+    txt = filedialog.askopenfilename()
+
+    # 尝试导入依赖，如果缺少依赖，则给出安装建议
+    try:
+        import fitz
+    except:
+        report_execption(chatbot, history, 
+            a = f"解析项目: {txt}", 
+            b = f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade pymupdf```。")
+        yield chatbot, history, '正常'
+        return
+
+    # 清空历史，以免输入溢出
+    history = []
+
+    # 开始正式执行任务
+    yield from 解析PDF(txt, top_p, temperature, chatbot, history, systemPromptTxt)
+
+
+
+@CatchException
+def 理解PDF文档内容标准文件输入(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
+    import glob, os
+
+    # 基本信息：功能、贡献者
+    chatbot.append([
+        "函数插件功能？",
+        "理解PDF论文内容，并且将结合上下文内容，进行学术解答。函数插件贡献者: Hanzoe。"])
+    yield chatbot, history, '正常'
+
+    # 尝试导入依赖，如果缺少依赖，则给出安装建议
+    try:
+        import fitz
+    except:
+        report_execption(chatbot, history, 
+            a = f"解析项目: {txt}", 
+            b = f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade pymupdf```。")
+        yield chatbot, history, '正常'
+        return
+
+    # 清空历史，以免输入溢出
+    history = []
+
+    # 检测输入参数，如没有给定输入参数，直接退出
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "":
+            txt = '空空如也的输入栏'
+        report_execption(chatbot, history,
+                         a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
+        yield chatbot, history, '正常'
+        return
+
+    # 搜索需要处理的文件清单
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)]
+    # 如果没找到任何文件
+    if len(file_manifest) == 0:
+        report_execption(chatbot, history,
+                         a=f"解析项目: {txt}", b=f"找不到任何.tex或.pdf文件: {txt}")
+        yield chatbot, history, '正常'
+        return
+    txt = file_manifest[0]
+    # 开始正式执行任务
+    yield from 解析PDF(txt, top_p, temperature, chatbot, history, systemPromptTxt)
--- a/crazy_functions/谷歌检索小助手.py
+++ b/crazy_functions/谷歌检索小助手.py
@@ -0,0 +1,106 @@
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+from toolbox import CatchException, report_execption, write_results_to_file
+
+def get_meta_information(url, chatbot, history):
+    import requests
+    import arxiv
+    import difflib
+    from bs4 import BeautifulSoup
+    from toolbox import get_conf
+    proxies, = get_conf('proxies')
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
+    }
+    # 发送 GET 请求
+    response = requests.get(url, proxies=proxies, headers=headers)
+
+    # 解析网页内容
+    soup = BeautifulSoup(response.text, "html.parser")
+
+    def string_similar(s1, s2):
+        return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
+
+    profile = []
+    # 获取所有文章的标题和作者
+    for result in soup.select(".gs_ri"):
+        title = result.a.text.replace('\n', ' ').replace('  ', ' ')
+        author = result.select_one(".gs_a").text
+        try:
+            citation = result.select_one(".gs_fl > a[href*='cites']").text  # 引用次数是链接中的文本，直接取出来
+        except:
+            citation = 'cited by 0'
+        abstract = result.select_one(".gs_rs").text.strip()  # 摘要在 .gs_rs 中的文本，需要清除首尾空格
+        search = arxiv.Search(
+            query = title,
+            max_results = 1,
+            sort_by = arxiv.SortCriterion.Relevance,
+        )
+        paper = next(search.results())
+        if string_similar(title, paper.title) > 0.90: # same paper
+            abstract = paper.summary.replace('\n', ' ')
+            is_paper_in_arxiv = True
+        else:   # different paper
+            abstract = abstract
+            is_paper_in_arxiv = False
+        paper = next(search.results())
+        print(title)
+        print(author)
+        print(citation)
+        profile.append({
+            'title':title,
+            'author':author,
+            'citation':citation,
+            'abstract':abstract,
+            'is_paper_in_arxiv':is_paper_in_arxiv,
+        })
+
+        chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中（不在arxiv中无法获取完整摘要）:{is_paper_in_arxiv}\n\n' + abstract]
+        msg = "正常"
+        yield chatbot, [], msg 
+    return profile
+
+@CatchException
+def 谷歌检索小助手(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
+    # 基本信息：功能、贡献者
+    chatbot.append([
+        "函数插件功能？",
+        "分析用户提供的谷歌学术（google scholar）搜索页面中，出现的所有文章: binary-husky，插件初始化中..."])
+    yield chatbot, history, '正常'
+
+    # 尝试导入依赖，如果缺少依赖，则给出安装建议
+    try:
+        import arxiv
+        from bs4 import BeautifulSoup
+    except:
+        report_execption(chatbot, history, 
+            a = f"解析项目: {txt}", 
+            b = f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade beautifulsoup4 arxiv```。")
+        yield chatbot, history, '正常'
+        return
+
+    # 清空历史，以免输入溢出
+    history = []
+
+    meta_paper_info_list = yield from get_meta_information(txt, chatbot, history)
+
+    if len(meta_paper_info_list[:10]) > 0:
+        i_say = "下面是一些学术文献的数据，请从中提取出以下内容。" + \
+        "1、英文题目；2、中文题目翻译；3、作者；4、arxiv公开（is_paper_in_arxiv）；4、引用数量（cite）；5、中文摘要翻译。" + \
+        f"以下是信息源：{str(meta_paper_info_list[:10])}" 
+
+        inputs_show_user = f"请分析此页面中出现的所有文章：{txt}"
+        gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+            inputs=i_say, inputs_show_user=inputs_show_user, 
+            top_p=top_p, temperature=temperature, chatbot=chatbot, history=[], 
+            sys_prompt="你是一个学术翻译，请从数据中提取信息。你必须使用Markdown格式。你必须逐个文献进行处理。"
+        )
+
+        history.extend([ "第一批", gpt_say ])
+        meta_paper_info_list = meta_paper_info_list[10:]
+
+    chatbot.append(["状态？", "已经全部完成"])
+    msg = '正常'
+    yield chatbot, history, msg
+    res = write_results_to_file(history)
+    chatbot.append(("完成了吗？", res)); 
+    yield chatbot, history, msg
--- a/main.py
+++ b/main.py
@@ -11,8 +11,9 @@ proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT,
 PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
 if not AUTHENTICATION: AUTHENTICATION = None

+from check_proxy import get_current_version
 initial_prompt = "Serve me as a writing and programming assistant."
-title_html = "<h1 align=\"center\">ChatGPT 学术优化</h1>"
+title_html = f"<h1 align=\"center\">ChatGPT 学术优化 {get_current_version()}</h1>"
 description =  """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic)，感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)"""

 # 问询记录, python 版本建议3.9+（越新越好）
@@ -49,7 +50,7 @@ if LAYOUT == "TOP-DOWN":
    CHATBOT_HEIGHT /= 2

 cancel_handles = []
-with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
+with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
    gr.HTML(title_html)
    with gr_L1():
        with gr_L2(scale=2):
@@ -160,15 +161,13 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as de
 def auto_opentab_delay():
    import threading, webbrowser, time
    print(f"如果浏览器没有自动打开，请复制并转到以下URL：")
-    print(f"\t（亮色主体）: http://localhost:{PORT}")
-    print(f"\t（暗色主体）: http://localhost:{PORT}/?__dark-theme=true")
+    print(f"\t（亮色主题）: http://localhost:{PORT}")
+    print(f"\t（暗色主题）: http://localhost:{PORT}/?__dark-theme=true")
    def open(): 
-        time.sleep(2)
-        try: auto_update()  # 检查新版本
-        except: pass
+        time.sleep(2)       # 打开浏览器
        webbrowser.open_new_tab(f"http://localhost:{PORT}/?__dark-theme=true")
    threading.Thread(target=open, name="open-browser", daemon=True).start()
+    threading.Thread(target=auto_update, name="self-upgrade", daemon=True).start()

 auto_opentab_delay()
-demo.title = "ChatGPT 学术优化"
 demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=True, server_port=PORT, auth=AUTHENTICATION)
--- a/request_llm/bridge_chatgpt.py
+++ b/request_llm/bridge_chatgpt.py
@@ -104,7 +104,10 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr
    result = ''
    while True:
        try: chunk = next(stream_response).decode()
-        except StopIteration: break
+        except StopIteration: 
+            break
+        except requests.exceptions.ConnectionError:
+            chunk = next(stream_response).decode() # 失败了，重试一次？再失败就没办法了。
        if len(chunk)==0: continue
        if not chunk.startswith('data:'): 
            error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,13 @@
 gradio>=3.23
 requests[socks]
-mdtex2html
-Markdown
-latex2mathml
-openai
 transformers
+python-markdown-math
+beautifulsoup4
+latex2mathml
+mdtex2html
+tiktoken
+Markdown
+pygments
+pymupdf
+openai
 numpy
--- a/theme.py
+++ b/theme.py
@@ -1,5 +1,6 @@
 import gradio as gr
-
+from toolbox import get_conf
+CODE_HIGHLIGHT, = get_conf('CODE_HIGHLIGHT')
 # gradio可用颜色列表
 # gr.themes.utils.colors.slate (石板色)
 # gr.themes.utils.colors.gray (灰色)
@@ -154,3 +155,75 @@ advanced_css = """
    margin: 1em 2em 1em 0.5em;
 }
 """
+
+if CODE_HIGHLIGHT:
+    advanced_css += """
+.hll { background-color: #ffffcc }
+.c { color: #3D7B7B; font-style: italic } /* Comment */
+.err { border: 1px solid #FF0000 } /* Error */
+.k { color: hsl(197, 94%, 51%); font-weight: bold } /* Keyword */
+.o { color: #666666 } /* Operator */
+.ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */
+.cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */
+.cp { color: #9C6500 } /* Comment.Preproc */
+.cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */
+.c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */
+.cs { color: #3D7B7B; font-style: italic } /* Comment.Special */
+.gd { color: #A00000 } /* Generic.Deleted */
+.ge { font-style: italic } /* Generic.Emph */
+.gr { color: #E40000 } /* Generic.Error */
+.gh { color: #000080; font-weight: bold } /* Generic.Heading */
+.gi { color: #008400 } /* Generic.Inserted */
+.go { color: #717171 } /* Generic.Output */
+.gp { color: #000080; font-weight: bold } /* Generic.Prompt */
+.gs { font-weight: bold } /* Generic.Strong */
+.gu { color: #800080; font-weight: bold } /* Generic.Subheading */
+.gt { color: #a9dd00 } /* Generic.Traceback */
+.kc { color: #008000; font-weight: bold } /* Keyword.Constant */
+.kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
+.kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
+.kp { color: #008000 } /* Keyword.Pseudo */
+.kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
+.kt { color: #B00040 } /* Keyword.Type */
+.m { color: #666666 } /* Literal.Number */
+.s { color: #BA2121 } /* Literal.String */
+.na { color: #687822 } /* Name.Attribute */
+.nb { color: #e5f8c3 } /* Name.Builtin */
+.nc { color: #ffad65; font-weight: bold } /* Name.Class */
+.no { color: #880000 } /* Name.Constant */
+.nd { color: #AA22FF } /* Name.Decorator */
+.ni { color: #717171; font-weight: bold } /* Name.Entity */
+.ne { color: #CB3F38; font-weight: bold } /* Name.Exception */
+.nf { color: #f9f978 } /* Name.Function */
+.nl { color: #767600 } /* Name.Label */
+.nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
+.nt { color: #008000; font-weight: bold } /* Name.Tag */
+.nv { color: #19177C } /* Name.Variable */
+.ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
+.w { color: #bbbbbb } /* Text.Whitespace */
+.mb { color: #666666 } /* Literal.Number.Bin */
+.mf { color: #666666 } /* Literal.Number.Float */
+.mh { color: #666666 } /* Literal.Number.Hex */
+.mi { color: #666666 } /* Literal.Number.Integer */
+.mo { color: #666666 } /* Literal.Number.Oct */
+.sa { color: #BA2121 } /* Literal.String.Affix */
+.sb { color: #BA2121 } /* Literal.String.Backtick */
+.sc { color: #BA2121 } /* Literal.String.Char */
+.dl { color: #BA2121 } /* Literal.String.Delimiter */
+.sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
+.s2 { color: #2bf840 } /* Literal.String.Double */
+.se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */
+.sh { color: #BA2121 } /* Literal.String.Heredoc */
+.si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */
+.sx { color: #008000 } /* Literal.String.Other */
+.sr { color: #A45A77 } /* Literal.String.Regex */
+.s1 { color: #BA2121 } /* Literal.String.Single */
+.ss { color: #19177C } /* Literal.String.Symbol */
+.bp { color: #008000 } /* Name.Builtin.Pseudo */
+.fm { color: #0000FF } /* Name.Function.Magic */
+.vc { color: #19177C } /* Name.Variable.Class */
+.vg { color: #19177C } /* Name.Variable.Global */
+.vi { color: #19177C } /* Name.Variable.Instance */
+.vm { color: #19177C } /* Name.Variable.Magic */
+.il { color: #666666 } /* Literal.Number.Integer.Long */
+"""
--- a/toolbox.py
+++ b/toolbox.py
@@ -6,7 +6,7 @@ import traceback
 import importlib
 import inspect
 import re
-from show_math import convert as convert_math
+from latex2mathml.converter import convert as tex2mathml
 from functools import wraps, lru_cache


@@ -162,7 +162,13 @@ def CatchException(f):

 def HotReload(f):
    """
-        装饰器函数，实现函数插件热更新
+    HotReload的装饰器函数，用于实现Python函数插件的热更新。
+    函数热更新是指在不停止程序运行的情况下，更新函数代码，从而达到实时更新功能。
+    在装饰器内部，使用wraps(f)来保留函数的元信息，并定义了一个名为decorated的内部函数。
+    内部函数通过使用importlib模块的reload函数和inspect模块的getmodule函数来重新加载并获取函数模块，
+    然后通过getattr函数获取函数名，并在新模块中重新加载函数。
+    最后，使用yield from语句返回重新加载过的函数，并在被装饰的函数上执行。
+    最终，装饰器函数返回内部函数。这个内部函数可以将函数的原始定义更新为最新版本，并执行函数的新版本。
    """
    @wraps(f)
    def decorated(*args, **kwargs):
@@ -203,15 +209,76 @@ def markdown_convertion(txt):
    """
    pre = '<div class="markdown-body">'
    suf = '</div>'
-    if ('$' in txt) and ('```' not in txt):
-        return pre + markdown.markdown(txt, extensions=['fenced_code', 'tables']) + '<br><br>' + markdown.markdown(convert_math(txt, splitParagraphs=False), extensions=['fenced_code', 'tables']) + suf
+    markdown_extension_configs = {
+        'mdx_math': {
+            'enable_dollar_delimiter': True,
+            'use_gitlab_delimiters': False,
+        },
+    }
+    find_equation_pattern = r'<script type="math/tex(?:.*?)>(.*?)</script>'
+
+    def tex2mathml_catch_exception(content, *args, **kwargs):
+        try:
+            content = tex2mathml(content, *args, **kwargs)
+        except:
+            content = content
+        return content
+
+    def replace_math_no_render(match):
+        content = match.group(1)
+        if 'mode=display' in match.group(0):
+            content = content.replace('\n', '</br>')
+            return f"<font color=\"#00FF00\">$$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$$</font>"
+        else:
+            return f"<font color=\"#00FF00\">$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$</font>"
+
+    def replace_math_render(match):
+        content = match.group(1)
+        if 'mode=display' in match.group(0):
+            if '\\begin{aligned}' in content:
+                content = content.replace('\\begin{aligned}', '\\begin{array}')
+                content = content.replace('\\end{aligned}', '\\end{array}')
+                content = content.replace('&', ' ')
+            content = tex2mathml_catch_exception(content, display="block")
+            return content
+        else:
+            return tex2mathml_catch_exception(content)
+        
+    def markdown_bug_hunt(content):
+        """
+        解决一个mdx_math的bug（单$包裹begin命令时多余<script>）
+        """
+        content = content.replace('<script type="math/tex">\n<script type="math/tex; mode=display">', '<script type="math/tex; mode=display">')
+        content = content.replace('</script>\n</script>', '</script>')
+        return content
+    
+
+    if ('$' in txt) and ('```' not in txt):  # 有$标识的公式符号，且没有代码段```的标识
+        # convert everything to html format
+        split = markdown.markdown(text='---')
+        convert_stage_1 = markdown.markdown(text=txt, extensions=['mdx_math', 'fenced_code', 'tables', 'sane_lists'], extension_configs=markdown_extension_configs)
+        convert_stage_1 = markdown_bug_hunt(convert_stage_1)
+        # re.DOTALL: Make the '.' special character match any character at all, including a newline; without this flag, '.' will match anything except a newline. Corresponds to the inline flag (?s).
+        # 1. convert to easy-to-copy tex (do not render math)
+        convert_stage_2_1, n = re.subn(find_equation_pattern, replace_math_no_render, convert_stage_1, flags=re.DOTALL)
+        # 2. convert to rendered equation
+        convert_stage_2_2, n = re.subn(find_equation_pattern, replace_math_render, convert_stage_1, flags=re.DOTALL)
+        # cat them together
+        return pre + convert_stage_2_1 + f'{split}' + convert_stage_2_2 + suf
    else:
-        return pre + markdown.markdown(txt, extensions=['fenced_code', 'tables']) + suf
+        return pre + markdown.markdown(txt, extensions=['fenced_code', 'codehilite', 'tables', 'sane_lists']) + suf


 def close_up_code_segment_during_stream(gpt_reply):
    """
-        在gpt输出代码的中途（输出了前面的```，但还没输出完后面的```），补上后面的```
+    在gpt输出代码的中途（输出了前面的```，但还没输出完后面的```），补上后面的```
+    
+    Args:
+        gpt_reply (str): GPT模型返回的回复字符串。
+
+    Returns:
+        str: 返回一个新的字符串，将输出代码片段的“后面的```”补上。
+
    """
    if '```' not in gpt_reply:
        return gpt_reply
@@ -409,6 +476,15 @@ def clear_line_break(txt):


 class DummyWith():
+    """
+    这段代码定义了一个名为DummyWith的空上下文管理器，
+    它的作用是……额……没用，即在代码结构不变得情况下取代其他的上下文管理器。
+    上下文管理器是一种Python对象，用于与with语句一起使用，
+    以确保一些资源在代码块执行期间得到正确的初始化和清理。
+    上下文管理器必须实现两个方法，分别为 __enter__()和 __exit__()。 
+    在上下文执行开始的情况下，__enter__()方法会在代码块被执行前被调用，
+    而在上下文执行结束时，__exit__()方法则会被调用。
+    """
    def __enter__(self):
        return self

--- a/4
+++ b/4
@@ -1,5 +1,5 @@
 {
-  "version": 2.4,
+  "version": 2.5,
  "show_feature": true,
-  "new_feature": "(1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。"
+  "new_feature": "新增一键更新程序<->高亮代码<->高亮公式<->新增垂直布局选项"
 }
Author	SHA1	Message	Date
qingxu fu	8ddc1adae4	version 2.5	2023-04-08 22:27:02 +08:00
qingxu fu	4e3f759d0c	移动参数位置	2023-04-08 22:16:33 +08:00
qingxu fu	94ff62bdaa	错别字	2023-04-08 22:15:33 +08:00
qingxu fu	2cbb5dbdaa	up	2023-04-08 22:14:05 +08:00
Your Name	3b85a29f91	加入自动更新协议	2023-04-08 02:48:35 +08:00
Your Name	166daa1ea7	显示版本	2023-04-08 02:39:54 +08:00
Your Name	5c3ecd7477	自动更新程序	2023-04-08 02:38:02 +08:00
Your Name	d5b03377ff	多种接口	2023-04-08 00:51:58 +08:00
Your Name	7cd11f2bbd	新插件移动到插件菜单中	2023-04-08 00:42:54 +08:00
Your Name	f65cc8deea	Merge branch 'master' of github.com:binary-husky/chatgpt_academic	2023-04-08 00:41:46 +08:00
Your Name	48ee620524	代码高亮开关	2023-04-08 00:41:39 +08:00
binary-husky	8a5be8fb8d	Merge pull request #366 from Hanzoe/master new function: 实现单篇PDF论文翻译理解	2023-04-08 00:41:03 +08:00
binary-husky	f26b8e28e1	Update README.md	2023-04-08 00:32:22 +08:00
Your Name	b005b84ad6	更新requirements.txt实现代码高亮必要	2023-04-08 00:23:26 +08:00
Your Name	1edf7ef80d	Fix dockerfile	2023-04-08 00:01:11 +08:00
Your Name	3fed08f65e	version 2.45	2023-04-07 23:58:10 +08:00
Your Name	fa8603d745	Merge branch 'master' into dev	2023-04-07 23:55:19 +08:00
Your Name	6b5c2538cf	新增谷歌学术统合小助手	2023-04-07 23:54:24 +08:00
Your Name	7f1c7ebd68	version 2.43	2023-04-07 22:08:05 +08:00
Your Name	ff87aebc29	处理多线程中出现的网络问题	2023-04-07 22:06:08 +08:00
Hanzoe	2c746056ff	Update crazy_functional.py	2023-04-07 21:35:36 +08:00
Hanzoe	0e4cac29f8	Add files via upload	2023-04-07 21:34:55 +08:00
Hanzoe	8513d46398	Merge pull request #1 from binary-husky/master 单篇论文翻译理解	2023-04-07 21:34:11 +08:00
Your Name	b2495a6f7e	Merge branch 'dev' of github.com:binary-husky/chatgpt_academic into dev	2023-04-07 21:09:43 +08:00
Your Name	5603d33d67	highlight	2023-04-07 21:09:37 +08:00
Your Name	d06d4f3a6f	highlight	2023-04-07 21:08:34 +08:00
Your Name	b2adc77a73	Merge branch 'dev' of github.com:binary-husky/chatgpt_academic into dev	2023-04-07 21:00:32 +08:00
Your Name	1f6e2547b2	Merge branch 'master' into dev	2023-04-07 20:59:35 +08:00
qingxu fu	fd0e3fb5c4	代码、公式高亮	2023-04-07 20:30:30 +08:00
qingxu fu	a0b7ae6674	Merge branch 'master' of https://github.com/binary-husky/chatgpt_academic into master	2023-04-07 19:26:20 +08:00
qingxu fu	8ca232cda3	修复小BUG	2023-04-07 19:26:17 +08:00
binary-husky	34e983c7a5	Update README.md	2023-04-07 19:09:18 +08:00
binary-husky	c0d096726c	Update README.md	2023-04-07 19:08:41 +08:00
qingxu fu	969e8c1d89	正确显示列表序号	2023-04-07 18:33:46 +08:00
binary-husky	d4e3082db4	Update toolbox.py	2023-04-07 18:27:52 +08:00
binary-husky	777e56882b	Update README.md	2023-04-07 18:21:13 +08:00
Your Name	4da7d75ad4	修复公式显示错误	2023-04-07 18:14:27 +08:00
qingxu fu	1538acaa5a	fix equation	2023-04-07 17:55:24 +08:00
qingxu fu	b47f69978e	更新requirements.txt	2023-04-07 12:45:47 +08:00
binary-husky	823c136de4	Update README.md	2023-04-06 19:24:37 +08:00