log user name during chat

Enhance archive extraction with error handling for tar and gzip formats
Adding support to new openai apikey format (#2030 )
2024-11-10 16:50:24 +00:00 · 2024-11-09 10:10:46 +00:00 · 2024-11-09 13:41:19 +08:00 · 2024-11-04 13:49:49 +00:00 · 2024-11-03 22:41:16 +08:00 · 2024-11-03 14:34:17 +00:00
34 changed files with 723 additions and 348 deletions
--- a/.github/workflows/build-with-latex-arm.yml
+++ b/.github/workflows/build-with-latex-arm.yml
@@ -46,6 +46,6 @@ jobs:
          context: .
          push: true
          platforms: linux/arm64
-          file: docs/GithubAction+NoLocal+Latex+Arm
+          file: docs/GithubAction+NoLocal+Latex
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
--- a/.gitignore
+++ b/.gitignore
@@ -160,4 +160,6 @@ test.*
 temp.*
 objdump*
 *.min.*.js
-TODO
+TODO
 experimental_mods
 search_results
--- a/check_proxy.py
+++ b/check_proxy.py
@@ -1,24 +1,36 @@
 from loguru import logger
 def check_proxy(proxies, return_ip=False):
    """
    检查代理配置并返回结果。
    Args:
        proxies (dict): 包含http和https代理配置的字典。
        return_ip (bool, optional): 是否返回代理的IP地址。默认为False。
    Returns:
        str or None: 检查的结果信息或代理的IP地址（如果`return_ip`为True）。
    """
    import requests
    proxies_https = proxies['https'] if proxies is not None else '无'
    ip = None
    try:
-        response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
+        response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)  # ⭐ 执行GET请求以获取代理信息
        data = response.json()
        if 'country_name' in data:
            country = data['country_name']
            result = f"代理配置 {proxies_https}, 代理所在地：{country}"
-            if 'ip' in data: ip = data['ip']
+            if 'ip' in data:
                ip = data['ip']
        elif 'error' in data:
-            alternative, ip = _check_with_backup_source(proxies)
+            alternative, ip = _check_with_backup_source(proxies)  # ⭐ 调用备用方法检查代理配置
            if alternative is None:
                result = f"代理配置 {proxies_https}, 代理所在地：未知，IP查询频率受限"
            else:
                result = f"代理配置 {proxies_https}, 代理所在地：{alternative}"
        else:
            result = f"代理配置 {proxies_https}, 代理数据解析失败：{data}"
        if not return_ip:
            logger.warning(result)
            return result
@@ -33,17 +45,33 @@ def check_proxy(proxies, return_ip=False):
            return ip
 def _check_with_backup_source(proxies):
    """
    通过备份源检查代理，并获取相应信息。
    Args:
        proxies (dict): 包含代理信息的字典。
    Returns:
        tuple: 代理信息(geo)和IP地址(ip)的元组。
    """
    import random, string, requests
    random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=32))
    try:
-        res_json = requests.get(f"http://{random_string}.edns.ip-api.com/json", proxies=proxies, timeout=4).json()
+        res_json = requests.get(f"http://{random_string}.edns.ip-api.com/json", proxies=proxies, timeout=4).json()  # ⭐ 执行代理检查和备份源请求
        return res_json['dns']['geo'], res_json['dns']['ip']
    except:
        return None, None
 def backup_and_download(current_version, remote_version):
    """
-    一键更新协议：备份和下载
+    一键更新协议：备份当前版本，下载远程版本并解压缩。
    Args:
        current_version (str): 当前版本号。
        remote_version (str): 远程版本号。
    Returns:
        str: 新版本目录的路径。
    """
    from toolbox import get_conf
    import shutil
@@ -60,7 +88,7 @@ def backup_and_download(current_version, remote_version):
    proxies = get_conf('proxies')
    try:    r = requests.get('https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
    except: r = requests.get('https://public.agent-matrix.com/publish/master.zip', proxies=proxies, stream=True)
-    zip_file_path = backup_dir+'/master.zip'
+    zip_file_path = backup_dir+'/master.zip'  # ⭐ 保存备份文件的路径
    with open(zip_file_path, 'wb+') as f:
        f.write(r.content)
    dst_path = new_version_dir
@@ -76,6 +104,17 @@ def backup_and_download(current_version, remote_version):
 def patch_and_restart(path):
    """
    一键更新协议：覆盖和重启
    Args:
        path (str): 新版本代码所在的路径
    注意事项:
        如果您的程序没有使用config_private.py私密配置文件，则会将config.py重命名为config_private.py以避免配置丢失。
    更新流程:
        - 复制最新版本代码到当前目录
        - 更新pip包依赖
        - 如果更新失败，则提示手动安装依赖库并重启
    """
    from distutils import dir_util
    import shutil
@@ -84,32 +123,43 @@ def patch_and_restart(path):
    import time
    import glob
    from shared_utils.colorful import log亮黄, log亮绿, log亮红
-    # if not using config_private, move origin config.py as config_private.py
+
    if not os.path.exists('config_private.py'):
        log亮黄('由于您没有设置config_private.py私密配置，现将您的现有配置移动至config_private.py以防止配置丢失，',
              '另外您可以随时在history子文件夹下找回旧版的程序。')
        shutil.copyfile('config.py', 'config_private.py')
    path_new_version = glob.glob(path + '/*-master')[0]
-    dir_util.copy_tree(path_new_version, './')
+    dir_util.copy_tree(path_new_version, './')  # ⭐ 将最新版本代码复制到当前目录
    log亮绿('代码已经更新，即将更新pip包依赖……')
    for i in reversed(range(5)): time.sleep(1); log亮绿(i)
    try:
        import subprocess
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'])
    except:
        log亮红('pip包依赖安装出现问题，需要手动安装新增的依赖库 `python -m pip install -r requirements.txt`，然后在用常规的`python main.py`的方式启动。')
    log亮绿('更新完成，您可以随时在history子文件夹下找回旧版的程序，5s之后重启')
    log亮红('假如重启失败，您可能需要手动安装新增的依赖库 `python -m pip install -r requirements.txt`，然后在用常规的`python main.py`的方式启动。')
    log亮绿(' ------------------------------ -----------------------------------')
    for i in reversed(range(8)): time.sleep(1); log亮绿(i)
-    os.execl(sys.executable, sys.executable, *sys.argv)
+    os.execl(sys.executable, sys.executable, *sys.argv)  # 重启程序
 def get_current_version():
    """
    获取当前的版本号。
    Returns:
        str: 当前的版本号。如果无法获取版本号，则返回空字符串。
    """
    import json
    try:
        with open('./version', 'r', encoding='utf8') as f:
-            current_version = json.loads(f.read())['version']
+            current_version = json.loads(f.read())['version']  # ⭐ 从读取的json数据中提取版本号
    except:
        current_version = ""
    return current_version
@@ -118,6 +168,12 @@ def get_current_version():
 def auto_update(raise_error=False):
    """
    一键更新协议：查询版本和用户意见
    Args:
        raise_error (bool, optional): 是否在出错时抛出错误。默认为 False。
    Returns:
        None
    """
    try:
        from toolbox import get_conf
@@ -137,13 +193,13 @@ def auto_update(raise_error=False):
            current_version = json.loads(current_version)['version']
        if (remote_version - current_version) >= 0.01-1e-5:
            from shared_utils.colorful import log亮黄
-            log亮黄(f'\n新版本可用。新版本:{remote_version}，当前版本:{current_version}。{new_feature}')
+            log亮黄(f'\n新版本可用。新版本:{remote_version}，当前版本:{current_version}。{new_feature}')  # ⭐ 在控制台打印新版本信息
            logger.info('（1）Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
            user_instruction = input('（2）是否一键更新代码（Y+回车=确认，输入其他/无输入+回车=不更新）？')
            if user_instruction in ['Y', 'y']:
-                path = backup_and_download(current_version, remote_version)
+                path = backup_and_download(current_version, remote_version)  # ⭐ 备份并下载文件
                try:
-                    patch_and_restart(path)
+                    patch_and_restart(path)  # ⭐ 执行覆盖并重启操作
                except:
                    msg = '更新失败。'
                    if raise_error:
@@ -163,6 +219,9 @@ def auto_update(raise_error=False):
        logger.info(msg)
 def warm_up_modules():
    """
    预热模块，加载特定模块并执行预热操作。
    """
    logger.info('正在执行一些模块的预热 ...')
    from toolbox import ProxyNetworkActivate
    from request_llms.bridge_all import model_info
@@ -173,6 +232,16 @@ def warm_up_modules():
        enc.encode("模块预热", disallowed_special=())
 def warm_up_vectordb():
    """
    执行一些模块的预热操作。
    本函数主要用于执行一些模块的预热操作，确保在后续的流程中能够顺利运行。
    ⭐ 关键作用：预热模块
    Returns:
        None
    """
    logger.info('正在执行一些模块的预热 ...')
    from toolbox import ProxyNetworkActivate
    with ProxyNetworkActivate("Warmup_Modules"):
@@ -185,4 +254,4 @@ if __name__ == '__main__':
    os.environ['no_proxy'] = '*'  # 避免代理网络产生意外污染
    from toolbox import get_conf
    proxies = get_conf('proxies')
-    check_proxy(proxies)
+    check_proxy(proxies)
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -49,6 +49,7 @@ def get_crazy_functions():
    from crazy_functions.Image_Generate import 图片生成_DALLE2, 图片生成_DALLE3, 图片修改_DALLE2
    from crazy_functions.Image_Generate_Wrap import ImageGen_Wrap
    from crazy_functions.SourceCode_Comment import 注释Python项目
    from crazy_functions.SourceCode_Comment_Wrap import SourceCodeComment_Wrap
    function_plugins = {
        "虚空终端": {
@@ -71,6 +72,7 @@ def get_crazy_functions():
            "AsButton": False,
            "Info": "上传一系列python源文件(或者压缩包), 为这些代码添加docstring | 输入参数为路径",
            "Function": HotReload(注释Python项目),
            "Class": SourceCodeComment_Wrap,
        },
        "载入对话历史存档（先上传存档或输入路径）": {
            "Group": "对话",
--- a/crazy_functions/Latex_Function.py
+++ b/crazy_functions/Latex_Function.py
@@ -3,7 +3,7 @@ from toolbox import CatchException, report_exception, update_ui_lastest_msg, zip
 from functools import partial
 from loguru import logger
-import glob, os, requests, time, json, tarfile
+import glob, os, requests, time, json, tarfile, threading
 pj = os.path.join
 ARXIV_CACHE_DIR = get_conf("ARXIV_CACHE_DIR")
@@ -338,11 +338,17 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
    # <-------------- more requirements ------------->
    if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
    more_req = plugin_kwargs.get("advanced_arg", "")
-    no_cache = more_req.startswith("--no-cache")
+
-    if no_cache: more_req.lstrip("--no-cache")
+    no_cache = ("--no-cache" in more_req)
    if no_cache: more_req = more_req.replace("--no-cache", "").strip()
    allow_gptac_cloud_io = ("--allow-cloudio" in more_req)  # 从云端下载翻译结果，以及上传翻译结果到云端
    if allow_gptac_cloud_io: more_req = more_req.replace("--allow-cloudio", "").strip()
    allow_cache = not no_cache
    _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
    # <-------------- check deps ------------->
    try:
        import glob, os, time, subprocess
@@ -369,6 +375,20 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
        yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
        return
    # #################################################################
    if allow_gptac_cloud_io and arxiv_id:
        # 访问 GPTAC学术云，查询云端是否存在该论文的翻译版本
        from crazy_functions.latex_fns.latex_actions import check_gptac_cloud
        success, downloaded = check_gptac_cloud(arxiv_id, chatbot)
        if success:
            chatbot.append([
                f"检测到GPTAC云端存在翻译版本, 如果不满意翻译结果, 请禁用云端分享, 然后重新执行。", 
                None
            ])
            yield from update_ui(chatbot=chatbot, history=history)
            return
    #################################################################
    if os.path.exists(txt):
        project_folder = txt
    else:
@@ -406,14 +426,21 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
    # <-------------- zip PDF ------------->
    zip_res = zip_result(project_folder)
    if success:
        if allow_gptac_cloud_io and arxiv_id:
            # 如果用户允许，我们将翻译好的arxiv论文PDF上传到GPTAC学术云
            from crazy_functions.latex_fns.latex_actions import upload_to_gptac_cloud_if_user_allow
            threading.Thread(target=upload_to_gptac_cloud_if_user_allow, 
                args=(chatbot, arxiv_id), daemon=True).start()
        chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
-        yield from update_ui(chatbot=chatbot, history=history);
+        yield from update_ui(chatbot=chatbot, history=history)
        time.sleep(1)  # 刷新界面
        promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
    else:
        chatbot.append((f"失败了",
                        '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux，请检查系统字体（见Github wiki） ...'))
-        yield from update_ui(chatbot=chatbot, history=history);
+        yield from update_ui(chatbot=chatbot, history=history)
        time.sleep(1)  # 刷新界面
        promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
--- a/crazy_functions/Latex_Function_Wrap.py
+++ b/crazy_functions/Latex_Function_Wrap.py
@@ -30,6 +30,8 @@ class Arxiv_Localize(GptAcademicPluginTemplate):
                            default_value="", type="string").model_dump_json(), # 高级参数输入区，自动同步
            "allow_cache":
                ArgProperty(title="是否允许从缓存中调取结果", options=["允许缓存", "从头执行"], default_value="允许缓存", description="无", type="dropdown").model_dump_json(),
            "allow_cloudio":
                ArgProperty(title="是否允许从GPTAC学术云下载(或者上传)翻译结果(仅针对Arxiv论文)", options=["允许", "禁止"], default_value="禁止", description="共享文献，互助互利", type="dropdown").model_dump_json(),
        }
        return gui_definition
@@ -38,9 +40,14 @@ class Arxiv_Localize(GptAcademicPluginTemplate):
        执行插件
        """
        allow_cache = plugin_kwargs["allow_cache"]
        allow_cloudio = plugin_kwargs["allow_cloudio"]
        advanced_arg = plugin_kwargs["advanced_arg"]
        if allow_cache == "从头执行": plugin_kwargs["advanced_arg"] = "--no-cache " + plugin_kwargs["advanced_arg"]
        # 从云端下载翻译结果，以及上传翻译结果到云端；人人为我，我为人人。
        if allow_cloudio == "允许": plugin_kwargs["advanced_arg"] = "--allow-cloudio " + plugin_kwargs["advanced_arg"]
        yield from Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
--- a/crazy_functions/Markdown_Translate.py
+++ b/crazy_functions/Markdown_Translate.py
@@ -65,7 +65,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
            pfg.file_contents.append(file_content)
    #  <-------- 拆分过长的Markdown文件 ---------->
-    pfg.run_file_split(max_token_limit=2048)
+    pfg.run_file_split(max_token_limit=1024)
    n_split = len(pfg.sp_file_contents)
    #  <-------- 多线程翻译开始 ---------->
--- a/crazy_functions/SourceCode_Comment.py
+++ b/crazy_functions/SourceCode_Comment.py
@@ -6,7 +6,10 @@ from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_ver
 from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 from crazy_functions.agent_fns.python_comment_agent import PythonCodeComment
 from crazy_functions.diagram_fns.file_tree import FileNode
 from crazy_functions.agent_fns.watchdog import WatchDog
 from shared_utils.advanced_markdown_format import markdown_convertion_for_file
 from loguru import logger
 def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
@@ -24,12 +27,13 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
        file_tree_struct.add_file(file_path, file_path)
    # <第一步，逐个文件分析，多线程>
    lang = "" if not plugin_kwargs["use_chinese"] else " (you must use Chinese)"
    for index, fp in enumerate(file_manifest):
        # 读取文件
        with open(fp, 'r', encoding='utf-8', errors='replace') as f:
            file_content = f.read()
        prefix = ""
-        i_say = prefix + f'Please conclude the following source code at {os.path.relpath(fp, project_folder)} with only one sentence, the code is:\n```{file_content}```'
+        i_say = prefix + f'Please conclude the following source code at {os.path.relpath(fp, project_folder)} with only one sentence{lang}, the code is:\n```{file_content}```'
        i_say_show_user = prefix + f'[{index+1}/{len(file_manifest)}] 请用一句话对下面的程序文件做一个整体概述: {fp}'
        # 装载请求内容
        MAX_TOKEN_SINGLE_FILE = 2560
@@ -37,7 +41,7 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
        inputs_array.append(i_say)
        inputs_show_user_array.append(i_say_show_user)
        history_array.append([])
-        sys_prompt_array.append("You are a software architecture analyst analyzing a source code project. Do not dig into details, tell me what the code is doing in general. Your answer must be short, simple and clear.")
+        sys_prompt_array.append(f"You are a software architecture analyst analyzing a source code project. Do not dig into details, tell me what the code is doing in general. Your answer must be short, simple and clear{lang}.")
    # 文件读取完成，对每一个源代码文件，生成一个请求线程，发送到大模型进行分析
    gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
        inputs_array = inputs_array,
@@ -50,10 +54,20 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
    )
    # <第二步，逐个文件分析，生成带注释文件>
    tasks = ["" for _ in range(len(file_manifest))]
    def bark_fn(tasks):
        for i in range(len(tasks)): tasks[i] = "watchdog is dead"
    wd = WatchDog(timeout=10, bark_fn=lambda: bark_fn(tasks), interval=3, msg="ThreadWatcher timeout")
    wd.begin_watch()
    from concurrent.futures import ThreadPoolExecutor
    executor = ThreadPoolExecutor(max_workers=get_conf('DEFAULT_WORKER_NUM'))
-    def _task_multi_threading(i_say, gpt_say, fp, file_tree_struct):
+    def _task_multi_threading(i_say, gpt_say, fp, file_tree_struct, index):
-        pcc = PythonCodeComment(llm_kwargs, language='English')
+        language = 'Chinese' if plugin_kwargs["use_chinese"] else 'English'
        def observe_window_update(x):
            if tasks[index] == "watchdog is dead":
                raise TimeoutError("ThreadWatcher: watchdog is dead")
            tasks[index] = x
        pcc = PythonCodeComment(llm_kwargs, plugin_kwargs, language=language, observe_window_update=observe_window_update)
        pcc.read_file(path=fp, brief=gpt_say)
        revised_path, revised_content = pcc.begin_comment_source_code(None, None)
        file_tree_struct.manifest[fp].revised_path = revised_path
@@ -65,7 +79,8 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
        with open("crazy_functions/agent_fns/python_comment_compare.html", 'r', encoding='utf-8') as f:
            html_template = f.read()
        warp = lambda x: "```python\n\n" + x + "\n\n```"
-        from themes.theme import advanced_css
+        from themes.theme import load_dynamic_theme
        _, advanced_css, _, _ = load_dynamic_theme("Default")
        html_template = html_template.replace("ADVANCED_CSS", advanced_css)
        html_template = html_template.replace("REPLACE_CODE_FILE_LEFT", pcc.get_markdown_block_in_html(markdown_convertion_for_file(warp(pcc.original_content))))
        html_template = html_template.replace("REPLACE_CODE_FILE_RIGHT", pcc.get_markdown_block_in_html(markdown_convertion_for_file(warp(revised_content))))
@@ -73,17 +88,21 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
        file_tree_struct.manifest[fp].compare_html = compare_html_path
        with open(compare_html_path, 'w', encoding='utf-8') as f:
            f.write(html_template)
-        # print('done 1')
+        tasks[index] = ""
    chatbot.append([None, f"正在处理:"])
    futures = []
    index = 0
    for i_say, gpt_say, fp in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], file_manifest):
-        future = executor.submit(_task_multi_threading, i_say, gpt_say, fp, file_tree_struct)
+        future = executor.submit(_task_multi_threading, i_say, gpt_say, fp, file_tree_struct, index)
        index += 1
        futures.append(future)
    # <第三步，等待任务完成>
    cnt = 0
    while True:
        cnt += 1
        wd.feed()
        time.sleep(3)
        worker_done = [h.done() for h in futures]
        remain = len(worker_done) - sum(worker_done)
@@ -92,14 +111,18 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
        preview_html_list = []
        for done, fp in zip(worker_done, file_manifest):
            if not done: continue
-            preview_html_list.append(file_tree_struct.manifest[fp].compare_html)
+            if hasattr(file_tree_struct.manifest[fp], 'compare_html'):
                preview_html_list.append(file_tree_struct.manifest[fp].compare_html)
            else:
                logger.error(f"文件: {fp} 的注释结果未能成功")
        file_links = generate_file_link(preview_html_list)
        yield from update_ui_lastest_msg(
-            f"剩余源文件数量: {remain}.\n\n" + 
+            f"当前任务: <br/>{'<br/>'.join(tasks)}.<br/>" + 
-            f"已完成的文件: {sum(worker_done)}.\n\n" + 
+            f"剩余源文件数量: {remain}.<br/>" + 
            f"已完成的文件: {sum(worker_done)}.<br/>" + 
            file_links +
-            "\n\n" +
+            "<br/>" +
            ''.join(['.']*(cnt % 10 + 1)
        ), chatbot=chatbot, history=history, delay=0)
        yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
@@ -120,6 +143,7 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
@CatchException
 def 注释Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
    history = []    # 清空历史，以免输入溢出
    plugin_kwargs["use_chinese"] = plugin_kwargs.get("use_chinese", False)
    import glob, os
    if os.path.exists(txt):
        project_folder = txt
--- a/crazy_functions/SourceCode_Comment_Wrap.py
+++ b/crazy_functions/SourceCode_Comment_Wrap.py
@@ -0,0 +1,36 @@
 from toolbox import get_conf, update_ui
 from crazy_functions.plugin_template.plugin_class_template import GptAcademicPluginTemplate, ArgProperty
 from crazy_functions.SourceCode_Comment import 注释Python项目
 class SourceCodeComment_Wrap(GptAcademicPluginTemplate):
    def __init__(self):
        """
        请注意`execute`会执行在不同的线程中，因此您在定义和使用类变量时，应当慎之又慎！
        """
        pass
    def define_arg_selection_menu(self):
        """
        定义插件的二级选项菜单
        """
        gui_definition = {
            "main_input":
                ArgProperty(title="路径", description="程序路径（上传文件后自动填写）", default_value="", type="string").model_dump_json(), # 主输入，自动从输入框同步
            "use_chinese":
                ArgProperty(title="注释语言", options=["英文", "中文"], default_value="英文", description="无", type="dropdown").model_dump_json(),
            # "use_emoji":
                # ArgProperty(title="在注释中使用emoji", options=["禁止", "允许"], default_value="禁止", description="无", type="dropdown").model_dump_json(),
        }
        return gui_definition
    def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
        """
        执行插件
        """
        if plugin_kwargs["use_chinese"] == "中文": 
            plugin_kwargs["use_chinese"] = True
        else: 
            plugin_kwargs["use_chinese"] = False
        yield from 注释Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
--- a/crazy_functions/agent_fns/python_comment_agent.py
+++ b/crazy_functions/agent_fns/python_comment_agent.py
@@ -68,6 +68,7 @@ Be aware:
 1. You must NOT modify the indent of code.
 2. You are NOT authorized to change or translate non-comment code, and you are NOT authorized to add empty lines either, toggle qu.
 3. Use {LANG} to add comments and docstrings. Do NOT translate Chinese that is already in the code.
 4. Besides adding a docstring, use the ⭐ symbol to annotate the most core and important line of code within the function, explaining its role.
 ------------------ Example ------------------
 INPUT:
@@ -116,10 +117,66 @@ def zip_result(folder):
 '''
 revise_funtion_prompt_chinese = '''
 您需要阅读以下代码，并根据以下说明修订源代码({FILE_BASENAME}):
 1. 如果源代码中包含函数的话, 你应该分析给定函数实现了什么功能
 2. 如果源代码中包含函数的话, 你需要为函数添加docstring, docstring必须使用中文
 请注意：
 1. 你不得修改代码的缩进
 2. 你无权更改或翻译代码中的非注释部分，也不允许添加空行
 3. 使用 {LANG} 添加注释和文档字符串。不要翻译代码中已有的中文
 4. 除了添加docstring之外, 使用⭐符号给该函数中最核心、最重要的一行代码添加注释，并说明其作用
 ------------------ 示例 ------------------
 INPUT:
 ```
 L0000 |
 L0001 |def zip_result(folder):
 L0002 |    t = gen_time_str()
 L0003 |    zip_folder(folder, get_log_folder(), f"result.zip")
 L0004 |    return os.path.join(get_log_folder(), f"result.zip")
 L0005 |
 L0006 |
 ```
 OUTPUT:
 <instruction_1_purpose>
 该函数用于压缩指定文件夹，并返回生成的`zip`文件的路径。
 </instruction_1_purpose>
 <instruction_2_revised_code>
 ```
 def zip_result(folder):
    """
    该函数将指定的文件夹压缩成ZIP文件, 并将其存储在日志文件夹中。
    输入参数:
        folder (str): 需要压缩的文件夹的路径。
    返回值:
        str: 日志文件夹中创建的ZIP文件的路径。
    """
    t = gen_time_str()
    zip_folder(folder, get_log_folder(), f"result.zip")  # ⭐ 执行文件夹的压缩
    return os.path.join(get_log_folder(), f"result.zip")
 ```
 </instruction_2_revised_code>
 ------------------ End of Example ------------------
 ------------------ the real INPUT you need to process NOW ({FILE_BASENAME}) ------------------
 ```
 {THE_CODE}
 ```
 {INDENT_REMINDER}
 {BRIEF_REMINDER}
 {HINT_REMINDER}
 '''
 class PythonCodeComment():
-    def __init__(self, llm_kwargs, language) -> None:
+    def __init__(self, llm_kwargs, plugin_kwargs, language, observe_window_update) -> None:
        self.original_content = ""
        self.full_context = []
        self.full_context_with_line_no = []
@@ -127,7 +184,13 @@ class PythonCodeComment():
        self.page_limit = 100 # 100 lines of code each page
        self.ignore_limit = 20
        self.llm_kwargs = llm_kwargs
        self.plugin_kwargs = plugin_kwargs
        self.language = language
        self.observe_window_update = observe_window_update
        if self.language == "chinese":
            self.core_prompt = revise_funtion_prompt_chinese
        else:
            self.core_prompt = revise_funtion_prompt
        self.path = None
        self.file_basename = None
        self.file_brief = ""
@@ -258,7 +321,7 @@ class PythonCodeComment():
        hint_reminder = "" if hint is None else f"(Reminder: do not ignore or modify code such as `{hint}`, provide complete code in the OUTPUT.)"
        self.llm_kwargs['temperature'] = 0
        result = predict_no_ui_long_connection(
-            inputs=revise_funtion_prompt.format(
+            inputs=self.core_prompt.format(
                LANG=self.language, 
                FILE_BASENAME=self.file_basename, 
                THE_CODE=code, 
@@ -348,6 +411,7 @@ class PythonCodeComment():
            try:
                # yield from update_ui_lastest_msg(f"({self.file_basename}) 正在读取下一段代码片段:\n", chatbot=chatbot, history=history, delay=0)
                next_batch, line_no_start, line_no_end = self.get_next_batch()
                self.observe_window_update(f"正在处理{self.file_basename} - {line_no_start}/{len(self.full_context)}\n")
                # yield from update_ui_lastest_msg(f"({self.file_basename}) 处理代码片段:\n\n{next_batch}", chatbot=chatbot, history=history, delay=0)
                hint = None
--- a/crazy_functions/ast_fns/comment_remove.py
+++ b/crazy_functions/ast_fns/comment_remove.py
@@ -1,39 +1,47 @@
-import ast
+import token
 import tokenize
 import copy
 import io
 class CommentRemover(ast.NodeTransformer):
    def visit_FunctionDef(self, node):
        # 移除函数的文档字符串
        if (node.body and isinstance(node.body[0], ast.Expr) and
                isinstance(node.body[0].value, ast.Str)):
            node.body = node.body[1:]
        self.generic_visit(node)
        return node
-    def visit_ClassDef(self, node):
+def remove_python_comments(input_source: str) -> str:
-        # 移除类的文档字符串
+    source_flag = copy.copy(input_source)
-        if (node.body and isinstance(node.body[0], ast.Expr) and
+    source = io.StringIO(input_source)
-                isinstance(node.body[0].value, ast.Str)):
+    ls = input_source.split('\n')
-            node.body = node.body[1:]
+    prev_toktype = token.INDENT
-        self.generic_visit(node)
+    readline = source.readline
        return node
-    def visit_Module(self, node):
+    def get_char_index(lineno, col):
-        # 移除模块的文档字符串
+        # find the index of the char in the source code
-        if (node.body and isinstance(node.body[0], ast.Expr) and
+        if lineno == 1:
-                isinstance(node.body[0].value, ast.Str)):
+            return len('\n'.join(ls[:(lineno-1)])) + col
-            node.body = node.body[1:]
+        else:
-        self.generic_visit(node)
+            return len('\n'.join(ls[:(lineno-1)])) + col + 1
-        return node
+
-    
+    def replace_char_between(start_lineno, start_col, end_lineno, end_col, source, replace_char, ls):
        # replace char between start_lineno, start_col and end_lineno, end_col with replace_char, but keep '\n' and ' '
        b = get_char_index(start_lineno, start_col)
        e = get_char_index(end_lineno, end_col)
        for i in range(b, e):
            if source[i] == '\n':
                source = source[:i] + '\n' + source[i+1:]
            elif source[i] == ' ':
                source = source[:i] + ' ' + source[i+1:]
            else:
                source = source[:i] + replace_char + source[i+1:]
        return source
    tokgen = tokenize.generate_tokens(readline)
    for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
        if toktype == token.STRING and (prev_toktype == token.INDENT):
            source_flag = replace_char_between(slineno, scol, elineno, ecol, source_flag, ' ', ls)
        elif toktype == token.STRING and (prev_toktype == token.NEWLINE):
            source_flag = replace_char_between(slineno, scol, elineno, ecol, source_flag, ' ', ls)
        elif toktype == tokenize.COMMENT:
            source_flag = replace_char_between(slineno, scol, elineno, ecol, source_flag, ' ', ls)
        prev_toktype = toktype
    return source_flag
 def remove_python_comments(source_code):
    # 解析源代码为 AST
    tree = ast.parse(source_code)
    # 移除注释
    transformer = CommentRemover()
    tree = transformer.visit(tree)
    # 将处理后的 AST 转换回源代码
    return ast.unparse(tree)
 # 示例使用
 if __name__ == "__main__":
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@@ -3,7 +3,7 @@ import re
 import shutil
 import numpy as np
 from loguru import logger
-from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
+from toolbox import update_ui, update_ui_lastest_msg, get_log_folder, gen_time_str
 from toolbox import get_conf, promote_file_to_downloadzone
 from crazy_functions.latex_fns.latex_toolbox import PRESERVE, TRANSFORM
 from crazy_functions.latex_fns.latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
@@ -468,3 +468,70 @@ def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
    except:
        from toolbox import trimmed_format_exc
        logger.error('writing html result failed:', trimmed_format_exc())
 def upload_to_gptac_cloud_if_user_allow(chatbot, arxiv_id):
    try:
        # 如果用户允许，我们将arxiv论文PDF上传到GPTAC学术云
        from toolbox import map_file_to_sha256
        # 检查是否顺利，如果没有生成预期的文件，则跳过
        is_result_good = False
        for file_path in chatbot._cookies.get("files_to_promote", []):
            if file_path.endswith('translate_zh.pdf'):
                is_result_good = True
        if not is_result_good:
            return
        # 上传文件
        for file_path in chatbot._cookies.get("files_to_promote", []):
            align_name = None
            # normalized name
            for name in ['translate_zh.pdf', 'comparison.pdf']:
                if file_path.endswith(name): align_name = name
            # if match any align name
            if align_name:
                logger.info(f'Uploading to GPTAC cloud as the user has set `allow_cloud_io`: {file_path}')
                with open(file_path, 'rb') as f:
                    import requests
                    url = 'https://cloud-2.agent-matrix.com/arxiv_tf_paper_normal_upload'
                    files = {'file': (align_name, f, 'application/octet-stream')}
                    data = {
                        'arxiv_id': arxiv_id,
                        'file_hash': map_file_to_sha256(file_path),
                        'language': 'zh',
                        'trans_prompt': 'to_be_implemented',
                        'llm_model': 'to_be_implemented',
                        'llm_model_param': 'to_be_implemented',
                    }
                    resp = requests.post(url=url, files=files, data=data, timeout=30)
                logger.info(f'Uploading terminate ({resp.status_code})`: {file_path}')
    except:
        # 如果上传失败，不会中断程序，因为这是次要功能
        pass
 def check_gptac_cloud(arxiv_id, chatbot):
    import requests
    success = False
    downloaded = []
    try:
        for pdf_target in ['translate_zh.pdf', 'comparison.pdf']:
            url = 'https://cloud-2.agent-matrix.com/arxiv_tf_paper_normal_exist'
            data = {
                'arxiv_id': arxiv_id,
                'name': pdf_target,
            }
            resp = requests.post(url=url, data=data)
            cache_hit_result = resp.text.strip('"')
            if cache_hit_result.startswith("http"):
                url = cache_hit_result
                logger.info(f'Downloading from GPTAC cloud: {url}')
                resp = requests.get(url=url, timeout=30)
                target = os.path.join(get_log_folder(plugin_name='gptac_cloud'), gen_time_str(), pdf_target)
                os.makedirs(os.path.dirname(target), exist_ok=True)
                with open(target, 'wb') as f:
                    f.write(resp.content)
                new_path = promote_file_to_downloadzone(target, chatbot=chatbot)
                success = True
                downloaded.append(new_path)
    except:
        pass
    return success, downloaded
--- a/crazy_functions/latex_fns/latex_pickle_io.py
+++ b/crazy_functions/latex_fns/latex_pickle_io.py
@@ -6,12 +6,16 @@ class SafeUnpickler(pickle.Unpickler):
    def get_safe_classes(self):
        from crazy_functions.latex_fns.latex_actions import LatexPaperFileGroup, LatexPaperSplit
        from crazy_functions.latex_fns.latex_toolbox import LinkedListNode
        from numpy.core.multiarray import scalar
        from numpy import dtype
        # 定义允许的安全类
        safe_classes = {
            # 在这里添加其他安全的类
            'LatexPaperFileGroup': LatexPaperFileGroup,
            'LatexPaperSplit': LatexPaperSplit,
            'LinkedListNode': LinkedListNode,
            'scalar': scalar,
            'dtype': dtype,
        }
        return safe_classes
@@ -22,8 +26,6 @@ class SafeUnpickler(pickle.Unpickler):
        for class_name in self.safe_classes.keys():
            if (class_name in f'{module}.{name}'):
                match_class_name = class_name
        if module == 'numpy' or module.startswith('numpy.'):
            return super().find_class(module, name)
        if match_class_name is not None:
            return self.safe_classes[match_class_name]
        # 如果尝试加载未授权的类，则抛出异常
--- a/crazy_functions/latex_fns/latex_toolbox.py
+++ b/crazy_functions/latex_fns/latex_toolbox.py
@@ -712,134 +712,137 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
                                    # 内部链接：跳转到文档中的某个页面
                                    dest = action.get("/D")  # 目标页或目标位置
                                    # if dest and annot.idnum in page2_annot_id:
-                                    if dest in pdf2_reader.named_destinations:
+                                    # if dest in pdf2_reader.named_destinations:
-                                        # 获取原始文件中跳转信息，包括跳转页面
+                                    if dest and page2.annotations:
-                                        destination = pdf2_reader.named_destinations[
+                                        if annot in page2.annotations:
-                                            dest
+                                            # 获取原始文件中跳转信息，包括跳转页面
-                                        ]
+                                            destination = pdf2_reader.named_destinations[
-                                        page_number = (
+                                                dest
                                            pdf2_reader.get_destination_page_number(
                                                destination
                                            )
                                        )
                                        # 更新跳转信息，跳转到对应的页面和，指定坐标 (100, 150)，缩放比例为 100%
                                        # “/D”:[10,'/XYZ',100,100,0]
                                        if destination.dest_array[1] == "/XYZ":
                                            annot_obj["/A"].update(
                                                {
                                                    NameObject("/D"): ArrayObject(
                                                        [
                                                            NumberObject(page_number),
                                                            destination.dest_array[1],
                                                            FloatObject(
                                                                destination.dest_array[
                                                                    2
                                                                ]
                                                                + int(
                                                                    page1.mediaBox.getWidth()
                                                                )
                                                            ),
                                                            destination.dest_array[3],
                                                            destination.dest_array[4],
                                                        ]
                                                    )  # 确保键和值是 PdfObject
                                                }
                                            )
                                        else:
                                            annot_obj["/A"].update(
                                                {
                                                    NameObject("/D"): ArrayObject(
                                                        [
                                                            NumberObject(page_number),
                                                            destination.dest_array[1],
                                                        ]
                                                    )  # 确保键和值是 PdfObject
                                                }
                                            )
                                        rect = annot_obj.get("/Rect")
                                        # 更新点击坐标
                                        rect = ArrayObject(
                                            [
                                                FloatObject(
                                                    rect[0]
                                                    + int(page1.mediaBox.getWidth())
                                                ),
                                                rect[1],
                                                FloatObject(
                                                    rect[2]
                                                    + int(page1.mediaBox.getWidth())
                                                ),
                                                rect[3],
                                            ]
-                                        )
+                                            page_number = (
-                                        annot_obj.update(
+                                                pdf2_reader.get_destination_page_number(
-                                            {
+                                                    destination
-                                                NameObject(
+                                                )
-                                                    "/Rect"
+                                            )
-                                                ): rect  # 确保键和值是 PdfObject
+                                            # 更新跳转信息，跳转到对应的页面和，指定坐标 (100, 150)，缩放比例为 100%
-                                            }
+                                            # “/D”:[10,'/XYZ',100,100,0]
-                                        )
+                                            if destination.dest_array[1] == "/XYZ":
                                                annot_obj["/A"].update(
                                                    {
                                                        NameObject("/D"): ArrayObject(
                                                            [
                                                                NumberObject(page_number),
                                                                destination.dest_array[1],
                                                                FloatObject(
                                                                    destination.dest_array[
                                                                        2
                                                                    ]
                                                                    + int(
                                                                        page1.mediaBox.getWidth()
                                                                    )
                                                                ),
                                                                destination.dest_array[3],
                                                                destination.dest_array[4],
                                                            ]
                                                        )  # 确保键和值是 PdfObject
                                                    }
                                                )
                                            else:
                                                annot_obj["/A"].update(
                                                    {
                                                        NameObject("/D"): ArrayObject(
                                                            [
                                                                NumberObject(page_number),
                                                                destination.dest_array[1],
                                                            ]
                                                        )  # 确保键和值是 PdfObject
                                                    }
                                                )
                                            rect = annot_obj.get("/Rect")
                                            # 更新点击坐标
                                            rect = ArrayObject(
                                                [
                                                    FloatObject(
                                                        rect[0]
                                                        + int(page1.mediaBox.getWidth())
                                                    ),
                                                    rect[1],
                                                    FloatObject(
                                                        rect[2]
                                                        + int(page1.mediaBox.getWidth())
                                                    ),
                                                    rect[3],
                                                ]
                                            )
                                            annot_obj.update(
                                                {
                                                    NameObject(
                                                        "/Rect"
                                                    ): rect  # 确保键和值是 PdfObject
                                                }
                                            )
                                    # if dest and annot.idnum in page1_annot_id:
-                                    if dest in pdf1_reader.named_destinations:
+                                    # if dest in pdf1_reader.named_destinations:
-
+                                    if dest and page1.annotations:
-                                        # 获取原始文件中跳转信息，包括跳转页面
+                                        if annot in page1.annotations:
-                                        destination = pdf1_reader.named_destinations[
+                                            # 获取原始文件中跳转信息，包括跳转页面
-                                            dest
+                                            destination = pdf1_reader.named_destinations[
-                                        ]
+                                                dest
                                        page_number = (
                                            pdf1_reader.get_destination_page_number(
                                                destination
                                            )
                                        )
                                        # 更新跳转信息，跳转到对应的页面和，指定坐标 (100, 150)，缩放比例为 100%
                                        # “/D”:[10,'/XYZ',100,100,0]
                                        if destination.dest_array[1] == "/XYZ":
                                            annot_obj["/A"].update(
                                                {
                                                    NameObject("/D"): ArrayObject(
                                                        [
                                                            NumberObject(page_number),
                                                            destination.dest_array[1],
                                                            FloatObject(
                                                                destination.dest_array[
                                                                    2
                                                                ]
                                                            ),
                                                            destination.dest_array[3],
                                                            destination.dest_array[4],
                                                        ]
                                                    )  # 确保键和值是 PdfObject
                                                }
                                            )
                                        else:
                                            annot_obj["/A"].update(
                                                {
                                                    NameObject("/D"): ArrayObject(
                                                        [
                                                            NumberObject(page_number),
                                                            destination.dest_array[1],
                                                        ]
                                                    )  # 确保键和值是 PdfObject
                                                }
                                            )
                                        rect = annot_obj.get("/Rect")
                                        rect = ArrayObject(
                                            [
                                                FloatObject(rect[0]),
                                                rect[1],
                                                FloatObject(rect[2]),
                                                rect[3],
                                            ]
-                                        )
+                                            page_number = (
-                                        annot_obj.update(
+                                                pdf1_reader.get_destination_page_number(
-                                            {
+                                                    destination
-                                                NameObject(
+                                                )
-                                                    "/Rect"
+                                            )
-                                                ): rect  # 确保键和值是 PdfObject
+                                            # 更新跳转信息，跳转到对应的页面和，指定坐标 (100, 150)，缩放比例为 100%
-                                            }
+                                            # “/D”:[10,'/XYZ',100,100,0]
-                                        )
+                                            if destination.dest_array[1] == "/XYZ":
                                                annot_obj["/A"].update(
                                                    {
                                                        NameObject("/D"): ArrayObject(
                                                            [
                                                                NumberObject(page_number),
                                                                destination.dest_array[1],
                                                                FloatObject(
                                                                    destination.dest_array[
                                                                        2
                                                                    ]
                                                                ),
                                                                destination.dest_array[3],
                                                                destination.dest_array[4],
                                                            ]
                                                        )  # 确保键和值是 PdfObject
                                                    }
                                                )
                                            else:
                                                annot_obj["/A"].update(
                                                    {
                                                        NameObject("/D"): ArrayObject(
                                                            [
                                                                NumberObject(page_number),
                                                                destination.dest_array[1],
                                                            ]
                                                        )  # 确保键和值是 PdfObject
                                                    }
                                                )
                                            rect = annot_obj.get("/Rect")
                                            rect = ArrayObject(
                                                [
                                                    FloatObject(rect[0]),
                                                    rect[1],
                                                    FloatObject(rect[2]),
                                                    rect[3],
                                                ]
                                            )
                                            annot_obj.update(
                                                {
                                                    NameObject(
                                                        "/Rect"
                                                    ): rect  # 确保键和值是 PdfObject
                                                }
                                            )
                                elif "/S" in action and action["/S"] == "/URI":
                                    # 外部链接：跳转到某个URI
--- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
+++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
@@ -4,7 +4,9 @@ from toolbox import promote_file_to_downloadzone, extract_archive
 from toolbox import generate_file_link, zip_folder
 from crazy_functions.crazy_utils import get_files_from_everything
 from shared_utils.colorful import *
 from loguru import logger
 import os
 import time
 def refresh_key(doc2x_api_key):
    import requests, json
@@ -22,105 +24,140 @@ def refresh_key(doc2x_api_key):
        raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
    return doc2x_api_key
 def 解析PDF_DOC2X_转Latex(pdf_file_path):
    zip_file_path, unzipped_folder = 解析PDF_DOC2X(pdf_file_path, format='tex')
    return unzipped_folder
 def 解析PDF_DOC2X(pdf_file_path, format='tex'):
    """
        format: 'tex', 'md', 'docx'
    """
    import requests, json, os
    DOC2X_API_KEY = get_conf('DOC2X_API_KEY')
    latex_dir = get_log_folder(plugin_name="pdf_ocr_latex")
    markdown_dir = get_log_folder(plugin_name="pdf_ocr")
    doc2x_api_key = DOC2X_API_KEY
    if doc2x_api_key.startswith('sk-'):
        url = "https://api.doc2x.noedgeai.com/api/v1/pdf"
    else:
        doc2x_api_key = refresh_key(doc2x_api_key)
        url = "https://api.doc2x.noedgeai.com/api/platform/pdf"
    # < ------ 第1步：上传 ------ >
    logger.info("Doc2x 第1步：上传")
    with open(pdf_file_path, 'rb') as file:
        res = requests.post(
            "https://v2.doc2x.noedgeai.com/api/v2/parse/pdf",
            headers={"Authorization": "Bearer " + doc2x_api_key},
            data=file
        )
    # res_json = []
    if res.status_code == 200:
        res_json = res.json()
    else:
        raise RuntimeError(f"Doc2x return an error: {res.json()}")
    uuid = res_json['data']['uid']
    # < ------ 第2步：轮询等待 ------ >
    logger.info("Doc2x 第2步：轮询等待")
    params = {'uid': uuid}
    while True:
        res = requests.get(
            'https://v2.doc2x.noedgeai.com/api/v2/parse/status',
            headers={"Authorization": "Bearer " + doc2x_api_key},
            params=params
        )
        res_json = res.json()
        if res_json['data']['status'] == "success":
            break
        elif res_json['data']['status'] == "processing":
            time.sleep(3)
            logger.info(f"Doc2x is processing at {res_json['data']['progress']}%")
        elif res_json['data']['status'] == "failed":
            raise RuntimeError(f"Doc2x return an error: {res_json}")
    # < ------ 第3步：提交转化 ------ >
    logger.info("Doc2x 第3步：提交转化")
    data = {
        "uid": uuid,
        "to": format,
        "formula_mode": "dollar",
        "filename": "output"
    }
    res = requests.post(
-        url,
+        'https://v2.doc2x.noedgeai.com/api/v2/convert/parse',
-        files={"file": open(pdf_file_path, "rb")},
+        headers={"Authorization": "Bearer " + doc2x_api_key},
-        data={"ocr": "1"},
+        json=data
        headers={"Authorization": "Bearer " + doc2x_api_key}
    )
    res_json = []
    if res.status_code == 200:
-        decoded = res.content.decode("utf-8")
+        res_json = res.json()
        for z_decoded in decoded.split('\n'):
            if len(z_decoded) == 0: continue
            assert z_decoded.startswith("data: ")
            z_decoded = z_decoded[len("data: "):]
            decoded_json = json.loads(z_decoded)
            res_json.append(decoded_json)
    else:
-        raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
+        raise RuntimeError(f"Doc2x return an error: {res.json()}")
    uuid = res_json[0]['uuid']
    to = "latex" # latex, md, docx
    url = "https://api.doc2x.noedgeai.com/api/export"+"?request_id="+uuid+"&to="+to
-    res = requests.get(url, headers={"Authorization": "Bearer " + doc2x_api_key})
+    # < ------ 第4步：等待结果 ------ >
-    latex_zip_path = os.path.join(latex_dir, gen_time_str() + '.zip')
+    logger.info("Doc2x 第4步：等待结果")
-    latex_unzip_path = os.path.join(latex_dir, gen_time_str())
+    params = {'uid': uuid}
-    if res.status_code == 200:
+    while True:
-        with open(latex_zip_path, "wb") as f: f.write(res.content)
+        res = requests.get(
-    else:
+            'https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result',
-        raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
+            headers={"Authorization": "Bearer " + doc2x_api_key},
            params=params
        )
        res_json = res.json()
        if res_json['data']['status'] == "success":
            break
        elif res_json['data']['status'] == "processing":
            time.sleep(3)
            logger.info(f"Doc2x still processing")
        elif res_json['data']['status'] == "failed":
            raise RuntimeError(f"Doc2x return an error: {res_json}")
    # < ------ 第5步：最后的处理 ------ >
    logger.info("Doc2x 第5步：最后的处理")
    if format=='tex':
        target_path = latex_dir
    if format=='md':
        target_path = markdown_dir
    os.makedirs(target_path, exist_ok=True)
    max_attempt = 3
    # < ------ 下载 ------ >
    for attempt in range(max_attempt):
        try:
            result_url = res_json['data']['url']
            res = requests.get(result_url)
            zip_path = os.path.join(target_path, gen_time_str() + '.zip')
            unzip_path = os.path.join(target_path, gen_time_str())
            if res.status_code == 200:
                with open(zip_path, "wb") as f: f.write(res.content)
            else:
                raise RuntimeError(f"Doc2x return an error: {res.json()}")
        except Exception as e:
            if attempt < max_attempt - 1:
                logger.error(f"Failed to download latex file, retrying... {e}")
                time.sleep(3)
                continue
            else:
                raise e
    # < ------ 解压 ------ >
    import zipfile
-    with zipfile.ZipFile(latex_zip_path, 'r') as zip_ref:
+    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-        zip_ref.extractall(latex_unzip_path)
+        zip_ref.extractall(unzip_path)
-
+    return zip_path, unzip_path
    return latex_unzip_path
 def 解析PDF_DOC2X_单文件(fp, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request):
    def pdf2markdown(filepath):
-        import requests, json, os
+        chatbot.append((None, f"Doc2x 解析中"))
        markdown_dir = get_log_folder(plugin_name="pdf_ocr")
        doc2x_api_key = DOC2X_API_KEY
        if doc2x_api_key.startswith('sk-'):
            url = "https://api.doc2x.noedgeai.com/api/v1/pdf"
        else:
            doc2x_api_key = refresh_key(doc2x_api_key)
            url = "https://api.doc2x.noedgeai.com/api/platform/pdf"
        chatbot.append((None, "加载PDF文件，发送至DOC2X解析..."))
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-        res = requests.post(
+        md_zip_path, unzipped_folder = 解析PDF_DOC2X(filepath, format='md')
            url,
            files={"file": open(filepath, "rb")},
            data={"ocr": "1"},
            headers={"Authorization": "Bearer " + doc2x_api_key}
        )
        res_json = []
        if res.status_code == 200:
            decoded = res.content.decode("utf-8")
            for z_decoded in decoded.split('\n'):
                if len(z_decoded) == 0: continue
                assert z_decoded.startswith("data: ")
                z_decoded = z_decoded[len("data: "):]
                decoded_json = json.loads(z_decoded)
                res_json.append(decoded_json)
            if 'limit exceeded' in decoded_json.get('status', ''):
                raise RuntimeError("Doc2x API 页数受限，请联系 Doc2x 方面，并更换新的 API 秘钥。")
        else:
            raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
        uuid = res_json[0]['uuid']
        to = "md" # latex, md, docx
        url = "https://api.doc2x.noedgeai.com/api/export"+"?request_id="+uuid+"&to="+to
        chatbot.append((None, f"读取解析: {url} ..."))
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        res = requests.get(url, headers={"Authorization": "Bearer " + doc2x_api_key})
        md_zip_path = os.path.join(markdown_dir, gen_time_str() + '.zip')
        if res.status_code == 200:
            with open(md_zip_path, "wb") as f: f.write(res.content)
        else:
            raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
        promote_file_to_downloadzone(md_zip_path, chatbot=chatbot)
        chatbot.append((None, f"完成解析 {md_zip_path} ..."))
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -180,6 +180,7 @@ version: '3'
 services:
  gpt_academic_with_latex:
    image: ghcr.io/binary-husky/gpt_academic_with_latex:master  # (Auto Built by Dockerfile: docs/GithubAction+NoLocal+Latex)
    # 对于ARM64设备，请将以上镜像名称替换为 ghcr.io/binary-husky/gpt_academic_with_latex_arm:master
    environment:
      # 请查阅 `config.py` 以查看所有的配置信息
      API_KEY:                  '    sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx                              '
--- a/docs/GithubAction+NoLocal+Latex
+++ b/docs/GithubAction+NoLocal+Latex
@@ -1,35 +1,34 @@
-# 此Dockerfile适用于“无本地模型”的环境构建，如果需要使用chatglm等本地模型，请参考 docs/Dockerfile+ChatGLM
+# 此Dockerfile适用于"无本地模型"的环境构建，如果需要使用chatglm等本地模型，请参考 docs/Dockerfile+ChatGLM
 # - 1 修改 `config.py`
 # - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/GithubAction+NoLocal+Latex .
 # - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex
-FROM fuqingxu/python311_texlive_ctex:latest
+FROM menghuan1918/ubuntu_uv_ctex:latest
-ENV PATH "$PATH:/usr/local/texlive/2022/bin/x86_64-linux"
+ENV DEBIAN_FRONTEND=noninteractive
-ENV PATH "$PATH:/usr/local/texlive/2023/bin/x86_64-linux"
+SHELL ["/bin/bash", "-c"]
 ENV PATH "$PATH:/usr/local/texlive/2024/bin/x86_64-linux"
 ENV PATH "$PATH:/usr/local/texlive/2025/bin/x86_64-linux"
 ENV PATH "$PATH:/usr/local/texlive/2026/bin/x86_64-linux"
 # 指定路径
 WORKDIR /gpt
-RUN pip3 install openai numpy arxiv rich
+# 先复制依赖文件
-RUN pip3 install colorama Markdown pygments pymupdf
+COPY requirements.txt .
 RUN pip3 install python-docx pdfminer
 RUN pip3 install nougat-ocr
 # 装载项目文件
 COPY . .
 # 安装依赖
-RUN pip3 install -r requirements.txt
+RUN pip install --break-system-packages openai numpy arxiv rich colorama Markdown pygments pymupdf python-docx pdfminer \
    && pip install --break-system-packages -r requirements.txt \
    && if [ "$(uname -m)" = "x86_64" ]; then \
    pip install --break-system-packages nougat-ocr; \
    fi \
    && pip cache purge \
    && rm -rf /root/.cache/pip/*
-# edge-tts需要的依赖
+# 创建非root用户
-RUN apt update && apt install ffmpeg -y
+RUN useradd -m gptuser && chown -R gptuser /gpt
 USER gptuser
 # 最后才复制代码文件,这样代码更新时只需重建最后几层，可以大幅减少docker pull所需的大小
 COPY --chown=gptuser:gptuser . .
 # 可选步骤，用于预热模块
-RUN python3  -c 'from check_proxy import warm_up_modules; warm_up_modules()'
+RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
 # 启动
 CMD ["python3", "-u", "main.py"]
--- a/docs/GithubAction+NoLocal+Latex+Arm
+++ b/docs/GithubAction+NoLocal+Latex+Arm
@@ -1,25 +0,0 @@
 # 此Dockerfile适用于“无本地模型”的环境构建，如果需要使用chatglm等本地模型，请参考 docs/Dockerfile+ChatGLM
 # - 1 修改 `config.py`
 # - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/GithubAction+NoLocal+Latex .
 # - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex
 FROM menghuan1918/ubuntu_uv_ctex:latest
 ENV DEBIAN_FRONTEND=noninteractive
 SHELL ["/bin/bash", "-c"]
 WORKDIR /gpt
 COPY . .
 RUN /root/.cargo/bin/uv venv --seed \
    && source .venv/bin/activate \
    && /root/.cargo/bin/uv pip install openai numpy arxiv rich colorama Markdown pygments pymupdf python-docx pdfminer \
    && /root/.cargo/bin/uv pip install -r requirements.txt \
    && /root/.cargo/bin/uv clean
 # 对齐python3
 RUN rm -f /usr/bin/python3 && ln -s /gpt/.venv/bin/python /usr/bin/python3
 RUN rm -f /usr/bin/python && ln -s /gpt/.venv/bin/python /usr/bin/python
 # 可选步骤，用于预热模块
 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
 # 启动
 CMD ["python3", "-u", "main.py"]
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -385,6 +385,14 @@ model_info = {
        "tokenizer": tokenizer_gpt35,
        "token_cnt": get_token_num_gpt35,
    },
    "glm-4-plus":{
        "fn_with_ui": zhipu_ui,
        "fn_without_ui": zhipu_noui,
        "endpoint": None,
        "max_token": 10124 * 8,
        "tokenizer": tokenizer_gpt35,
        "token_cnt": get_token_num_gpt35,
    },
    # api_2d (此后不需要在此处添加api2d的接口了，因为下面的代码会自动添加)
    "api2d-gpt-4": {
--- a/request_llms/bridge_chatgpt.py
+++ b/request_llms/bridge_chatgpt.py
@@ -341,7 +341,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
                    # 前者是API2D的结束条件，后者是OPENAI的结束条件
                    if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
                        # 判定为数据流的结束，gpt_replying_buffer也写完了
-                        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
+                        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
                        break
                    # 处理数据流的主体
                    status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
@@ -375,7 +375,7 @@ def handle_o1_model_special(response, inputs, llm_kwargs, chatbot, history):
    try:
        chunkjson = json.loads(response.content.decode())
        gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"]
-        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
+        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
        history[-1] = gpt_replying_buffer
        chatbot[-1] = (history[-2], history[-1])
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
--- a/request_llms/bridge_chatgpt_vision.py
+++ b/request_llms/bridge_chatgpt_vision.py
@@ -184,7 +184,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
                        # 判定为数据流的结束，gpt_replying_buffer也写完了
                        lastmsg = chatbot[-1][-1] + f"\n\n\n\n「{llm_kwargs['llm_model']}调用结束，该模型不具备上下文对话能力，如需追问，请及时切换模型。」"
                        yield from update_ui_lastest_msg(lastmsg, chatbot, history, delay=1)
-                        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
+                        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
                        break
                    # 处理数据流的主体
                    status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
--- a/request_llms/bridge_claude.py
+++ b/request_llms/bridge_claude.py
@@ -216,7 +216,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
                if need_to_pass:
                    pass
                elif is_last_chunk:
-                    log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
+                    log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
                    # logger.info(f'[response] {gpt_replying_buffer}')
                    break
                else:
--- a/request_llms/bridge_cohere.py
+++ b/request_llms/bridge_cohere.py
@@ -223,7 +223,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
                        chatbot[-1] = (history[-2], history[-1])
                        yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面
                    if chunkjson['event_type'] == 'stream-end':
-                        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
+                        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
                        history[-1] = gpt_replying_buffer
                        chatbot[-1] = (history[-2], history[-1])
                        yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面
--- a/request_llms/bridge_google_gemini.py
+++ b/request_llms/bridge_google_gemini.py
@@ -109,7 +109,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
            gpt_replying_buffer += paraphrase['text']    # 使用 json 解析库进行处理
            chatbot[-1] = (inputs, gpt_replying_buffer)
            history[-1] = gpt_replying_buffer
-            log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
+            log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
            yield from update_ui(chatbot=chatbot, history=history)
        if error_match:
            history = history[-2]  # 错误的不纳入对话
--- a/request_llms/bridge_moonshot.py
+++ b/request_llms/bridge_moonshot.py
@@ -166,7 +166,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
            history = history[:-2]
            yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
            break
-    log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_bro_result)
+    log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_bro_result, user_name=chatbot.get_user())
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None,
                                  console_slience=False):
--- a/request_llms/bridge_openrouter.py
+++ b/request_llms/bridge_openrouter.py
@@ -337,7 +337,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
                    # 前者是API2D的结束条件，后者是OPENAI的结束条件
                    if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
                        # 判定为数据流的结束，gpt_replying_buffer也写完了
-                        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
+                        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
                        break
                    # 处理数据流的主体
                    status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
@@ -371,7 +371,7 @@ def handle_o1_model_special(response, inputs, llm_kwargs, chatbot, history):
    try:
        chunkjson = json.loads(response.content.decode())
        gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"]
-        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
+        log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
        history[-1] = gpt_replying_buffer
        chatbot[-1] = (history[-2], history[-1])
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
--- a/request_llms/bridge_qwen.py
+++ b/request_llms/bridge_qwen.py
@@ -59,7 +59,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        chatbot[-1] = (inputs, response)
        yield from update_ui(chatbot=chatbot, history=history)
-    log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response)
+    log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response, user_name=chatbot.get_user())
    # 总结输出
    if response == f"[Local Message] 等待{model_name}响应中 ...":
        response = f"[Local Message] {model_name}响应异常 ..."
--- a/request_llms/bridge_taichu.py
+++ b/request_llms/bridge_taichu.py
@@ -68,5 +68,5 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
        chatbot[-1] = [inputs, response]
        yield from update_ui(chatbot=chatbot, history=history)
    history.extend([inputs, response])
-    log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response)
+    log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response, user_name=chatbot.get_user())
    yield from update_ui(chatbot=chatbot, history=history)
--- a/request_llms/bridge_zhipu.py
+++ b/request_llms/bridge_zhipu.py
@@ -97,5 +97,5 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
        chatbot[-1] = [inputs, response]
        yield from update_ui(chatbot=chatbot, history=history)
    history.extend([inputs, response])
-    log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response)
+    log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response, user_name=chatbot.get_user())
    yield from update_ui(chatbot=chatbot, history=history)
--- a/shared_utils/fastapi_server.py
+++ b/shared_utils/fastapi_server.py
@@ -138,7 +138,9 @@ def start_app(app_block, CONCURRENT_COUNT, AUTHENTICATION, PORT, SSL_KEYFILE, SS
    app_block.is_sagemaker = False
    gradio_app = App.create_app(app_block)
-
+    for route in list(gradio_app.router.routes):
        if route.path == "/proxy={url_path:path}":
            gradio_app.router.routes.remove(route)
    # --- --- replace gradio endpoint to forbid access to sensitive files --- ---
    if len(AUTHENTICATION) > 0:
        dependencies = []
@@ -154,9 +156,13 @@ def start_app(app_block, CONCURRENT_COUNT, AUTHENTICATION, PORT, SSL_KEYFILE, SS
        @gradio_app.head("/file={path_or_url:path}", dependencies=dependencies)
        @gradio_app.get("/file={path_or_url:path}", dependencies=dependencies)
        async def file(path_or_url: str, request: fastapi.Request):
-            if len(AUTHENTICATION) > 0:
+            if not _authorize_user(path_or_url, request, gradio_app):
-                if not _authorize_user(path_or_url, request, gradio_app):
+                return "越权访问!"
-                    return "越权访问!"
+            stripped = path_or_url.lstrip().lower()
            if stripped.startswith("https://") or stripped.startswith("http://"):
                return "账户密码授权模式下, 禁止链接!"
            if '../' in stripped:
                return "非法路径!"
            return await endpoint(path_or_url, request)
        from fastapi import Request, status
@@ -167,6 +173,26 @@ def start_app(app_block, CONCURRENT_COUNT, AUTHENTICATION, PORT, SSL_KEYFILE, SS
            response.delete_cookie('access-token')
            response.delete_cookie('access-token-unsecure')
            return response
    else:
        dependencies = []
        endpoint = None
        for route in list(gradio_app.router.routes):
            if route.path == "/file/{path:path}":
                gradio_app.router.routes.remove(route)
            if route.path == "/file={path_or_url:path}":
                dependencies = route.dependencies
                endpoint = route.endpoint
                gradio_app.router.routes.remove(route)
        @gradio_app.get("/file/{path:path}", dependencies=dependencies)
        @gradio_app.head("/file={path_or_url:path}", dependencies=dependencies)
        @gradio_app.get("/file={path_or_url:path}", dependencies=dependencies)
        async def file(path_or_url: str, request: fastapi.Request):
            stripped = path_or_url.lstrip().lower()
            if stripped.startswith("https://") or stripped.startswith("http://"):
                return "账户密码授权模式下, 禁止链接!"
            if '../' in stripped:
                return "非法路径!"
            return await endpoint(path_or_url, request)
    # --- --- enable TTS (text-to-speech) functionality --- ---
    TTS_TYPE = get_conf("TTS_TYPE")
--- a/shared_utils/handle_upload.py
+++ b/shared_utils/handle_upload.py
@@ -104,17 +104,27 @@ def extract_archive(file_path, dest_dir):
            logger.info("Successfully extracted zip archive to {}".format(dest_dir))
    elif file_extension in [".tar", ".gz", ".bz2"]:
-        with tarfile.open(file_path, "r:*") as tarobj:
+        try:
-            # 清理提取路径，移除任何不安全的元素
+            with tarfile.open(file_path, "r:*") as tarobj:
-            for member in tarobj.getmembers():
+                # 清理提取路径，移除任何不安全的元素
-                member_path = os.path.normpath(member.name)
+                for member in tarobj.getmembers():
-                full_path = os.path.join(dest_dir, member_path)
+                    member_path = os.path.normpath(member.name)
-                full_path = os.path.abspath(full_path)
+                    full_path = os.path.join(dest_dir, member_path)
-                if not full_path.startswith(os.path.abspath(dest_dir) + os.sep):
+                    full_path = os.path.abspath(full_path)
-                    raise Exception(f"Attempted Path Traversal in {member.name}")
+                    if not full_path.startswith(os.path.abspath(dest_dir) + os.sep):
                        raise Exception(f"Attempted Path Traversal in {member.name}")
-            tarobj.extractall(path=dest_dir)
+                tarobj.extractall(path=dest_dir)
-            logger.info("Successfully extracted tar archive to {}".format(dest_dir))
+                logger.info("Successfully extracted tar archive to {}".format(dest_dir))
        except tarfile.ReadError as e:
            if file_extension == ".gz":
                # 一些特别奇葩的项目，是一个gz文件，里面不是tar，只有一个tex文件
                import gzip
                with gzip.open(file_path, 'rb') as f_in:
                    with open(os.path.join(dest_dir, 'main.tex'), 'wb') as f_out:
                        f_out.write(f_in.read())
            else:
                raise e
    # 第三方库，需要预先pip install rarfile
    # 此外，Windows上还需要安装winrar软件，配置其Path环境变量，如"C:\Program Files\WinRAR"才可以
--- a/shared_utils/key_pattern_manager.py
+++ b/shared_utils/key_pattern_manager.py
@@ -14,6 +14,7 @@ openai_regex = re.compile(
    r"sk-[a-zA-Z0-9_-]{92}$|" +
    r"sk-proj-[a-zA-Z0-9_-]{48}$|"+
    r"sk-proj-[a-zA-Z0-9_-]{124}$|"+
    r"sk-proj-[a-zA-Z0-9_-]{156}$|"+ #新版apikey位数不匹配故修改此正则表达式
    r"sess-[a-zA-Z0-9]{40}$"
 )
 def is_openai_api_key(key):
--- a/tests/test_doc2x.py
+++ b/tests/test_doc2x.py
@@ -0,0 +1,7 @@
 import init_test
 from crazy_functions.pdf_fns.parse_pdf_via_doc2x import 解析PDF_DOC2X_转Latex
 # 解析PDF_DOC2X_转Latex("gpt_log/arxiv_cache_old/2410.10819/workfolder/merge.pdf")
 # 解析PDF_DOC2X_转Latex("gpt_log/arxiv_cache_ooo/2410.07095/workfolder/merge.pdf")
 解析PDF_DOC2X_转Latex("2410.11190v2.pdf")
--- a/toolbox.py
+++ b/toolbox.py
@@ -1029,7 +1029,7 @@ def check_repeat_upload(new_pdf_path, pdf_hash):
    # 如果所有页的内容都相同，返回 True
    return False, None
-def log_chat(llm_model: str, input_str: str, output_str: str):
+def log_chat(llm_model: str, input_str: str, output_str: str, user_name: str=default_user_name):
    try:
        if output_str and input_str and llm_model:
            uid = str(uuid.uuid4().hex)
@@ -1038,8 +1038,8 @@ def log_chat(llm_model: str, input_str: str, output_str: str):
            logger.bind(chat_msg=True).info(dedent(
            """
            ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
-            [UID]
+            [UID/USER]
-            {uid}
+            {uid}/{user_name}
            [Model]
            {llm_model}
            [Query]
@@ -1047,6 +1047,6 @@ def log_chat(llm_model: str, input_str: str, output_str: str):
            [Response]
            {output_str}
            ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
-            """).format(uid=uid, llm_model=llm_model, input_str=input_str, output_str=output_str))
+            """).format(uid=uid, user_name=user_name, llm_model=llm_model, input_str=input_str, output_str=output_str))
    except:
        logger.error(trimmed_format_exc())
Author	SHA1	Message	Date
binary-husky	82e125d439	log user name during chat	2024-11-10 16:50:24 +00:00
binary-husky	197287fc30	Enhance archive extraction with error handling for tar and gzip formats	2024-11-09 10:10:46 +00:00
Bingchen Jiang	c37fcc9299	Adding support to new openai apikey format (#2030 )	2024-11-09 13:41:19 +08:00
binary-husky	91f5e6b8f7	resolve pickle security issue	2024-11-04 13:49:49 +00:00
hcy2206	4f0851f703	增加了对于glm-4-plus的支持 (#2014 ) * 增加对于讯飞星火大模型Spark4.0的支持 * Create github action sync.yml * 增加对于智谱glm-4-plus的支持 * feat: change arxiv io param * catch comment source code exception * upgrade auto comment * add security patch --------- Co-authored-by: GH Action - Upstream Sync <action@github.com> Co-authored-by: binary-husky <qingxu.fu@outlook.com>	2024-11-03 22:41:16 +08:00
binary-husky	2821f27756	add security patch	2024-11-03 14:34:17 +00:00
binary-husky	180550b8f0	upgrade auto comment	2024-10-30 13:37:35 +00:00
binary-husky	7497dcb852	catch comment source code exception	2024-10-30 11:40:47 +00:00
binary-husky	23ef2ffb22	feat: change arxiv io param	2024-10-27 16:54:29 +00:00
binary-husky	848d0f65c7	share paper network beta	2024-10-27 16:08:25 +00:00
Menghuan1918	f0b0364f74	修复并改进build with latex的Docker构建 (#2020 ) * 改进构建文件 * 修复问题 * 更改docker注释，同时测试拉取大小	2024-10-27 23:17:03 +08:00
binary-husky	69f3755682	adjust max_token_limit for pdf translation plugin	2024-10-21 14:31:11 +00:00
binary-husky	4727113243	update doc2x functions	2024-10-21 14:05:42 +00:00
wsg1873	310122f5a7	solve the concatenate error. (#2011 )	2024-10-16 00:56:24 +08:00