Merge Frontier, Update to Version 3.72 (#1553)

* Zhipu sdk update 适配最新的智谱SDK，支持GLM4v (#1502) * 适配 google gemini 优化为从用户input中提取文件 * 适配最新的智谱SDK、支持glm-4v * requirements.txt fix * pending history check --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com> * Update "生成多种Mermaid图表" plugin: Separate out the file reading function (#1520) * Update crazy_functional.py with new functionality deal with PDF * Update crazy_functional.py and Mermaid.py for plugin_kwargs * Update crazy_functional.py with new chart type: mind map * Update SELECT_PROMPT and i_say_show_user messages * Update ArgsReminder message in get_crazy_functions() function * Update with read md file and update PROMPTS * Return the PROMPTS as the test found that the initial version worked best * Update Mermaid chart generation function * version 3.71 * 解决issues #1510 * Remove unnecessary text from sys_prompt in 解析历史输入 function * Remove sys_prompt message in 解析历史输入 function * Update bridge_all.py: supports gpt-4-turbo-preview (#1517) * Update bridge_all.py: supports gpt-4-turbo-preview supports gpt-4-turbo-preview * Update bridge_all.py --------- Co-authored-by: binary-husky <96192199+binary-husky@users.noreply.github.com> * Update config.py: supports gpt-4-turbo-preview (#1516) * Update config.py: supports gpt-4-turbo-preview supports gpt-4-turbo-preview * Update config.py --------- Co-authored-by: binary-husky <96192199+binary-husky@users.noreply.github.com> * Refactor 解析历史输入 function to handle file input * Update Mermaid chart generation functionality * rename files and functions --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com> Co-authored-by: hongyi-zhao <hongyi.zhao@gmail.com> Co-authored-by: binary-husky <96192199+binary-husky@users.noreply.github.com> * 接入mathpix ocr功能 (#1468) * Update Latex输出PDF结果.py 借助mathpix实现了PDF翻译中文并重新编译PDF * Update config.py add mathpix appid & appkey * Add 'PDF翻译中文并重新编译PDF' feature to plugins. --------- Co-authored-by: binary-husky <96192199+binary-husky@users.noreply.github.com> * fix zhipuai * check picture * remove glm-4 due to bug * 修改config * 检查MATHPIX_APPID * Remove unnecessary code and update function_plugins dictionary * capture non-standard token overflow * bug fix #1524 * change mermaid style * 支持mermaid 滚动放大缩小重置,鼠标滚动和拖拽 (#1530) * 支持mermaid 滚动放大缩小重置,鼠标滚动和拖拽 * 微调未果先stage一下 * update --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com> Co-authored-by: binary-husky <96192199+binary-husky@users.noreply.github.com> * ver 3.72 * change live2d * save the status of ``clear btn` in cookie * 前端选择保持 * js ui bug fix * reset btn bug fix * update live2d tips * fix missing get_token_num method * fix live2d toggle switch * fix persistent custom btn with cookie * fix zhipuai feedback with core functionality * Refactor button update and clean up functions --------- Co-authored-by: XIao <46100050+Kilig947@users.noreply.github.com> Co-authored-by: Menghuan1918 <menghuan2003@outlook.com> Co-authored-by: hongyi-zhao <hongyi.zhao@gmail.com> Co-authored-by: Hao Ma <893017927@qq.com> Co-authored-by: zeyuan huang <599012428@qq.com>
2024-02-14 18:35:09 +08:00
parent e0c5859cf9
commit 2e9b4a5770
42 changed files with 1171 additions and 9635 deletions
--- a/crazy_functions/生成多种Mermaid图表.py
+++ b/crazy_functions/生成多种Mermaid图表.py
@@ -1,6 +1,5 @@
 from toolbox import CatchException, update_ui, report_exception
 from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
-from .crazy_utils import read_and_clean_pdf_text
 import datetime

 #以下是每类图表的PROMPT
@@ -162,7 +161,7 @@ mindmap
 ```
 """

-def 解析历史输入(history,llm_kwargs,chatbot,plugin_kwargs):
+def 解析历史输入(history,llm_kwargs,file_manifest,chatbot,plugin_kwargs):
    ############################## <第 0 步，切割输入> ##################################
    # 借用PDF切割中的函数对文本进行切割
    TOKEN_LIMIT_PER_FRAGMENT = 2500
@@ -170,8 +169,6 @@ def 解析历史输入(history,llm_kwargs,chatbot,plugin_kwargs):
    from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
    txt = breakdown_text_to_satisfy_token_limit(txt=txt, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model'])
    ############################## <第 1 步，迭代地历遍整个文章，提取精炼信息> ##################################
-    i_say_show_user = f'首先你从历史记录或文件中提取摘要。'; gpt_say = "[Local Message] 收到。"   # 用户提示
-    chatbot.append([i_say_show_user, gpt_say]); yield from update_ui(chatbot=chatbot, history=history)    # 更新UI
    results = []
    MAX_WORD_TOTAL = 4096
    n_txt = len(txt)
@@ -179,7 +176,7 @@ def 解析历史输入(history,llm_kwargs,chatbot,plugin_kwargs):
    if n_txt >= 20: print('文章极长，不能达到预期效果')
    for i in range(n_txt):
        NUM_OF_WORD = MAX_WORD_TOTAL // n_txt
-        i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {txt[i]}"
+        i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words in Chinese: {txt[i]}"
        i_say_show_user = f"[{i+1}/{n_txt}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {txt[i][:200]} ...."
        gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user,  # i_say=真正给chatgpt的提问， i_say_show_user=给用户看的提问
                                                                           llm_kwargs, chatbot, 
@@ -232,34 +229,10 @@ def 解析历史输入(history,llm_kwargs,chatbot,plugin_kwargs):
        inputs=i_say,
        inputs_show_user=i_say_show_user,
        llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], 
-        sys_prompt="你精通使用mermaid语法来绘制图表,首先确保语法正确,其次避免在mermaid语法中使用不允许的字符,此外也应当分考虑图表的可读性。"
+        sys_prompt=""
    )
    history.append(gpt_say)
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
-
-def 输入区文件处理(txt):
-    if txt == "": return False, txt
-    success = True
-    import glob
-    from .crazy_utils import get_files_from_everything
-    file_pdf,pdf_manifest,folder_pdf = get_files_from_everything(txt, '.pdf')
-    file_md,md_manifest,folder_md = get_files_from_everything(txt, '.md')
-    if len(pdf_manifest) == 0 and len(md_manifest) == 0:
-        return False, txt   #如输入区内容不是文件则直接返回输入区内容
-    
-    final_result = ""
-    if file_pdf:
-        for index, fp in enumerate(pdf_manifest):
-            file_content, page_one = read_and_clean_pdf_text(fp) # （尝试）按照章节切割PDF
-            file_content = file_content.encode('utf-8', 'ignore').decode()   # avoid reading non-utf8 chars
-            final_result += "\n" + file_content
-    if file_md:
-        for index, fp in enumerate(md_manifest):
-            with open(fp, 'r', encoding='utf-8', errors='replace') as f:
-                file_content = f.read()
-            file_content = file_content.encode('utf-8', 'ignore').decode()
-            final_result += "\n" + file_content
-    return True, final_result
    
@CatchException
 def 生成多种Mermaid图表(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
@@ -277,26 +250,47 @@ def 生成多种Mermaid图表(txt, llm_kwargs, plugin_kwargs, chatbot, history,
    # 基本信息：功能、贡献者
    chatbot.append([
        "函数插件功能？", 
-        "根据当前聊天历史或文件中(文件内容优先)绘制多种mermaid图表，将会由对话模型首先判断适合的图表类型，随后绘制图表。\
+        "根据当前聊天历史或指定的路径文件(文件内容优先)绘制多种mermaid图表，将会由对话模型首先判断适合的图表类型，随后绘制图表。\
        \n您也可以使用插件参数指定绘制的图表类型,函数插件贡献者: Menghuan1918"])
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
-    # 尝试导入依赖，如果缺少依赖，则给出安装建议
-    try:
-        import fitz
-    except:
-        report_exception(chatbot, history, 
-            a = f"解析项目: {txt}", 
-            b = f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade pymupdf```。")
-        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-        return
    
    if os.path.exists(txt):     #如输入区无内容则直接解析历史记录
-        file_exist, txt = 输入区文件处理(txt)
+        from crazy_functions.pdf_fns.parse_word import extract_text_from_files
+        file_exist, final_result, page_one, file_manifest, excption = extract_text_from_files(txt, chatbot, history)
    else:
        file_exist = False
+        excption = ""
+        file_manifest = []

-    if file_exist : history = []    #如输入区内容为文件则清空历史记录
-    history.append(txt)     #将解析后的txt传递加入到历史中
-    
-    yield from 解析历史输入(history,llm_kwargs,chatbot,plugin_kwargs)  
+    if excption != "":
+        if excption == "word":
+            report_exception(chatbot, history, 
+                a = f"解析项目: {txt}", 
+                b = f"找到了.doc文件，但是该文件格式不被支持，请先转化为.docx格式。")
+            
+        elif excption == "pdf":
+            report_exception(chatbot, history, 
+                a = f"解析项目: {txt}", 
+                b = f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade pymupdf```。")
+        
+        elif excption == "word_pip":
+                report_exception(chatbot, history,
+                    a=f"解析项目: {txt}",
+                    b=f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade python-docx pywin32```。")
+
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+    else:
+        if not file_exist:
+            history.append(txt)     #如输入区不是文件则将输入区内容加入历史记录
+            i_say_show_user = f'首先你从历史记录中提取摘要。'; gpt_say = "[Local Message] 收到。"   # 用户提示
+            chatbot.append([i_say_show_user, gpt_say]); yield from update_ui(chatbot=chatbot, history=history)    # 更新UI
+            yield from 解析历史输入(history,llm_kwargs,file_manifest,chatbot,plugin_kwargs)
+        else:
+            file_num = len(file_manifest)
+            for i in range(file_num):     #依次处理文件
+                i_say_show_user = f"[{i+1}/{file_num}]处理文件{file_manifest[i]}"; gpt_say = "[Local Message] 收到。"   # 用户提示
+                chatbot.append([i_say_show_user, gpt_say]); yield from update_ui(chatbot=chatbot, history=history)    # 更新UI
+                history = []    #如输入区内容为文件则清空历史记录
+                history.append(final_result[i])
+                yield from 解析历史输入(history,llm_kwargs,file_manifest,chatbot,plugin_kwargs)