up

Default prompt word count control
2024-11-06 00:47:56 +08:00 · 2024-11-05 02:08:12 +08:00 · 2024-11-03 23:05:02 +08:00 · 2024-11-03 22:54:19 +08:00 · 2024-11-03 22:49:29 +08:00 · 2024-11-03 14:19:16 +00:00
3 changed files with 32 additions and 157 deletions
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -21,7 +21,6 @@ def get_crazy_functions():
    from crazy_functions.询问多个大语言模型 import 同时问询
    from crazy_functions.SourceCode_Analyse import 解析一个Lua项目
    from crazy_functions.SourceCode_Analyse import 解析一个CSharp项目
-    from crazy_functions.总结word文档 import 总结word文档
    from crazy_functions.解析JupyterNotebook import 解析ipynb文件
    from crazy_functions.Conversation_To_File import 载入对话历史存档
    from crazy_functions.Conversation_To_File import 对话历史存档
@@ -240,7 +239,7 @@ def get_crazy_functions():
            "AsButton": True,  # 加入下拉菜单中
            # "Info": "连接网络回答问题（需要访问谷歌）| 输入参数是一个问题",
            "Function": HotReload(连接网络回答问题),
-            # "Class": NetworkGPT_Wrap     # 新一代插件需要注册Class
+            "Class": NetworkGPT_Wrap     # 新一代插件需要注册Class
        },
        "历史上的今天": {
            "Group": "对话",
--- a/crazy_functions/总结word文档.py
+++ b/crazy_functions/总结word文档.py
@@ -1,127 +0,0 @@
-from toolbox import update_ui
-from toolbox import CatchException, report_exception
-from toolbox import write_history_to_file, promote_file_to_downloadzone
-from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
-fast_debug = False
-
-
-def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
-    import time, os
-    # pip install python-docx 用于docx格式，跨平台
-    # pip install pywin32 用于doc格式，仅支持Win平台
-    for index, fp in enumerate(file_manifest):
-        if fp.split(".")[-1] == "docx":
-            from docx import Document
-            doc = Document(fp)
-            file_content = "\n".join([para.text for para in doc.paragraphs])
-        else:
-            try:
-                import win32com.client
-                word = win32com.client.Dispatch("Word.Application")
-                word.visible = False
-                # 打开文件
-                doc = word.Documents.Open(os.getcwd() + '/' + fp)
-                # file_content = doc.Content.Text
-                doc = word.ActiveDocument
-                file_content = doc.Range().Text
-                doc.Close()
-                word.Quit()
-            except:
-                raise RuntimeError('请先将.doc文档转换为.docx文档。')
-
-        # private_upload里面的文件名在解压zip后容易出现乱码（rar和7z格式正常），故可以只分析文章内容，不输入文件名
-        from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
-        from request_llms.bridge_all import model_info
-        max_token = model_info[llm_kwargs['llm_model']]['max_token']
-        TOKEN_LIMIT_PER_FRAGMENT = max_token * 3 // 4
-        paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model'])
-        this_paper_history = []
-        for i, paper_frag in enumerate(paper_fragments):
-            i_say = f'请对下面的文章片段用中文做概述，文件名是{os.path.relpath(fp, project_folder)}，文章内容是 ```{paper_frag}```'
-            i_say_show_user = f'请对下面的文章片段做概述: {os.path.abspath(fp)}的第{i+1}/{len(paper_fragments)}个片段。'
-            gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
-                inputs=i_say,
-                inputs_show_user=i_say_show_user,
-                llm_kwargs=llm_kwargs,
-                chatbot=chatbot,
-                history=[],
-                sys_prompt="总结文章。"
-            )
-
-            chatbot[-1] = (i_say_show_user, gpt_say)
-            history.extend([i_say_show_user,gpt_say])
-            this_paper_history.extend([i_say_show_user,gpt_say])
-
-        # 已经对该文章的所有片段总结完毕，如果文章被切分了，
-        if len(paper_fragments) > 1:
-            i_say = f"根据以上的对话，总结文章{os.path.abspath(fp)}的主要内容。"
-            gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
-                inputs=i_say,
-                inputs_show_user=i_say,
-                llm_kwargs=llm_kwargs,
-                chatbot=chatbot,
-                history=this_paper_history,
-                sys_prompt="总结文章。"
-            )
-
-            history.extend([i_say,gpt_say])
-            this_paper_history.extend([i_say,gpt_say])
-
-        res = write_history_to_file(history)
-        promote_file_to_downloadzone(res, chatbot=chatbot)
-        chatbot.append(("完成了吗？", res))
-        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
-    res = write_history_to_file(history)
-    promote_file_to_downloadzone(res, chatbot=chatbot)
-    chatbot.append(("所有文件都总结完成了吗？", res))
-    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
-
-@CatchException
-def 总结word文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
-    import glob, os
-
-    # 基本信息：功能、贡献者
-    chatbot.append([
-        "函数插件功能？",
-        "批量总结Word文档。函数插件贡献者: JasonGuo1。注意, 如果是.doc文件, 请先转化为.docx格式。"])
-    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
-    # 尝试导入依赖，如果缺少依赖，则给出安装建议
-    try:
-        from docx import Document
-    except:
-        report_exception(chatbot, history,
-                         a=f"解析项目: {txt}",
-                         b=f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade python-docx pywin32```。")
-        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-        return
-
-    # 清空历史，以免输入溢出
-    history = []
-
-    # 检测输入参数，如没有给定输入参数，直接退出
-    if os.path.exists(txt):
-        project_folder = txt
-    else:
-        if txt == "": txt = '空空如也的输入栏'
-        report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
-        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-        return
-
-    # 搜索需要处理的文件清单
-    if txt.endswith('.docx') or txt.endswith('.doc'):
-        file_manifest = [txt]
-    else:
-        file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.docx', recursive=True)] + \
-                        [f for f in glob.glob(f'{project_folder}/**/*.doc', recursive=True)]
-
-    # 如果没找到任何文件
-    if len(file_manifest) == 0:
-        report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.docx或doc文件: {txt}")
-        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-        return
-
-    # 开始正式执行任务
-    yield from 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
--- a/crazy_functions/批量文件询问.py
+++ b/crazy_functions/批量文件询问.py
@@ -52,7 +52,7 @@ class BatchDocumentSummarizer:
                         f'文件内容是 ```{frag.content}```')
                i_say_show_user = (f'正在处理 {frag.rel_path} (片段 {frag.fragment_index + 1}/{frag.total_fragments})')
            else:
-                i_say = (f'请对下面的内容用中文做概述，文件名是{os.path.basename(frag.file_path)}，'
+                i_say = (f'请对下面的内容用中文做总结，不超过500字，文件名是{os.path.basename(frag.file_path)}，'
                         f'内容是 ```{frag.content}```')
                i_say_show_user = f'正在处理 {frag.rel_path} (片段 {frag.fragment_index + 1}/{frag.total_fragments})'

@@ -355,8 +355,8 @@ class BatchDocumentSummarizer:
            if len(summaries) > 1:  # 多片段文件需要生成整体总结
                sorted_summaries = sorted(summaries, key=lambda x: x['index'])
                if self.plugin_kwargs.get("advanced_arg"):
-                    i_say = (f"根据以下内容，按要求：{self.plugin_kwargs['advanced_arg']}，"
-                             f"总结文件 {os.path.basename(rel_path)} 的主要内容。")
+
+                    i_say = f'请按照用户要求对文件内容进行处理，用户要求为：{self.plugin_kwargs["advanced_arg"]}：'
                else:
                    i_say = f"请总结文件 {os.path.basename(rel_path)} 的主要内容，不超过500字。"

@@ -364,11 +364,11 @@ class BatchDocumentSummarizer:
                    summary_texts = [s['summary'] for s in sorted_summaries]
                    response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
                        inputs_array=[i_say],
-                        inputs_show_user_array=[f"生成 {rel_path} 的总结"],
+                        inputs_show_user_array=[f"生成 {rel_path} 的处理结果"],
                        llm_kwargs=self.llm_kwargs,
                        chatbot=self.chatbot,
                        history_array=[summary_texts],
-                        sys_prompt_array=["总结文件内容。"],
+                        sys_prompt_array=["你是一个优秀的助手，"],
                    )
                    self.file_summaries_map[rel_path] = response_collection[1]
                except Exception as e:
@@ -378,32 +378,35 @@ class BatchDocumentSummarizer:
                self.file_summaries_map[rel_path] = summaries[0]['summary']

        # 4. 生成最终总结
-        try:
-            # 收集所有文件的总结用于生成最终总结
-            file_summaries_for_final = []
-            for rel_path, summary in self.file_summaries_map.items():
-                file_summaries_for_final.append(f"文件 {rel_path} 的总结：\n{summary}")
+        if total_files ==1:
+            return "文件数为1，此时不调用总结模块"
+        else:
+            try:
+                # 收集所有文件的总结用于生成最终总结
+                file_summaries_for_final = []
+                for rel_path, summary in self.file_summaries_map.items():
+                    file_summaries_for_final.append(f"文件 {rel_path} 的总结：\n{summary}")

-            if self.plugin_kwargs.get("advanced_arg"):
-                final_summary_prompt = ("根据以下所有文件的总结内容，按要求进行综合处理：" +
-                                        self.plugin_kwargs['advanced_arg'])
-            else:
-                final_summary_prompt = "请根据以下所有文件的总结内容，生成最终的总结报告。"
+                if self.plugin_kwargs.get("advanced_arg"):
+                    final_summary_prompt = ("根据以下所有文件的总结内容，按要求进行综合处理：" +
+                                            self.plugin_kwargs['advanced_arg'])
+                else:
+                    final_summary_prompt = "请根据以下所有文件的总结内容，生成最终的总结报告。"

-            response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
-                inputs_array=[final_summary_prompt],
-                inputs_show_user_array=["生成最终总结报告"],
-                llm_kwargs=self.llm_kwargs,
-                chatbot=self.chatbot,
-                history_array=[file_summaries_for_final],
-                sys_prompt_array=["总结所有文件内容。"],
-                max_workers=1
-            )
+                response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
+                    inputs_array=[final_summary_prompt],
+                    inputs_show_user_array=["生成最终总结报告"],
+                    llm_kwargs=self.llm_kwargs,
+                    chatbot=self.chatbot,
+                    history_array=[file_summaries_for_final],
+                    sys_prompt_array=["总结所有文件内容。"],
+                    max_workers=1
+                )

-            return response_collection[1] if len(response_collection) > 1 else "生成总结失败"
-        except Exception as e:
-            self.chatbot.append(["错误", f"最终总结生成失败：{str(e)}"])
-            return "生成总结失败"
+                return response_collection[1] if len(response_collection) > 1 else "生成总结失败"
+            except Exception as e:
+                self.chatbot.append(["错误", f"最终总结生成失败：{str(e)}"])
+                return "生成总结失败"

    def save_results(self, final_summary: str):
        """保存结果到文件"""
Author	SHA1	Message	Date
lbykkkk	61676d0536	up	2024-11-06 00:47:56 +08:00
lbykkkk	df2ef7940c	up	2024-11-05 02:08:12 +08:00
lbykkkk	c10f2b45e5	Default prompt word count control	2024-11-03 23:05:02 +08:00
lbykkkk	7e2ede2d12	up	2024-11-03 22:54:19 +08:00
lbykkkk	ec10e2a3ac	Merge branch 'refs/heads/batch-file-query' into boyin_summary # Conflicts: # crazy_functional.py	2024-11-03 22:49:29 +08:00
binary-husky	7474d43433	stage connection	2024-11-03 14:19:16 +00:00
binary-husky	83489f9acf	Merge remote-tracking branch 'origin/boyin_summary'	2024-11-03 14:12:04 +00:00
lbykkkk	5dab7b2290	refine	2024-10-29 23:54:55 +08:00
lbykkkk	89dc6c7265	refine	2024-10-21 22:58:04 +08:00
lbykkkk	21111d3bd0	refine	2024-10-21 00:57:29 +08:00
lbykkkk	701018f48c	up	2024-10-21 00:30:18 +08:00
lbykkkk	8733c4e1e9	file type support	2024-10-20 01:33:00 +08:00
lbykkkk	8498ddf6bf	up	2024-10-19 17:31:30 +00:00
lbykkkk	3c3293818d	Change the word document summary function to document summary function	2024-10-20 01:14:42 +08:00