From 48e10fb10a052ed1f878330ba4fd84f32310e49a Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Mon, 10 Jun 2024 22:22:04 +0800
Subject: [PATCH 1/3] Update README.md
---
README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 8eed1fee..97da208f 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
> [!IMPORTANT]
+> 2024.6.1: 版本3.80加入插件二级菜单功能(详见wiki)
> 2024.5.1: 加入Doc2x翻译PDF论文的功能,[查看详情](https://github.com/binary-husky/gpt_academic/wiki/Doc2x)
-> 2024.4.30: 3.75版本引入Edge-TTS和SoVits语音克隆模块,[查看详情](https://www.bilibili.com/video/BV1Rp421S7tF/)
-> 2024.3.11: 恭迎Claude3和Moonshot,全力支持Qwen、GLM、DeepseekCoder等中文大语言模型!
+> 2024.3.11: 全力支持Qwen、GLM、DeepseekCoder等中文大语言模型! SoVits语音克隆模块,[查看详情](https://www.bilibili.com/video/BV1Rp421S7tF/)
> 2024.1.17: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目完全开源免费,您可通过订阅[在线服务](https://github.com/binary-husky/gpt_academic/wiki/online)的方式鼓励本项目的发展。
From 2ff1a1fb0bbd7d3c5fee734f6064e51159c57f34 Mon Sep 17 00:00:00 2001
From: binary-husky
Date: Wed, 12 Jun 2024 09:34:05 +0000
Subject: [PATCH 2/3] update translation matrix
---
docs/translate_english.json | 238 +++++++++++++++++++++++++++++++++++-
docs/translate_std.json | 12 +-
2 files changed, 240 insertions(+), 10 deletions(-)
diff --git a/docs/translate_english.json b/docs/translate_english.json
index e0e16fea..f82bfecb 100644
--- a/docs/translate_english.json
+++ b/docs/translate_english.json
@@ -36,15 +36,12 @@
"总结word文档": "SummarizingWordDocuments",
"解析ipynb文件": "ParsingIpynbFiles",
"解析JupyterNotebook": "ParsingJupyterNotebook",
- "Conversation_To_File": "ConversationHistoryArchive",
"载入Conversation_To_File": "LoadConversationHistoryArchive",
"删除所有本地对话历史记录": "DeleteAllLocalConversationHistoryRecords",
"Markdown英译中": "TranslateMarkdownFromEnglishToChinese",
- "Markdown_Translate": "BatchTranslateMarkdown",
"批量总结PDF文档": "BatchSummarizePDFDocuments",
"批量总结PDF文档pdfminer": "BatchSummarizePDFDocumentsUsingPdfminer",
"批量翻译PDF文档": "BatchTranslatePDFDocuments",
- "PDF_Translate": "BatchTranslatePDFDocuments_MultiThreaded",
"谷歌检索小助手": "GoogleSearchAssistant",
"理解PDF文档内容标准文件输入": "UnderstandPdfDocumentContentStandardFileInput",
"理解PDF文档内容": "UnderstandPdfDocumentContent",
@@ -1668,7 +1665,6 @@
"Markdown翻译指定语言": "TranslateMarkdownToSpecifiedLanguage",
"Langchain知识库": "LangchainKnowledgeBase",
"Latex英文纠错加PDF对比": "CorrectEnglishInLatexWithPDFComparison",
- "Latex_Function": "OutputPDFFromLatex",
"Latex翻译中文并重新编译PDF": "TranslateChineseToEnglishInLatexAndRecompilePDF",
"sprint亮靛": "SprintIndigo",
"寻找Latex主文件": "FindLatexMainFile",
@@ -3748,5 +3744,237 @@
"中文省略号": "Chinese Ellipsis",
"则不生效": "Will not take effect",
"目前是两位小数": "Currently is two decimal places",
- "Incorrect API key. Cohere以提供了不正确的API_KEY为由": "Incorrect API key. Cohere reports an incorrect API_KEY."
+ "Incorrect API key. Cohere以提供了不正确的API_KEY为由": "Incorrect API key. Cohere reports an incorrect API_KEY.",
+ "应当慎之又慎!": "Should be extremely cautious!",
+ "、后端setter": "backend setter",
+ "对于 Run1 的数据": "data for Run1",
+ "另一种更简单的setter方法": "another simpler setter method",
+ "完成解析": "complete parsing",
+ "自动同步": "automatic synchronization",
+ "**表8**": "**Table 8**",
+ "安装方法见": "Installation method see",
+ "通过更严格的 PID 选择对π介子和 K 介子进行过滤以减少主要鉴别为 π 介子的 K 介子等峰背景的污染": "Filtering π mesons and K mesons with a stricter PID to reduce contamination of K mesons mainly identified as π mesons",
+ "并且占据高质量边带的候选体会被拒绝": "And candidates occupying high-quality sidebands are rejected",
+ "GPT-SOVITS 文本转语音服务的运行地址": "Operating address of GPT-SOVITS text-to-speech service",
+ "PDF文件路径": "PDF file path",
+ "注意图片大约占用1": "Note that the image takes up about 1",
+ "以便可以研究BDT输入": "So that BDT inputs can be studied",
+ "是否自动打开浏览器页面": "Whether to automatically open the browser page",
+ "中此模型的APIKEY的名字": "The name of the APIKEY for this model",
+ "{0.8} $ 和 $ \\operatorname{ProbNNk}\\left": "{0.8} $ and $ \\operatorname{ProbNNk}\\left",
+ "请检测终端输出": "Please check the terminal output",
+ "注册账号并获取API KEY": "Register an account and get an API KEY",
+ "-=-=-=-=-=-=-= 👇 以下是多模型路由切换函数 -=-=-=-=-=-=-=": "-=-=-=-=-=-=-= 👇 The following is a multi-model route switching function -=-=-=-=-=-=-=",
+ "如不设置": "If not set",
+ "如果只询问“一个”大语言模型": "If only asking about 'one' large language model",
+ "并非为了计算权重而专门施加了附加选择": "Not specifically applying additional selection for weight calculation",
+ "DOC2X的PDF解析服务": "PDF parsing service of DOC2X",
+ "两兄弟": "Two brothers",
+ "相同的切割也用于Run2和Run1数据": "The same segmentation is also used for Run2 and Run1 data",
+ "返回的数据流第一次为空": "The returned data stream is empty for the first time",
+ "对于光子 PID": "For photon PID",
+ "例如chatglm&gpt-3.5-turbo&gpt-4": "For example chatglm&gpt-3.5-turbo&gpt-4",
+ "第二种方法": "The second method",
+ "BDT 模型的系统性误差使用通过拟合通过和未通过所选 BDT 截断值的 $ B $ 候选体质量分布的异构同位旋对称模式进行评估": "The systematic error of the BDT model is evaluated using the heterogeneous isospin symmetry mode of the candidate body mass distribution of $ B $ selected by fitting through and not through the selected BDT truncation value",
+ "通过比较模拟和真实的 $ {B}^{ + } \\rightarrow J/\\psi {K}^{* + } $ 衰变样本来计算权重": "Calculate weights by comparing simulated and real $ {B}^{ + } \\rightarrow J/\\psi {K}^{* + } $ decay samples",
+ "上下文长度超过glm-4v上限2000tokens": "The context length exceeds the upper limit of 2000 tokens for glm-4v",
+ "通过为每个模拟信号候选分配权重来校正模拟和碰撞数据之间的一些差异": "Correct some differences between simulated and collision data by assigning weights to each simulated signal candidate",
+ "2016 年上磁场数据集中通过松散选择": "Loose selection in the 2016 upper magnetic field data set",
+ "定义history的一个孪生的前端存储区": "Define a twin front-end storage area for history",
+ "为默认值;": "For the default value;",
+ "一个带二级菜单的插件": "A plugin with a secondary menu",
+ "用于": "Used for",
+ "每次请求的最大token数量": "Maximum token count for each request",
+ "输入Arxiv的ID或者网址": "Enter the Arxiv ID or URL",
+ "采用哪种方法执行转换": "Which method to use for transformation",
+ "定义history_cache-": "Define history_cache-",
+ "再点击该插件": "Click the plugin again",
+ "隐藏": "Hide",
+ "第三个参数": "The third parameter",
+ "声明这是一个文本框": "Declare this as a text box",
+ "其准则为拒绝已知 $ {B}^{ + } $ 质量内 $ \\pm {50}\\mathrm{{MeV}}/{c}^{2} $ 范围内的候选体": "Its criterion is to reject candidates within $ \\pm {50}\\mathrm{{MeV}}/{c}^{2} $ of the known $ {B}^{ + } $ mass",
+ "第一种方法": "The first method",
+ "正在尝试GROBID": "Trying GROBID",
+ "定义新一代插件的高级参数区": "Define the advanced parameter area for the new generation of plugins",
+ "047个tokens": "47 tokens",
+ "PDF解析方法": "PDF parsing method",
+ "缺失 DOC2X_API_KEY": "Missing DOC2X_API_KEY",
+ "第二个参数": "The second parameter",
+ "将只取第一张图片进行处理": "Only the first image will be processed",
+ "请检查配置文件的": "Please check the configuration file",
+ "此函数已经弃用!!新函数位于": "This function has been deprecated!! The new function is located at",
+ "同样地": "Similarly",
+ "的 $ J/\\psi {K}^{ + }{\\pi }^{0} $ 和 $ J/\\psi {K}^{ + } $ 质量的分布": "The distribution of the masses of $ J/\\psi {K}^{ + }{\\pi }^{0} $ and $ J/\\psi {K}^{ + } $",
+ "取消": "Cancel",
+ "3.8 对 BDT 系统误差的严格 PID 选择": "Strict PID selection for BDT system errors at 3.8",
+ "发送至DOC2X解析": "Send to DOC2X for parsing",
+ "在触发这个按钮时": "When triggering this button",
+ "例如对于01万物的yi-34b-chat-200k": "For example, for 010,000 items yi-34b-chat-200k",
+ "继续等待": "Continue waiting",
+ "留空则使用时间作为文件名": "Leave blank to use time as the file name",
+ "获得以下报错信息": "Get the following error message",
+ "ollama模型": "Ollama model",
+ "要求如下": "Requirements are as follows",
+ "不包括思维导图": "Excluding mind maps",
+ "则用指定模型覆盖全局模型": "Then override the global model with the specified model",
+ "DOC2X服务不可用": "DOC2X service is not available",
+ "则抛出异常": "Then throw an exception",
+ "幻方-深度求索大模型在线API -=-=-=-=-=-=-": "Magic Square - Deep Quest Large Model Online API -=-=-=-=-=-=-",
+ "详见 themes/common.js": "See themes/common.js",
+ "如果尝试加载未授权的类": "If trying to load unauthorized class",
+ "因此真实样本包含一定比例的背景": "Therefore, real samples contain a certain proportion of background",
+ "热更新Prompt & ModelOverride": "Hot update Prompt & ModelOverride",
+ "可能的原因是": "Possible reasons are",
+ "因此仅BDT进入相应的选择": "So only BDT enters the corresponding selection",
+ "⚠️请不要与模型的最大token数量相混淆": "⚠️ Do not confuse with the maximum token number of the model",
+ "为openai格式的API生成响应函数": "Generate response function for OpenAI format API",
+ "API异常": "API exception",
+ "调用Markdown插件": "Call Markdown plugin",
+ "报告已经添加到右侧“文件下载区”": "The report has been added to the right 'File Download Area'",
+ "把PDF文件拖入对话": "Drag the PDF file into the dialogue",
+ "根据基础功能区 ModelOverride 参数调整模型类型": "Adjust the model type according to the ModelOverride parameter in the basic function area",
+ "vllm 对齐支持 -=-=-=-=-=-=-": "VLLM alignment support -=-=-=-=-=-=-",
+ "强制点击此基础功能按钮时": "When forcing to click this basic function button",
+ "请上传文件后": "Please upload the file first",
+ "解析错误": "Parsing error",
+ "APIKEY为空": "APIKEY is empty",
+ "效果最好": "Best effect",
+ "未来5天": "Next 5 days",
+ "会先执行js代码更新history_cache": "Will first execute js code to update history_cache",
+ "下拉菜单的选项为": "The options in the dropdown menu are",
+ "额外的翻译提示词": "Additional translation prompts",
+ "这三个切割也用于选择 $ {B}^{ + } \\rightarrow J/\\psi {K}^{* + } $ 衰变": "These three cuts are also used to select $ {B}^{ + } \\rightarrow J/\\psi {K}^{* + } $ decay",
+ "借鉴自同目录下的bridge_chatgpt.py": "Inspired by bridge_chatgpt.py in the same directory",
+ "其中质量从 DTF 四维向量重新计算以改善测量的线形": "Recalculate the mass from the DTF four-vector to improve the linearity of the measurement",
+ "移除任何不安全的元素": "Remove any unsafe elements",
+ "默认返回原参数": "Return the original parameters by default",
+ "三兄弟": "Three brothers",
+ "为下拉菜单默认值;": "As the default value for the dropdown menu;",
+ "翻译后的带图文档.zip": "Translated document with images.zip",
+ "是否使用代理": "Whether to use a proxy",
+ "新一代插件的高级参数区确认按钮": "Confirmation button for the advanced parameter area of the new generation plugin",
+ "声明这是一个下拉菜单": "Declare that this is a dropdown menu",
+ "ffmpeg未安装": "FFmpeg not installed",
+ "围绕 $ {K}^{* + } $ 的质量窗口从 $ \\pm {100} $ 缩小至 $ \\pm {75}\\mathrm{{MeV}}/{c}^{2} $": "Narrow the mass window around $ {K}^{* + } $ from $ \\pm {100} $ to $ \\pm {75}\\mathrm{{MeV}}/{c}^{2} $",
+ "保存文件名": "Save file name",
+ "第三种方法": "The third method",
+ "$ 缩减到 $ \\left\\lbrack {{75}": "$ Reduced to $ \\left\\lbrack {{75}",
+ "清理提取路径": "Clean up the extraction path",
+ "history的更新方法": "Method to update the history",
+ "定义history的后端state": "Define the backend state of the history",
+ "生成包含图片的压缩包": "Generate a compressed package containing images",
+ "执行插件": "Execute the plugin",
+ "使用指定的模型": "Use the specified model",
+ "只允许特定的类进行反序列化": "Only allow specific classes to be deserialized",
+ "是否允许从缓存中调取结果": "Whether to allow fetching results from the cache",
+ "效果不理想": "The effect is not ideal",
+ "这计算是在不需要BDT要求的情况下进行的": "This calculation is done without the need for BDT requirements",
+ "生成在线预览": "Generate online preview",
+ "主输入": "Primary input",
+ "定义允许的安全类": "Define allowed security classes",
+ "其最大请求数为4096": "Its maximum request number is 4096",
+ "在线预览翻译": "Online preview translation",
+ "其中传入参数": "Among the incoming parameters",
+ "下载Gradio主题时出现异常": "An exception occurred when downloading the Gradio theme",
+ "修正一些公式问题": "Correcting some formula issues",
+ "对专有名词、翻译语气等方面的要求": "Requirements for proper nouns, translation tone, etc.",
+ "替换成$$": "Replace with $$",
+ "主要用途": "Main purpose",
+ "允许 $ {\\pi }^{0} $ 候选体的质量范围从 $ \\left\\lbrack {0": "Allow the mass range of the $ {\\pi }^{0} $ candidate from $ \\left\\lbrack {0",
+ "$ {B}^{ + } $ 衰变到 $ J/\\psi {K}^{ + } $": "$ {B}^{ + } $ decays to $ J/\\psi {K}^{ + } $",
+ "未指定路径": "Path not specified",
+ "True为不使用": "True means not in use",
+ "尝试获取完整的错误信息": "Attempt to get the complete error message",
+ "仅今天": "Only today",
+ "图 12": "Figure 12",
+ "效果次优": "Effect is suboptimal",
+ "绘制的Mermaid图表类型": "Types of Mermaid charts drawn",
+ "vllm模型": "VLLM model",
+ "文本框上方显示": "Displayed above the text box",
+ "未来3天": "Next 3 days",
+ "在这里添加其他安全的类": "Add other secure classes here",
+ "额外提示词": "Additional prompt words",
+ "由于在等离子体共轭模式中没有光子": "Due to no photons in the plasma conjugate mode",
+ "将公式中的\\": "Escape the backslash in the formula",
+ "插件功能": "Plugin function",
+ "设置5秒不准咬人": "Disallow biting for 5 seconds",
+ "定义cookies的后端state": "Define the backend state of cookies",
+ "选择其他类型时将直接绘制指定的图表类型": "Directly draw the specified chart type when selecting another type",
+ "替换成$": "Replace with $",
+ "自动从输入框同步": "Automatically sync from the input box",
+ "第一个参数": "The first parameter",
+ "注意需要使用双引号将内容括起来": "Note that you need to enclose the content in double quotes",
+ "下拉菜单上方显示": "Display above the dropdown menu",
+ "把history转存history_cache备用": "Transfer history to history_cache for backup",
+ "从头执行": "Execute from the beginning",
+ "选择插件参数": "Select plugin parameters",
+ "您还可以在接入one-api/vllm/ollama时": "You can also access one-api/vllm/ollama",
+ "输入对话存档文件名": "Enter the dialogue archive file name",
+ "但是需要DOC2X服务": "But DOC2X service is required",
+ "相反": "On the contrary",
+ "你好👋": "Hello👋",
+ "生成在线预览html": "Generate online preview HTML",
+ "为简化拟合模型": "To simplify the fitting model",
+ "、前端": "Front end",
+ "定义插件的二级选项菜单": "Define the secondary option menu of the plugin",
+ "未选定任何插件": "No plugin selected",
+ "以上三种方法都试一遍": "Try all three methods above once",
+ "一个非常简单的插件": "A very simple plugin",
+ "为了更灵活地接入ollama多模型管理界面": "In order to more flexibly access the ollama multi-model management interface",
+ "文本框内部显示": "Text box internal display",
+ "☝️ 以上是模型路由 -=-=-=-=-=-=-=-=-=": "☝️ The above is the model route -=-=-=-=-=-=-=-=-=",
+ "则使用当前全局模型;如设置": "Then use the current global model; if set",
+ "由LLM决定": "Decided by LLM",
+ "4 对模拟的修正": "4 corrections to the simulation",
+ "glm-4v只支持一张图片": "glm-4v only supports one image",
+ "这个并发量稍微大一点": "This concurrency is slightly larger",
+ "无法处理EdgeTTS音频": "Unable to handle EdgeTTS audio",
+ "早期代码": "Early code",
+ "您可以调用下拉菜单中的“LoadChatHistoryArchive”还原当下的对话": "You can use the 'LoadChatHistoryArchive' in the drop-down menu to restore the current conversation",
+ "因此您在定义和使用类变量时": "So when you define and use class variables",
+ "这将通过sPlot方法进行减除": "This will be subtracted through the sPlot method",
+ "然后再执行python代码更新history": "Then execute python code to update history",
+ "新一代插件需要注册Class": "The new generation plugin needs to register Class",
+ "请选择": "Please select",
+ "旧插件的高级参数区确认按钮": "Confirm button in the advanced parameter area of the old plugin",
+ "多数情况": "In most cases",
+ "ollama 对齐支持 -=-=-=-=-=-=-": "ollama alignment support -=-=-=-=-=-=-",
+ "用该压缩包+Conversation_To_File进行反馈": "Use this compressed package + Conversation_To_File for feedback",
+ "名称": "Name",
+ "错误处理部分": "Error handling section",
+ "False为使用": "False for use",
+ "详细方法见第4节": "See Section 4 for detailed methods",
+ "在应用元组裁剪后": "After applying tuple clipping",
+ "深度求索": "Deep Search",
+ "绘制脑图的Demo": "Demo for Drawing Mind Maps",
+ "需要在表格前加上一个emoji": "Need to add an emoji in front of the table",
+ "批量Markdown翻译": "Batch Markdown Translation",
+ "将语言模型的生成文本朗读出来": "Read aloud the generated text of the language model",
+ "Function旧接口仅会在“VoidTerminal”中起作用": "The old interface of Function only works in 'VoidTerminal'",
+ "请配置 DOC2X_API_KEY": "Please configure DOC2X_API_KEY",
+ "如果同时询问“多个”大语言模型": "If inquiring about 'multiple' large language models at the same time",
+ "3.7 用于MC校正的宽松选择": "3.7 Loose selection for MC correction",
+ "咬的也不是人": "Not biting humans either",
+ "定义 后端state": "Define backend state",
+ "这个隐藏textbox负责装入当前弹出插件的属性": "This hidden textbox is responsible for loading the properties of the current pop-up plugin",
+ "会执行在不同的线程中": "Will be executed in different threads",
+ "定义cookies的一个孪生的前端存储区": "Define a twin front-end storage area for cookies",
+ "模型选择": "Model selection",
+ "应用于信号、标准化和等离子体共轭模式的最终切割": "Final cutting applied to signal, normalization, and plasma conjugate modes",
+ "确认参数并执行": "Confirm parameters and execute",
+ "请先上传文件": "Please upload the file first",
+ "以便公式渲染": "For formula rendering",
+ "加载PDF文件": "Load PDF file",
+ "LoadChatHistoryArchive | 输入参数为路径": "Load Chat History Archive | Input parameter is the path",
+ "日期选择": "Date selection",
+ "除 $ {B}^{ + } \\rightarrow J/\\psi {K}^{ + } $ 否决": "Veto except for $ {B}^{ + } \\rightarrow J/\\psi {K}^{ + } $",
+ "使用 0.2 的截断值会获得类似的效率": "Using a truncation value of 0.2 will achieve similar efficiency",
+ "请输入": "Please enter",
+ "当注册Class后": "After registering the Class",
+ "Markdown中使用不标准的表格": "Using non-standard tables in Markdown",
+ "采用非常宽松的截断值": "Using very loose truncation values",
+ "为了更灵活地接入vllm多模型管理界面": "To more flexibly access the vllm multi-model management interface",
+ "读取解析": "Read and parse",
+ "允许缓存": "Allow caching",
+ "Run2 中对 Kaon 鉴别的要求被收紧为 $ \\operatorname{ProbNNk}\\left": "The requirement for Kaon discrimination in Run2 has been tightened to $ \\operatorname{ProbNNk}\\left"
}
\ No newline at end of file
diff --git a/docs/translate_std.json b/docs/translate_std.json
index 1c00355a..0b3faf42 100644
--- a/docs/translate_std.json
+++ b/docs/translate_std.json
@@ -6,17 +6,14 @@
"Latex英文纠错加PDF对比": "CorrectEnglishInLatexWithPDFComparison",
"下载arxiv论文并翻译摘要": "DownloadArxivPaperAndTranslateAbstract",
"Markdown翻译指定语言": "TranslateMarkdownToSpecifiedLanguage",
- "PDF_Translate": "BatchTranslatePDFDocuments_MultiThreaded",
"下载arxiv论文翻译摘要": "DownloadArxivPaperTranslateAbstract",
"解析一个Python项目": "ParsePythonProject",
"解析一个Golang项目": "ParseGolangProject",
"代码重写为全英文_多线程": "RewriteCodeToEnglish_MultiThreaded",
"解析一个CSharp项目": "ParsingCSharpProject",
"删除所有本地对话历史记录": "DeleteAllLocalConversationHistoryRecords",
- "Markdown_Translate": "BatchTranslateMarkdown",
"连接bing搜索回答问题": "ConnectBingSearchAnswerQuestion",
"Langchain知识库": "LangchainKnowledgeBase",
- "Latex_Function": "OutputPDFFromLatex",
"把字符太少的块清除为回车": "ClearBlocksWithTooFewCharactersToNewline",
"Latex精细分解与转化": "DecomposeAndConvertLatex",
"解析一个C项目的头文件": "ParseCProjectHeaderFiles",
@@ -70,7 +67,6 @@
"读文章写摘要": "ReadArticleWriteSummary",
"生成函数注释": "GenerateFunctionComments",
"解析项目本身": "ParseProjectItself",
- "Conversation_To_File": "ConversationHistoryArchive",
"专业词汇声明": "ProfessionalTerminologyDeclaration",
"解析docx": "ParseDocx",
"解析源代码新": "ParsingSourceCodeNew",
@@ -104,5 +100,11 @@
"随机小游戏": "RandomMiniGame",
"互动小游戏": "InteractiveMiniGame",
"解析历史输入": "ParseHistoricalInput",
- "高阶功能模板函数示意图": "HighOrderFunctionTemplateDiagram"
+ "高阶功能模板函数示意图": "HighOrderFunctionTemplateDiagram",
+ "载入对话历史存档": "LoadChatHistoryArchive",
+ "对话历史存档": "ChatHistoryArchive",
+ "解析PDF_DOC2X_转Latex": "ParsePDF_DOC2X_toLatex",
+ "解析PDF_基于DOC2X": "ParsePDF_basedDOC2X",
+ "解析PDF_简单拆解": "ParsePDF_simpleDecomposition",
+ "解析PDF_DOC2X_单文件": "ParsePDF_DOC2X_singleFile"
}
\ No newline at end of file
From 12aebf9707b9cab9381b05da6a7d808b42319ab0 Mon Sep 17 00:00:00 2001
From: binary-husky
Date: Sun, 16 Jun 2024 12:12:57 +0000
Subject: [PATCH 3/3] searxng based information gathering
---
crazy_functions/Internet_GPT.py | 122 ++++++++++++++++++++++++++++++++
tests/test_plugins.py | 4 +-
2 files changed, 125 insertions(+), 1 deletion(-)
create mode 100644 crazy_functions/Internet_GPT.py
diff --git a/crazy_functions/Internet_GPT.py b/crazy_functions/Internet_GPT.py
new file mode 100644
index 00000000..25a651f8
--- /dev/null
+++ b/crazy_functions/Internet_GPT.py
@@ -0,0 +1,122 @@
+from toolbox import CatchException, update_ui
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
+import requests
+from bs4 import BeautifulSoup
+from request_llms.bridge_all import model_info
+import urllib.request
+from functools import lru_cache
+
+
+@lru_cache
+def get_auth_ip():
+ try:
+ external_ip = urllib.request.urlopen('https://v4.ident.me/').read().decode('utf8')
+ return external_ip
+ except:
+ return '114.114.114.114'
+
+def searxng_request(query, proxies):
+ url = 'https://cloud-1.agent-matrix.com/' # 请替换为实际的API URL
+ params = {
+ 'q': query, # 搜索查询
+ 'format': 'json', # 输出格式为JSON
+ 'language': 'zh', # 搜索语言
+ }
+ headers = {
+ 'Accept-Language': 'zh-CN,zh;q=0.9',
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
+ 'X-Forwarded-For': get_auth_ip(),
+ 'X-Real-IP': get_auth_ip()
+ }
+ results = []
+ response = requests.post(url, params=params, headers=headers, proxies=proxies)
+ if response.status_code == 200:
+ json_result = response.json()
+ for result in json_result['results']:
+ item = {
+ "title": result["title"],
+ "content": result["content"],
+ "link": result["url"],
+ }
+ results.append(item)
+ return results
+ else:
+ raise ValueError("搜索失败,状态码: " + str(response.status_code) + '\t' + response.content.decode('utf-8'))
+
+def scrape_text(url, proxies) -> str:
+ """Scrape text from a webpage
+
+ Args:
+ url (str): The URL to scrape text from
+
+ Returns:
+ str: The scraped text
+ """
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
+ 'Content-Type': 'text/plain',
+ }
+ try:
+ response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
+ if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
+ except:
+ return "无法连接到该网页"
+ soup = BeautifulSoup(response.text, "html.parser")
+ for script in soup(["script", "style"]):
+ script.extract()
+ text = soup.get_text()
+ lines = (line.strip() for line in text.splitlines())
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
+ text = "\n".join(chunk for chunk in chunks if chunk)
+ return text
+
+@CatchException
+def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
+ """
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
+ plugin_kwargs 插件模型的参数,暂时没有用武之地
+ chatbot 聊天显示框的句柄,用于显示给用户
+ history 聊天历史,前情提要
+ system_prompt 给gpt的静默提醒
+ user_request 当前用户的请求信息(IP地址等)
+ """
+ history = [] # 清空历史,以免输入溢出
+ chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
+ "[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!"))
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
+
+ # ------------- < 第1步:爬取搜索引擎的结果 > -------------
+ from toolbox import get_conf
+ proxies = get_conf('proxies')
+ urls = searxng_request(txt, proxies)
+ history = []
+ if len(urls) == 0:
+ chatbot.append((f"结论:{txt}",
+ "[Local Message] 受到google限制,无法从google获取信息!"))
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
+ return
+ # ------------- < 第2步:依次访问网页 > -------------
+ max_search_result = 5 # 最多收纳多少个网页的结果
+ for index, url in enumerate(urls[:max_search_result]):
+ res = scrape_text(url['link'], proxies)
+ history.extend([f"第{index}份搜索结果:", res])
+ chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"])
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
+
+ # ------------- < 第3步:ChatGPT综合 > -------------
+ i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
+ i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token
+ inputs=i_say,
+ history=history,
+ max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
+ )
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+ inputs=i_say, inputs_show_user=i_say,
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
+ sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
+ )
+ chatbot[-1] = (i_say, gpt_say)
+ history.append(i_say);history.append(gpt_say)
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+
diff --git a/tests/test_plugins.py b/tests/test_plugins.py
index 6d1c4856..0c4d9251 100644
--- a/tests/test_plugins.py
+++ b/tests/test_plugins.py
@@ -18,11 +18,13 @@ validate_path() # 返回项目根路径
if __name__ == "__main__":
from tests.test_utils import plugin_test
+ plugin_test(plugin='crazy_functions.Internet_GPT->连接网络回答问题', main_input="谁是应急食品?")
+
# plugin_test(plugin='crazy_functions.函数动态生成->函数动态生成', main_input='交换图像的蓝色通道和红色通道', advanced_arg={"file_path_arg": "./build/ants.jpg"})
# plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="2307.07522")
- plugin_test(plugin='crazy_functions.PDF_Translate->批量翻译PDF文档', main_input='build/pdf/t1.pdf')
+ # plugin_test(plugin='crazy_functions.PDF_Translate->批量翻译PDF文档', main_input='build/pdf/t1.pdf')
# plugin_test(
# plugin="crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF",