This commit is contained in:
青轩
2024-11-24 23:18:33 +08:00
parent e62decac21
commit 16567d3a44
128 changed files with 6 additions and 29560 deletions

View File

@@ -1,6 +1,6 @@
from toolbox import get_conf, update_ui
from crazy_functions.Image_Generate import 图片生成_DALLE2, 图片生成_DALLE3, 图片修改_DALLE2
from crazy_functions.AntFin import 图片生成_DALLE2, 图片生成_DALLE3, 图片修改_DALLE2
from crazy_functions.plugin_template.plugin_class_template import GptAcademicPluginTemplate, ArgProperty

View File

@@ -1,220 +0,0 @@
from toolbox import CatchException, update_ui, promote_file_to_downloadzone, get_log_folder, get_user
from crazy_functions.plugin_template.plugin_class_template import GptAcademicPluginTemplate, ArgProperty
import re
f_prefix = 'GPT-Academic对话存档'
def write_chat_to_file(chatbot, history=None, file_name=None):
"""
将对话记录history以Markdown格式写入文件中。如果没有指定文件名则使用当前时间生成文件名。
"""
import os
import time
from themes.theme import advanced_css
if file_name is None:
file_name = f_prefix + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
fp = os.path.join(get_log_folder(get_user(chatbot), plugin_name='chat_history'), file_name)
with open(fp, 'w', encoding='utf8') as f:
from textwrap import dedent
form = dedent("""
<!DOCTYPE html><head><meta charset="utf-8"><title>对话存档</title><style>{CSS}</style></head>
<body>
<div class="test_temp1" style="width:10%; height: 500px; float:left;"></div>
<div class="test_temp2" style="width:80%;padding: 40px;float:left;padding-left: 20px;padding-right: 20px;box-shadow: rgba(0, 0, 0, 0.2) 0px 0px 8px 8px;border-radius: 10px;">
<div class="chat-body" style="display: flex;justify-content: center;flex-direction: column;align-items: center;flex-wrap: nowrap;">
{CHAT_PREVIEW}
<div></div>
<div></div>
<div style="text-align: center;width:80%;padding: 0px;float:left;padding-left:20px;padding-right:20px;box-shadow: rgba(0, 0, 0, 0.05) 0px 0px 1px 2px;border-radius: 1px;">对话(原始数据)</div>
{HISTORY_PREVIEW}
</div>
</div>
<div class="test_temp3" style="width:10%; height: 500px; float:left;"></div>
</body>
""")
qa_from = dedent("""
<div class="QaBox" style="width:80%;padding: 20px;margin-bottom: 20px;box-shadow: rgb(0 255 159 / 50%) 0px 0px 1px 2px;border-radius: 4px;">
<div class="Question" style="border-radius: 2px;">{QUESTION}</div>
<hr color="blue" style="border-top: dotted 2px #ccc;">
<div class="Answer" style="border-radius: 2px;">{ANSWER}</div>
</div>
""")
history_from = dedent("""
<div class="historyBox" style="width:80%;padding: 0px;float:left;padding-left:20px;padding-right:20px;box-shadow: rgba(0, 0, 0, 0.05) 0px 0px 1px 2px;border-radius: 1px;">
<div class="entry" style="border-radius: 2px;">{ENTRY}</div>
</div>
""")
CHAT_PREVIEW_BUF = ""
for i, contents in enumerate(chatbot):
question, answer = contents[0], contents[1]
if question is None: question = ""
try: question = str(question)
except: question = ""
if answer is None: answer = ""
try: answer = str(answer)
except: answer = ""
CHAT_PREVIEW_BUF += qa_from.format(QUESTION=question, ANSWER=answer)
HISTORY_PREVIEW_BUF = ""
for h in history:
HISTORY_PREVIEW_BUF += history_from.format(ENTRY=h)
html_content = form.format(CHAT_PREVIEW=CHAT_PREVIEW_BUF, HISTORY_PREVIEW=HISTORY_PREVIEW_BUF, CSS=advanced_css)
f.write(html_content)
promote_file_to_downloadzone(fp, rename_file=file_name, chatbot=chatbot)
return '对话历史写入:' + fp
def gen_file_preview(file_name):
try:
with open(file_name, 'r', encoding='utf8') as f:
file_content = f.read()
# pattern to match the text between <head> and </head>
pattern = re.compile(r'<head>.*?</head>', flags=re.DOTALL)
file_content = re.sub(pattern, '', file_content)
html, history = file_content.split('<hr color="blue"> \n\n 对话数据 (无渲染):\n')
history = history.strip('<code>')
history = history.strip('</code>')
history = history.split("\n>>>")
return list(filter(lambda x:x!="", history))[0][:100]
except:
return ""
def read_file_to_chat(chatbot, history, file_name):
with open(file_name, 'r', encoding='utf8') as f:
file_content = f.read()
from bs4 import BeautifulSoup
soup = BeautifulSoup(file_content, 'lxml')
# 提取QaBox信息
chatbot.clear()
qa_box_list = []
qa_boxes = soup.find_all("div", class_="QaBox")
for box in qa_boxes:
question = box.find("div", class_="Question").get_text(strip=False)
answer = box.find("div", class_="Answer").get_text(strip=False)
qa_box_list.append({"Question": question, "Answer": answer})
chatbot.append([question, answer])
# 提取historyBox信息
history_box_list = []
history_boxes = soup.find_all("div", class_="historyBox")
for box in history_boxes:
entry = box.find("div", class_="entry").get_text(strip=False)
history_box_list.append(entry)
history = history_box_list
chatbot.append([None, f"[Local Message] 载入对话{len(qa_box_list)}条,上下文{len(history)}条。"])
return chatbot, history
@CatchException
def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
file_name = plugin_kwargs.get("file_name", None)
if (file_name is not None) and (file_name != "") and (not file_name.endswith('.html')): file_name += '.html'
else: file_name = None
chatbot.append((None, f"[Local Message] {write_chat_to_file(chatbot, history, file_name)},您可以调用下拉菜单中的“载入对话历史存档”还原当下的对话。"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
class Conversation_To_File_Wrap(GptAcademicPluginTemplate):
def __init__(self):
"""
请注意`execute`会执行在不同的线程中,因此您在定义和使用类变量时,应当慎之又慎!
"""
pass
def define_arg_selection_menu(self):
"""
定义插件的二级选项菜单
第一个参数,名称`file_name`,参数`type`声明这是一个文本框,文本框上方显示`title`,文本框内部显示`description``default_value`为默认值;
"""
gui_definition = {
"file_name": ArgProperty(title="保存文件名", description="输入对话存档文件名,留空则使用时间作为文件名", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
}
return gui_definition
def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
执行插件
"""
yield from 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
def hide_cwd(str):
import os
current_path = os.getcwd()
replace_path = "."
return str.replace(current_path, replace_path)
@CatchException
def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
from crazy_functions.crazy_utils import get_files_from_everything
success, file_manifest, _ = get_files_from_everything(txt, type='.html')
if not success:
if txt == "": txt = '空空如也的输入栏'
import glob
local_history = "<br/>".join([
"`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`"
for f in glob.glob(
f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html',
recursive=True
)])
chatbot.append([f"正在查找对话历史文件html格式: {txt}", f"找不到任何html文件: {txt}。但本地存储了以下历史文件,您可以将任意一个文件路径粘贴到输入区,然后重试:<br/>{local_history}"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
try:
chatbot, history = read_file_to_chat(chatbot, history, file_manifest[0])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
except:
chatbot.append([f"载入对话历史文件", f"对话历史文件损坏!"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
@CatchException
def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
import glob, os
local_history = "<br/>".join([
"`"+hide_cwd(f)+"`"
for f in glob.glob(
f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True
)])
for f in glob.glob(f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True):
os.remove(f)
chatbot.append([f"删除所有历史对话文件", f"已删除<br/>{local_history}"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return

View File

@@ -1,278 +0,0 @@
import requests
import random
import time
import re
import json
from bs4 import BeautifulSoup
from functools import lru_cache
from itertools import zip_longest
from check_proxy import check_proxy
from toolbox import CatchException, update_ui, get_conf
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
from request_llms.bridge_all import model_info
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.prompts.internet import SearchOptimizerPrompt, SearchAcademicOptimizerPrompt
def search_optimizer(
query,
proxies,
history,
llm_kwargs,
optimizer=1,
categories="general",
searxng_url=None,
engines=None,
):
# ------------- < 第1步尝试进行搜索优化 > -------------
# * 增强优化,会尝试结合历史记录进行搜索优化
if optimizer == 2:
his = " "
if len(history) == 0:
pass
else:
for i, h in enumerate(history):
if i % 2 == 0:
his += f"Q: {h}\n"
else:
his += f"A: {h}\n"
if categories == "general":
sys_prompt = SearchOptimizerPrompt.format(query=query, history=his, num=4)
elif categories == "science":
sys_prompt = SearchAcademicOptimizerPrompt.format(query=query, history=his, num=4)
else:
his = " "
if categories == "general":
sys_prompt = SearchOptimizerPrompt.format(query=query, history=his, num=3)
elif categories == "science":
sys_prompt = SearchAcademicOptimizerPrompt.format(query=query, history=his, num=3)
mutable = ["", time.time(), ""]
llm_kwargs["temperature"] = 0.8
try:
querys_json = predict_no_ui_long_connection(
inputs=query,
llm_kwargs=llm_kwargs,
history=[],
sys_prompt=sys_prompt,
observe_window=mutable,
)
except Exception:
querys_json = "1234"
#* 尝试解码优化后的搜索结果
querys_json = re.sub(r"```json|```", "", querys_json)
try:
querys = json.loads(querys_json)
except Exception:
#* 如果解码失败,降低温度再试一次
try:
llm_kwargs["temperature"] = 0.4
querys_json = predict_no_ui_long_connection(
inputs=query,
llm_kwargs=llm_kwargs,
history=[],
sys_prompt=sys_prompt,
observe_window=mutable,
)
querys_json = re.sub(r"```json|```", "", querys_json)
querys = json.loads(querys_json)
except Exception:
#* 如果再次失败,直接返回原始问题
querys = [query]
links = []
success = 0
Exceptions = ""
for q in querys:
try:
link = searxng_request(q, proxies, categories, searxng_url, engines=engines)
if len(link) > 0:
links.append(link[:-5])
success += 1
except Exception:
Exceptions = Exception
pass
if success == 0:
raise ValueError(f"在线搜索失败!\n{Exceptions}")
# * 清洗搜索结果,依次放入每组第一,第二个搜索结果,并清洗重复的搜索结果
seen_links = set()
result = []
for tuple in zip_longest(*links, fillvalue=None):
for item in tuple:
if item is not None:
link = item["link"]
if link not in seen_links:
seen_links.add(link)
result.append(item)
return result
@lru_cache
def get_auth_ip():
ip = check_proxy(None, return_ip=True)
if ip is None:
return '114.114.114.' + str(random.randint(1, 10))
return ip
def searxng_request(query, proxies, categories='general', searxng_url=None, engines=None):
if searxng_url is None:
url = get_conf("SEARXNG_URL")
else:
url = searxng_url
if engines == "Mixed":
engines = None
if categories == 'general':
params = {
'q': query, # 搜索查询
'format': 'json', # 输出格式为JSON
'language': 'zh', # 搜索语言
'engines': engines,
}
elif categories == 'science':
params = {
'q': query, # 搜索查询
'format': 'json', # 输出格式为JSON
'language': 'zh', # 搜索语言
'categories': 'science'
}
else:
raise ValueError('不支持的检索类型')
headers = {
'Accept-Language': 'zh-CN,zh;q=0.9',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'X-Forwarded-For': get_auth_ip(),
'X-Real-IP': get_auth_ip()
}
results = []
response = requests.post(url, params=params, headers=headers, proxies=proxies, timeout=30)
if response.status_code == 200:
json_result = response.json()
for result in json_result['results']:
item = {
"title": result.get("title", ""),
"source": result.get("engines", "unknown"),
"content": result.get("content", ""),
"link": result["url"],
}
results.append(item)
return results
else:
if response.status_code == 429:
raise ValueError("Searxng在线搜索服务当前使用人数太多请稍后。")
else:
raise ValueError("在线搜索失败,状态码: " + str(response.status_code) + '\t' + response.content.decode('utf-8'))
def scrape_text(url, proxies) -> str:
"""Scrape text from a webpage
Args:
url (str): The URL to scrape text from
Returns:
str: The scraped text
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
'Content-Type': 'text/plain',
}
try:
response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
except:
return "无法连接到该网页"
soup = BeautifulSoup(response.text, "html.parser")
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
return text
@CatchException
def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
optimizer_history = history[:-8]
history = [] # 清空历史,以免输入溢出
chatbot.append((f"请结合互联网信息回答以下问题:{txt}", "检索中..."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# ------------- < 第1步爬取搜索引擎的结果 > -------------
from toolbox import get_conf
proxies = get_conf('proxies')
categories = plugin_kwargs.get('categories', 'general')
searxng_url = plugin_kwargs.get('searxng_url', None)
engines = plugin_kwargs.get('engine', None)
optimizer = plugin_kwargs.get('optimizer', "关闭")
if optimizer == "关闭":
urls = searxng_request(txt, proxies, categories, searxng_url, engines=engines)
else:
urls = search_optimizer(txt, proxies, optimizer_history, llm_kwargs, optimizer, categories, searxng_url, engines)
history = []
if len(urls) == 0:
chatbot.append((f"结论:{txt}",
"[Local Message] 受到限制无法从searxng获取信息请尝试更换搜索引擎。"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# ------------- < 第2步依次访问网页 > -------------
max_search_result = 5 # 最多收纳多少个网页的结果
if optimizer == "开启(增强)":
max_search_result = 8
chatbot.append(["联网检索中 ...", None])
for index, url in enumerate(urls[:max_search_result]):
res = scrape_text(url['link'], proxies)
prefix = f"{index}份搜索结果 [源自{url['source'][0]}搜索] {url['title'][:25]}"
history.extend([prefix, res])
res_squeeze = res.replace('\n', '...')
chatbot[-1] = [prefix + "\n\n" + res_squeeze[:500] + "......", None]
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# ------------- < 第3步ChatGPT综合 > -------------
if (optimizer != "开启(增强)"):
i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
i_say, history = input_clipping( # 裁剪输入从最长的条目开始裁剪防止爆token
inputs=i_say,
history=history,
max_token_limit=min(model_info[llm_kwargs['llm_model']]['max_token']*3//4, 8192)
)
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
)
chatbot[-1] = (i_say, gpt_say)
history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
#* 或者使用搜索优化器,这样可以保证后续问答能读取到有效的历史记录
else:
i_say = f"从以上搜索结果中抽取与问题:{txt} 相关的信息:"
i_say, history = input_clipping( # 裁剪输入从最长的条目开始裁剪防止爆token
inputs=i_say,
history=history,
max_token_limit=min(model_info[llm_kwargs['llm_model']]['max_token']*3//4, 8192)
)
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的三个搜索结果进行总结"
)
chatbot[-1] = (i_say, gpt_say)
history = []
history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
# ------------- < 第4步根据综合回答问题 > -------------
i_say = f"请根据以上搜索结果回答问题:{txt}"
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt="请根据给定的若干条搜索结果回答问题"
)
chatbot[-1] = (i_say, gpt_say)
history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history)

View File

@@ -1,45 +0,0 @@
from toolbox import get_conf
from crazy_functions.Internet_GPT import 连接网络回答问题
from crazy_functions.plugin_template.plugin_class_template import GptAcademicPluginTemplate, ArgProperty
class NetworkGPT_Wrap(GptAcademicPluginTemplate):
def __init__(self):
"""
请注意`execute`会执行在不同的线程中,因此您在定义和使用类变量时,应当慎之又慎!
"""
pass
def define_arg_selection_menu(self):
"""
定义插件的二级选项菜单
第一个参数,名称`main_input`,参数`type`声明这是一个文本框,文本框上方显示`title`,文本框内部显示`description``default_value`为默认值;
第二个参数,名称`advanced_arg`,参数`type`声明这是一个文本框,文本框上方显示`title`,文本框内部显示`description``default_value`为默认值;
第三个参数,名称`allow_cache`,参数`type`声明这是一个下拉菜单,下拉菜单上方显示`title`+`description`,下拉菜单的选项为`options``default_value`为下拉菜单默认值;
"""
gui_definition = {
"main_input":
ArgProperty(title="输入问题", description="待通过互联网检索的问题,会自动读取输入框内容", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
"categories":
ArgProperty(title="搜索分类", options=["网页", "学术论文"], default_value="网页", description="", type="dropdown").model_dump_json(),
"engine":
ArgProperty(title="选择搜索引擎", options=["Mixed", "bing", "google", "duckduckgo"], default_value="google", description="", type="dropdown").model_dump_json(),
"optimizer":
ArgProperty(title="搜索优化", options=["关闭", "开启", "开启(增强)"], default_value="关闭", description="是否使用搜索增强。注意这可能会消耗较多token", type="dropdown").model_dump_json(),
"searxng_url":
ArgProperty(title="Searxng服务地址", description="输入Searxng的地址", default_value=get_conf("SEARXNG_URL"), type="string").model_dump_json(), # 主输入,自动从输入框同步
}
return gui_definition
def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
执行插件
"""
if plugin_kwargs["categories"] == "网页": plugin_kwargs["categories"] = "general"
if plugin_kwargs["categories"] == "学术论文": plugin_kwargs["categories"] = "science"
yield from 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)

View File

@@ -1,595 +0,0 @@
from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone, check_repeat_upload, map_file_to_sha256
from toolbox import CatchException, report_exception, update_ui_lastest_msg, zip_result, gen_time_str
from functools import partial
from loguru import logger
import glob, os, requests, time, json, tarfile, threading
pj = os.path.join
ARXIV_CACHE_DIR = get_conf("ARXIV_CACHE_DIR")
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 工具函数 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
def switch_prompt(pfg, mode, more_requirement):
"""
Generate prompts and system prompts based on the mode for proofreading or translating.
Args:
- pfg: Proofreader or Translator instance.
- mode: A string specifying the mode, either 'proofread' or 'translate_zh'.
Returns:
- inputs_array: A list of strings containing prompts for users to respond to.
- sys_prompt_array: A list of strings containing prompts for system prompts.
"""
n_split = len(pfg.sp_file_contents)
if mode == 'proofread_en':
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
r"Answer me only with the revised text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
elif mode == 'translate_zh':
inputs_array = [
r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
r"Answer me only with the translated text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
sys_prompt_array = ["You are a professional translator." for _ in range(n_split)]
else:
assert False, "未知指令"
return inputs_array, sys_prompt_array
def desend_to_extracted_folder_if_exist(project_folder):
"""
Descend into the extracted folder if it exists, otherwise return the original folder.
Args:
- project_folder: A string specifying the folder path.
Returns:
- A string specifying the path to the extracted folder, or the original folder if there is no extracted folder.
"""
maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
if len(maybe_dir) == 0: return project_folder
if maybe_dir[0].endswith('.extract'): return maybe_dir[0]
return project_folder
def move_project(project_folder, arxiv_id=None):
"""
Create a new work folder and copy the project folder to it.
Args:
- project_folder: A string specifying the folder path of the project.
Returns:
- A string specifying the path to the new work folder.
"""
import shutil, time
time.sleep(2) # avoid time string conflict
if arxiv_id is not None:
new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
else:
new_workfolder = f'{get_log_folder()}/{gen_time_str()}'
try:
shutil.rmtree(new_workfolder)
except:
pass
# align subfolder if there is a folder wrapper
items = glob.glob(pj(project_folder, '*'))
items = [item for item in items if os.path.basename(item) != '__MACOSX']
if len(glob.glob(pj(project_folder, '*.tex'))) == 0 and len(items) == 1:
if os.path.isdir(items[0]): project_folder = items[0]
shutil.copytree(src=project_folder, dst=new_workfolder)
return new_workfolder
def arxiv_download(chatbot, history, txt, allow_cache=True):
def check_cached_translation_pdf(arxiv_id):
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
if not os.path.exists(translation_dir):
os.makedirs(translation_dir)
target_file = pj(translation_dir, 'translate_zh.pdf')
if os.path.exists(target_file):
promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
target_file_compare = pj(translation_dir, 'comparison.pdf')
if os.path.exists(target_file_compare):
promote_file_to_downloadzone(target_file_compare, rename_file=None, chatbot=chatbot)
return target_file
return False
def is_float(s):
try:
float(s)
return True
except ValueError:
return False
if txt.startswith('https://arxiv.org/pdf/'):
arxiv_id = txt.split('/')[-1] # 2402.14207v2.pdf
txt = arxiv_id.split('v')[0] # 2402.14207
if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID
txt = 'https://arxiv.org/abs/' + txt.strip()
if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID
txt = 'https://arxiv.org/abs/' + txt[:10]
if not txt.startswith('https://arxiv.org'):
return txt, None # 是本地文件,跳过下载
# <-------------- inspect format ------------->
chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...'])
yield from update_ui(chatbot=chatbot, history=history)
time.sleep(1) # 刷新界面
url_ = txt # https://arxiv.org/abs/1707.06690
if not txt.startswith('https://arxiv.org/abs/'):
msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}"
yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面
return msg, None
# <-------------- set format ------------->
arxiv_id = url_.split('/abs/')[-1]
if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
if cached_translation_pdf and allow_cache: return cached_translation_pdf, arxiv_id
extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
dst = pj(translation_dir, arxiv_id + '.tar')
os.makedirs(translation_dir, exist_ok=True)
# <-------------- download arxiv source file ------------->
def fix_url_and_download():
# for url_tar in [url_.replace('/abs/', '/e-print/'), url_.replace('/abs/', '/src/')]:
for url_tar in [url_.replace('/abs/', '/src/'), url_.replace('/abs/', '/e-print/')]:
proxies = get_conf('proxies')
r = requests.get(url_tar, proxies=proxies)
if r.status_code == 200:
with open(dst, 'wb+') as f:
f.write(r.content)
return True
return False
if os.path.exists(dst) and allow_cache:
yield from update_ui_lastest_msg(f"调用缓存 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
success = True
else:
yield from update_ui_lastest_msg(f"开始下载 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
success = fix_url_and_download()
yield from update_ui_lastest_msg(f"下载完成 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
if not success:
yield from update_ui_lastest_msg(f"下载失败 {arxiv_id}", chatbot=chatbot, history=history)
raise tarfile.ReadError(f"论文下载失败 {arxiv_id}")
# <-------------- extract file ------------->
from toolbox import extract_archive
try:
extract_archive(file_path=dst, dest_dir=extract_dst)
except tarfile.ReadError:
os.remove(dst)
raise tarfile.ReadError(f"论文下载失败")
return extract_dst, arxiv_id
def pdf2tex_project(pdf_file_path, plugin_kwargs):
if plugin_kwargs["method"] == "MATHPIX":
# Mathpix API credentials
app_id, app_key = get_conf('MATHPIX_APPID', 'MATHPIX_APPKEY')
headers = {"app_id": app_id, "app_key": app_key}
# Step 1: Send PDF file for processing
options = {
"conversion_formats": {"tex.zip": True},
"math_inline_delimiters": ["$", "$"],
"rm_spaces": True
}
response = requests.post(url="https://api.mathpix.com/v3/pdf",
headers=headers,
data={"options_json": json.dumps(options)},
files={"file": open(pdf_file_path, "rb")})
if response.ok:
pdf_id = response.json()["pdf_id"]
logger.info(f"PDF processing initiated. PDF ID: {pdf_id}")
# Step 2: Check processing status
while True:
conversion_response = requests.get(f"https://api.mathpix.com/v3/pdf/{pdf_id}", headers=headers)
conversion_data = conversion_response.json()
if conversion_data["status"] == "completed":
logger.info("PDF processing completed.")
break
elif conversion_data["status"] == "error":
logger.info("Error occurred during processing.")
else:
logger.info(f"Processing status: {conversion_data['status']}")
time.sleep(5) # wait for a few seconds before checking again
# Step 3: Save results to local files
output_dir = os.path.join(os.path.dirname(pdf_file_path), 'mathpix_output')
if not os.path.exists(output_dir):
os.makedirs(output_dir)
url = f"https://api.mathpix.com/v3/pdf/{pdf_id}.tex"
response = requests.get(url, headers=headers)
file_name_wo_dot = '_'.join(os.path.basename(pdf_file_path).split('.')[:-1])
output_name = f"{file_name_wo_dot}.tex.zip"
output_path = os.path.join(output_dir, output_name)
with open(output_path, "wb") as output_file:
output_file.write(response.content)
logger.info(f"tex.zip file saved at: {output_path}")
import zipfile
unzip_dir = os.path.join(output_dir, file_name_wo_dot)
with zipfile.ZipFile(output_path, 'r') as zip_ref:
zip_ref.extractall(unzip_dir)
return unzip_dir
else:
logger.error(f"Error sending PDF for processing. Status code: {response.status_code}")
return None
else:
from crazy_functions.pdf_fns.parse_pdf_via_doc2x import 解析PDF_DOC2X_转Latex
unzip_dir = 解析PDF_DOC2X_转Latex(pdf_file_path)
return unzip_dir
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序1 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@CatchException
def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# <-------------- information about this plugin ------------->
chatbot.append(["函数插件功能?",
"对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前对机器学习类文献转化效果最好其他类型文献转化效果未知。仅在Windows系统进行了测试其他操作系统表现未知。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------------- more requirements ------------->
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
more_req = plugin_kwargs.get("advanced_arg", "")
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
# <-------------- check deps ------------->
try:
import glob, os, time, subprocess
subprocess.Popen(['pdflatex', '-version'])
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
except Exception as e:
chatbot.append([f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# <-------------- clear history and read input ------------->
history = []
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# <-------------- if is a zip/tar file ------------->
project_folder = desend_to_extracted_folder_if_exist(project_folder)
# <-------------- move latex project away from temp folder ------------->
from shared_utils.fastapi_server import validate_path_safety
validate_path_safety(project_folder, chatbot.get_user())
project_folder = move_project(project_folder, arxiv_id=None)
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
chatbot, history, system_prompt, mode='proofread_en',
switch_prompt=_switch_prompt_)
# <-------------- compile PDF ------------->
success = yield from 编译Latex(chatbot, history, main_file_original='merge',
main_file_modified='merge_proofread_en',
work_folder_original=project_folder, work_folder_modified=project_folder,
work_folder=project_folder)
# <-------------- zip PDF ------------->
zip_res = zip_result(project_folder)
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history);
time.sleep(1) # 刷新界面
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
else:
chatbot.append((f"失败了",
'虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+Conversation_To_File进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history);
time.sleep(1) # 刷新界面
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
# <-------------- we are done ------------->
return success
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序2 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@CatchException
def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# <-------------- information about this plugin ------------->
chatbot.append([
"函数插件功能?",
"对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳Linux下必须使用Docker安装详见项目主README.md。目前对机器学习类文献转化效果最好其他类型文献转化效果未知。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------------- more requirements ------------->
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
more_req = plugin_kwargs.get("advanced_arg", "")
no_cache = ("--no-cache" in more_req)
if no_cache: more_req = more_req.replace("--no-cache", "").strip()
allow_gptac_cloud_io = ("--allow-cloudio" in more_req) # 从云端下载翻译结果,以及上传翻译结果到云端
if allow_gptac_cloud_io: more_req = more_req.replace("--allow-cloudio", "").strip()
allow_cache = not no_cache
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
# <-------------- check deps ------------->
try:
import glob, os, time, subprocess
subprocess.Popen(['pdflatex', '-version'])
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
except Exception as e:
chatbot.append([f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# <-------------- clear history and read input ------------->
history = []
try:
txt, arxiv_id = yield from arxiv_download(chatbot, history, txt, allow_cache)
except tarfile.ReadError as e:
yield from update_ui_lastest_msg(
"无法自动下载该论文的Latex源码请前往arxiv打开此论文下载页面点other Formats然后download source手动下载latex源码包。接下来调用本地Latex翻译插件即可。",
chatbot=chatbot, history=history)
return
if txt.endswith('.pdf'):
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"发现已经存在翻译好的PDF文档")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# #################################################################
if allow_gptac_cloud_io and arxiv_id:
# 访问 GPTAC学术云查询云端是否存在该论文的翻译版本
from crazy_functions.latex_fns.latex_actions import check_gptac_cloud
success, downloaded = check_gptac_cloud(arxiv_id, chatbot)
if success:
chatbot.append([
f"检测到GPTAC云端存在翻译版本, 如果不满意翻译结果, 请禁用云端分享, 然后重新执行。",
None
])
yield from update_ui(chatbot=chatbot, history=history)
return
#################################################################
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无法处理: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# <-------------- if is a zip/tar file ------------->
project_folder = desend_to_extracted_folder_if_exist(project_folder)
# <-------------- move latex project away from temp folder ------------->
from shared_utils.fastapi_server import validate_path_safety
validate_path_safety(project_folder, chatbot.get_user())
project_folder = move_project(project_folder, arxiv_id)
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
chatbot, history, system_prompt, mode='translate_zh',
switch_prompt=_switch_prompt_)
# <-------------- compile PDF ------------->
success = yield from 编译Latex(chatbot, history, main_file_original='merge',
main_file_modified='merge_translate_zh', mode='translate_zh',
work_folder_original=project_folder, work_folder_modified=project_folder,
work_folder=project_folder)
# <-------------- zip PDF ------------->
zip_res = zip_result(project_folder)
if success:
if allow_gptac_cloud_io and arxiv_id:
# 如果用户允许我们将翻译好的arxiv论文PDF上传到GPTAC学术云
from crazy_functions.latex_fns.latex_actions import upload_to_gptac_cloud_if_user_allow
threading.Thread(target=upload_to_gptac_cloud_if_user_allow,
args=(chatbot, arxiv_id), daemon=True).start()
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history)
time.sleep(1) # 刷新界面
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
else:
chatbot.append((f"失败了",
'虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux请检查系统字体见Github wiki ...'))
yield from update_ui(chatbot=chatbot, history=history)
time.sleep(1) # 刷新界面
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
# <-------------- we are done ------------->
return success
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 插件主程序3 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@CatchException
def PDF翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
# <-------------- information about this plugin ------------->
chatbot.append([
"函数插件功能?",
"将PDF转换为Latex项目翻译为中文后重新编译为PDF。函数插件贡献者: Marroh。注意事项: 此插件Windows支持最佳Linux下必须使用Docker安装详见项目主README.md。目前对机器学习类文献转化效果最好其他类型文献转化效果未知。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------------- more requirements ------------->
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
more_req = plugin_kwargs.get("advanced_arg", "")
no_cache = more_req.startswith("--no-cache")
if no_cache: more_req.lstrip("--no-cache")
allow_cache = not no_cache
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
# <-------------- check deps ------------->
try:
import glob, os, time, subprocess
subprocess.Popen(['pdflatex', '-version'])
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
except Exception as e:
chatbot.append([f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# <-------------- clear history and read input ------------->
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无法处理: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.pdf文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if len(file_manifest) != 1:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"不支持同时处理多个pdf文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if plugin_kwargs.get("method", "") == 'MATHPIX':
app_id, app_key = get_conf('MATHPIX_APPID', 'MATHPIX_APPKEY')
if len(app_id) == 0 or len(app_key) == 0:
report_exception(chatbot, history, a="缺失 MATHPIX_APPID 和 MATHPIX_APPKEY。", b=f"请配置 MATHPIX_APPID 和 MATHPIX_APPKEY")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if plugin_kwargs.get("method", "") == 'DOC2X':
app_id, app_key = "", ""
DOC2X_API_KEY = get_conf('DOC2X_API_KEY')
if len(DOC2X_API_KEY) == 0:
report_exception(chatbot, history, a="缺失 DOC2X_API_KEY。", b=f"请配置 DOC2X_API_KEY")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
hash_tag = map_file_to_sha256(file_manifest[0])
# # <-------------- check repeated pdf ------------->
# chatbot.append([f"检查PDF是否被重复上传", "正在检查..."])
# yield from update_ui(chatbot=chatbot, history=history)
# repeat, project_folder = check_repeat_upload(file_manifest[0], hash_tag)
# if repeat:
# yield from update_ui_lastest_msg(f"发现重复上传,请查收结果(压缩包)...", chatbot=chatbot, history=history)
# try:
# translate_pdf = [f for f in glob.glob(f'{project_folder}/**/merge_translate_zh.pdf', recursive=True)][0]
# promote_file_to_downloadzone(translate_pdf, rename_file=None, chatbot=chatbot)
# comparison_pdf = [f for f in glob.glob(f'{project_folder}/**/comparison.pdf', recursive=True)][0]
# promote_file_to_downloadzone(comparison_pdf, rename_file=None, chatbot=chatbot)
# zip_res = zip_result(project_folder)
# promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
# return
# except:
# report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"发现重复上传,但是无法找到相关文件")
# yield from update_ui(chatbot=chatbot, history=history)
# else:
# yield from update_ui_lastest_msg(f"未发现重复上传", chatbot=chatbot, history=history)
# <-------------- convert pdf into tex ------------->
chatbot.append([f"解析项目: {txt}", "正在将PDF转换为tex项目请耐心等待..."])
yield from update_ui(chatbot=chatbot, history=history)
project_folder = pdf2tex_project(file_manifest[0], plugin_kwargs)
if project_folder is None:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"PDF转换为tex项目失败")
yield from update_ui(chatbot=chatbot, history=history)
return False
# <-------------- translate latex file into Chinese ------------->
yield from update_ui_lastest_msg("正在tex项目将翻译为中文...", chatbot=chatbot, history=history)
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# <-------------- if is a zip/tar file ------------->
project_folder = desend_to_extracted_folder_if_exist(project_folder)
# <-------------- move latex project away from temp folder ------------->
from shared_utils.fastapi_server import validate_path_safety
validate_path_safety(project_folder, chatbot.get_user())
project_folder = move_project(project_folder)
# <-------------- set a hash tag for repeat-checking ------------->
with open(pj(project_folder, hash_tag + '.tag'), 'w', encoding='utf8') as f:
f.write(hash_tag)
f.close()
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
chatbot, history, system_prompt, mode='translate_zh',
switch_prompt=_switch_prompt_)
# <-------------- compile PDF ------------->
yield from update_ui_lastest_msg("正在将翻译好的项目tex项目编译为PDF...", chatbot=chatbot, history=history)
success = yield from 编译Latex(chatbot, history, main_file_original='merge',
main_file_modified='merge_translate_zh', mode='translate_zh',
work_folder_original=project_folder, work_folder_modified=project_folder,
work_folder=project_folder)
# <-------------- zip PDF ------------->
zip_res = zip_result(project_folder)
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history);
time.sleep(1) # 刷新界面
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
else:
chatbot.append((f"失败了",
'虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux请检查系统字体见Github wiki ...'))
yield from update_ui(chatbot=chatbot, history=history);
time.sleep(1) # 刷新界面
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
# <-------------- we are done ------------->
return success

View File

@@ -1,85 +0,0 @@
from crazy_functions.Latex_Function import Latex翻译中文并重新编译PDF, PDF翻译中文并重新编译PDF
from crazy_functions.plugin_template.plugin_class_template import GptAcademicPluginTemplate, ArgProperty
class Arxiv_Localize(GptAcademicPluginTemplate):
def __init__(self):
"""
请注意`execute`会执行在不同的线程中,因此您在定义和使用类变量时,应当慎之又慎!
"""
pass
def define_arg_selection_menu(self):
"""
定义插件的二级选项菜单
第一个参数,名称`main_input`,参数`type`声明这是一个文本框,文本框上方显示`title`,文本框内部显示`description``default_value`为默认值;
第二个参数,名称`advanced_arg`,参数`type`声明这是一个文本框,文本框上方显示`title`,文本框内部显示`description``default_value`为默认值;
第三个参数,名称`allow_cache`,参数`type`声明这是一个下拉菜单,下拉菜单上方显示`title`+`description`,下拉菜单的选项为`options``default_value`为下拉菜单默认值;
"""
gui_definition = {
"main_input":
ArgProperty(title="ArxivID", description="输入Arxiv的ID或者网址", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
"advanced_arg":
ArgProperty(title="额外的翻译提示词",
description=r"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "
r"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: "
r'If the term "agent" is used in this section, it should be translated to "智能体". ',
default_value="", type="string").model_dump_json(), # 高级参数输入区,自动同步
"allow_cache":
ArgProperty(title="是否允许从缓存中调取结果", options=["允许缓存", "从头执行"], default_value="允许缓存", description="", type="dropdown").model_dump_json(),
"allow_cloudio":
ArgProperty(title="是否允许从GPTAC学术云下载(或者上传)翻译结果(仅针对Arxiv论文)", options=["允许", "禁止"], default_value="禁止", description="共享文献,互助互利", type="dropdown").model_dump_json(),
}
return gui_definition
def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
执行插件
"""
allow_cache = plugin_kwargs["allow_cache"]
allow_cloudio = plugin_kwargs["allow_cloudio"]
advanced_arg = plugin_kwargs["advanced_arg"]
if allow_cache == "从头执行": plugin_kwargs["advanced_arg"] = "--no-cache " + plugin_kwargs["advanced_arg"]
# 从云端下载翻译结果,以及上传翻译结果到云端;人人为我,我为人人。
if allow_cloudio == "允许": plugin_kwargs["advanced_arg"] = "--allow-cloudio " + plugin_kwargs["advanced_arg"]
yield from Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
class PDF_Localize(GptAcademicPluginTemplate):
def __init__(self):
"""
请注意`execute`会执行在不同的线程中,因此您在定义和使用类变量时,应当慎之又慎!
"""
pass
def define_arg_selection_menu(self):
"""
定义插件的二级选项菜单
"""
gui_definition = {
"main_input":
ArgProperty(title="PDF文件路径", description="未指定路径,请上传文件后,再点击该插件", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
"advanced_arg":
ArgProperty(title="额外的翻译提示词",
description=r"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "
r"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: "
r'If the term "agent" is used in this section, it should be translated to "智能体". ',
default_value="", type="string").model_dump_json(), # 高级参数输入区,自动同步
"method":
ArgProperty(title="采用哪种方法执行转换", options=["MATHPIX", "DOC2X"], default_value="DOC2X", description="", type="dropdown").model_dump_json(),
}
return gui_definition
def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
执行插件
"""
yield from PDF翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)

View File

@@ -1,245 +0,0 @@
from toolbox import update_ui, trimmed_format_exc, promote_file_to_downloadzone, get_log_folder
from toolbox import CatchException, report_exception, write_history_to_file, zip_folder
from loguru import logger
class PaperFileGroup():
def __init__(self):
self.file_paths = []
self.file_contents = []
self.sp_file_contents = []
self.sp_file_index = []
self.sp_file_tag = []
# count_token
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num
def run_file_split(self, max_token_limit=1900):
"""
将长文本分离开来
"""
for index, file_content in enumerate(self.file_contents):
if self.get_token_num(file_content) < max_token_limit:
self.sp_file_contents.append(file_content)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index])
else:
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
for j, segment in enumerate(segments):
self.sp_file_contents.append(segment)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
logger.info('Segmentation: done')
def merge_result(self):
self.file_result = ["" for _ in range(len(self.file_paths))]
for r, k in zip(self.sp_file_result, self.sp_file_index):
self.file_result[k] += r
def write_result(self):
manifest = []
for path, res in zip(self.file_paths, self.file_result):
with open(path + '.polish.tex', 'w', encoding='utf8') as f:
manifest.append(path + '.polish.tex')
f.write(res)
return manifest
def zip_result(self):
import os, time
folder = os.path.dirname(self.file_paths[0])
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
zip_folder(folder, get_log_folder(), f'{t}-polished.zip')
def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='polish'):
import time, os, re
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
# <-------- 读取Latex文件删除其中的所有注释 ---------->
pfg = PaperFileGroup()
for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
# 定义注释的正则表达式
comment_pattern = r'(?<!\\)%.*'
# 使用正则表达式查找注释,并替换为空字符串
clean_tex_content = re.sub(comment_pattern, '', file_content)
# 记录删除注释后的文本
pfg.file_paths.append(fp)
pfg.file_contents.append(clean_tex_content)
# <-------- 拆分过长的latex文件 ---------->
pfg.run_file_split(max_token_limit=1024)
n_split = len(pfg.sp_file_contents)
# <-------- 多线程润色开始 ---------->
if language == 'en':
if mode == 'polish':
inputs_array = [r"Below is a section from an academic paper, polish this section to meet the academic standard, " +
r"improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
else:
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
r"Answer me only with the revised text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"Polish {f}" for f in pfg.sp_file_tag]
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
elif language == 'zh':
if mode == 'polish':
inputs_array = [r"以下是一篇学术论文中的一段内容请将此部分润色以满足学术标准提高语法、清晰度和整体可读性不要修改任何LaTeX命令例如\section\cite和方程式" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
else:
inputs_array = [r"以下是一篇学术论文中的一段内容请对这部分内容进行语法矫正。不要修改任何LaTeX命令例如\section\cite和方程式" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"润色 {f}" for f in pfg.sp_file_tag]
sys_prompt_array=["你是一位专业的中文学术论文作家。" for _ in range(n_split)]
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array=inputs_array,
inputs_show_user_array=inputs_show_user_array,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history_array=[[""] for _ in range(n_split)],
sys_prompt_array=sys_prompt_array,
# max_workers=5, # 并行任务数量限制最多同时执行5个其他的排队等待
scroller_max_len = 80
)
# <-------- 文本碎片重组为完整的tex文件整理结果为压缩包 ---------->
try:
pfg.sp_file_result = []
for i_say, gpt_say in zip(gpt_response_collection[0::2], gpt_response_collection[1::2]):
pfg.sp_file_result.append(gpt_say)
pfg.merge_result()
pfg.write_result()
pfg.zip_result()
except:
logger.error(trimmed_format_exc())
# <-------- 整理结果,退出 ---------->
create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
res = write_history_to_file(gpt_response_collection, file_basename=create_report_file_name)
promote_file_to_downloadzone(res, chatbot=chatbot)
history = gpt_response_collection
chatbot.append((f"{fp}完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@CatchException
def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"对整个Latex项目进行润色。函数插件贡献者: Binary-Husky。注意此插件不调用Latex如果有Latex环境请使用「Latex英文纠错+高亮修正位置(需Latex)插件」"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import tiktoken
except:
report_exception(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en')
@CatchException
def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"对整个Latex项目进行润色。函数插件贡献者: Binary-Husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import tiktoken
except:
report_exception(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh')
@CatchException
def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"对整个Latex项目进行纠错。函数插件贡献者: Binary-Husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import tiktoken
except:
report_exception(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread')

View File

@@ -1,176 +0,0 @@
from toolbox import update_ui, promote_file_to_downloadzone
from toolbox import CatchException, report_exception, write_history_to_file
from loguru import logger
class PaperFileGroup():
def __init__(self):
self.file_paths = []
self.file_contents = []
self.sp_file_contents = []
self.sp_file_index = []
self.sp_file_tag = []
# count_token
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num
def run_file_split(self, max_token_limit=1900):
"""
将长文本分离开来
"""
for index, file_content in enumerate(self.file_contents):
if self.get_token_num(file_content) < max_token_limit:
self.sp_file_contents.append(file_content)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index])
else:
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
for j, segment in enumerate(segments):
self.sp_file_contents.append(segment)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
logger.info('Segmentation: done')
def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
import time, os, re
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
# <-------- 读取Latex文件删除其中的所有注释 ---------->
pfg = PaperFileGroup()
for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
# 定义注释的正则表达式
comment_pattern = r'(?<!\\)%.*'
# 使用正则表达式查找注释,并替换为空字符串
clean_tex_content = re.sub(comment_pattern, '', file_content)
# 记录删除注释后的文本
pfg.file_paths.append(fp)
pfg.file_contents.append(clean_tex_content)
# <-------- 拆分过长的latex文件 ---------->
pfg.run_file_split(max_token_limit=1024)
n_split = len(pfg.sp_file_contents)
# <-------- 抽取摘要 ---------->
# if language == 'en':
# abs_extract_inputs = f"Please write an abstract for this paper"
# # 单线获取文章meta信息
# paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
# inputs=abs_extract_inputs,
# inputs_show_user=f"正在抽取摘要信息。",
# llm_kwargs=llm_kwargs,
# chatbot=chatbot, history=[],
# sys_prompt="Your job is to collect information from materials。",
# )
# <-------- 多线程润色开始 ---------->
if language == 'en->zh':
inputs_array = ["Below is a section from an English academic paper, translate it into Chinese, do not modify any latex command such as \section, \cite and equations:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
sys_prompt_array = ["You are a professional academic paper translator." for _ in range(n_split)]
elif language == 'zh->en':
inputs_array = [f"Below is a section from a Chinese academic paper, translate it into English, do not modify any latex command such as \section, \cite and equations:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
sys_prompt_array = ["You are a professional academic paper translator." for _ in range(n_split)]
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array=inputs_array,
inputs_show_user_array=inputs_show_user_array,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history_array=[[""] for _ in range(n_split)],
sys_prompt_array=sys_prompt_array,
# max_workers=5, # OpenAI所允许的最大并行过载
scroller_max_len = 80
)
# <-------- 整理结果,退出 ---------->
create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
res = write_history_to_file(gpt_response_collection, create_report_file_name)
promote_file_to_downloadzone(res, chatbot=chatbot)
history = gpt_response_collection
chatbot.append((f"{fp}完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@CatchException
def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"对整个Latex项目进行翻译。函数插件贡献者: Binary-Husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import tiktoken
except:
report_exception(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh')
@CatchException
def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"对整个Latex项目进行翻译。函数插件贡献者: Binary-Husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import tiktoken
except:
report_exception(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh->en')

View File

@@ -1,264 +0,0 @@
import glob, shutil, os, re
from loguru import logger
from toolbox import update_ui, trimmed_format_exc, gen_time_str
from toolbox import CatchException, report_exception, get_log_folder
from toolbox import write_history_to_file, promote_file_to_downloadzone
fast_debug = False
class PaperFileGroup():
def __init__(self):
self.file_paths = []
self.file_contents = []
self.sp_file_contents = []
self.sp_file_index = []
self.sp_file_tag = []
# count_token
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num
def run_file_split(self, max_token_limit=2048):
"""
将长文本分离开来
"""
for index, file_content in enumerate(self.file_contents):
if self.get_token_num(file_content) < max_token_limit:
self.sp_file_contents.append(file_content)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index])
else:
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
for j, segment in enumerate(segments):
self.sp_file_contents.append(segment)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.md")
logger.info('Segmentation: done')
def merge_result(self):
self.file_result = ["" for _ in range(len(self.file_paths))]
for r, k in zip(self.sp_file_result, self.sp_file_index):
self.file_result[k] += r
def write_result(self, language):
manifest = []
for path, res in zip(self.file_paths, self.file_result):
dst_file = os.path.join(get_log_folder(), f'{gen_time_str()}.md')
with open(dst_file, 'w', encoding='utf8') as f:
manifest.append(dst_file)
f.write(res)
return manifest
def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
# <-------- 读取Markdown文件删除其中的所有注释 ---------->
pfg = PaperFileGroup()
for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
# 记录删除注释后的文本
pfg.file_paths.append(fp)
pfg.file_contents.append(file_content)
# <-------- 拆分过长的Markdown文件 ---------->
pfg.run_file_split(max_token_limit=1024)
n_split = len(pfg.sp_file_contents)
# <-------- 多线程翻译开始 ---------->
if language == 'en->zh':
inputs_array = ["This is a Markdown file, translate it into Chinese, do NOT modify any existing Markdown commands, do NOT use code wrapper (```), ONLY answer me with translated results:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
sys_prompt_array = ["You are a professional academic paper translator." + plugin_kwargs.get("additional_prompt", "") for _ in range(n_split)]
elif language == 'zh->en':
inputs_array = [f"This is a Markdown file, translate it into English, do NOT modify any existing Markdown commands, do NOT use code wrapper (```), ONLY answer me with translated results:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
sys_prompt_array = ["You are a professional academic paper translator." + plugin_kwargs.get("additional_prompt", "") for _ in range(n_split)]
else:
inputs_array = [f"This is a Markdown file, translate it into {language}, do NOT modify any existing Markdown commands, do NOT use code wrapper (```), ONLY answer me with translated results:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
sys_prompt_array = ["You are a professional academic paper translator." + plugin_kwargs.get("additional_prompt", "") for _ in range(n_split)]
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array=inputs_array,
inputs_show_user_array=inputs_show_user_array,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history_array=[[""] for _ in range(n_split)],
sys_prompt_array=sys_prompt_array,
# max_workers=5, # OpenAI所允许的最大并行过载
scroller_max_len = 80
)
try:
pfg.sp_file_result = []
for i_say, gpt_say in zip(gpt_response_collection[0::2], gpt_response_collection[1::2]):
pfg.sp_file_result.append(gpt_say)
pfg.merge_result()
output_file_arr = pfg.write_result(language)
for output_file in output_file_arr:
promote_file_to_downloadzone(output_file, chatbot=chatbot)
if 'markdown_expected_output_path' in plugin_kwargs:
expected_f_name = plugin_kwargs['markdown_expected_output_path']
shutil.copyfile(output_file, expected_f_name)
except:
logger.error(trimmed_format_exc())
# <-------- 整理结果,退出 ---------->
create_report_file_name = gen_time_str() + f"-chatgpt.md"
res = write_history_to_file(gpt_response_collection, file_basename=create_report_file_name)
promote_file_to_downloadzone(res, chatbot=chatbot)
history = gpt_response_collection
chatbot.append((f"{fp}完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
def get_files_from_everything(txt, preference=''):
if txt == "": return False, None, None
success = True
if txt.startswith('http'):
import requests
from toolbox import get_conf
proxies = get_conf('proxies')
# 网络的远程文件
if preference == 'Github':
logger.info('正在从github下载资源 ...')
if not txt.endswith('.md'):
# Make a request to the GitHub API to retrieve the repository information
url = txt.replace("https://github.com/", "https://api.github.com/repos/") + '/readme'
response = requests.get(url, proxies=proxies)
txt = response.json()['download_url']
else:
txt = txt.replace("https://github.com/", "https://raw.githubusercontent.com/")
txt = txt.replace("/blob/", "/")
r = requests.get(txt, proxies=proxies)
download_local = f'{get_log_folder(plugin_name="批量Markdown翻译")}/raw-readme-{gen_time_str()}.md'
project_folder = f'{get_log_folder(plugin_name="批量Markdown翻译")}'
with open(download_local, 'wb+') as f: f.write(r.content)
file_manifest = [download_local]
elif txt.endswith('.md'):
# 直接给定文件
file_manifest = [txt]
project_folder = os.path.dirname(txt)
elif os.path.exists(txt):
# 本地路径,递归搜索
project_folder = txt
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.md', recursive=True)]
else:
project_folder = None
file_manifest = []
success = False
return success, file_manifest, project_folder
@CatchException
def Markdown英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"对整个Markdown项目进行翻译。函数插件贡献者: Binary-Husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import tiktoken
except:
report_exception(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
history = [] # 清空历史,以免输入溢出
success, file_manifest, project_folder = get_files_from_everything(txt, preference="Github")
if not success:
# 什么都没有
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.md文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh')
@CatchException
def Markdown中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"对整个Markdown项目进行翻译。函数插件贡献者: Binary-Husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import tiktoken
except:
report_exception(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
history = [] # 清空历史,以免输入溢出
success, file_manifest, project_folder = get_files_from_everything(txt)
if not success:
# 什么都没有
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.md文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh->en')
@CatchException
def Markdown翻译指定语言(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"对整个Markdown项目进行翻译。函数插件贡献者: Binary-Husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import tiktoken
except:
report_exception(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
history = [] # 清空历史,以免输入溢出
success, file_manifest, project_folder = get_files_from_everything(txt)
if not success:
# 什么都没有
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.md文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
language = plugin_kwargs.get("advanced_arg", 'Chinese')
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language=language)

View File

@@ -1,83 +0,0 @@
from toolbox import CatchException, check_packages, get_conf
from toolbox import update_ui, update_ui_lastest_msg, disable_auto_promotion
from toolbox import trimmed_format_exc_markdown
from crazy_functions.crazy_utils import get_files_from_everything
from crazy_functions.pdf_fns.parse_pdf import get_avail_grobid_url
from crazy_functions.pdf_fns.parse_pdf_via_doc2x import 解析PDF_基于DOC2X
from crazy_functions.pdf_fns.parse_pdf_legacy import 解析PDF_简单拆解
from crazy_functions.pdf_fns.parse_pdf_grobid import 解析PDF_基于GROBID
from shared_utils.colorful import *
@CatchException
def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
disable_auto_promotion(chatbot)
# 基本信息:功能、贡献者
chatbot.append([None, "插件功能批量翻译PDF文档。函数插件贡献者: Binary-Husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
check_packages(["fitz", "tiktoken", "scipdf"])
except:
chatbot.append([None, f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf tiktoken scipdf_parser```。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 清空历史,以免输入溢出
history = []
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
# 检测输入参数,如没有给定输入参数,直接退出
if (not success) and txt == "": txt = '空空如也的输入栏。提示请先上传文件把PDF文件拖入对话'
# 如果没找到任何文件
if len(file_manifest) == 0:
chatbot.append([None, f"找不到任何.pdf拓展名的文件: {txt}"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 开始正式执行任务
method = plugin_kwargs.get("pdf_parse_method", None)
if method == "DOC2X":
# ------- 第一种方法效果最好但是需要DOC2X服务 -------
DOC2X_API_KEY = get_conf("DOC2X_API_KEY")
if len(DOC2X_API_KEY) != 0:
try:
yield from 解析PDF_基于DOC2X(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request)
return
except:
chatbot.append([None, f"DOC2X服务不可用现在将执行效果稍差的旧版代码。{trimmed_format_exc_markdown()}"])
yield from update_ui(chatbot=chatbot, history=history)
if method == "GROBID":
# ------- 第二种方法,效果次优 -------
grobid_url = get_avail_grobid_url()
if grobid_url is not None:
yield from 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, grobid_url)
return
if method == "ClASSIC":
# ------- 第三种方法,早期代码,效果不理想 -------
yield from update_ui_lastest_msg("GROBID服务不可用请检查config中的GROBID_URL。作为替代现在将执行效果稍差的旧版代码。", chatbot, history, delay=3)
yield from 解析PDF_简单拆解(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
return
if method is None:
# ------- 以上三种方法都试一遍 -------
DOC2X_API_KEY = get_conf("DOC2X_API_KEY")
if len(DOC2X_API_KEY) != 0:
try:
yield from 解析PDF_基于DOC2X(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request)
return
except:
chatbot.append([None, f"DOC2X服务不可用正在尝试GROBID。{trimmed_format_exc_markdown()}"])
yield from update_ui(chatbot=chatbot, history=history)
grobid_url = get_avail_grobid_url()
if grobid_url is not None:
yield from 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, grobid_url)
return
yield from update_ui_lastest_msg("GROBID服务不可用请检查config中的GROBID_URL。作为替代现在将执行效果稍差的旧版代码。", chatbot, history, delay=3)
yield from 解析PDF_简单拆解(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
return

View File

@@ -1,33 +0,0 @@
from crazy_functions.plugin_template.plugin_class_template import GptAcademicPluginTemplate, ArgProperty
from .PDF_Translate import 批量翻译PDF文档
class PDF_Tran(GptAcademicPluginTemplate):
def __init__(self):
"""
请注意`execute`会执行在不同的线程中,因此您在定义和使用类变量时,应当慎之又慎!
"""
pass
def define_arg_selection_menu(self):
"""
定义插件的二级选项菜单
"""
gui_definition = {
"main_input":
ArgProperty(title="PDF文件路径", description="未指定路径,请上传文件后,再点击该插件", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
"additional_prompt":
ArgProperty(title="额外提示词", description="例如:对专有名词、翻译语气等方面的要求", default_value="", type="string").model_dump_json(), # 高级参数输入区,自动同步
"pdf_parse_method":
ArgProperty(title="PDF解析方法", options=["DOC2X", "GROBID", "ClASSIC"], description="", default_value="GROBID", type="dropdown").model_dump_json(),
}
return gui_definition
def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
执行插件
"""
main_input = plugin_kwargs["main_input"]
additional_prompt = plugin_kwargs["additional_prompt"]
pdf_parse_method = plugin_kwargs["pdf_parse_method"]
yield from 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)

View File

@@ -1,153 +0,0 @@
import os,glob
from typing import List
from shared_utils.fastapi_server import validate_path_safety
from toolbox import report_exception
from toolbox import CatchException, update_ui, get_conf, get_log_folder, update_ui_lastest_msg
from shared_utils.fastapi_server import validate_path_safety
from crazy_functions.crazy_utils import input_clipping
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
RAG_WORKER_REGISTER = {}
MAX_HISTORY_ROUND = 5
MAX_CONTEXT_TOKEN_LIMIT = 4096
REMEMBER_PREVIEW = 1000
@CatchException
def handle_document_upload(files: List[str], llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request, rag_worker):
"""
Handles document uploads by extracting text and adding it to the vector store.
"""
from llama_index.core import Document
from crazy_functions.rag_fns.rag_file_support import extract_text, supports_format
user_name = chatbot.get_user()
checkpoint_dir = get_log_folder(user_name, plugin_name='experimental_rag')
for file_path in files:
try:
validate_path_safety(file_path, user_name)
text = extract_text(file_path)
if text is None:
chatbot.append(
[f"上传文件: {os.path.basename(file_path)}", f"文件解析失败无法提取文本内容请更换文件。失败原因可能为1.文档格式过于复杂2. 不支持的文件格式,支持的文件格式后缀有:" + ", ".join(supports_format)])
else:
chatbot.append(
[f"上传文件: {os.path.basename(file_path)}", f"上传文件前50个字符为:{text[:50]}"])
document = Document(text=text, metadata={"source": file_path})
rag_worker.add_documents_to_vector_store([document])
chatbot.append([f"上传文件: {os.path.basename(file_path)}", "文件已成功添加到知识库。"])
except Exception as e:
report_exception(chatbot, history, a=f"处理文件: {file_path}", b=str(e))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# Main Q&A function with document upload support
@CatchException
def Rag问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# import vector store lib
VECTOR_STORE_TYPE = "Milvus"
if VECTOR_STORE_TYPE == "Milvus":
try:
from crazy_functions.rag_fns.milvus_worker import MilvusRagWorker as LlamaIndexRagWorker
except:
VECTOR_STORE_TYPE = "Simple"
if VECTOR_STORE_TYPE == "Simple":
from crazy_functions.rag_fns.llama_index_worker import LlamaIndexRagWorker
# 1. we retrieve rag worker from global context
user_name = chatbot.get_user()
checkpoint_dir = get_log_folder(user_name, plugin_name='experimental_rag')
if user_name in RAG_WORKER_REGISTER:
rag_worker = RAG_WORKER_REGISTER[user_name]
else:
rag_worker = RAG_WORKER_REGISTER[user_name] = LlamaIndexRagWorker(
user_name,
llm_kwargs,
checkpoint_dir=checkpoint_dir,
auto_load_checkpoint=True
)
current_context = f"{VECTOR_STORE_TYPE} @ {checkpoint_dir}"
tip = "提示输入“清空向量数据库”可以清空RAG向量数据库"
# 2. Handle special commands
if os.path.exists(txt) and os.path.isdir(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
# Extract file paths from the user input
# Assuming the user inputs file paths separated by commas after the command
file_paths = [f for f in glob.glob(f'{project_folder}/**/*', recursive=True)]
chatbot.append([txt, f'正在处理上传的文档 ({current_context}) ...'])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
yield from handle_document_upload(file_paths, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request, rag_worker)
return
elif txt == "清空向量数据库":
chatbot.append([txt, f'正在清空 ({current_context}) ...'])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
rag_worker.purge_vector_store()
yield from update_ui_lastest_msg('已清空', chatbot, history, delay=0) # 刷新界面
return
# 3. Normal Q&A processing
chatbot.append([txt, f'正在召回知识 ({current_context}) ...'])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 4. Clip history to reduce token consumption
txt_origin = txt
if len(history) > MAX_HISTORY_ROUND * 2:
history = history[-(MAX_HISTORY_ROUND * 2):]
txt_clip, history, flags = input_clipping(txt, history, max_token_limit=MAX_CONTEXT_TOKEN_LIMIT, return_clip_flags=True)
input_is_clipped_flag = (flags["original_input_len"] != flags["clipped_input_len"])
# 5. If input is clipped, add input to vector store before retrieve
if input_is_clipped_flag:
yield from update_ui_lastest_msg('检测到长输入, 正在向量化 ...', chatbot, history, delay=0) # 刷新界面
# Save input to vector store
rag_worker.add_text_to_vector_store(txt_origin)
yield from update_ui_lastest_msg('向量化完成 ...', chatbot, history, delay=0) # 刷新界面
if len(txt_origin) > REMEMBER_PREVIEW:
HALF = REMEMBER_PREVIEW // 2
i_say_to_remember = txt[:HALF] + f" ...\n...(省略{len(txt_origin)-REMEMBER_PREVIEW}字)...\n... " + txt[-HALF:]
if (flags["original_input_len"] - flags["clipped_input_len"]) > HALF:
txt_clip = txt_clip + f" ...\n...(省略{len(txt_origin)-len(txt_clip)-HALF}字)...\n... " + txt[-HALF:]
else:
i_say_to_remember = i_say = txt_clip
else:
i_say_to_remember = i_say = txt_clip
# 6. Search vector store and build prompts
nodes = rag_worker.retrieve_from_store_with_query(i_say)
prompt = rag_worker.build_prompt(query=i_say, nodes=nodes)
# 7. Query language model
if len(chatbot) != 0:
chatbot.pop(-1) # Pop temp chat, because we are going to add them again inside `request_gpt_model_in_new_thread_with_ui_alive`
model_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=prompt,
inputs_show_user=i_say,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=history,
sys_prompt=system_prompt,
retry_times_at_unknown_error=0
)
# 8. Remember Q&A
yield from update_ui_lastest_msg(
model_say + '</br></br>' + f'对话记忆中, 请稍等 ({current_context}) ...',
chatbot, history, delay=0.5
)
rag_worker.remember_qa(i_say_to_remember, model_say)
history.extend([i_say, model_say])
# 9. Final UI Update
yield from update_ui_lastest_msg(model_say, chatbot, history, delay=0, msg=tip)

View File

@@ -1,167 +0,0 @@
import pickle, os, random
from toolbox import CatchException, update_ui, get_conf, get_log_folder, update_ui_lastest_msg
from crazy_functions.crazy_utils import input_clipping
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.json_fns.select_tool import structure_output, select_tool
from pydantic import BaseModel, Field
from loguru import logger
from typing import List
SOCIAL_NETWOK_WORKER_REGISTER = {}
class SocialNetwork():
def __init__(self):
self.people = []
class SaveAndLoad():
def __init__(self, user_name, llm_kwargs, auto_load_checkpoint=True, checkpoint_dir=None) -> None:
self.user_name = user_name
self.checkpoint_dir = checkpoint_dir
if auto_load_checkpoint:
self.social_network = self.load_from_checkpoint(checkpoint_dir)
else:
self.social_network = SocialNetwork()
def does_checkpoint_exist(self, checkpoint_dir=None):
import os, glob
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
if not os.path.exists(checkpoint_dir): return False
if len(glob.glob(os.path.join(checkpoint_dir, "social_network.pkl"))) == 0: return False
return True
def save_to_checkpoint(self, checkpoint_dir=None):
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
with open(os.path.join(checkpoint_dir, 'social_network.pkl'), "wb+") as f:
pickle.dump(self.social_network, f)
return
def load_from_checkpoint(self, checkpoint_dir=None):
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
if self.does_checkpoint_exist(checkpoint_dir=checkpoint_dir):
with open(os.path.join(checkpoint_dir, 'social_network.pkl'), "rb") as f:
social_network = pickle.load(f)
return social_network
else:
return SocialNetwork()
class Friend(BaseModel):
friend_name: str = Field(description="name of a friend")
friend_description: str = Field(description="description of a friend (everything about this friend)")
friend_relationship: str = Field(description="The relationship with a friend (e.g. friend, family, colleague)")
class FriendList(BaseModel):
friends_list: List[Friend] = Field(description="The list of friends")
class SocialNetworkWorker(SaveAndLoad):
def ai_socail_advice(self, prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, run_gpt_fn, intention_type):
pass
def ai_remove_friend(self, prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, run_gpt_fn, intention_type):
pass
def ai_list_friends(self, prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, run_gpt_fn, intention_type):
pass
def ai_add_multi_friends(self, prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, run_gpt_fn, intention_type):
friend, err_msg = structure_output(
txt=prompt,
prompt="根据提示, 解析多个联系人的身份信息\n\n",
err_msg=f"不能理解该联系人",
run_gpt_fn=run_gpt_fn,
pydantic_cls=FriendList
)
if friend.friends_list:
for f in friend.friends_list:
self.add_friend(f)
msg = f"成功添加{len(friend.friends_list)}个联系人: {str(friend.friends_list)}"
yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=0)
def run(self, txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
prompt = txt
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
self.tools_to_select = {
"SocialAdvice":{
"explain_to_llm": "如果用户希望获取社交指导调用SocialAdvice生成一些社交建议",
"callback": self.ai_socail_advice,
},
"AddFriends":{
"explain_to_llm": "如果用户给出了联系人调用AddMultiFriends把联系人添加到数据库",
"callback": self.ai_add_multi_friends,
},
"RemoveFriend":{
"explain_to_llm": "如果用户希望移除某个联系人调用RemoveFriend",
"callback": self.ai_remove_friend,
},
"ListFriends":{
"explain_to_llm": "如果用户列举联系人调用ListFriends",
"callback": self.ai_list_friends,
}
}
try:
Explaination = '\n'.join([f'{k}: {v["explain_to_llm"]}' for k, v in self.tools_to_select.items()])
class UserSociaIntention(BaseModel):
intention_type: str = Field(
description=
f"The type of user intention. You must choose from {self.tools_to_select.keys()}.\n\n"
f"Explaination:\n{Explaination}",
default="SocialAdvice"
)
pydantic_cls_instance, err_msg = select_tool(
prompt=txt,
run_gpt_fn=run_gpt_fn,
pydantic_cls=UserSociaIntention
)
except Exception as e:
yield from update_ui_lastest_msg(
lastmsg=f"无法理解用户意图 {err_msg}",
chatbot=chatbot,
history=history,
delay=0
)
return
intention_type = pydantic_cls_instance.intention_type
intention_callback = self.tools_to_select[pydantic_cls_instance.intention_type]['callback']
yield from intention_callback(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, run_gpt_fn, intention_type)
def add_friend(self, friend):
# check whether the friend is already in the social network
for f in self.social_network.people:
if f.friend_name == friend.friend_name:
f.friend_description = friend.friend_description
f.friend_relationship = friend.friend_relationship
logger.info(f"Repeated friend, update info: {friend}")
return
logger.info(f"Add a new friend: {friend}")
self.social_network.people.append(friend)
return
@CatchException
def I人助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# 1. we retrieve worker from global context
user_name = chatbot.get_user()
checkpoint_dir=get_log_folder(user_name, plugin_name='experimental_rag')
if user_name in SOCIAL_NETWOK_WORKER_REGISTER:
social_network_worker = SOCIAL_NETWOK_WORKER_REGISTER[user_name]
else:
social_network_worker = SOCIAL_NETWOK_WORKER_REGISTER[user_name] = SocialNetworkWorker(
user_name,
llm_kwargs,
checkpoint_dir=checkpoint_dir,
auto_load_checkpoint=True
)
# 2. save
yield from social_network_worker.run(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
social_network_worker.save_to_checkpoint(checkpoint_dir)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@@ -1,389 +0,0 @@
from toolbox import update_ui, promote_file_to_downloadzone
from toolbox import CatchException, report_exception, write_history_to_file
from shared_utils.fastapi_server import validate_path_safety
from crazy_functions.crazy_utils import input_clipping
def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import os, copy
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
summary_batch_isolation = True
inputs_array = []
inputs_show_user_array = []
history_array = []
sys_prompt_array = []
report_part_1 = []
assert len(file_manifest) <= 512, "源文件太多超过512个, 请缩减输入文件的数量。或者您也可以选择删除此行警告并修改代码拆分file_manifest列表从而实现分批次处理。"
############################## <第一步,逐个文件分析,多线程> ##################################
for index, fp in enumerate(file_manifest):
# 读取文件
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
i_say_show_user = prefix + f'[{index+1}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {fp}'
# 装载请求内容
inputs_array.append(i_say)
inputs_show_user_array.append(i_say_show_user)
history_array.append([])
sys_prompt_array.append("你是一个程序架构分析师,正在分析一个源代码项目。你的回答必须简单明了。")
# 文件读取完成对每一个源代码文件生成一个请求线程发送到chatgpt进行分析
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array = inputs_array,
inputs_show_user_array = inputs_show_user_array,
history_array = history_array,
sys_prompt_array = sys_prompt_array,
llm_kwargs = llm_kwargs,
chatbot = chatbot,
show_user_at_complete = True
)
# 全部文件解析完成,结果写入文件,准备对工程源代码进行汇总分析
report_part_1 = copy.deepcopy(gpt_response_collection)
history_to_return = report_part_1
res = write_history_to_file(report_part_1)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成?", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。"))
yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
############################## <第二步,综合,单线程,分组+迭代处理> ##################################
batchsize = 16 # 10个文件为一组
report_part_2 = []
previous_iteration_files = []
last_iteration_result = ""
while True:
if len(file_manifest) == 0: break
this_iteration_file_manifest = file_manifest[:batchsize]
this_iteration_gpt_response_collection = gpt_response_collection[:batchsize*2]
file_rel_path = [os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]
# 把“请对下面的程序文件做一个概述” 替换成 精简的 "文件名:{all_file[index]}"
for index, content in enumerate(this_iteration_gpt_response_collection):
if index%2==0: this_iteration_gpt_response_collection[index] = f"{file_rel_path[index//2]}" # 只保留文件名节省token
this_iteration_files = [os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]
previous_iteration_files.extend(this_iteration_files)
previous_iteration_files_string = ', '.join(previous_iteration_files)
current_iteration_focus = ', '.join(this_iteration_files)
if summary_batch_isolation: focus = current_iteration_focus
else: focus = previous_iteration_files_string
i_say = f'用一张Markdown表格简要描述以下文件的功能{focus}。根据以上分析,用一句话概括程序的整体功能。'
if last_iteration_result != "":
sys_prompt_additional = "已知某些代码的局部作用是:" + last_iteration_result + "\n请继续分析其他源代码,从而更全面地理解项目的整体功能。"
else:
sys_prompt_additional = ""
inputs_show_user = f'根据以上分析,对程序的整体功能和构架重新做出概括,由于输入长度限制,可能需要分组处理,本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
this_iteration_history.append(last_iteration_result)
# 裁剪input
inputs, this_iteration_history_feed = input_clipping(inputs=i_say, history=this_iteration_history, max_token_limit=2560)
result = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=inputs, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
history=this_iteration_history_feed, # 迭代之前的分析
sys_prompt="你是一个程序架构分析师,正在分析一个项目的源代码。" + sys_prompt_additional)
diagram_code = make_diagram(this_iteration_files, result, this_iteration_history_feed)
summary = "请用一句话概括这些文件的整体功能。\n\n" + diagram_code
summary_result = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=summary,
inputs_show_user=summary,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=[i_say, result], # 迭代之前的分析
sys_prompt="你是一个程序架构分析师,正在分析一个项目的源代码。" + sys_prompt_additional)
report_part_2.extend([i_say, result])
last_iteration_result = summary_result
file_manifest = file_manifest[batchsize:]
gpt_response_collection = gpt_response_collection[batchsize*2:]
############################## <END> ##################################
history_to_return.extend(report_part_2)
res = write_history_to_file(history_to_return)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
def make_diagram(this_iteration_files, result, this_iteration_history_feed):
from crazy_functions.diagram_fns.file_tree import build_file_tree_mermaid_diagram
return build_file_tree_mermaid_diagram(this_iteration_history_feed[0::2], this_iteration_history_feed[1::2], "项目示意图")
@CatchException
def 解析项目本身(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob
file_manifest = [f for f in glob.glob('./*.py')] + \
[f for f in glob.glob('./*/*.py')]
project_folder = './'
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个Matlab项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析Matlab项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.m', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析Matlab项目: {txt}", b = f"找不到任何`.m`源文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个C项目的头文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.hpp', recursive=True)] #+ \
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个C项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.hpp', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个Java项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.java', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.jar', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.xml', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.sh', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何java文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个前端项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.ts', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.tsx', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.json', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.js', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.vue', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.less', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.sass', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.wxml', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.wxss', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.css', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.jsx', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何前端相关文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个Golang项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.go', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/go.mod', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/go.sum', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/go.work', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个Rust项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.rs', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.toml', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.lock', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.lua', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.xml', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.json', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.toml', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何lua文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.cs', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.csproj', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何CSharp文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析任意code项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
txt_pattern = plugin_kwargs.get("advanced_arg")
txt_pattern = txt_pattern.replace("", ",")
# 将要匹配的模式(例如: *.c, *.cpp, *.py, config.toml)
pattern_include = [_.lstrip(" ,").rstrip(" ,") for _ in txt_pattern.split(",") if _ != "" and not _.strip().startswith("^")]
if not pattern_include: pattern_include = ["*"] # 不输入即全部匹配
# 将要忽略匹配的文件后缀(例如: ^*.c, ^*.cpp, ^*.py)
pattern_except_suffix = [_.lstrip(" ^*.,").rstrip(" ,") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^*.")]
pattern_except_suffix += ['zip', 'rar', '7z', 'tar', 'gz'] # 避免解析压缩文件
# 将要忽略匹配的文件名(例如: ^README.md)
pattern_except_name = [_.lstrip(" ^*,").rstrip(" ,").replace(".", r"\.") # 移除左边通配符,移除右侧逗号,转义点号
for _ in txt_pattern.split(" ") # 以空格分割
if (_ != "" and _.strip().startswith("^") and not _.strip().startswith("^*.")) # ^开始,但不是^*.开始
]
# 生成正则表达式
pattern_except = r'/[^/]+\.(' + "|".join(pattern_except_suffix) + ')$'
pattern_except += '|/(' + "|".join(pattern_except_name) + ')$' if pattern_except_name != [] else ''
history.clear()
import glob, os, re
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 若上传压缩文件, 先寻找到解压的文件夹路径, 从而避免解析压缩文件
maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
if len(maybe_dir)>0 and maybe_dir[0].endswith('.extract'):
extract_folder_path = maybe_dir[0]
else:
extract_folder_path = project_folder
# 按输入的匹配模式寻找上传的非压缩文件和已解压的文件
file_manifest = [f for pattern in pattern_include for f in glob.glob(f'{extract_folder_path}/**/{pattern}', recursive=True) if "" != extract_folder_path and \
os.path.isfile(f) and (not re.search(pattern_except, f) or pattern.endswith('.' + re.search(pattern_except, f).group().split('.')[-1]))]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@@ -1,162 +0,0 @@
import os, copy, time
from toolbox import CatchException, report_exception, update_ui, zip_result, promote_file_to_downloadzone, update_ui_lastest_msg, get_conf, generate_file_link
from shared_utils.fastapi_server import validate_path_safety
from crazy_functions.crazy_utils import input_clipping
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.agent_fns.python_comment_agent import PythonCodeComment
from crazy_functions.diagram_fns.file_tree import FileNode
from crazy_functions.agent_fns.watchdog import WatchDog
from shared_utils.advanced_markdown_format import markdown_convertion_for_file
from loguru import logger
def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
summary_batch_isolation = True
inputs_array = []
inputs_show_user_array = []
history_array = []
sys_prompt_array = []
assert len(file_manifest) <= 512, "源文件太多超过512个, 请缩减输入文件的数量。或者您也可以选择删除此行警告并修改代码拆分file_manifest列表从而实现分批次处理。"
# 建立文件树
file_tree_struct = FileNode("root", build_manifest=True)
for file_path in file_manifest:
file_tree_struct.add_file(file_path, file_path)
# <第一步,逐个文件分析,多线程>
lang = "" if not plugin_kwargs["use_chinese"] else " (you must use Chinese)"
for index, fp in enumerate(file_manifest):
# 读取文件
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
prefix = ""
i_say = prefix + f'Please conclude the following source code at {os.path.relpath(fp, project_folder)} with only one sentence{lang}, the code is:\n```{file_content}```'
i_say_show_user = prefix + f'[{index+1}/{len(file_manifest)}] 请用一句话对下面的程序文件做一个整体概述: {fp}'
# 装载请求内容
MAX_TOKEN_SINGLE_FILE = 2560
i_say, _ = input_clipping(inputs=i_say, history=[], max_token_limit=MAX_TOKEN_SINGLE_FILE)
inputs_array.append(i_say)
inputs_show_user_array.append(i_say_show_user)
history_array.append([])
sys_prompt_array.append(f"You are a software architecture analyst analyzing a source code project. Do not dig into details, tell me what the code is doing in general. Your answer must be short, simple and clear{lang}.")
# 文件读取完成,对每一个源代码文件,生成一个请求线程,发送到大模型进行分析
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array = inputs_array,
inputs_show_user_array = inputs_show_user_array,
history_array = history_array,
sys_prompt_array = sys_prompt_array,
llm_kwargs = llm_kwargs,
chatbot = chatbot,
show_user_at_complete = True
)
# <第二步,逐个文件分析,生成带注释文件>
tasks = ["" for _ in range(len(file_manifest))]
def bark_fn(tasks):
for i in range(len(tasks)): tasks[i] = "watchdog is dead"
wd = WatchDog(timeout=10, bark_fn=lambda: bark_fn(tasks), interval=3, msg="ThreadWatcher timeout")
wd.begin_watch()
from concurrent.futures import ThreadPoolExecutor
executor = ThreadPoolExecutor(max_workers=get_conf('DEFAULT_WORKER_NUM'))
def _task_multi_threading(i_say, gpt_say, fp, file_tree_struct, index):
language = 'Chinese' if plugin_kwargs["use_chinese"] else 'English'
def observe_window_update(x):
if tasks[index] == "watchdog is dead":
raise TimeoutError("ThreadWatcher: watchdog is dead")
tasks[index] = x
pcc = PythonCodeComment(llm_kwargs, plugin_kwargs, language=language, observe_window_update=observe_window_update)
pcc.read_file(path=fp, brief=gpt_say)
revised_path, revised_content = pcc.begin_comment_source_code(None, None)
file_tree_struct.manifest[fp].revised_path = revised_path
file_tree_struct.manifest[fp].revised_content = revised_content
# <将结果写回源文件>
with open(fp, 'w', encoding='utf-8') as f:
f.write(file_tree_struct.manifest[fp].revised_content)
# <生成对比html>
with open("crazy_functions/agent_fns/python_comment_compare.html", 'r', encoding='utf-8') as f:
html_template = f.read()
warp = lambda x: "```python\n\n" + x + "\n\n```"
from themes.theme import load_dynamic_theme
_, advanced_css, _, _ = load_dynamic_theme("Default")
html_template = html_template.replace("ADVANCED_CSS", advanced_css)
html_template = html_template.replace("REPLACE_CODE_FILE_LEFT", pcc.get_markdown_block_in_html(markdown_convertion_for_file(warp(pcc.original_content))))
html_template = html_template.replace("REPLACE_CODE_FILE_RIGHT", pcc.get_markdown_block_in_html(markdown_convertion_for_file(warp(revised_content))))
compare_html_path = fp + '.compare.html'
file_tree_struct.manifest[fp].compare_html = compare_html_path
with open(compare_html_path, 'w', encoding='utf-8') as f:
f.write(html_template)
tasks[index] = ""
chatbot.append([None, f"正在处理:"])
futures = []
index = 0
for i_say, gpt_say, fp in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], file_manifest):
future = executor.submit(_task_multi_threading, i_say, gpt_say, fp, file_tree_struct, index)
index += 1
futures.append(future)
# <第三步,等待任务完成>
cnt = 0
while True:
cnt += 1
wd.feed()
time.sleep(3)
worker_done = [h.done() for h in futures]
remain = len(worker_done) - sum(worker_done)
# <展示已经完成的部分>
preview_html_list = []
for done, fp in zip(worker_done, file_manifest):
if not done: continue
if hasattr(file_tree_struct.manifest[fp], 'compare_html'):
preview_html_list.append(file_tree_struct.manifest[fp].compare_html)
else:
logger.error(f"文件: {fp} 的注释结果未能成功")
file_links = generate_file_link(preview_html_list)
yield from update_ui_lastest_msg(
f"当前任务: <br/>{'<br/>'.join(tasks)}.<br/>" +
f"剩余源文件数量: {remain}.<br/>" +
f"已完成的文件: {sum(worker_done)}.<br/>" +
file_links +
"<br/>" +
''.join(['.']*(cnt % 10 + 1)
), chatbot=chatbot, history=history, delay=0)
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
if all(worker_done):
executor.shutdown()
break
# <第四步,压缩结果>
zip_res = zip_result(project_folder)
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
# <END>
chatbot.append((None, "所有源文件均已处理完毕。"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@CatchException
def 注释Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
plugin_kwargs["use_chinese"] = plugin_kwargs.get("use_chinese", False)
import glob, os
if os.path.exists(txt):
project_folder = txt
validate_path_safety(project_folder, chatbot.get_user())
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@@ -1,36 +0,0 @@
from toolbox import get_conf, update_ui
from crazy_functions.plugin_template.plugin_class_template import GptAcademicPluginTemplate, ArgProperty
from crazy_functions.SourceCode_Comment import 注释Python项目
class SourceCodeComment_Wrap(GptAcademicPluginTemplate):
def __init__(self):
"""
请注意`execute`会执行在不同的线程中,因此您在定义和使用类变量时,应当慎之又慎!
"""
pass
def define_arg_selection_menu(self):
"""
定义插件的二级选项菜单
"""
gui_definition = {
"main_input":
ArgProperty(title="路径", description="程序路径(上传文件后自动填写)", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
"use_chinese":
ArgProperty(title="注释语言", options=["英文", "中文"], default_value="英文", description="", type="dropdown").model_dump_json(),
# "use_emoji":
# ArgProperty(title="在注释中使用emoji", options=["禁止", "允许"], default_value="禁止", description="无", type="dropdown").model_dump_json(),
}
return gui_definition
def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
执行插件
"""
if plugin_kwargs["use_chinese"] == "中文":
plugin_kwargs["use_chinese"] = True
else:
plugin_kwargs["use_chinese"] = False
yield from 注释Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)

View File

@@ -1,147 +0,0 @@
import requests
import random
import time
import re
import json
from bs4 import BeautifulSoup
from functools import lru_cache
from itertools import zip_longest
from check_proxy import check_proxy
from toolbox import CatchException, update_ui, get_conf, promote_file_to_downloadzone, update_ui_lastest_msg, generate_file_link
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
from request_llms.bridge_all import model_info
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.prompts.internet import SearchOptimizerPrompt, SearchAcademicOptimizerPrompt
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
from textwrap import dedent
from loguru import logger
from pydantic import BaseModel, Field
class Query(BaseModel):
search_keyword: str = Field(description="search query for video resource")
class VideoResource(BaseModel):
thought: str = Field(description="analysis of the search results based on the user's query")
title: str = Field(description="title of the video")
author: str = Field(description="author/uploader of the video")
bvid: str = Field(description="unique ID of the video")
def get_video_resource(search_keyword):
from crazy_functions.media_fns.get_media import search_videos
# Search for videos and return the first result
videos = search_videos(
search_keyword
)
# Return the first video if results exist, otherwise return None
return videos
def download_video(bvid, user_name, chatbot, history):
# from experimental_mods.get_bilibili_resource import download_bilibili
from crazy_functions.media_fns.get_media import download_video
# pause a while
tic_time = 8
for i in range(tic_time):
yield from update_ui_lastest_msg(
lastmsg=f"即将下载音频。等待{tic_time-i}秒后自动继续, 点击“停止”键取消此操作。",
chatbot=chatbot, history=[], delay=1)
# download audio
chatbot.append((None, "下载音频, 请稍等...")); yield from update_ui(chatbot=chatbot, history=history)
downloaded_files = yield from download_video(bvid, only_audio=True, user_name=user_name, chatbot=chatbot, history=history)
# preview
preview_list = [promote_file_to_downloadzone(fp) for fp in downloaded_files]
file_links = generate_file_link(preview_list)
yield from update_ui_lastest_msg(f"已完成的文件: <br/>" + file_links, chatbot=chatbot, history=history, delay=0)
chatbot.append((None, f"即将下载视频。"))
# pause a while
tic_time = 16
for i in range(tic_time):
yield from update_ui_lastest_msg(
lastmsg=f"即将下载视频。等待{tic_time-i}秒后自动继续, 点击“停止”键取消此操作。",
chatbot=chatbot, history=[], delay=1)
# download video
chatbot.append((None, "下载视频, 请稍等...")); yield from update_ui(chatbot=chatbot, history=history)
downloaded_files_part2 = yield from download_video(bvid, only_audio=False, user_name=user_name, chatbot=chatbot, history=history)
# preview
preview_list = [promote_file_to_downloadzone(fp) for fp in downloaded_files_part2]
file_links = generate_file_link(preview_list)
yield from update_ui_lastest_msg(f"已完成的文件: <br/>" + file_links, chatbot=chatbot, history=history, delay=0)
# return
return downloaded_files + downloaded_files_part2
@CatchException
def 多媒体任务(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
user_wish: str = txt
# 搜索
chatbot.append((txt, "检索中, 请稍等..."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 结构化生成
rf_req = dedent(f"""
The user wish to get the following resource:
{user_wish}
Generate reseach keywords (less than 5 keywords) accordingly.
""")
gpt_json_io = GptJsonIO(Query)
inputs = rf_req + gpt_json_io.format_instructions
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
analyze_res = run_gpt_fn(inputs, "")
logger.info(analyze_res)
query: Query = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
# 关键词展示
chatbot.append((None, f"检索关键词已确认: {query.search_keyword}。筛选中, 请稍等..."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 获取候选资源
candadate_dictionary: dict = get_video_resource(query.search_keyword)
candadate_dictionary_as_str = json.dumps(candadate_dictionary, ensure_ascii=False, indent=4)
# 展示候选资源
candadate_display = "\n".join([f"{i+1}. {it['title']}" for i, it in enumerate(candadate_dictionary)])
chatbot.append((None, f"候选:\n\n{candadate_display}"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 结构化生成
rf_req_2 = dedent(f"""
The user wish to get the following resource:
{user_wish}
Select the most relevant and suitable video resource from the following search results:
{candadate_dictionary_as_str}
Note:
1. The first several search video results are more likely to satisfy the user's wish.
2. The time duration of the video should be less than 10 minutes.
3. You should analyze the search results first, before giving your answer.
4. Use Chinese if possible.
""")
gpt_json_io = GptJsonIO(VideoResource)
inputs = rf_req_2 + gpt_json_io.format_instructions
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
analyze_res = run_gpt_fn(inputs, "")
logger.info(analyze_res)
video_resource: VideoResource = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
# Display
chatbot.append(
(None,
f"分析:{video_resource.thought}" "<br/>"
f"选择: `{video_resource.title}`。" "<br/>"
f"作者:{video_resource.author}"
)
)
chatbot.append((None, f"下载中, 请稍等..."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if video_resource and video_resource.bvid:
logger.info(video_resource)
yield from download_video(video_resource.bvid, chatbot.get_user(), chatbot, history)

View File

@@ -92,7 +92,7 @@ class MiniGame_ResumeStory(GptAcademicGameBaseState):
def generate_story_image(self, story_paragraph):
try:
from crazy_functions.Image_Generate import gen_image
from crazy_functions.AntFin import gen_image
prompt_ = predict_no_ui_long_connection(inputs=story_paragraph, llm_kwargs=self.llm_kwargs, history=[], sys_prompt='你需要根据用户给出的小说段落进行简短的环境描写。要求80字以内。')
image_url, image_path = gen_image(self.llm_kwargs, prompt_, '512x512', model="dall-e-2", quality='standard', style='natural')
return f'<br/><div align="center"><img src="file={image_path}"></div>'

View File

@@ -1,180 +0,0 @@
import re, requests, unicodedata, os
from toolbox import update_ui, get_log_folder
from toolbox import write_history_to_file, promote_file_to_downloadzone
from toolbox import CatchException, report_exception, get_conf
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from loguru import logger
def download_arxiv_(url_pdf):
if 'arxiv.org' not in url_pdf:
if ('.' in url_pdf) and ('/' not in url_pdf):
new_url = 'https://arxiv.org/abs/'+url_pdf
logger.info('下载编号:', url_pdf, '自动定位:', new_url)
# download_arxiv_(new_url)
return download_arxiv_(new_url)
else:
logger.info('不能识别的URL')
return None
if 'abs' in url_pdf:
url_pdf = url_pdf.replace('abs', 'pdf')
url_pdf = url_pdf + '.pdf'
url_abs = url_pdf.replace('.pdf', '').replace('pdf', 'abs')
title, other_info = get_name(_url_=url_abs)
paper_id = title.split()[0] # '[1712.00559]'
if '2' in other_info['year']:
title = other_info['year'] + ' ' + title
known_conf = ['NeurIPS', 'NIPS', 'Nature', 'Science', 'ICLR', 'AAAI']
for k in known_conf:
if k in other_info['comment']:
title = k + ' ' + title
download_dir = get_log_folder(plugin_name='arxiv')
os.makedirs(download_dir, exist_ok=True)
title_str = title.replace('?', '')\
.replace(':', '')\
.replace('\"', '')\
.replace('\n', '')\
.replace(' ', ' ')\
.replace(' ', ' ')
requests_pdf_url = url_pdf
file_path = download_dir+title_str
logger.info('下载中')
proxies = get_conf('proxies')
r = requests.get(requests_pdf_url, proxies=proxies)
with open(file_path, 'wb+') as f:
f.write(r.content)
logger.info('下载完成')
x = "%s %s %s.bib" % (paper_id, other_info['year'], other_info['authors'])
x = x.replace('?', '')\
.replace(':', '')\
.replace('\"', '')\
.replace('\n', '')\
.replace(' ', ' ')\
.replace(' ', ' ')
return file_path, other_info
def get_name(_url_):
from bs4 import BeautifulSoup
logger.info('正在获取文献名!')
logger.info(_url_)
proxies = get_conf('proxies')
res = requests.get(_url_, proxies=proxies)
bs = BeautifulSoup(res.text, 'html.parser')
other_details = {}
# get year
try:
year = bs.find_all(class_='dateline')[0].text
year = re.search(r'(\d{4})', year, re.M | re.I).group(1)
other_details['year'] = year
abstract = bs.find_all(class_='abstract mathjax')[0].text
other_details['abstract'] = abstract
except:
other_details['year'] = ''
logger.info('年份获取失败')
# get author
try:
authors = bs.find_all(class_='authors')[0].text
authors = authors.split('Authors:')[1]
other_details['authors'] = authors
except:
other_details['authors'] = ''
logger.info('authors获取失败')
# get comment
try:
comment = bs.find_all(class_='metatable')[0].text
real_comment = None
for item in comment.replace('\n', ' ').split(' '):
if 'Comments' in item:
real_comment = item
if real_comment is not None:
other_details['comment'] = real_comment
else:
other_details['comment'] = ''
except:
other_details['comment'] = ''
logger.info('年份获取失败')
title_str = BeautifulSoup(
res.text, 'html.parser').find('title').contents[0]
logger.info('获取成功:', title_str)
# arxiv_recall[_url_] = (title_str+'.pdf', other_details)
# with open('./arxiv_recall.pkl', 'wb') as f:
# pickle.dump(arxiv_recall, f)
return title_str+'.pdf', other_details
@CatchException
def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
CRAZY_FUNCTION_INFO = "下载arxiv论文并翻译摘要函数插件作者[binary-husky]。正在提取摘要并下载PDF文档……"
import glob
import os
# 基本信息:功能、贡献者
chatbot.append(["函数插件功能?", CRAZY_FUNCTION_INFO])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import bs4
except:
report_exception(chatbot, history,
a = f"解析项目: {txt}",
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 清空历史,以免输入溢出
history = []
# 提取摘要下载PDF文档
try:
pdf_path, info = download_arxiv_(txt)
except:
report_exception(chatbot, history,
a = f"解析项目: {txt}",
b = f"下载pdf文件未成功")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 翻译摘要等
i_say = f"请你阅读以下学术论文相关的材料,提取摘要,翻译为中文。材料如下:{str(info)}"
i_say_show_user = f'请你阅读以下学术论文相关的材料,提取摘要,翻译为中文。论文:{pdf_path}'
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
msg = '正常'
# ** gpt request **
# 单线获取文章meta信息
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say,
inputs_show_user=i_say_show_user,
llm_kwargs=llm_kwargs,
chatbot=chatbot, history=[],
sys_prompt="Your job is to collect information from materials and translate to Chinese。",
)
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
promote_file_to_downloadzone(pdf_path, chatbot=chatbot)
chatbot.append(("完成了吗?", res + "\n\nPDF文件也已经下载"))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面

View File

@@ -1,40 +0,0 @@
from toolbox import CatchException, update_ui, update_ui_lastest_msg
from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
@CatchException
def 随机小游戏(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
from crazy_functions.game_fns.game_interactive_story import MiniGame_ResumeStory
# 清空历史
history = []
# 选择游戏
cls = MiniGame_ResumeStory
# 如果之前已经初始化了游戏实例,则继续该实例;否则重新初始化
state = cls.sync_state(chatbot,
llm_kwargs,
cls,
plugin_name='MiniGame_ResumeStory',
callback_fn='crazy_functions.互动小游戏->随机小游戏',
lock_plugin=True
)
yield from state.continue_game(prompt, chatbot, history)
@CatchException
def 随机小游戏1(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
from crazy_functions.game_fns.game_ascii_art import MiniGame_ASCII_Art
# 清空历史
history = []
# 选择游戏
cls = MiniGame_ASCII_Art
# 如果之前已经初始化了游戏实例,则继续该实例;否则重新初始化
state = cls.sync_state(chatbot,
llm_kwargs,
cls,
plugin_name='MiniGame_ASCII_Art',
callback_fn='crazy_functions.互动小游戏->随机小游戏1',
lock_plugin=True
)
yield from state.continue_game(prompt, chatbot, history)

View File

@@ -1,62 +0,0 @@
from toolbox import CatchException, update_ui
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
@CatchException
def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
plugin_kwargs 插件模型的参数, 如温度和top_p等, 一般原样传递下去就行
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
history = [] # 清空历史,以免输入溢出
chatbot.append(("这是什么功能?", "交互功能函数模板。在执行完成之后, 可以将自身的状态存储到cookie中, 等待用户的再次调用。"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
state = chatbot._cookies.get('plugin_state_0001', None) # 初始化插件状态
if state is None:
chatbot._cookies['lock_plugin'] = 'crazy_functions.交互功能函数模板->交互功能模板函数' # 赋予插件锁定 锁定插件回调路径,当下一次用户提交时,会直接转到该函数
chatbot._cookies['plugin_state_0001'] = 'wait_user_keyword' # 赋予插件状态
chatbot.append(("第一次调用:", "请输入关键词, 我将为您查找相关壁纸, 建议使用英文单词, 插件锁定中,请直接提交即可。"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if state == 'wait_user_keyword':
chatbot._cookies['lock_plugin'] = None # 解除插件锁定,避免遗忘导致死锁
chatbot._cookies['plugin_state_0001'] = None # 解除插件状态,避免遗忘导致死锁
# 解除插件锁定
chatbot.append((f"获取关键词:{txt}", ""))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
page_return = get_image_page_by_keyword(txt)
inputs=inputs_show_user=f"Extract all image urls in this html page, pick the first 5 images and show them with markdown format: \n\n {page_return}"
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=inputs, inputs_show_user=inputs_show_user,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt="When you want to show an image, use markdown format. e.g. ![image_description](image_url). If there are no image url provided, answer 'no image url provided'"
)
chatbot[-1] = [chatbot[-1][0], gpt_say]
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# ---------------------------------------------------------------------------------
def get_image_page_by_keyword(keyword):
import requests
from bs4 import BeautifulSoup
response = requests.get(f'https://wallhaven.cc/search?q={keyword}', timeout=2)
res = "image urls: \n"
for image_element in BeautifulSoup(response.content, 'html.parser').findAll("img"):
try:
res += image_element["data-src"]
res += "\n"
except:
pass
return res

View File

@@ -1,252 +0,0 @@
# 本源代码中, ⭐ = 关键步骤
"""
测试:
- 裁剪图像,保留下半部分
- 交换图像的蓝色通道和红色通道
- 将图像转为灰度图像
- 将csv文件转excel表格
Testing:
- Crop the image, keeping the bottom half.
- Swap the blue channel and red channel of the image.
- Convert the image to grayscale.
- Convert the CSV file to an Excel spreadsheet.
"""
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
from crazy_functions.crazy_utils import input_clipping, try_install_deps
from crazy_functions.gen_fns.gen_fns_shared import is_function_successfully_generated
from crazy_functions.gen_fns.gen_fns_shared import get_class_name
from crazy_functions.gen_fns.gen_fns_shared import subprocess_worker
from crazy_functions.gen_fns.gen_fns_shared import try_make_module
import os
import time
import glob
import multiprocessing
templete = """
```python
import ... # Put dependencies here, e.g. import numpy as np.
class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
def run(self, path): # The name of the function must be `run`, it takes only a positional argument.
# rewrite the function you have just written here
...
return generated_file_path
```
"""
def inspect_dependency(chatbot, history):
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return True
def get_code_block(reply):
import re
pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
matches = re.findall(pattern, reply) # find all code blocks in text
if len(matches) == 1:
return matches[0].strip('python') # code block
for match in matches:
if 'class TerminalFunction' in match:
return match.strip('python') # code block
raise RuntimeError("GPT is not generating proper code.")
def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
# 输入
prompt_compose = [
f'Your job:\n'
f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
f"2. You should write this function to perform following task: " + txt + "\n",
f"3. Wrap the output python function with markdown codeblock."
]
i_say = "".join(prompt_compose)
demo = []
# 第一步
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
sys_prompt= r"You are a world-class programmer."
)
history.extend([i_say, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
# 第二步
prompt_compose = [
"If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
templete
]
i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=inputs_show_user,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt= r"You are a programmer. You need to replace `...` with valid packages, do not give `...` in your answer!"
)
code_to_return = gpt_say
history.extend([i_say, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
# # 第三步
# i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
# i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
# installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
# inputs=i_say, inputs_show_user=inputs_show_user,
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
# sys_prompt= r"You are a programmer."
# )
# # # 第三步
# i_say = "Show me how to use `pip` to install packages to run the code above. "
# i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
# installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
# inputs=i_say, inputs_show_user=i_say,
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
# sys_prompt= r"You are a programmer."
# )
installation_advance = ""
return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
def for_immediate_show_off_when_possible(file_type, fp, chatbot):
if file_type in ['png', 'jpg']:
image_path = os.path.abspath(fp)
chatbot.append(['这是一张图片, 展示如下:',
f'本地文件地址: <br/>`{image_path}`<br/>'+
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
])
return chatbot
def have_any_recent_upload_files(chatbot):
_5min = 5 * 60
if not chatbot: return False # chatbot is None
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
if not most_recent_uploaded: return False # most_recent_uploaded is None
if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
else: return False # most_recent_uploaded is too old
def get_recent_file_prompt_support(chatbot):
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
path = most_recent_uploaded['path']
return path
@CatchException
def 函数动态生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
# 清空历史
history = []
# 基本信息:功能、贡献者
chatbot.append(["正在启动: 插件动态生成插件", "插件动态生成, 执行开始, 作者Binary-Husky."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# ⭐ 文件上传区是否有东西
# 1. 如果有文件: 作为函数参数
# 2. 如果没有文件需要用GPT提取参数 (太懒了,以后再写,虚空终端已经实现了类似的代码)
file_list = []
if get_plugin_arg(plugin_kwargs, key="file_path_arg", default=False):
file_path = get_plugin_arg(plugin_kwargs, key="file_path_arg", default=None)
file_list.append(file_path)
yield from update_ui_lastest_msg(f"当前文件: {file_path}", chatbot, history, 1)
elif have_any_recent_upload_files(chatbot):
file_dir = get_recent_file_prompt_support(chatbot)
file_list = glob.glob(os.path.join(file_dir, '**/*'), recursive=True)
yield from update_ui_lastest_msg(f"当前文件处理列表: {file_list}", chatbot, history, 1)
else:
chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
yield from update_ui_lastest_msg("没有发现任何近期上传的文件。", chatbot, history, 1)
return # 2. 如果没有文件
if len(file_list) == 0:
chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
yield from update_ui_lastest_msg("没有发现任何近期上传的文件。", chatbot, history, 1)
return # 2. 如果没有文件
# 读取文件
file_type = file_list[0].split('.')[-1]
# 粗心检查
if is_the_upload_folder(txt):
yield from update_ui_lastest_msg(f"请在输入框内填写需求, 然后再次点击该插件! 至于您的文件,不用担心, 文件路径 {txt} 已经被记忆. ", chatbot, history, 1)
return
# 开始干正事
MAX_TRY = 3
for j in range(MAX_TRY): # 最多重试5次
traceback = ""
try:
# ⭐ 开始啦
code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
chatbot.append(["代码生成阶段结束", ""])
yield from update_ui_lastest_msg(f"正在验证上述代码的有效性 ...", chatbot, history, 1)
# ⭐ 分离代码块
code = get_code_block(code)
# ⭐ 检查模块
ok, traceback = try_make_module(code, chatbot)
# 搞定代码生成
if ok: break
except Exception as e:
if not traceback: traceback = trimmed_format_exc()
# 处理异常
if not traceback: traceback = trimmed_format_exc()
yield from update_ui_lastest_msg(f"{j+1}/{MAX_TRY} 次代码生成尝试, 失败了~ 别担心, 我们5秒后再试一次... \n\n此次我们的错误追踪是\n```\n{traceback}\n```\n", chatbot, history, 5)
# 代码生成结束, 开始执行
TIME_LIMIT = 15
yield from update_ui_lastest_msg(f"开始创建新进程并执行代码! 时间限制 {TIME_LIMIT} 秒. 请等待任务完成... ", chatbot, history, 1)
manager = multiprocessing.Manager()
return_dict = manager.dict()
# ⭐ 到最后一步了,开始逐个文件进行处理
for file_path in file_list:
if os.path.exists(file_path):
chatbot.append([f"正在处理文件: {file_path}", f"请稍等..."])
chatbot = for_immediate_show_off_when_possible(file_type, file_path, chatbot)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
else:
continue
# ⭐⭐⭐ subprocess_worker ⭐⭐⭐
p = multiprocessing.Process(target=subprocess_worker, args=(code, file_path, return_dict))
# ⭐ 开始执行时间限制TIME_LIMIT
p.start(); p.join(timeout=TIME_LIMIT)
if p.is_alive(): p.terminate(); p.join()
p.close()
res = return_dict['result']
success = return_dict['success']
traceback = return_dict['traceback']
if not success:
if not traceback: traceback = trimmed_format_exc()
chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
# chatbot.append(["如果是缺乏依赖,请参考以下建议", installation_advance])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 顺利完成,收尾
res = str(res)
if os.path.exists(res):
chatbot.append(["执行成功了,结果是一个有效文件", "结果:" + res])
new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
else:
chatbot.append(["执行成功了,结果是一个字符串", "结果:" + res])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

View File

@@ -1,31 +0,0 @@
from toolbox import CatchException, update_ui, gen_time_str
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import input_clipping
import copy, json
@CatchException
def 命令行助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
plugin_kwargs 插件模型的参数, 暂时没有用武之地
chatbot 聊天显示框的句柄, 用于显示给用户
history 聊天历史, 前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
# 清空历史, 以免输入溢出
history = []
# 输入
i_say = "请写bash命令实现以下功能" + txt
# 开始
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt="你是一个Linux大师级用户。注意当我要求你写bash命令时尽可能地仅用一行命令解决我的要求。"
)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

View File

@@ -1,102 +0,0 @@
# 本源代码中, ⭐ = 关键步骤
"""
测试:
- show me the solution of $x^2=cos(x)$, solve this problem with figure, and plot and save image to t.jpg
"""
import time
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
from toolbox import get_conf, select_api_key, update_ui_lastest_msg, Singleton
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
from crazy_functions.crazy_utils import input_clipping, try_install_deps
from crazy_functions.agent_fns.persistent import GradioMultiuserManagerForPersistentClasses
from crazy_functions.agent_fns.auto_agent import AutoGenMath
from loguru import logger
def remove_model_prefix(llm):
if llm.startswith('api2d-'): llm = llm.replace('api2d-', '')
if llm.startswith('azure-'): llm = llm.replace('azure-', '')
return llm
@CatchException
def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
# 检查当前的模型是否符合要求
supported_llms = [
"gpt-3.5-turbo-16k",
'gpt-3.5-turbo-1106',
"gpt-4",
"gpt-4-32k",
'gpt-4-1106-preview',
"azure-gpt-3.5-turbo-16k",
"azure-gpt-3.5-16k",
"azure-gpt-4",
"azure-gpt-4-32k",
]
from request_llms.bridge_all import model_info
if model_info[llm_kwargs['llm_model']]["max_token"] < 8000: # 至少是8k上下文的模型
chatbot.append([f"处理任务: {txt}", f"当前插件只支持{str(supported_llms)}, 当前模型{llm_kwargs['llm_model']}的最大上下文长度太短, 不能支撑AutoGen运行。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if model_info[llm_kwargs['llm_model']]["endpoint"] is not None: # 如果不是本地模型加载API_KEY
llm_kwargs['api_key'] = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import autogen
if get_conf("AUTOGEN_USE_DOCKER"):
import docker
except:
chatbot.append([ f"处理任务: {txt}",
f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pyautogen docker```。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import autogen
import glob, os, time, subprocess
if get_conf("AUTOGEN_USE_DOCKER"):
subprocess.Popen(["docker", "--version"])
except:
chatbot.append([f"处理任务: {txt}", f"缺少docker运行环境"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 解锁插件
chatbot.get_cookies()['lock_plugin'] = None
persistent_class_multi_user_manager = GradioMultiuserManagerForPersistentClasses()
user_uuid = chatbot.get_cookies().get('uuid')
persistent_key = f"{user_uuid}->多智能体终端"
if persistent_class_multi_user_manager.already_alive(persistent_key):
# 当已经存在一个正在运行的多智能体终端时,直接将用户输入传递给它,而不是再次启动一个新的多智能体终端
logger.info('[debug] feed new user input')
executor = persistent_class_multi_user_manager.get(persistent_key)
exit_reason = yield from executor.main_process_ui_control(txt, create_or_resume="resume")
else:
# 运行多智能体终端 (首次)
logger.info('[debug] create new executor instance')
history = []
chatbot.append(["正在启动: 多智能体终端", "插件动态生成, 执行开始, 作者 Microsoft & Binary-Husky."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
executor = AutoGenMath(llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
persistent_class_multi_user_manager.set(persistent_key, executor)
exit_reason = yield from executor.main_process_ui_control(txt, create_or_resume="create")
if exit_reason == "wait_feedback":
# 当用户点击了“等待反馈”按钮时将executor存储到cookie中等待用户的再次调用
executor.chatbot.get_cookies()['lock_plugin'] = 'crazy_functions.多智能体->多智能体终端'
else:
executor.chatbot.get_cookies()['lock_plugin'] = None
yield from update_ui(chatbot=executor.chatbot, history=executor.history) # 更新状态

View File

@@ -1,127 +0,0 @@
from toolbox import update_ui
from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False
def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, os
# pip install python-docx 用于docx格式跨平台
# pip install pywin32 用于doc格式仅支持Win平台
for index, fp in enumerate(file_manifest):
if fp.split(".")[-1] == "docx":
from docx import Document
doc = Document(fp)
file_content = "\n".join([para.text for para in doc.paragraphs])
else:
try:
import win32com.client
word = win32com.client.Dispatch("Word.Application")
word.visible = False
# 打开文件
doc = word.Documents.Open(os.getcwd() + '/' + fp)
# file_content = doc.Content.Text
doc = word.ActiveDocument
file_content = doc.Range().Text
doc.Close()
word.Quit()
except:
raise RuntimeError('请先将.doc文档转换为.docx文档。')
# private_upload里面的文件名在解压zip后容易出现乱码rar和7z格式正常故可以只分析文章内容不输入文件名
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
from request_llms.bridge_all import model_info
max_token = model_info[llm_kwargs['llm_model']]['max_token']
TOKEN_LIMIT_PER_FRAGMENT = max_token * 3 // 4
paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model'])
this_paper_history = []
for i, paper_frag in enumerate(paper_fragments):
i_say = f'请对下面的文章片段用中文做概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{paper_frag}```'
i_say_show_user = f'请对下面的文章片段做概述: {os.path.abspath(fp)}的第{i+1}/{len(paper_fragments)}个片段。'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say,
inputs_show_user=i_say_show_user,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=[],
sys_prompt="总结文章。"
)
chatbot[-1] = (i_say_show_user, gpt_say)
history.extend([i_say_show_user,gpt_say])
this_paper_history.extend([i_say_show_user,gpt_say])
# 已经对该文章的所有片段总结完毕,如果文章被切分了,
if len(paper_fragments) > 1:
i_say = f"根据以上的对话,总结文章{os.path.abspath(fp)}的主要内容。"
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say,
inputs_show_user=i_say,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=this_paper_history,
sys_prompt="总结文章。"
)
history.extend([i_say,gpt_say])
this_paper_history.extend([i_say,gpt_say])
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("所有文件都总结完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@CatchException
def 总结word文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
import glob, os
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"批量总结Word文档。函数插件贡献者: JasonGuo1。注意, 如果是.doc文件, 请先转化为.docx格式。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
from docx import Document
except:
report_exception(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade python-docx pywin32```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 清空历史,以免输入溢出
history = []
# 检测输入参数,如没有给定输入参数,直接退出
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 搜索需要处理的文件清单
if txt.endswith('.docx') or txt.endswith('.doc'):
file_manifest = [txt]
else:
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.docx', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.doc', recursive=True)]
# 如果没找到任何文件
if len(file_manifest) == 0:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.docx或doc文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 开始正式执行任务
yield from 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@@ -1,186 +0,0 @@
from toolbox import CatchException, report_exception, select_api_key, update_ui, get_conf
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import write_history_to_file, promote_file_to_downloadzone, get_log_folder
def split_audio_file(filename, split_duration=1000):
"""
根据给定的切割时长将音频文件切割成多个片段。
Args:
filename (str): 需要被切割的音频文件名。
split_duration (int, optional): 每个切割音频片段的时长以秒为单位。默认值为1000。
Returns:
filelist (list): 一个包含所有切割音频片段文件路径的列表。
"""
from moviepy.editor import AudioFileClip
import os
os.makedirs(f"{get_log_folder(plugin_name='audio')}/mp3/cut/", exist_ok=True) # 创建存储切割音频的文件夹
# 读取音频文件
audio = AudioFileClip(filename)
# 计算文件总时长和切割点
total_duration = audio.duration
split_points = list(range(0, int(total_duration), split_duration))
split_points.append(int(total_duration))
filelist = []
# 切割音频文件
for i in range(len(split_points) - 1):
start_time = split_points[i]
end_time = split_points[i + 1]
split_audio = audio.subclip(start_time, end_time)
split_audio.write_audiofile(f"{get_log_folder(plugin_name='audio')}/mp3/cut/{filename[0]}_{i}.mp3")
filelist.append(f"{get_log_folder(plugin_name='audio')}/mp3/cut/{filename[0]}_{i}.mp3")
audio.close()
return filelist
def AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history):
import os, requests
from moviepy.editor import AudioFileClip
from request_llms.bridge_all import model_info
# 设置OpenAI密钥和模型
api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
chat_endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
whisper_endpoint = chat_endpoint.replace('chat/completions', 'audio/transcriptions')
url = whisper_endpoint
headers = {
'Authorization': f"Bearer {api_key}"
}
os.makedirs(f"{get_log_folder(plugin_name='audio')}/mp3/", exist_ok=True)
for index, fp in enumerate(file_manifest):
audio_history = []
# 提取文件扩展名
ext = os.path.splitext(fp)[1]
# 提取视频中的音频
if ext not in [".mp3", ".wav", ".m4a", ".mpga"]:
audio_clip = AudioFileClip(fp)
audio_clip.write_audiofile(f"{get_log_folder(plugin_name='audio')}/mp3/output{index}.mp3")
fp = f"{get_log_folder(plugin_name='audio')}/mp3/output{index}.mp3"
# 调用whisper模型音频转文字
voice = split_audio_file(fp)
for j, i in enumerate(voice):
with open(i, 'rb') as f:
file_content = f.read() # 读取文件内容到内存
files = {
'file': (os.path.basename(i), file_content),
}
data = {
"model": "whisper-1",
"prompt": parse_prompt,
'response_format': "text"
}
chatbot.append([f"{i} 发送到openai音频解析终端 (whisper),当前参数:{parse_prompt}", "正在处理 ..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
proxies = get_conf('proxies')
response = requests.post(url, headers=headers, files=files, data=data, proxies=proxies).text
chatbot.append(["音频解析结果", response])
history.extend(["音频解析结果", response])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
i_say = f'请对下面的音频片段做概述,音频内容是 ```{response}```'
i_say_show_user = f'{index + 1}段音频的第{j + 1} / {len(voice)}片段。'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say,
inputs_show_user=i_say_show_user,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=[],
sys_prompt=f"总结音频。音频文件名{fp}"
)
chatbot[-1] = (i_say_show_user, gpt_say)
history.extend([i_say_show_user, gpt_say])
audio_history.extend([i_say_show_user, gpt_say])
# 已经对该文章的所有片段总结完毕,如果文章被切分了
result = "".join(audio_history)
if len(audio_history) > 1:
i_say = f"根据以上的对话,使用中文总结音频“{result}”的主要内容。"
i_say_show_user = f'{index + 1}段音频的主要内容:'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say,
inputs_show_user=i_say_show_user,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=audio_history,
sys_prompt="总结文章。"
)
history.extend([i_say, gpt_say])
audio_history.extend([i_say, gpt_say])
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append((f"{index + 1}段音频完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 删除中间文件夹
import shutil
shutil.rmtree(f"{get_log_folder(plugin_name='audio')}/mp3")
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("所有音频都总结完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history)
@CatchException
def 总结音视频(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, WEB_PORT):
import glob, os
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"总结音视频内容,函数插件贡献者: dalvqw & BinaryHusky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
try:
from moviepy.editor import AudioFileClip
except:
report_exception(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade moviepy```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 清空历史,以免输入溢出
history = []
# 检测输入参数,如没有给定输入参数,直接退出
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 搜索需要处理的文件清单
extensions = ['.mp4', '.m4a', '.wav', '.mpga', '.mpeg', '.mp3', '.avi', '.mkv', '.flac', '.aac']
if txt.endswith(tuple(extensions)):
file_manifest = [txt]
else:
file_manifest = []
for extension in extensions:
file_manifest.extend(glob.glob(f'{project_folder}/**/*{extension}', recursive=True))
# 如果没找到任何文件
if len(file_manifest) == 0:
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何音频或视频文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 开始正式执行任务
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
parse_prompt = plugin_kwargs.get("advanced_arg", '将音频解析为简体中文')
yield from AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@@ -1,147 +0,0 @@
from loguru import logger
from toolbox import update_ui, promote_file_to_downloadzone, gen_time_str
from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import read_and_clean_pdf_text
from crazy_functions.crazy_utils import input_clipping
def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
file_write_buffer = []
for file_name in file_manifest:
logger.info('begin analysis on:', file_name)
############################## <第 0 步切割PDF> ##################################
# 递归地切割PDF文件每一块尽量是完整的一个section比如introductionexperiment等必要时再进行切割
# 的长度必须小于 2500 个 Token
file_content, page_one = read_and_clean_pdf_text(file_name) # 尝试按照章节切割PDF
file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
TOKEN_LIMIT_PER_FRAGMENT = 2500
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model'])
page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=str(page_one), limit=TOKEN_LIMIT_PER_FRAGMENT//4, llm_model=llm_kwargs['llm_model'])
# 为了更好的效果我们剥离Introduction之后的部分如果有
paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0]
############################## <第 1 步从摘要中提取高价值信息放到history中> ##################################
final_results = []
final_results.append(paper_meta)
############################## <第 2 步,迭代地历遍整个文章,提取精炼信息> ##################################
i_say_show_user = f'首先你在中文语境下通读整篇论文。'; gpt_say = "[Local Message] 收到。" # 用户提示
chatbot.append([i_say_show_user, gpt_say]); yield from update_ui(chatbot=chatbot, history=[]) # 更新UI
iteration_results = []
last_iteration_result = paper_meta # 初始值是摘要
MAX_WORD_TOTAL = 4096 * 0.7
n_fragment = len(paper_fragments)
if n_fragment >= 20: logger.warning('文章极长,不能达到预期效果')
for i in range(n_fragment):
NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} Chinese characters: {paper_fragments[i]}"
i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} Chinese characters: {paper_fragments[i][:200]}"
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, # i_say=真正给chatgpt的提问 i_say_show_user=给用户看的提问
llm_kwargs, chatbot,
history=["The main idea of the previous section is?", last_iteration_result], # 迭代上一次的结果
sys_prompt="Extract the main idea of this section with Chinese." # 提示
)
iteration_results.append(gpt_say)
last_iteration_result = gpt_say
############################## <第 3 步整理history提取总结> ##################################
final_results.extend(iteration_results)
final_results.append(f'Please conclude this paper discussed above。')
# This prompt is from https://github.com/kaixindelele/ChatPaper/blob/main/chat_paper.py
NUM_OF_WORD = 1000
i_say = """
1. Mark the title of the paper (with Chinese translation)
2. list all the authors' names (use English)
3. mark the first author's affiliation (output Chinese translation only)
4. mark the keywords of this article (use English)
5. link to the paper, Github code link (if available, fill in Github:None if not)
6. summarize according to the following four points.Be sure to use Chinese answers (proper nouns need to be marked in English)
- (1):What is the research background of this article?
- (2):What are the past methods? What are the problems with them? Is the approach well motivated?
- (3):What is the research methodology proposed in this paper?
- (4):On what task and what performance is achieved by the methods in this paper? Can the performance support their goals?
Follow the format of the output that follows:
1. Title: xxx\n\n
2. Authors: xxx\n\n
3. Affiliation: xxx\n\n
4. Keywords: xxx\n\n
5. Urls: xxx or xxx , xxx \n\n
6. Summary: \n\n
- (1):xxx;\n
- (2):xxx;\n
- (3):xxx;\n
- (4):xxx.\n\n
Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible,
do not have too much repetitive information, numerical values using the original numbers.
"""
# This prompt is from https://github.com/kaixindelele/ChatPaper/blob/main/chat_paper.py
file_write_buffer.extend(final_results)
i_say, final_results = input_clipping(i_say, final_results, max_token_limit=2000)
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user='开始最终总结',
llm_kwargs=llm_kwargs, chatbot=chatbot, history=final_results,
sys_prompt= f"Extract the main idea of this paper with less than {NUM_OF_WORD} Chinese characters"
)
final_results.append(gpt_say)
file_write_buffer.extend([i_say, gpt_say])
############################## <第 4 步设置一个token上限> ##################################
_, final_results = input_clipping("", final_results, max_token_limit=3200)
yield from update_ui(chatbot=chatbot, history=final_results) # 注意这里的历史记录被替代了
res = write_history_to_file(file_write_buffer)
promote_file_to_downloadzone(res, chatbot=chatbot)
yield from update_ui(chatbot=chatbot, history=final_results) # 刷新界面
@CatchException
def 批量总结PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
import glob, os
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"批量总结PDF文档。函数插件贡献者: ValeriaWongEralien"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import fitz
except:
report_exception(chatbot, history,
a = f"解析项目: {txt}",
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 清空历史,以免输入溢出
history = []
# 检测输入参数,如没有给定输入参数,直接退出
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 搜索需要处理的文件清单
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)]
# 如果没找到任何文件
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex或.pdf文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 开始正式执行任务
yield from 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@@ -1,162 +0,0 @@
from loguru import logger
from toolbox import update_ui
from toolbox import CatchException, report_exception
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import write_history_to_file, promote_file_to_downloadzone
fast_debug = False
def readPdf(pdfPath):
"""
读取pdf文件返回文本内容
"""
import pdfminer
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage, PDFTextExtractionNotAllowed
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfdevice import PDFDevice
from pdfminer.layout import LAParams
from pdfminer.converter import PDFPageAggregator
fp = open(pdfPath, 'rb')
# Create a PDF parser object associated with the file object
parser = PDFParser(fp)
# Create a PDF document object that stores the document structure.
# Password for initialization as 2nd parameter
document = PDFDocument(parser)
# Check if the document allows text extraction. If not, abort.
if not document.is_extractable:
raise PDFTextExtractionNotAllowed
# Create a PDF resource manager object that stores shared resources.
rsrcmgr = PDFResourceManager()
# Create a PDF device object.
# device = PDFDevice(rsrcmgr)
# BEGIN LAYOUT ANALYSIS.
# Set parameters for analysis.
laparams = LAParams(
char_margin=10.0,
line_margin=0.2,
boxes_flow=0.2,
all_texts=False,
)
# Create a PDF page aggregator object.
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
# Create a PDF interpreter object.
interpreter = PDFPageInterpreter(rsrcmgr, device)
# loop over all pages in the document
outTextList = []
for page in PDFPage.create_pages(document):
# read the page into a layout object
interpreter.process_page(page)
layout = device.get_result()
for obj in layout._objs:
if isinstance(obj, pdfminer.layout.LTTextBoxHorizontal):
outTextList.append(obj.get_text())
return outTextList
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, glob, os
from bs4 import BeautifulSoup
logger.info('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest):
if ".tex" in fp:
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
if ".pdf" in fp.lower():
file_content = readPdf(fp)
file_content = BeautifulSoup(''.join(file_content), features="lxml").body.text.encode('gbk', 'ignore').decode('gbk')
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
i_say_show_user = prefix + f'[{index+1}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say,
inputs_show_user=i_say_show_user,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=[],
sys_prompt="总结文章。"
) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
if not fast_debug: time.sleep(2)
all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
chatbot.append((i_say, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say,
inputs_show_user=i_say,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=history,
sys_prompt="总结文章。"
) # 带超时倒计时
chatbot[-1] = (i_say, gpt_say)
history.append(i_say); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
@CatchException
def 批量总结PDF文档pdfminer(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"批量总结PDF文档此版本使用pdfminer插件带token约简功能。函数插件贡献者: Euclid-Jie。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import pdfminer, bs4
except:
report_exception(chatbot, history,
a = f"解析项目: {txt}",
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pdfminer beautifulsoup4```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] # + \
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex或pdf文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@@ -1,125 +0,0 @@
from toolbox import CatchException, report_exception, get_log_folder, gen_time_str
from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion
from toolbox import write_history_to_file, promote_file_to_downloadzone
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import read_and_clean_pdf_text
from .pdf_fns.parse_pdf import parse_pdf, get_avail_grobid_url, translate_pdf
from shared_utils.colorful import *
import copy
import os
import math
import logging
def markdown_to_dict(article_content):
import markdown
from bs4 import BeautifulSoup
cur_t = ""
cur_c = ""
results = {}
for line in article_content:
if line.startswith('#'):
if cur_t!="":
if cur_t not in results:
results.update({cur_t:cur_c.lstrip('\n')})
else:
# 处理重名的章节
results.update({cur_t + " " + gen_time_str():cur_c.lstrip('\n')})
cur_t = line.rstrip('\n')
cur_c = ""
else:
cur_c += line
results_final = {}
for k in list(results.keys()):
if k.startswith('# '):
results_final['title'] = k.split('# ')[-1]
results_final['authors'] = results.pop(k).lstrip('\n')
if k.startswith('###### Abstract'):
results_final['abstract'] = results.pop(k).lstrip('\n')
results_final_sections = []
for k,v in results.items():
results_final_sections.append({
'heading':k.lstrip("# "),
'text':v if len(v) > 0 else f"The beginning of {k.lstrip('# ')} section."
})
results_final['sections'] = results_final_sections
return results_final
@CatchException
def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
disable_auto_promotion(chatbot)
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"批量翻译PDF文档。函数插件贡献者: Binary-Husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 清空历史,以免输入溢出
history = []
from crazy_functions.crazy_utils import get_files_from_everything
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
if len(file_manifest) > 0:
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import nougat
import tiktoken
except:
report_exception(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade nougat-ocr tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
success_mmd, file_manifest_mmd, _ = get_files_from_everything(txt, type='.mmd')
success = success or success_mmd
file_manifest += file_manifest_mmd
chatbot.append(["文件列表:", ", ".join([e.split('/')[-1] for e in file_manifest])]);
yield from update_ui( chatbot=chatbot, history=history)
# 检测输入参数,如没有给定输入参数,直接退出
if not success:
if txt == "": txt = '空空如也的输入栏'
# 如果没找到任何文件
if len(file_manifest) == 0:
report_exception(chatbot, history,
a=f"解析项目: {txt}", b=f"找不到任何.pdf拓展名的文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 开始正式执行任务
yield from 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
def 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import copy
import tiktoken
TOKEN_LIMIT_PER_FRAGMENT = 1024
generated_conclusion_files = []
generated_html_files = []
DST_LANG = "中文"
from crazy_functions.crazy_utils import nougat_interface
from crazy_functions.pdf_fns.report_gen_html import construct_html
nougat_handle = nougat_interface()
for index, fp in enumerate(file_manifest):
if fp.endswith('pdf'):
chatbot.append(["当前进度:", f"正在解析论文请稍候。第一次运行时需要花费较长时间下载NOUGAT参数"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
fpp = yield from nougat_handle.NOUGAT_parse_pdf(fp, chatbot, history)
promote_file_to_downloadzone(fpp, rename_file=os.path.basename(fpp)+'.nougat.mmd', chatbot=chatbot)
else:
chatbot.append(["当前论文无需解析:", fp]); yield from update_ui( chatbot=chatbot, history=history)
fpp = fp
with open(fpp, 'r', encoding='utf8') as f:
article_content = f.readlines()
article_dict = markdown_to_dict(article_content)
logging.info(article_dict)
yield from translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG)
chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files)))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@@ -1,192 +0,0 @@
import os
from loguru import logger
from toolbox import CatchException, update_ui, gen_time_str, promote_file_to_downloadzone
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import input_clipping
def inspect_dependency(chatbot, history):
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import manim
return True
except:
chatbot.append(["导入依赖失败", "使用该模块需要额外依赖,安装方法:```pip install manim manimgl```"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return False
def eval_manim(code):
import subprocess, sys, os, shutil
with open('gpt_log/MyAnimation.py', 'w', encoding='utf8') as f:
f.write(code)
def get_class_name(class_string):
import re
# Use regex to extract the class name
class_name = re.search(r'class (\w+)\(', class_string).group(1)
return class_name
class_name = get_class_name(code)
try:
time_str = gen_time_str()
subprocess.check_output([sys.executable, '-c', f"from gpt_log.MyAnimation import {class_name}; {class_name}().render()"])
shutil.move(f'media/videos/1080p60/{class_name}.mp4', f'gpt_log/{class_name}-{time_str}.mp4')
return f'gpt_log/{time_str}.mp4'
except subprocess.CalledProcessError as e:
output = e.output.decode()
logger.error(f"Command returned non-zero exit status {e.returncode}: {output}.")
return f"Evaluating python script failed: {e.output}."
except:
logger.error('generating mp4 failed')
return "Generating mp4 failed."
def get_code_block(reply):
import re
pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
matches = re.findall(pattern, reply) # find all code blocks in text
if len(matches) != 1:
raise RuntimeError("GPT is not generating proper code.")
return matches[0].strip('python') # code block
@CatchException
def 动画生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
# 清空历史,以免输入溢出
history = []
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"生成数学动画, 此插件处于开发阶段, 建议暂时不要使用, 作者: binary-husky, 插件初始化中 ..."
])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖, 如果缺少依赖, 则给出安装建议
dep_ok = yield from inspect_dependency(chatbot=chatbot, history=history) # 刷新界面
if not dep_ok: return
# 输入
i_say = f'Generate a animation to show: ' + txt
demo = ["Here is some examples of manim", examples_of_manim()]
_, demo = input_clipping(inputs="", history=demo, max_token_limit=2560)
# 开始
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
sys_prompt=
r"Write a animation script with 3blue1brown's manim. "+
r"Please begin with `from manim import *`. " +
r"Answer me with a code block wrapped by ```."
)
chatbot.append(["开始生成动画", "..."])
history.extend([i_say, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
# 将代码转为动画
code = get_code_block(gpt_say)
res = eval_manim(code)
chatbot.append(("生成的视频文件路径", res))
if os.path.exists(res):
promote_file_to_downloadzone(res, chatbot=chatbot)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
# 在这里放一些网上搜集的demo辅助gpt生成代码
def examples_of_manim():
return r"""
```
class MovingGroupToDestination(Scene):
def construct(self):
group = VGroup(Dot(LEFT), Dot(ORIGIN), Dot(RIGHT, color=RED), Dot(2 * RIGHT)).scale(1.4)
dest = Dot([4, 3, 0], color=YELLOW)
self.add(group, dest)
self.play(group.animate.shift(dest.get_center() - group[2].get_center()))
self.wait(0.5)
```
```
class LatexWithMovingFramebox(Scene):
def construct(self):
text=MathTex(
"\\frac{d}{dx}f(x)g(x)=","f(x)\\frac{d}{dx}g(x)","+",
"g(x)\\frac{d}{dx}f(x)"
)
self.play(Write(text))
framebox1 = SurroundingRectangle(text[1], buff = .1)
framebox2 = SurroundingRectangle(text[3], buff = .1)
self.play(
Create(framebox1),
)
self.wait()
self.play(
ReplacementTransform(framebox1,framebox2),
)
self.wait()
```
```
class PointWithTrace(Scene):
def construct(self):
path = VMobject()
dot = Dot()
path.set_points_as_corners([dot.get_center(), dot.get_center()])
def update_path(path):
previous_path = path.copy()
previous_path.add_points_as_corners([dot.get_center()])
path.become(previous_path)
path.add_updater(update_path)
self.add(path, dot)
self.play(Rotating(dot, radians=PI, about_point=RIGHT, run_time=2))
self.wait()
self.play(dot.animate.shift(UP))
self.play(dot.animate.shift(LEFT))
self.wait()
```
```
# do not use get_graph, this funciton is deprecated
class ExampleFunctionGraph(Scene):
def construct(self):
cos_func = FunctionGraph(
lambda t: np.cos(t) + 0.5 * np.cos(7 * t) + (1 / 7) * np.cos(14 * t),
color=RED,
)
sin_func_1 = FunctionGraph(
lambda t: np.sin(t) + 0.5 * np.sin(7 * t) + (1 / 7) * np.sin(14 * t),
color=BLUE,
)
sin_func_2 = FunctionGraph(
lambda t: np.sin(t) + 0.5 * np.sin(7 * t) + (1 / 7) * np.sin(14 * t),
x_range=[-4, 4],
color=GREEN,
).move_to([0, 1, 0])
self.add(cos_func, sin_func_1, sin_func_2)
```
"""

View File

@@ -1,108 +0,0 @@
from loguru import logger
from toolbox import update_ui
from toolbox import CatchException, report_exception
from crazy_functions.crazy_utils import read_and_clean_pdf_text
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
logger.info('begin analysis on:', file_name)
############################## <第 0 步切割PDF> ##################################
# 递归地切割PDF文件每一块尽量是完整的一个section比如introductionexperiment等必要时再进行切割
# 的长度必须小于 2500 个 Token
file_content, page_one = read_and_clean_pdf_text(file_name) # 尝试按照章节切割PDF
file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
TOKEN_LIMIT_PER_FRAGMENT = 2500
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model'])
page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=str(page_one), limit=TOKEN_LIMIT_PER_FRAGMENT//4, llm_model=llm_kwargs['llm_model'])
# 为了更好的效果我们剥离Introduction之后的部分如果有
paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0]
############################## <第 1 步从摘要中提取高价值信息放到history中> ##################################
final_results = []
final_results.append(paper_meta)
############################## <第 2 步,迭代地历遍整个文章,提取精炼信息> ##################################
i_say_show_user = f'首先你在英文语境下通读整篇论文。'; gpt_say = "[Local Message] 收到。" # 用户提示
chatbot.append([i_say_show_user, gpt_say]); yield from update_ui(chatbot=chatbot, history=[]) # 更新UI
iteration_results = []
last_iteration_result = paper_meta # 初始值是摘要
MAX_WORD_TOTAL = 4096
n_fragment = len(paper_fragments)
if n_fragment >= 20: logger.warning('文章极长,不能达到预期效果')
for i in range(n_fragment):
NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i]}"
i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i][:200]} ...."
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, # i_say=真正给chatgpt的提问 i_say_show_user=给用户看的提问
llm_kwargs, chatbot,
history=["The main idea of the previous section is?", last_iteration_result], # 迭代上一次的结果
sys_prompt="Extract the main idea of this section, answer me with Chinese." # 提示
)
iteration_results.append(gpt_say)
last_iteration_result = gpt_say
############################## <第 3 步整理history> ##################################
final_results.extend(iteration_results)
final_results.append(f'接下来,你是一名专业的学术教授,利用以上信息,使用中文回答我的问题。')
# 接下来两句话只显示在界面上,不起实际作用
i_say_show_user = f'接下来,你是一名专业的学术教授,利用以上信息,使用中文回答我的问题。'; gpt_say = "[Local Message] 收到。"
chatbot.append([i_say_show_user, gpt_say])
############################## <第 4 步设置一个token上限防止回答时Token溢出> ##################################
from crazy_functions.crazy_utils import input_clipping
_, final_results = input_clipping("", final_results, max_token_limit=3200)
yield from update_ui(chatbot=chatbot, history=final_results) # 注意这里的历史记录被替代了
@CatchException
def 理解PDF文档内容标准文件输入(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
import glob, os
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"理解PDF论文内容并且将结合上下文内容进行学术解答。函数插件贡献者: Hanzoe, binary-husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import fitz
except:
report_exception(chatbot, history,
a = f"解析项目: {txt}",
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 清空历史,以免输入溢出
history = []
# 检测输入参数,如没有给定输入参数,直接退出
if os.path.exists(txt):
project_folder = txt
else:
if txt == "":
txt = '空空如也的输入栏'
report_exception(chatbot, history,
a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 搜索需要处理的文件清单
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)]
# 如果没找到任何文件
if len(file_manifest) == 0:
report_exception(chatbot, history,
a=f"解析项目: {txt}", b=f"找不到任何.tex或.pdf文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
txt = file_manifest[0]
# 开始正式执行任务
yield from 解析PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@@ -1,54 +0,0 @@
from loguru import logger
from toolbox import update_ui
from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, os
logger.info('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
i_say = f'请对下面的程序文件做一个概述并对文件中的所有函数生成注释使用markdown表格输出结果文件名是{os.path.relpath(fp, project_folder)},文件内容是 ```{file_content}```'
i_say_show_user = f'[{index+1}/{len(file_manifest)}] 请对下面的程序文件做一个概述,并对文件中的所有函数生成注释: {os.path.abspath(fp)}'
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
time.sleep(2)
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
@CatchException
def 批量生成函数注释(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)] + \
[f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@@ -1,437 +0,0 @@
from toolbox import CatchException, update_ui, report_exception
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.plugin_template.plugin_class_template import (
GptAcademicPluginTemplate,
)
from crazy_functions.plugin_template.plugin_class_template import ArgProperty
# 以下是每类图表的PROMPT
SELECT_PROMPT = """
{subject}
=============
以上是从文章中提取的摘要,将会使用这些摘要绘制图表。请你选择一个合适的图表类型:
1 流程图
2 序列图
3 类图
4 饼图
5 甘特图
6 状态图
7 实体关系图
8 象限提示图
不需要解释原因,仅需要输出单个不带任何标点符号的数字。
"""
# 没有思维导图!!!测试发现模型始终会优先选择思维导图
# 流程图
PROMPT_1 = """
请你给出围绕“{subject}”的逻辑关系图使用mermaid语法注意需要使用双引号将内容括起来。
mermaid语法举例
```mermaid
graph TD
P("编程") --> L1("Python")
P("编程") --> L2("C")
P("编程") --> L3("C++")
P("编程") --> L4("Javascipt")
P("编程") --> L5("PHP")
```
"""
# 序列图
PROMPT_2 = """
请你给出围绕“{subject}”的序列图使用mermaid语法。
mermaid语法举例
```mermaid
sequenceDiagram
participant A as 用户
participant B as 系统
A->>B: 登录请求
B->>A: 登录成功
A->>B: 获取数据
B->>A: 返回数据
```
"""
# 类图
PROMPT_3 = """
请你给出围绕“{subject}”的类图使用mermaid语法。
mermaid语法举例
```mermaid
classDiagram
Class01 <|-- AveryLongClass : Cool
Class03 *-- Class04
Class05 o-- Class06
Class07 .. Class08
Class09 --> C2 : Where am i?
Class09 --* C3
Class09 --|> Class07
Class07 : equals()
Class07 : Object[] elementData
Class01 : size()
Class01 : int chimp
Class01 : int gorilla
Class08 <--> C2: Cool label
```
"""
# 饼图
PROMPT_4 = """
请你给出围绕“{subject}”的饼图使用mermaid语法注意需要使用双引号将内容括起来。
mermaid语法举例
```mermaid
pie title Pets adopted by volunteers
"" : 386
"" : 85
"兔子" : 15
```
"""
# 甘特图
PROMPT_5 = """
请你给出围绕“{subject}”的甘特图使用mermaid语法注意需要使用双引号将内容括起来。
mermaid语法举例
```mermaid
gantt
title "项目开发流程"
dateFormat YYYY-MM-DD
section "设计"
"需求分析" :done, des1, 2024-01-06,2024-01-08
"原型设计" :active, des2, 2024-01-09, 3d
"UI设计" : des3, after des2, 5d
section "开发"
"前端开发" :2024-01-20, 10d
"后端开发" :2024-01-20, 10d
```
"""
# 状态图
PROMPT_6 = """
请你给出围绕“{subject}”的状态图使用mermaid语法注意需要使用双引号将内容括起来。
mermaid语法举例
```mermaid
stateDiagram-v2
[*] --> "Still"
"Still" --> [*]
"Still" --> "Moving"
"Moving" --> "Still"
"Moving" --> "Crash"
"Crash" --> [*]
```
"""
# 实体关系图
PROMPT_7 = """
请你给出围绕“{subject}”的实体关系图使用mermaid语法。
mermaid语法举例
```mermaid
erDiagram
CUSTOMER ||--o{ ORDER : places
ORDER ||--|{ LINE-ITEM : contains
CUSTOMER {
string name
string id
}
ORDER {
string orderNumber
date orderDate
string customerID
}
LINE-ITEM {
number quantity
string productID
}
```
"""
# 象限提示图
PROMPT_8 = """
请你给出围绕“{subject}”的象限图使用mermaid语法注意需要使用双引号将内容括起来。
mermaid语法举例
```mermaid
graph LR
A["Hard skill"] --> B("Programming")
A["Hard skill"] --> C("Design")
D["Soft skill"] --> E("Coordination")
D["Soft skill"] --> F("Communication")
```
"""
# 思维导图
PROMPT_9 = """
{subject}
==========
请给出上方内容的思维导图充分考虑其之间的逻辑使用mermaid语法注意需要使用双引号将内容括起来。
mermaid语法举例
```mermaid
mindmap
root((mindmap))
("Origins")
("Long history")
::icon(fa fa-book)
("Popularisation")
("British popular psychology author Tony Buzan")
::icon(fa fa-user)
("Research")
("On effectiveness<br/>and features")
::icon(fa fa-search)
("On Automatic creation")
::icon(fa fa-robot)
("Uses")
("Creative techniques")
::icon(fa fa-lightbulb-o)
("Strategic planning")
::icon(fa fa-flag)
("Argument mapping")
::icon(fa fa-comments)
("Tools")
("Pen and paper")
::icon(fa fa-pencil)
("Mermaid")
::icon(fa fa-code)
```
"""
def 解析历史输入(history, llm_kwargs, file_manifest, chatbot, plugin_kwargs):
############################## <第 0 步,切割输入> ##################################
# 借用PDF切割中的函数对文本进行切割
TOKEN_LIMIT_PER_FRAGMENT = 2500
txt = (
str(history).encode("utf-8", "ignore").decode()
) # avoid reading non-utf8 chars
from crazy_functions.pdf_fns.breakdown_txt import (
breakdown_text_to_satisfy_token_limit,
)
txt = breakdown_text_to_satisfy_token_limit(
txt=txt, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs["llm_model"]
)
############################## <第 1 步,迭代地历遍整个文章,提取精炼信息> ##################################
results = []
MAX_WORD_TOTAL = 4096
n_txt = len(txt)
last_iteration_result = "从以下文本中提取摘要。"
for i in range(n_txt):
NUM_OF_WORD = MAX_WORD_TOTAL // n_txt
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words in Chinese: {txt[i]}"
i_say_show_user = f"[{i+1}/{n_txt}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {txt[i][:200]} ...."
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
i_say,
i_say_show_user, # i_say=真正给chatgpt的提问 i_say_show_user=给用户看的提问
llm_kwargs,
chatbot,
history=[
"The main content of the previous section is?",
last_iteration_result,
], # 迭代上一次的结果
sys_prompt="Extracts the main content from the text section where it is located for graphing purposes, answer me with Chinese.", # 提示
)
results.append(gpt_say)
last_iteration_result = gpt_say
############################## <第 2 步,根据整理的摘要选择图表类型> ##################################
gpt_say = str(plugin_kwargs) # 将图表类型参数赋值为插件参数
results_txt = "\n".join(results) # 合并摘要
if gpt_say not in [
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
]: # 如插件参数不正确则使用对话模型判断
i_say_show_user = (
f"接下来将判断适合的图表类型,如连续3次判断失败将会使用流程图进行绘制"
)
gpt_say = "[Local Message] 收到。" # 用户提示
chatbot.append([i_say_show_user, gpt_say])
yield from update_ui(chatbot=chatbot, history=[]) # 更新UI
i_say = SELECT_PROMPT.format(subject=results_txt)
i_say_show_user = f'请判断适合使用的流程图类型,其中数字对应关系为:1-流程图,2-序列图,3-类图,4-饼图,5-甘特图,6-状态图,7-实体关系图,8-象限提示图。由于不管提供文本是什么,模型大概率认为"思维导图"最合适,因此思维导图仅能通过参数调用。'
for i in range(3):
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say,
inputs_show_user=i_say_show_user,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=[],
sys_prompt="",
)
if gpt_say in [
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
]: # 判断返回是否正确
break
if gpt_say not in ["1", "2", "3", "4", "5", "6", "7", "8", "9"]:
gpt_say = "1"
############################## <第 3 步,根据选择的图表类型绘制图表> ##################################
if gpt_say == "1":
i_say = PROMPT_1.format(subject=results_txt)
elif gpt_say == "2":
i_say = PROMPT_2.format(subject=results_txt)
elif gpt_say == "3":
i_say = PROMPT_3.format(subject=results_txt)
elif gpt_say == "4":
i_say = PROMPT_4.format(subject=results_txt)
elif gpt_say == "5":
i_say = PROMPT_5.format(subject=results_txt)
elif gpt_say == "6":
i_say = PROMPT_6.format(subject=results_txt)
elif gpt_say == "7":
i_say = PROMPT_7.replace("{subject}", results_txt) # 由于实体关系图用到了{}符号
elif gpt_say == "8":
i_say = PROMPT_8.format(subject=results_txt)
elif gpt_say == "9":
i_say = PROMPT_9.format(subject=results_txt)
i_say_show_user = f"请根据判断结果绘制相应的图表。如需绘制思维导图请使用参数调用,同时过大的图表可能需要复制到在线编辑器中进行渲染。"
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say,
inputs_show_user=i_say_show_user,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=[],
sys_prompt="",
)
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
@CatchException
def 生成多种Mermaid图表(
txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port
):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,用于灵活调整复杂功能的各种参数
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
web_port 当前软件运行的端口号
"""
import os
# 基本信息:功能、贡献者
chatbot.append(
[
"函数插件功能?",
"根据当前聊天历史或指定的路径文件(文件内容优先)绘制多种mermaid图表将会由对话模型首先判断适合的图表类型随后绘制图表。\
\n您也可以使用插件参数指定绘制的图表类型,函数插件贡献者: Menghuan1918",
]
)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if os.path.exists(txt): # 如输入区无内容则直接解析历史记录
from crazy_functions.pdf_fns.parse_word import extract_text_from_files
file_exist, final_result, page_one, file_manifest, excption = (
extract_text_from_files(txt, chatbot, history)
)
else:
file_exist = False
excption = ""
file_manifest = []
if excption != "":
if excption == "word":
report_exception(
chatbot,
history,
a=f"解析项目: {txt}",
b=f"找到了.doc文件但是该文件格式不被支持请先转化为.docx格式。",
)
elif excption == "pdf":
report_exception(
chatbot,
history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。",
)
elif excption == "word_pip":
report_exception(
chatbot,
history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade python-docx pywin32```。",
)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
else:
if not file_exist:
history.append(txt) # 如输入区不是文件则将输入区内容加入历史记录
i_say_show_user = f"首先你从历史记录中提取摘要。"
gpt_say = "[Local Message] 收到。" # 用户提示
chatbot.append([i_say_show_user, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 更新UI
yield from 解析历史输入(
history, llm_kwargs, file_manifest, chatbot, plugin_kwargs
)
else:
file_num = len(file_manifest)
for i in range(file_num): # 依次处理文件
i_say_show_user = f"[{i+1}/{file_num}]处理文件{file_manifest[i]}"
gpt_say = "[Local Message] 收到。" # 用户提示
chatbot.append([i_say_show_user, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 更新UI
history = [] # 如输入区内容为文件则清空历史记录
history.append(final_result[i])
yield from 解析历史输入(
history, llm_kwargs, file_manifest, chatbot, plugin_kwargs
)
class Mermaid_Gen(GptAcademicPluginTemplate):
def __init__(self):
pass
def define_arg_selection_menu(self):
gui_definition = {
"Type_of_Mermaid": ArgProperty(
title="绘制的Mermaid图表类型",
options=[
"由LLM决定",
"流程图",
"序列图",
"类图",
"饼图",
"甘特图",
"状态图",
"实体关系图",
"象限提示图",
"思维导图",
],
default_value="由LLM决定",
description="选择'由LLM决定'时将由对话模型判断适合的图表类型(不包括思维导图),选择其他类型时将直接绘制指定的图表类型。",
type="dropdown",
).model_dump_json(),
}
return gui_definition
def execute(
txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request
):
options = [
"由LLM决定",
"流程图",
"序列图",
"类图",
"饼图",
"甘特图",
"状态图",
"实体关系图",
"象限提示图",
"思维导图",
]
plugin_kwargs = options.index(plugin_kwargs['Type_of_Mermaid'])
yield from 生成多种Mermaid图表(
txt,
llm_kwargs,
plugin_kwargs,
chatbot,
history,
system_prompt,
user_request,
)

View File

@@ -1,117 +0,0 @@
from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg, get_log_folder, get_user
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
from loguru import logger
install_msg ="""
1. python -m pip install torch --index-url https://download.pytorch.org/whl/cpu
2. python -m pip install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
3. python -m pip install unstructured[all-docs] --upgrade
4. python -c 'import nltk; nltk.download("punkt")'
"""
@CatchException
def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
history = [] # 清空历史,以免输入溢出
# < --------------------读取参数--------------- >
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
kai_id = plugin_kwargs.get("advanced_arg", 'default')
chatbot.append((f"向`{kai_id}`知识库中添加文件。", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# resolve deps
try:
# from zh_langchain import construct_vector_store
# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from crazy_functions.vector_fns.vector_database import knowledge_archive_interface
except Exception as e:
chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# from crazy_functions.crazy_utils import try_install_deps
# try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
# yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
return
# < --------------------读取文件--------------- >
file_manifest = []
spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"]
for sp in spl:
_, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}')
file_manifest += file_manifest_tmp
if len(file_manifest) == 0:
chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# < -------------------预热文本向量化模组--------------- >
chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
logger.info('Checking Text2vec ...')
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
# < -------------------构建知识库--------------- >
chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
logger.info('Establishing knowledge archive ...')
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
kai = knowledge_archive_interface()
vs_path = get_log_folder(user=get_user(chatbot), plugin_name='vec_store')
kai.feed_archive(file_manifest=file_manifest, vs_path=vs_path, id=kai_id)
kai_files = kai.get_loaded_file(vs_path=vs_path)
kai_files = '<br/>'.join(kai_files)
# chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"])
# yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id()
# chatbot._cookies['lock_plugin'] = 'crazy_functions.知识库文件注入->读取知识库作答'
# chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"])
chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
@CatchException
def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request=-1):
# resolve deps
try:
# from zh_langchain import construct_vector_store
# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from crazy_functions.vector_fns.vector_database import knowledge_archive_interface
except Exception as e:
chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# from crazy_functions.crazy_utils import try_install_deps
# try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
# yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
return
# < ------------------- --------------- >
kai = knowledge_archive_interface()
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
kai_id = plugin_kwargs.get("advanced_arg", 'default')
vs_path = get_log_folder(user=get_user(chatbot), plugin_name='vec_store')
resp, prompt = kai.answer_with_archive_by_id(txt, kai_id, vs_path)
chatbot.append((txt, f'[知识库 {kai_id}] ' + prompt))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=prompt, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt=system_prompt
)
history.extend((prompt, gpt_say))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新

View File

@@ -1,106 +0,0 @@
from toolbox import CatchException, update_ui
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
import requests
from bs4 import BeautifulSoup
from request_llms.bridge_all import model_info
def google(query, proxies):
query = query # 在此处替换您要搜索的关键词
url = f"https://www.google.com/search?q={query}"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
response = requests.get(url, headers=headers, proxies=proxies)
soup = BeautifulSoup(response.content, 'html.parser')
results = []
for g in soup.find_all('div', class_='g'):
anchors = g.find_all('a')
if anchors:
link = anchors[0]['href']
if link.startswith('/url?q='):
link = link[7:]
if not link.startswith('http'):
continue
title = g.find('h3').text
item = {'title': title, 'link': link}
results.append(item)
# for r in results:
# print(r['link'])
return results
def scrape_text(url, proxies) -> str:
"""Scrape text from a webpage
Args:
url (str): The URL to scrape text from
Returns:
str: The scraped text
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
'Content-Type': 'text/plain',
}
try:
response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
except:
return "无法连接到该网页"
soup = BeautifulSoup(response.text, "html.parser")
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
return text
@CatchException
def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
history = [] # 清空历史,以免输入溢出
chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
"[Local Message] 请注意,您正在调用一个[函数插件]的模板该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者它可以作为创建新功能函数的模板。您若希望分享新的功能模组请不吝PR"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# ------------- < 第1步爬取搜索引擎的结果 > -------------
from toolbox import get_conf
proxies = get_conf('proxies')
urls = google(txt, proxies)
history = []
if len(urls) == 0:
chatbot.append((f"结论:{txt}",
"[Local Message] 受到google限制无法从google获取信息"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
return
# ------------- < 第2步依次访问网页 > -------------
max_search_result = 5 # 最多收纳多少个网页的结果
for index, url in enumerate(urls[:max_search_result]):
res = scrape_text(url['link'], proxies)
history.extend([f"{index}份搜索结果:", res])
chatbot.append([f"{index}份搜索结果:", res[:500]+"......"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# ------------- < 第3步ChatGPT综合 > -------------
i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
i_say, history = input_clipping( # 裁剪输入从最长的条目开始裁剪防止爆token
inputs=i_say,
history=history,
max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
)
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
)
chatbot[-1] = (i_say, gpt_say)
history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

View File

@@ -1,106 +0,0 @@
from toolbox import CatchException, update_ui
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
import requests
from bs4 import BeautifulSoup
from request_llms.bridge_all import model_info
def bing_search(query, proxies=None):
query = query
url = f"https://cn.bing.com/search?q={query}"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
response = requests.get(url, headers=headers, proxies=proxies)
soup = BeautifulSoup(response.content, 'html.parser')
results = []
for g in soup.find_all('li', class_='b_algo'):
anchors = g.find_all('a')
if anchors:
link = anchors[0]['href']
if not link.startswith('http'):
continue
title = g.find('h2').text
item = {'title': title, 'link': link}
results.append(item)
# for r in results:
# print(r['link'])
return results
def scrape_text(url, proxies) -> str:
"""Scrape text from a webpage
Args:
url (str): The URL to scrape text from
Returns:
str: The scraped text
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
'Content-Type': 'text/plain',
}
try:
response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
except:
return "无法连接到该网页"
soup = BeautifulSoup(response.text, "html.parser")
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
return text
@CatchException
def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
history = [] # 清空历史,以免输入溢出
chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
"[Local Message] 请注意,您正在调用一个[函数插件]的模板该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者它可以作为创建新功能函数的模板。您若希望分享新的功能模组请不吝PR"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# ------------- < 第1步爬取搜索引擎的结果 > -------------
from toolbox import get_conf
proxies = get_conf('proxies')
urls = bing_search(txt, proxies)
history = []
if len(urls) == 0:
chatbot.append((f"结论:{txt}",
"[Local Message] 受到bing限制无法从bing获取信息"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
return
# ------------- < 第2步依次访问网页 > -------------
max_search_result = 8 # 最多收纳多少个网页的结果
for index, url in enumerate(urls[:max_search_result]):
res = scrape_text(url['link'], proxies)
history.extend([f"{index}份搜索结果:", res])
chatbot.append([f"{index}份搜索结果:", res[:500]+"......"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# ------------- < 第3步ChatGPT综合 > -------------
i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
i_say, history = input_clipping( # 裁剪输入从最长的条目开始裁剪防止爆token
inputs=i_say,
history=history,
max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
)
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
)
chatbot[-1] = (i_say, gpt_say)
history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

View File

@@ -1,180 +0,0 @@
"""
Explanation of the Void Terminal Plugin:
Please describe in natural language what you want to do.
1. You can open the plugin's dropdown menu to explore various capabilities of this project, and then describe your needs in natural language, for example:
- "Please call the plugin to translate a PDF paper for me. I just uploaded the paper to the upload area."
- "Please use the plugin to translate a PDF paper, with the address being https://www.nature.com/articles/s41586-019-1724-z.pdf."
- "Generate an image with blooming flowers and lush green grass using the plugin."
- "Translate the README using the plugin. The GitHub URL is https://github.com/facebookresearch/co-tracker."
- "Translate an Arxiv paper for me. The Arxiv ID is 1812.10695. Remember to use the plugin and don't do it manually!"
- "I don't like the current interface color. Modify the configuration and change the theme to THEME="High-Contrast"."
- "Could you please explain the structure of the Transformer network?"
2. If you use keywords like "call the plugin xxx", "modify the configuration xxx", "please", etc., your intention can be recognized more accurately.
3. Your intention can be recognized more accurately when using powerful models like GPT4. This plugin is relatively new, so please feel free to provide feedback on GitHub.
4. Now, if you need to process a file, please upload the file (drag the file to the file upload area) or describe the path to the file.
5. If you don't need to upload a file, you can simply repeat your command again.
"""
explain_msg = """
## 虚空终端插件说明:
1. 请用**自然语言**描述您需要做什么。例如:
- 「请调用插件为我翻译PDF论文论文我刚刚放到上传区了」
- 「请调用插件翻译PDF论文地址为https://openreview.net/pdf?id=rJl0r3R9KX」
- 「把Arxiv论文翻译成中文PDFarxiv论文的ID是1812.10695,记得用插件!」
- 「生成一张图片,图中鲜花怒放,绿草如茵,用插件实现」
- 「用插件翻译READMEGithub网址是https://github.com/facebookresearch/co-tracker」
- 「我不喜欢当前的界面颜色修改配置把主题THEME更换为THEME="High-Contrast"
- 「请调用插件解析python源代码项目代码我刚刚打包拖到上传区了」
- 「请问Transformer网络的结构是怎样的
2. 您可以打开插件下拉菜单以了解本项目的各种能力。
3. 如果您使用「调用插件xxx」、「修改配置xxx」、「请问」等关键词您的意图可以被识别的更准确。
4. 建议使用 GPT3.5 或更强的模型弱模型可能无法理解您的想法。该插件诞生时间不长欢迎您前往Github反馈问题。
5. 现在,如果需要处理文件,请您上传文件(将文件拖动到文件上传区),或者描述文件所在的路径。
6. 如果不需要上传文件,现在您只需要再次重复一次您的指令即可。
"""
from pydantic import BaseModel, Field
from typing import List
from toolbox import CatchException, update_ui, is_the_upload_folder
from toolbox import update_ui_lastest_msg, disable_auto_promotion
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import input_clipping
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
from crazy_functions.vt_fns.vt_state import VoidTerminalState
from crazy_functions.vt_fns.vt_modify_config import modify_configuration_hot
from crazy_functions.vt_fns.vt_modify_config import modify_configuration_reboot
from crazy_functions.vt_fns.vt_call_plugin import execute_plugin
class UserIntention(BaseModel):
user_prompt: str = Field(description="the content of user input", default="")
intention_type: str = Field(description="the type of user intention, choose from ['ModifyConfiguration', 'ExecutePlugin', 'Chat']", default="ExecutePlugin")
user_provide_file: bool = Field(description="whether the user provides a path to a file", default=False)
user_provide_url: bool = Field(description="whether the user provides a url", default=False)
def chat(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=txt, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt=system_prompt
)
chatbot[-1] = [txt, gpt_say]
history.extend([txt, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
pass
explain_intention_to_user = {
'Chat': "聊天对话",
'ExecutePlugin': "调用插件",
'ModifyConfiguration': "修改配置",
}
def analyze_intention_with_simple_rules(txt):
user_intention = UserIntention()
user_intention.user_prompt = txt
is_certain = False
if '请问' in txt:
is_certain = True
user_intention.intention_type = 'Chat'
if '用插件' in txt:
is_certain = True
user_intention.intention_type = 'ExecutePlugin'
if '修改配置' in txt:
is_certain = True
user_intention.intention_type = 'ModifyConfiguration'
return is_certain, user_intention
@CatchException
def 虚空终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
disable_auto_promotion(chatbot=chatbot)
# 获取当前虚空终端状态
state = VoidTerminalState.get_state(chatbot)
appendix_msg = ""
# 用简单的关键词检测用户意图
is_certain, _ = analyze_intention_with_simple_rules(txt)
if is_the_upload_folder(txt):
state.set_state(chatbot=chatbot, key='has_provided_explaination', value=False)
appendix_msg = "\n\n**很好,您已经上传了文件**,现在请您描述您的需求。"
if is_certain or (state.has_provided_explaination):
# 如果意图明确,跳过提示环节
state.set_state(chatbot=chatbot, key='has_provided_explaination', value=True)
state.unlock_plugin(chatbot=chatbot)
yield from update_ui(chatbot=chatbot, history=history)
yield from 虚空终端主路由(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
return
else:
# 如果意图模糊,提示
state.set_state(chatbot=chatbot, key='has_provided_explaination', value=True)
state.lock_plugin(chatbot=chatbot)
chatbot.append(("虚空终端状态:", explain_msg+appendix_msg))
yield from update_ui(chatbot=chatbot, history=history)
return
def 虚空终端主路由(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = []
chatbot.append(("虚空终端状态: ", f"正在执行任务: {txt}"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# ⭐ ⭐ ⭐ 分析用户意图
is_certain, user_intention = analyze_intention_with_simple_rules(txt)
if not is_certain:
yield from update_ui_lastest_msg(
lastmsg=f"正在执行任务: {txt}\n\n分析用户意图中", chatbot=chatbot, history=history, delay=0)
gpt_json_io = GptJsonIO(UserIntention)
rf_req = "\nchoose from ['ModifyConfiguration', 'ExecutePlugin', 'Chat']"
inputs = "Analyze the intention of the user according to following user input: \n\n" + \
">> " + (txt+rf_req).rstrip('\n').replace('\n','\n>> ') + '\n\n' + gpt_json_io.format_instructions
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
analyze_res = run_gpt_fn(inputs, "")
try:
user_intention = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 意图={explain_intention_to_user[user_intention.intention_type]}",
except JsonStringError as e:
yield from update_ui_lastest_msg(
lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 失败 当前语言模型({llm_kwargs['llm_model']})不能理解您的意图", chatbot=chatbot, history=history, delay=0)
return
else:
pass
yield from update_ui_lastest_msg(
lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 意图={explain_intention_to_user[user_intention.intention_type]}",
chatbot=chatbot, history=history, delay=0)
# 用户意图: 修改本项目的配置
if user_intention.intention_type == 'ModifyConfiguration':
yield from modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
# 用户意图: 调度插件
if user_intention.intention_type == 'ExecutePlugin':
yield from execute_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
# 用户意图: 聊天
if user_intention.intention_type == 'Chat':
yield from chat(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
return

View File

@@ -1,146 +0,0 @@
from toolbox import update_ui
from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone
fast_debug = True
class PaperFileGroup():
def __init__(self):
self.file_paths = []
self.file_contents = []
self.sp_file_contents = []
self.sp_file_index = []
self.sp_file_tag = []
# count_token
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num
def run_file_split(self, max_token_limit=1900):
"""
将长文本分离开来
"""
for index, file_content in enumerate(self.file_contents):
if self.get_token_num(file_content) < max_token_limit:
self.sp_file_contents.append(file_content)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index])
else:
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
for j, segment in enumerate(segments):
self.sp_file_contents.append(segment)
self.sp_file_index.append(index)
self.sp_file_tag.append(
self.file_paths[index] + f".part-{j}.txt")
def parseNotebook(filename, enable_markdown=1):
import json
CodeBlocks = []
with open(filename, 'r', encoding='utf-8', errors='replace') as f:
notebook = json.load(f)
for cell in notebook['cells']:
if cell['cell_type'] == 'code' and cell['source']:
# remove blank lines
cell['source'] = [line for line in cell['source'] if line.strip()
!= '']
CodeBlocks.append("".join(cell['source']))
elif enable_markdown and cell['cell_type'] == 'markdown' and cell['source']:
cell['source'] = [line for line in cell['source'] if line.strip()
!= '']
CodeBlocks.append("Markdown:"+"".join(cell['source']))
Code = ""
for idx, code in enumerate(CodeBlocks):
Code += f"This is {idx+1}th code block: \n"
Code += code+"\n"
return Code
def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
enable_markdown = plugin_kwargs.get("advanced_arg", "1")
try:
enable_markdown = int(enable_markdown)
except ValueError:
enable_markdown = 1
pfg = PaperFileGroup()
for fp in file_manifest:
file_content = parseNotebook(fp, enable_markdown=enable_markdown)
pfg.file_paths.append(fp)
pfg.file_contents.append(file_content)
# <-------- 拆分过长的IPynb文件 ---------->
pfg.run_file_split(max_token_limit=1024)
n_split = len(pfg.sp_file_contents)
inputs_array = [r"This is a Jupyter Notebook file, tell me about Each Block in Chinese. Focus Just On Code." +
r"If a block starts with `Markdown` which means it's a markdown block in ipynbipynb. " +
r"Start a new line for a block and block num use Chinese." +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"{f}的分析如下" for f in pfg.sp_file_tag]
sys_prompt_array = ["You are a professional programmer."] * n_split
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array=inputs_array,
inputs_show_user_array=inputs_show_user_array,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history_array=[[""] for _ in range(n_split)],
sys_prompt_array=sys_prompt_array,
# max_workers=5, # OpenAI所允许的最大并行过载
scroller_max_len=80
)
# <-------- 整理结果,退出 ---------->
block_result = " \n".join(gpt_response_collection)
chatbot.append(("解析的结果如下", block_result))
history.extend(["解析的结果如下", block_result])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------- 写入文件,退出 ---------->
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@CatchException
def 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
chatbot.append([
"函数插件功能?",
"对IPynb文件进行解析。Contributor: codycjy."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
history = [] # 清空历史
import glob
import os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "":
txt = '空空如也的输入栏'
report_exception(chatbot, history,
a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if txt.endswith('.ipynb'):
file_manifest = [txt]
else:
file_manifest = [f for f in glob.glob(
f'{project_folder}/**/*.ipynb', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history,
a=f"解析项目: {txt}", b=f"找不到任何.ipynb文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, )

View File

@@ -1,63 +0,0 @@
from toolbox import CatchException, update_ui, get_conf
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import datetime
@CatchException
def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,用于灵活调整复杂功能的各种参数
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
history = [] # 清空历史,以免输入溢出
MULTI_QUERY_LLM_MODELS = get_conf('MULTI_QUERY_LLM_MODELS')
chatbot.append((txt, "正在同时咨询" + MULTI_QUERY_LLM_MODELS))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo&api2d-gpt-3.5-turbo' # 支持任意数量的llm接口用&符号分隔
llm_kwargs['llm_model'] = MULTI_QUERY_LLM_MODELS # 支持任意数量的llm接口用&符号分隔
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=txt, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt=system_prompt,
retry_times_at_unknown_error=0
)
history.append(txt)
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
@CatchException
def 同时问询_指定模型(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,用于灵活调整复杂功能的各种参数
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
history = [] # 清空历史,以免输入溢出
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
# llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo&api2d-gpt-3.5-turbo' # 支持任意数量的llm接口用&符号分隔
llm_kwargs['llm_model'] = plugin_kwargs.get("advanced_arg", 'chatglm&gpt-3.5-turbo') # 'chatglm&gpt-3.5-turbo' # 支持任意数量的llm接口用&符号分隔
chatbot.append((txt, f"正在同时咨询{llm_kwargs['llm_model']}"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=txt, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt=system_prompt,
retry_times_at_unknown_error=0
)
history.append(txt)
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

View File

@@ -1,194 +0,0 @@
from toolbox import update_ui
from toolbox import CatchException, get_conf, markdown_convertion
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.crazy_utils import input_clipping
from crazy_functions.agent_fns.watchdog import WatchDog
from crazy_functions.live_audio.aliyunASR import AliyunASR
from loguru import logger
import threading, time
import numpy as np
import json
import re
def chatbot2history(chatbot):
history = []
for c in chatbot:
for q in c:
if q in ["[ 请讲话 ]", "[ 等待GPT响应 ]", "[ 正在等您说完问题 ]"]:
continue
elif q.startswith("[ 正在等您说完问题 ]"):
continue
else:
history.append(q.strip('<div class="markdown-body">').strip('</div>').strip('<p>').strip('</p>'))
return history
def visualize_audio(chatbot, audio_shape):
if len(chatbot) == 0: chatbot.append(["[ 请讲话 ]", "[ 正在等您说完问题 ]"])
chatbot[-1] = list(chatbot[-1])
p1 = ''
p2 = ''
chatbot[-1][-1] = re.sub(p1+r'(.*)'+p2, '', chatbot[-1][-1])
chatbot[-1][-1] += (p1+f"`{audio_shape}`"+p2)
class AsyncGptTask():
def __init__(self) -> None:
self.observe_future = []
self.observe_future_chatbot_index = []
def gpt_thread_worker(self, i_say, llm_kwargs, history, sys_prompt, observe_window, index):
try:
MAX_TOKEN_ALLO = 2560
i_say, history = input_clipping(i_say, history, max_token_limit=MAX_TOKEN_ALLO)
gpt_say_partial = predict_no_ui_long_connection(inputs=i_say, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt,
observe_window=observe_window[index], console_slience=True)
except ConnectionAbortedError as token_exceed_err:
logger.error('至少一个线程任务Token溢出而失败', e)
except Exception as e:
logger.error('至少一个线程任务意外失败', e)
def add_async_gpt_task(self, i_say, chatbot_index, llm_kwargs, history, system_prompt):
self.observe_future.append([""])
self.observe_future_chatbot_index.append(chatbot_index)
cur_index = len(self.observe_future)-1
th_new = threading.Thread(target=self.gpt_thread_worker, args=(i_say, llm_kwargs, history, system_prompt, self.observe_future, cur_index))
th_new.daemon = True
th_new.start()
def update_chatbot(self, chatbot):
for of, ofci in zip(self.observe_future, self.observe_future_chatbot_index):
try:
chatbot[ofci] = list(chatbot[ofci])
chatbot[ofci][1] = markdown_convertion(of[0])
except:
self.observe_future = []
self.observe_future_chatbot_index = []
return chatbot
class InterviewAssistant(AliyunASR):
def __init__(self):
self.capture_interval = 0.5 # second
self.stop = False
self.parsed_text = "" # 下个句子中已经说完的部分, 由 test_on_result_chg() 写入
self.parsed_sentence = "" # 某段话的整个句子, 由 test_on_sentence_end() 写入
self.buffered_sentence = "" #
self.audio_shape = "" # 音频的可视化表现, 由 audio_convertion_thread() 写入
self.event_on_result_chg = threading.Event()
self.event_on_entence_end = threading.Event()
self.event_on_commit_question = threading.Event()
def __del__(self):
self.stop = True
self.stop_msg = ""
self.commit_wd.kill_dog = True
self.plugin_wd.kill_dog = True
def init(self, chatbot):
# 初始化音频采集线程
self.captured_audio = np.array([])
self.keep_latest_n_second = 10
self.commit_after_pause_n_second = 2.0
self.ready_audio_flagment = None
self.stop = False
self.plugin_wd = WatchDog(timeout=5, bark_fn=self.__del__, msg="程序终止")
self.aut = threading.Thread(target=self.audio_convertion_thread, args=(chatbot._cookies['uuid'],))
self.aut.daemon = True
self.aut.start()
# th2 = threading.Thread(target=self.audio2txt_thread, args=(chatbot._cookies['uuid'],))
# th2.daemon = True
# th2.start()
def no_audio_for_a_while(self):
if len(self.buffered_sentence) < 7: # 如果一句话小于7个字暂不提交
self.commit_wd.begin_watch()
else:
self.event_on_commit_question.set()
def begin(self, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
# main plugin function
self.init(chatbot)
chatbot.append(["[ 请讲话 ]", "[ 正在等您说完问题 ]"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
self.plugin_wd.begin_watch()
self.agt = AsyncGptTask()
self.commit_wd = WatchDog(timeout=self.commit_after_pause_n_second, bark_fn=self.no_audio_for_a_while, interval=0.2)
self.commit_wd.begin_watch()
while not self.stop:
self.event_on_result_chg.wait(timeout=0.25) # run once every 0.25 second
chatbot = self.agt.update_chatbot(chatbot) # 将子线程的gpt结果写入chatbot
history = chatbot2history(chatbot)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
self.plugin_wd.feed()
if self.event_on_result_chg.is_set():
# called when some words have finished
self.event_on_result_chg.clear()
chatbot[-1] = list(chatbot[-1])
chatbot[-1][0] = self.buffered_sentence + self.parsed_text
history = chatbot2history(chatbot)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
self.commit_wd.feed()
if self.event_on_entence_end.is_set():
# called when a sentence has ended
self.event_on_entence_end.clear()
self.parsed_text = self.parsed_sentence
self.buffered_sentence += self.parsed_text
chatbot[-1] = list(chatbot[-1])
chatbot[-1][0] = self.buffered_sentence
history = chatbot2history(chatbot)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if self.event_on_commit_question.is_set():
# called when a question should be commited
self.event_on_commit_question.clear()
if len(self.buffered_sentence) == 0: raise RuntimeError
self.commit_wd.begin_watch()
chatbot[-1] = list(chatbot[-1])
chatbot[-1] = [self.buffered_sentence, "[ 等待GPT响应 ]"]
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# add gpt task 创建子线程请求gpt避免线程阻塞
history = chatbot2history(chatbot)
self.agt.add_async_gpt_task(self.buffered_sentence, len(chatbot)-1, llm_kwargs, history, system_prompt)
self.buffered_sentence = ""
chatbot.append(["[ 请讲话 ]", "[ 正在等您说完问题 ]"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if not self.event_on_result_chg.is_set() and not self.event_on_entence_end.is_set() and not self.event_on_commit_question.is_set():
visualize_audio(chatbot, self.audio_shape)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if len(self.stop_msg) != 0:
raise RuntimeError(self.stop_msg)
@CatchException
def 语音助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
# pip install -U openai-whisper
chatbot.append(["对话助手函数插件:使用时,双手离开鼠标键盘吧", "音频助手, 正在听您讲话(点击“停止”键可终止程序)..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import nls
from scipy import io
except:
chatbot.append(["导入依赖失败", "使用该模块需要额外依赖, 安装方法:```pip install --upgrade aliyun-python-sdk-core==2.13.3 pyOpenSSL webrtcvad scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git```"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
APPKEY = get_conf('ALIYUN_APPKEY')
if APPKEY == "":
chatbot.append(["导入依赖失败", "没有阿里云语音识别APPKEY和TOKEN, 详情见https://help.aliyun.com/document_detail/450255.html"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
ia = InterviewAssistant()
yield from ia.begin(llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@@ -1,63 +0,0 @@
from toolbox import update_ui
from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, glob, os
for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
i_say_show_user = prefix + f'[{index+1}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
msg = '正常'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
time.sleep(2)
all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
chatbot.append((i_say, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot, history=history, sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say, gpt_say)
history.append(i_say); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
@CatchException
def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
if len(file_manifest) == 0:
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@@ -1,185 +0,0 @@
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import CatchException, report_exception, promote_file_to_downloadzone
from toolbox import update_ui, update_ui_lastest_msg, disable_auto_promotion, write_history_to_file
import logging
import requests
import time
import random
ENABLE_ALL_VERSION_SEARCH = True
def get_meta_information(url, chatbot, history):
import arxiv
import difflib
import re
from bs4 import BeautifulSoup
from toolbox import get_conf
from urllib.parse import urlparse
session = requests.session()
proxies = get_conf('proxies')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
'Cache-Control':'max-age=0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Connection': 'keep-alive'
}
try:
session.proxies.update(proxies)
except:
report_exception(chatbot, history,
a=f"获取代理失败 无代理状态下很可能无法访问OpenAI家族的模型及谷歌学术 建议检查USE_PROXY选项是否修改。",
b=f"尝试直接连接")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
session.headers.update(headers)
response = session.get(url)
# 解析网页内容
soup = BeautifulSoup(response.text, "html.parser")
def string_similar(s1, s2):
return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
if ENABLE_ALL_VERSION_SEARCH:
def search_all_version(url):
time.sleep(random.randint(1,5)) # 睡一会防止触发google反爬虫
response = session.get(url)
soup = BeautifulSoup(response.text, "html.parser")
for result in soup.select(".gs_ri"):
try:
url = result.select_one(".gs_rt").a['href']
except:
continue
arxiv_id = extract_arxiv_id(url)
if not arxiv_id:
continue
search = arxiv.Search(
id_list=[arxiv_id],
max_results=1,
sort_by=arxiv.SortCriterion.Relevance,
)
try: paper = next(search.results())
except: paper = None
return paper
return None
def extract_arxiv_id(url):
# 返回给定的url解析出的arxiv_id如url未成功匹配返回None
pattern = r'arxiv.org/abs/([^/]+)'
match = re.search(pattern, url)
if match:
return match.group(1)
else:
return None
profile = []
# 获取所有文章的标题和作者
for result in soup.select(".gs_ri"):
title = result.a.text.replace('\n', ' ').replace(' ', ' ')
author = result.select_one(".gs_a").text
try:
citation = result.select_one(".gs_fl > a[href*='cites']").text # 引用次数是链接中的文本,直接取出来
except:
citation = 'cited by 0'
abstract = result.select_one(".gs_rs").text.strip() # 摘要在 .gs_rs 中的文本,需要清除首尾空格
# 首先在arxiv上搜索获取文章摘要
search = arxiv.Search(
query = title,
max_results = 1,
sort_by = arxiv.SortCriterion.Relevance,
)
try: paper = next(search.results())
except: paper = None
is_match = paper is not None and string_similar(title, paper.title) > 0.90
# 如果在Arxiv上匹配失败检索文章的历史版本的题目
if not is_match and ENABLE_ALL_VERSION_SEARCH:
other_versions_page_url = [tag['href'] for tag in result.select_one('.gs_flb').select('.gs_nph') if 'cluster' in tag['href']]
if len(other_versions_page_url) > 0:
other_versions_page_url = other_versions_page_url[0]
paper = search_all_version('http://' + urlparse(url).netloc + other_versions_page_url)
is_match = paper is not None and string_similar(title, paper.title) > 0.90
if is_match:
# same paper
abstract = paper.summary.replace('\n', ' ')
is_paper_in_arxiv = True
else:
# different paper
abstract = abstract
is_paper_in_arxiv = False
logging.info('[title]:' + title)
logging.info('[author]:' + author)
logging.info('[citation]:' + citation)
profile.append({
'title': title,
'author': author,
'citation': citation,
'abstract': abstract,
'is_paper_in_arxiv': is_paper_in_arxiv,
})
chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中不在arxiv中无法获取完整摘要:{is_paper_in_arxiv}\n\n' + abstract]
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
return profile
@CatchException
def 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
disable_auto_promotion(chatbot=chatbot)
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"分析用户提供的谷歌学术google scholar搜索页面中出现的所有文章: binary-husky插件初始化中..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import arxiv
import math
from bs4 import BeautifulSoup
except:
report_exception(chatbot, history,
a = f"解析项目: {txt}",
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4 arxiv```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 清空历史,以免输入溢出
history = []
meta_paper_info_list = yield from get_meta_information(txt, chatbot, history)
if len(meta_paper_info_list) == 0:
yield from update_ui_lastest_msg(lastmsg='获取文献失败可能触发了google反爬虫机制。',chatbot=chatbot, history=history, delay=0)
return
batchsize = 5
for batch in range(math.ceil(len(meta_paper_info_list)/batchsize)):
if len(meta_paper_info_list[:batchsize]) > 0:
i_say = "下面是一些学术文献的数据,提取出以下内容:" + \
"1、英文题目2、中文题目翻译3、作者4、arxiv公开is_paper_in_arxiv4、引用数量cite5、中文摘要翻译。" + \
f"以下是信息源:{str(meta_paper_info_list[:batchsize])}"
inputs_show_user = f"请分析此页面中出现的所有文章:{txt},这是第{batch+1}"
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=inputs_show_user,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt="你是一个学术翻译请从数据中提取信息。你必须使用Markdown表格。你必须逐个文献进行处理。"
)
history.extend([ f"{batch+1}", gpt_say ])
meta_paper_info_list = meta_paper_info_list[batchsize:]
chatbot.append(["状态?",
"已经全部完成您可以试试让AI写一个Related Works例如您可以继续输入Write a \"Related Works\" section about \"你搜索的研究领域\" for me."])
msg = '正常'
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
path = write_history_to_file(history)
promote_file_to_downloadzone(path, chatbot=chatbot)
chatbot.append(("完成了吗?", path));
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面

View File

@@ -1,54 +0,0 @@
# encoding: utf-8
# @Time : 2023/4/19
# @Author : Spike
# @Descr :
from toolbox import update_ui, get_conf, get_user
from toolbox import CatchException
from toolbox import default_user_name
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import shutil
import os
@CatchException
def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
if txt:
show_say = txt
prompt = txt+'\n回答完问题后,再列出用户可能提出的三个问题。'
else:
prompt = history[-1]+"\n分析上述回答,再列出用户可能提出的三个问题。"
show_say = '分析上述回答,再列出用户可能提出的三个问题。'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=prompt,
inputs_show_user=show_say,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=history,
sys_prompt=system_prompt
)
chatbot[-1] = (show_say, gpt_say)
history.extend([show_say, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@CatchException
def 清除缓存(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
chatbot.append(['清除本地缓存数据', '执行中. 删除数据'])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
def _get_log_folder(user=default_user_name):
PATH_LOGGING = get_conf('PATH_LOGGING')
_dir = os.path.join(PATH_LOGGING, user)
if not os.path.exists(_dir): os.makedirs(_dir)
return _dir
def _get_upload_folder(user=default_user_name):
PATH_PRIVATE_UPLOAD = get_conf('PATH_PRIVATE_UPLOAD')
_dir = os.path.join(PATH_PRIVATE_UPLOAD, user)
return _dir
shutil.rmtree(_get_log_folder(get_user(chatbot)), ignore_errors=True)
shutil.rmtree(_get_upload_folder(get_user(chatbot)), ignore_errors=True)
chatbot.append(['清除本地缓存数据', '执行完成'])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@@ -1,153 +0,0 @@
from toolbox import CatchException, update_ui
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import datetime
####################################################################################################################
# Demo 1: 一个非常简单的插件 #########################################################################################
####################################################################################################################
高阶功能模板函数示意图 = f"""
```mermaid
flowchart TD
%% <gpt_academic_hide_mermaid_code> 一个特殊标记用于在生成mermaid图表时隐藏代码块
subgraph 函数调用["函数调用过程"]
AA["输入栏用户输入的文本(txt)"] --> BB["gpt模型参数(llm_kwargs)"]
BB --> CC["插件模型参数(plugin_kwargs)"]
CC --> DD["对话显示框的句柄(chatbot)"]
DD --> EE["对话历史(history)"]
EE --> FF["系统提示词(system_prompt)"]
FF --> GG["当前用户信息(web_port)"]
A["开始(查询5天历史事件)"]
A --> B["获取当前月份和日期"]
B --> C["生成历史事件查询提示词"]
C --> D["调用大模型"]
D --> E["更新界面"]
E --> F["记录历史"]
F --> |"下一天"| B
end
```
"""
@CatchException
def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request, num_day=5):
"""
# 高阶功能模板函数示意图https://mermaid.live/edit#pako:eNptk1tvEkEYhv8KmattQpvlvOyFCcdeeaVXuoYssBwie8gyhCIlqVoLhrbbtAWNUpEGUkyMEDW2Fmn_DDOL_8LZHdOwxrnamX3f7_3mmZk6yKhZCfAgV1KrmYKoQ9fDuKC4yChX0nld1Aou1JzjznQ5fWmejh8LYHW6vG2a47YAnlCLNSIRolnenKBXI_zRIBrcuqRT890u7jZx7zMDt-AaMbnW1--5olGiz2sQjwfoQxsZL0hxplSSU0-rop4vrzmKR6O2JxYjHmwcL2Y_HDatVMkXlf86YzHbGY9bO5j8XE7O8Nsbc3iNB3ukL2SMcH-XIQBgWoVOZzxuOxOJOyc63EPGV6ZQLENVrznViYStTiaJ2vw2M2d9bByRnOXkgCnXylCSU5quyto_IcmkbdvctELmJ-j1ASW3uB3g5xOmKqVTmqr_Na3AtuS_dtBFm8H90XJyHkDDT7S9xXWb4HGmRChx64AOL5HRpUm411rM5uh4H78Z4V7fCZzytjZz2seto9XaNPFue07clLaVZF8UNLygJ-VES8lah_n-O-5Ozc7-77NzJ0-K0yr0ZYrmHdqAk50t2RbA4qq9uNohBASw7YpSgaRkLWCCAtxAlnRZLGbJba9bPwUAC5IsCYAnn1kpJ1ZKUACC0iBSsQLVBzUlA3ioVyQ3qGhZEUrxokiehAz4nFgqk1VNVABfB1uAD_g2_AGPl-W8nMcbCvsDblADfNCz4feyobDPy3rYEMtxwYYbPFNVUoHdCPmDHBv2cP4AMfrCbiBli-Q-3afv0X6WdsIjW2-10fgDy1SAig
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,用于灵活调整复杂功能的各种参数
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
history = [] # 清空历史,以免输入溢出
chatbot.append((
"您正在调用插件:历史上的今天",
"[Local Message] 请注意,您正在调用一个[函数插件]的模板该函数面向希望实现更多有趣功能的开发者它可以作为创建新功能函数的模板该函数只有20多行代码。此外我们也提供可同步处理大量文件的多线程Demo供您参考。您若希望分享新的功能模组请不吝PR" + 高阶功能模板函数示意图))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
for i in range(int(num_day)):
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
i_say = f'历史中哪些事件发生在{currentMonth}{currentDay}列举两条并发送相关图片。发送图片时请使用Markdown将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt="当你想发送一张照片时请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。"
)
chatbot[-1] = (i_say, gpt_say)
history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
####################################################################################################################
# Demo 2: 一个带二级菜单的插件 #######################################################################################
####################################################################################################################
from crazy_functions.plugin_template.plugin_class_template import GptAcademicPluginTemplate, ArgProperty
class Demo_Wrap(GptAcademicPluginTemplate):
def __init__(self):
"""
请注意`execute`会执行在不同的线程中,因此您在定义和使用类变量时,应当慎之又慎!
"""
pass
def define_arg_selection_menu(self):
"""
定义插件的二级选项菜单
"""
gui_definition = {
"num_day":
ArgProperty(title="日期选择", options=["仅今天", "未来3天", "未来5天"], default_value="未来3天", description="", type="dropdown").model_dump_json(),
}
return gui_definition
def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
执行插件
"""
num_day = plugin_kwargs["num_day"]
if num_day == "仅今天": num_day = 1
if num_day == "未来3天": num_day = 3
if num_day == "未来5天": num_day = 5
yield from 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request, num_day=num_day)
####################################################################################################################
# Demo 3: 绘制脑图的Demo ############################################################################################
####################################################################################################################
PROMPT = """
请你给出围绕“{subject}”的逻辑关系图使用mermaid语法mermaid语法举例
```mermaid
graph TD
P(编程) --> L1(Python)
P(编程) --> L2(C)
P(编程) --> L3(C++)
P(编程) --> L4(Javascipt)
P(编程) --> L5(PHP)
```
"""
@CatchException
def 测试图表渲染(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,用于灵活调整复杂功能的各种参数
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
history = [] # 清空历史,以免输入溢出
chatbot.append(("这是什么功能?", "一个测试mermaid绘制图表的功能您可以在输入框中输入一些关键词然后使用mermaid+llm绘制图表。"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
if txt == "": txt = "空白的输入栏" # 调皮一下
i_say_show_user = f'请绘制有关“{txt}”的逻辑关系图。'
i_say = PROMPT.format(subject=txt)
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say,
inputs_show_user=i_say_show_user,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt=""
)
history.append(i_say); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新