Merge branch 'master' into jsz14897502-master
This commit is contained in:
231
crazy_functions/CodeInterpreter.py
Normal file
231
crazy_functions/CodeInterpreter.py
Normal file
@@ -0,0 +1,231 @@
|
||||
from collections.abc import Callable, Iterable, Mapping
|
||||
from typing import Any
|
||||
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, promote_file_to_downloadzone, clear_file_downloadzone
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from .crazy_utils import input_clipping, try_install_deps
|
||||
from multiprocessing import Process, Pipe
|
||||
import os
|
||||
import time
|
||||
|
||||
templete = """
|
||||
```python
|
||||
import ... # Put dependencies here, e.g. import numpy as np
|
||||
|
||||
class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
|
||||
|
||||
def run(self, path): # The name of the function must be `run`, it takes only a positional argument.
|
||||
# rewrite the function you have just written here
|
||||
...
|
||||
return generated_file_path
|
||||
```
|
||||
"""
|
||||
|
||||
def inspect_dependency(chatbot, history):
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return True
|
||||
|
||||
def get_code_block(reply):
|
||||
import re
|
||||
pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
|
||||
matches = re.findall(pattern, reply) # find all code blocks in text
|
||||
if len(matches) == 1:
|
||||
return matches[0].strip('python') # code block
|
||||
for match in matches:
|
||||
if 'class TerminalFunction' in match:
|
||||
return match.strip('python') # code block
|
||||
raise RuntimeError("GPT is not generating proper code.")
|
||||
|
||||
def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
|
||||
# 输入
|
||||
prompt_compose = [
|
||||
f'Your job:\n'
|
||||
f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
|
||||
f"2. You should write this function to perform following task: " + txt + "\n",
|
||||
f"3. Wrap the output python function with markdown codeblock."
|
||||
]
|
||||
i_say = "".join(prompt_compose)
|
||||
demo = []
|
||||
|
||||
# 第一步
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=i_say, inputs_show_user=i_say,
|
||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
|
||||
sys_prompt= r"You are a programmer."
|
||||
)
|
||||
history.extend([i_say, gpt_say])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
||||
|
||||
# 第二步
|
||||
prompt_compose = [
|
||||
"If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
|
||||
templete
|
||||
]
|
||||
i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=i_say, inputs_show_user=inputs_show_user,
|
||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
||||
sys_prompt= r"You are a programmer."
|
||||
)
|
||||
code_to_return = gpt_say
|
||||
history.extend([i_say, gpt_say])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
||||
|
||||
# # 第三步
|
||||
# i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
|
||||
# i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
|
||||
# installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
# inputs=i_say, inputs_show_user=inputs_show_user,
|
||||
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
||||
# sys_prompt= r"You are a programmer."
|
||||
# )
|
||||
# # # 第三步
|
||||
# i_say = "Show me how to use `pip` to install packages to run the code above. "
|
||||
# i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
|
||||
# installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
# inputs=i_say, inputs_show_user=i_say,
|
||||
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
||||
# sys_prompt= r"You are a programmer."
|
||||
# )
|
||||
installation_advance = ""
|
||||
|
||||
return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
|
||||
|
||||
def make_module(code):
|
||||
module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
|
||||
with open(f'gpt_log/{module_file}.py', 'w', encoding='utf8') as f:
|
||||
f.write(code)
|
||||
|
||||
def get_class_name(class_string):
|
||||
import re
|
||||
# Use regex to extract the class name
|
||||
class_name = re.search(r'class (\w+)\(', class_string).group(1)
|
||||
return class_name
|
||||
|
||||
class_name = get_class_name(code)
|
||||
return f"gpt_log.{module_file}->{class_name}"
|
||||
|
||||
def init_module_instance(module):
|
||||
import importlib
|
||||
module_, class_ = module.split('->')
|
||||
init_f = getattr(importlib.import_module(module_), class_)
|
||||
return init_f()
|
||||
|
||||
def for_immediate_show_off_when_possible(file_type, fp, chatbot):
|
||||
if file_type in ['png', 'jpg']:
|
||||
image_path = os.path.abspath(fp)
|
||||
chatbot.append(['这是一张图片, 展示如下:',
|
||||
f'本地文件地址: <br/>`{image_path}`<br/>'+
|
||||
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
|
||||
])
|
||||
return chatbot
|
||||
|
||||
def subprocess_worker(instance, file_path, return_dict):
|
||||
return_dict['result'] = instance.run(file_path)
|
||||
|
||||
def have_any_recent_upload_files(chatbot):
|
||||
_5min = 5 * 60
|
||||
if not chatbot: return False # chatbot is None
|
||||
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
||||
if not most_recent_uploaded: return False # most_recent_uploaded is None
|
||||
if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
|
||||
else: return False # most_recent_uploaded is too old
|
||||
|
||||
def get_recent_file_prompt_support(chatbot):
|
||||
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
||||
path = most_recent_uploaded['path']
|
||||
return path
|
||||
|
||||
@CatchException
|
||||
def 虚空终端CodeInterpreter(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
"""
|
||||
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
||||
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
||||
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
||||
chatbot 聊天显示框的句柄,用于显示给用户
|
||||
history 聊天历史,前情提要
|
||||
system_prompt 给gpt的静默提醒
|
||||
web_port 当前软件运行的端口号
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
# 清空历史,以免输入溢出
|
||||
history = []; clear_file_downloadzone(chatbot)
|
||||
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"CodeInterpreter开源版, 此插件处于开发阶段, 建议暂时不要使用, 插件初始化中 ..."
|
||||
])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
if have_any_recent_upload_files(chatbot):
|
||||
file_path = get_recent_file_prompt_support(chatbot)
|
||||
else:
|
||||
chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# 读取文件
|
||||
if ("recently_uploaded_files" in plugin_kwargs) and (plugin_kwargs["recently_uploaded_files"] == ""): plugin_kwargs.pop("recently_uploaded_files")
|
||||
recently_uploaded_files = plugin_kwargs.get("recently_uploaded_files", None)
|
||||
file_path = recently_uploaded_files[-1]
|
||||
file_type = file_path.split('.')[-1]
|
||||
|
||||
# 粗心检查
|
||||
if 'private_upload' in txt:
|
||||
chatbot.append([
|
||||
"...",
|
||||
f"请在输入框内填写需求,然后再次点击该插件(文件路径 {file_path} 已经被记忆)"
|
||||
])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
# 开始干正事
|
||||
for j in range(5): # 最多重试5次
|
||||
try:
|
||||
code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
|
||||
yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
|
||||
code = get_code_block(code)
|
||||
res = make_module(code)
|
||||
instance = init_module_instance(res)
|
||||
break
|
||||
except Exception as e:
|
||||
chatbot.append([f"第{j}次代码生成尝试,失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# 代码生成结束, 开始执行
|
||||
try:
|
||||
import multiprocessing
|
||||
manager = multiprocessing.Manager()
|
||||
return_dict = manager.dict()
|
||||
|
||||
p = multiprocessing.Process(target=subprocess_worker, args=(instance, file_path, return_dict))
|
||||
# only has 10 seconds to run
|
||||
p.start(); p.join(timeout=10)
|
||||
if p.is_alive(): p.terminate(); p.join()
|
||||
p.close()
|
||||
res = return_dict['result']
|
||||
# res = instance.run(file_path)
|
||||
except Exception as e:
|
||||
chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
|
||||
# chatbot.append(["如果是缺乏依赖,请参考以下建议", installation_advance])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
# 顺利完成,收尾
|
||||
res = str(res)
|
||||
if os.path.exists(res):
|
||||
chatbot.append(["执行成功了,结果是一个有效文件", "结果:" + res])
|
||||
new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
|
||||
chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
||||
else:
|
||||
chatbot.append(["执行成功了,结果是一个字符串", "结果:" + res])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
||||
|
||||
"""
|
||||
测试:
|
||||
裁剪图像,保留下半部分
|
||||
交换图像的蓝色通道和红色通道
|
||||
将图像转为灰度图像
|
||||
将csv文件转excel表格
|
||||
"""
|
||||
@@ -6,7 +6,7 @@ pj = os.path.join
|
||||
ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
|
||||
|
||||
# =================================== 工具函数 ===============================================
|
||||
专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
|
||||
# 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
|
||||
def switch_prompt(pfg, mode, more_requirement):
|
||||
"""
|
||||
Generate prompts and system prompts based on the mode for proofreading or translating.
|
||||
@@ -109,7 +109,7 @@ def arxiv_download(chatbot, history, txt):
|
||||
|
||||
url_ = txt # https://arxiv.org/abs/1707.06690
|
||||
if not txt.startswith('https://arxiv.org/abs/'):
|
||||
msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}"
|
||||
msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}。"
|
||||
yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面
|
||||
return msg, None
|
||||
# <-------------- set format ------------->
|
||||
@@ -255,7 +255,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
|
||||
project_folder = txt
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无法处理: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
@@ -291,7 +291,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
|
||||
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
||||
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
||||
else:
|
||||
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
|
||||
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux,请检查系统字体(见Github wiki) ...'))
|
||||
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
||||
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
||||
|
||||
|
||||
@@ -591,11 +591,16 @@ def get_files_from_everything(txt, type): # type='.md'
|
||||
# 网络的远程文件
|
||||
import requests
|
||||
from toolbox import get_conf
|
||||
from toolbox import get_log_folder, gen_time_str
|
||||
proxies, = get_conf('proxies')
|
||||
r = requests.get(txt, proxies=proxies)
|
||||
with open('./gpt_log/temp'+type, 'wb+') as f: f.write(r.content)
|
||||
project_folder = './gpt_log/'
|
||||
file_manifest = ['./gpt_log/temp'+type]
|
||||
try:
|
||||
r = requests.get(txt, proxies=proxies)
|
||||
except:
|
||||
raise ConnectionRefusedError(f"无法下载资源{txt},请检查。")
|
||||
path = os.path.join(get_log_folder(plugin_name='web_download'), gen_time_str()+type)
|
||||
with open(path, 'wb+') as f: f.write(r.content)
|
||||
project_folder = get_log_folder(plugin_name='web_download')
|
||||
file_manifest = [path]
|
||||
elif txt.endswith(type):
|
||||
# 直接给定文件
|
||||
file_manifest = [txt]
|
||||
|
||||
@@ -37,10 +37,19 @@ Here is the output schema:
|
||||
{schema}
|
||||
```"""
|
||||
|
||||
|
||||
PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
||||
```
|
||||
{schema}
|
||||
```"""
|
||||
|
||||
class JsonStringError(Exception): ...
|
||||
|
||||
class GptJsonIO():
|
||||
|
||||
def __init__(self, schema):
|
||||
def __init__(self, schema, example_instruction=True):
|
||||
self.pydantic_object = schema
|
||||
self.example_instruction = example_instruction
|
||||
self.format_instructions = self.generate_format_instructions()
|
||||
|
||||
def generate_format_instructions(self):
|
||||
@@ -53,9 +62,11 @@ class GptJsonIO():
|
||||
if "type" in reduced_schema:
|
||||
del reduced_schema["type"]
|
||||
# Ensure json in context is well-formed with double quotes.
|
||||
schema_str = json.dumps(reduced_schema)
|
||||
|
||||
return PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str)
|
||||
if self.example_instruction:
|
||||
schema_str = json.dumps(reduced_schema)
|
||||
return PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str)
|
||||
else:
|
||||
return PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE.format(schema=schema_str)
|
||||
|
||||
def generate_output(self, text):
|
||||
# Greedy search for 1st json candidate.
|
||||
@@ -95,6 +106,6 @@ class GptJsonIO():
|
||||
except Exception as e:
|
||||
# 没辙了,放弃治疗
|
||||
logging.info('Repaire json fail.')
|
||||
raise RuntimeError('Cannot repair json.', str(e))
|
||||
raise JsonStringError('Cannot repair json.', str(e))
|
||||
return result
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import time, threading, json
|
||||
import time, logging, json
|
||||
|
||||
|
||||
class AliyunASR():
|
||||
@@ -12,14 +12,14 @@ class AliyunASR():
|
||||
message = json.loads(message)
|
||||
self.parsed_sentence = message['payload']['result']
|
||||
self.event_on_entence_end.set()
|
||||
print(self.parsed_sentence)
|
||||
# print(self.parsed_sentence)
|
||||
|
||||
def test_on_start(self, message, *args):
|
||||
# print("test_on_start:{}".format(message))
|
||||
pass
|
||||
|
||||
def test_on_error(self, message, *args):
|
||||
print("on_error args=>{}".format(args))
|
||||
logging.error("on_error args=>{}".format(args))
|
||||
pass
|
||||
|
||||
def test_on_close(self, *args):
|
||||
@@ -36,7 +36,6 @@ class AliyunASR():
|
||||
# print("on_completed:args=>{} message=>{}".format(args, message))
|
||||
pass
|
||||
|
||||
|
||||
def audio_convertion_thread(self, uuid):
|
||||
# 在一个异步线程中采集音频
|
||||
import nls # pip install git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
|
||||
|
||||
@@ -20,6 +20,11 @@ def get_avail_grobid_url():
|
||||
def parse_pdf(pdf_path, grobid_url):
|
||||
import scipdf # pip install scipdf_parser
|
||||
if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
|
||||
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
|
||||
try:
|
||||
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
|
||||
except GROBID_OFFLINE_EXCEPTION:
|
||||
raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。")
|
||||
except:
|
||||
raise RuntimeError("解析PDF失败,请检查PDF是否损坏。")
|
||||
return article_dict
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
from toolbox import update_ui_lastest_msg, get_conf
|
||||
from toolbox import update_ui_lastest_msg, disable_auto_promotion
|
||||
from request_llm.bridge_all import predict_no_ui_long_connection
|
||||
from crazy_functions.json_fns.pydantic_io import GptJsonIO
|
||||
import copy, json, pickle, os, sys
|
||||
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
|
||||
import copy, json, pickle, os, sys, time
|
||||
|
||||
|
||||
def read_avail_plugin_enum():
|
||||
@@ -11,37 +11,85 @@ def read_avail_plugin_enum():
|
||||
plugin_arr = get_crazy_functions()
|
||||
# remove plugins with out explaination
|
||||
plugin_arr = {k:v for k, v in plugin_arr.items() if 'Info' in v}
|
||||
plugin_arr_info = {"F{:04d}".format(i):v["Info"] for i, v in enumerate(plugin_arr.values(), start=1)}
|
||||
plugin_arr_dict = {"F{:04d}".format(i):v for i, v in enumerate(plugin_arr.values(), start=1)}
|
||||
plugin_arr_info = {"F_{:04d}".format(i):v["Info"] for i, v in enumerate(plugin_arr.values(), start=1)}
|
||||
plugin_arr_dict = {"F_{:04d}".format(i):v for i, v in enumerate(plugin_arr.values(), start=1)}
|
||||
plugin_arr_dict_parse = {"F_{:04d}".format(i):v for i, v in enumerate(plugin_arr.values(), start=1)}
|
||||
plugin_arr_dict_parse.update({f"F_{i}":v for i, v in enumerate(plugin_arr.values(), start=1)})
|
||||
prompt = json.dumps(plugin_arr_info, ensure_ascii=False, indent=2)
|
||||
prompt = "\n\nThe defination of PluginEnum:\nPluginEnum=" + prompt
|
||||
return prompt, plugin_arr_dict
|
||||
return prompt, plugin_arr_dict, plugin_arr_dict_parse
|
||||
|
||||
def wrap_code(txt):
|
||||
txt = txt.replace('```','')
|
||||
return f"\n```\n{txt}\n```\n"
|
||||
|
||||
def have_any_recent_upload_files(chatbot):
|
||||
_5min = 5 * 60
|
||||
if not chatbot: return False # chatbot is None
|
||||
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
||||
if not most_recent_uploaded: return False # most_recent_uploaded is None
|
||||
if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
|
||||
else: return False # most_recent_uploaded is too old
|
||||
|
||||
def get_recent_file_prompt_support(chatbot):
|
||||
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
||||
path = most_recent_uploaded['path']
|
||||
prompt = "\nAdditional Information:\n"
|
||||
prompt = "In case that this plugin requires a path or a file as argument,"
|
||||
prompt += f"it is important for you to know that the user has recently uploaded a file, located at: `{path}`"
|
||||
prompt += f"Only use it when necessary, otherwise, you can ignore this file."
|
||||
return prompt
|
||||
|
||||
def get_inputs_show_user(inputs, plugin_arr_enum_prompt):
|
||||
# remove plugin_arr_enum_prompt from inputs string
|
||||
inputs_show_user = inputs.replace(plugin_arr_enum_prompt, "")
|
||||
inputs_show_user += plugin_arr_enum_prompt[:200] + '...'
|
||||
inputs_show_user += '\n...\n'
|
||||
inputs_show_user += '...\n'
|
||||
inputs_show_user += '...}'
|
||||
return inputs_show_user
|
||||
|
||||
def execute_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
|
||||
plugin_arr_enum_prompt, plugin_arr_dict = read_avail_plugin_enum()
|
||||
plugin_arr_enum_prompt, plugin_arr_dict, plugin_arr_dict_parse = read_avail_plugin_enum()
|
||||
class Plugin(BaseModel):
|
||||
plugin_selection: str = Field(description="The most related plugin from one of the PluginEnum.", default="F0000000000000")
|
||||
plugin_arg: str = Field(description="The argument of the plugin. A path or url or empty.", default="")
|
||||
|
||||
plugin_selection: str = Field(description="The most related plugin from one of the PluginEnum.", default="F_0000")
|
||||
reason_of_selection: str = Field(description="The reason why you should select this plugin.", default="This plugin satisfy user requirement most")
|
||||
# ⭐ ⭐ ⭐ 选择插件
|
||||
yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n查找可用插件中...", chatbot=chatbot, history=history, delay=0)
|
||||
gpt_json_io = GptJsonIO(Plugin)
|
||||
gpt_json_io.format_instructions = "The format of your output should be a json that can be parsed by json.loads.\n"
|
||||
gpt_json_io.format_instructions += """Output example: {"plugin_selection":"F_1234", "reason_of_selection":"F_1234 plugin satisfy user requirement most"}\n"""
|
||||
gpt_json_io.format_instructions += "The plugins you are authorized to use are listed below:\n"
|
||||
gpt_json_io.format_instructions += plugin_arr_enum_prompt
|
||||
inputs = "Choose the correct plugin and extract plugin_arg, the user requirement is: \n\n" + \
|
||||
">> " + txt.rstrip('\n').replace('\n','\n>> ') + '\n\n' + \
|
||||
gpt_json_io.format_instructions
|
||||
inputs = "Choose the correct plugin according to user requirements, the user requirement is: \n\n" + \
|
||||
">> " + txt.rstrip('\n').replace('\n','\n>> ') + '\n\n' + gpt_json_io.format_instructions
|
||||
|
||||
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
|
||||
inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
|
||||
plugin_sel = gpt_json_io.generate_output_auto_repair(run_gpt_fn(inputs, ""), run_gpt_fn)
|
||||
|
||||
if plugin_sel.plugin_selection not in plugin_arr_dict:
|
||||
msg = f'找不到合适插件执行该任务'
|
||||
try:
|
||||
gpt_reply = run_gpt_fn(inputs, "")
|
||||
plugin_sel = gpt_json_io.generate_output_auto_repair(gpt_reply, run_gpt_fn)
|
||||
except JsonStringError:
|
||||
msg = f"抱歉, {llm_kwargs['llm_model']}无法理解您的需求。"
|
||||
msg += "请求的Prompt为:\n" + wrap_code(get_inputs_show_user(inputs, plugin_arr_enum_prompt))
|
||||
msg += "语言模型回复为:\n" + wrap_code(gpt_reply)
|
||||
msg += "\n但您可以尝试再试一次\n"
|
||||
yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
|
||||
return
|
||||
if plugin_sel.plugin_selection not in plugin_arr_dict_parse:
|
||||
msg = f"抱歉, 找不到合适插件执行该任务, 或者{llm_kwargs['llm_model']}无法理解您的需求。"
|
||||
msg += f"语言模型{llm_kwargs['llm_model']}选择了不存在的插件:\n" + wrap_code(gpt_reply)
|
||||
msg += "\n但您可以尝试再试一次\n"
|
||||
yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
|
||||
return
|
||||
|
||||
# ⭐ ⭐ ⭐ 确认插件参数
|
||||
plugin = plugin_arr_dict[plugin_sel.plugin_selection]
|
||||
if not have_any_recent_upload_files(chatbot):
|
||||
appendix_info = ""
|
||||
else:
|
||||
appendix_info = get_recent_file_prompt_support(chatbot)
|
||||
|
||||
plugin = plugin_arr_dict_parse[plugin_sel.plugin_selection]
|
||||
yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n提取插件参数...", chatbot=chatbot, history=history, delay=0)
|
||||
class PluginExplicit(BaseModel):
|
||||
plugin_selection: str = plugin_sel.plugin_selection
|
||||
@@ -50,7 +98,7 @@ def execute_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
|
||||
gpt_json_io.format_instructions += "The information about this plugin is:" + plugin["Info"]
|
||||
inputs = f"A plugin named {plugin_sel.plugin_selection} is selected, " + \
|
||||
"you should extract plugin_arg from the user requirement, the user requirement is: \n\n" + \
|
||||
">> " + txt.rstrip('\n').replace('\n','\n>> ') + '\n\n' + \
|
||||
">> " + (txt + appendix_info).rstrip('\n').replace('\n','\n>> ') + '\n\n' + \
|
||||
gpt_json_io.format_instructions
|
||||
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
|
||||
inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
|
||||
@@ -60,7 +108,7 @@ def execute_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
|
||||
# ⭐ ⭐ ⭐ 执行插件
|
||||
fn = plugin['Function']
|
||||
fn_name = fn.__name__
|
||||
msg = f'正在调用插件: {fn_name}\n\n插件说明:{plugin["Info"]}\n\n插件参数:{plugin_sel.plugin_arg}'
|
||||
msg = f'{llm_kwargs["llm_model"]}为您选择了插件: `{fn_name}`\n\n插件说明:{plugin["Info"]}\n\n插件参数:{plugin_sel.plugin_arg}\n\n假如偏离了您的要求,按停止键终止。'
|
||||
yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
|
||||
yield from fn(plugin_sel.plugin_arg, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, -1)
|
||||
return
|
||||
28
crazy_functions/vt_fns/vt_state.py
Normal file
28
crazy_functions/vt_fns/vt_state.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import pickle
|
||||
|
||||
class VoidTerminalState():
|
||||
def __init__(self):
|
||||
self.reset_state()
|
||||
|
||||
def reset_state(self):
|
||||
self.has_provided_explaination = False
|
||||
|
||||
def lock_plugin(self, chatbot):
|
||||
chatbot._cookies['lock_plugin'] = 'crazy_functions.虚空终端->虚空终端'
|
||||
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
||||
|
||||
def unlock_plugin(self, chatbot):
|
||||
self.reset_state()
|
||||
chatbot._cookies['lock_plugin'] = None
|
||||
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
||||
|
||||
def set_state(self, chatbot, key, value):
|
||||
setattr(self, key, value)
|
||||
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
||||
|
||||
def get_state(chatbot):
|
||||
state = chatbot._cookies.get('plugin_state', None)
|
||||
if state is not None: state = pickle.loads(state)
|
||||
else: state = VoidTerminalState()
|
||||
state.chatbot = chatbot
|
||||
return state
|
||||
271
crazy_functions/批量翻译PDF文档_NOUGAT.py
Normal file
271
crazy_functions/批量翻译PDF文档_NOUGAT.py
Normal file
@@ -0,0 +1,271 @@
|
||||
from toolbox import CatchException, report_execption, gen_time_str
|
||||
from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion
|
||||
from toolbox import write_history_to_file, get_log_folder
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from .crazy_utils import read_and_clean_pdf_text
|
||||
from .pdf_fns.parse_pdf import parse_pdf, get_avail_grobid_url
|
||||
from colorful import *
|
||||
import os
|
||||
import math
|
||||
import logging
|
||||
|
||||
def markdown_to_dict(article_content):
|
||||
import markdown
|
||||
from bs4 import BeautifulSoup
|
||||
cur_t = ""
|
||||
cur_c = ""
|
||||
results = {}
|
||||
for line in article_content:
|
||||
if line.startswith('#'):
|
||||
if cur_t!="":
|
||||
if cur_t not in results:
|
||||
results.update({cur_t:cur_c.lstrip('\n')})
|
||||
else:
|
||||
# 处理重名的章节
|
||||
results.update({cur_t + " " + gen_time_str():cur_c.lstrip('\n')})
|
||||
cur_t = line.rstrip('\n')
|
||||
cur_c = ""
|
||||
else:
|
||||
cur_c += line
|
||||
results_final = {}
|
||||
for k in list(results.keys()):
|
||||
if k.startswith('# '):
|
||||
results_final['title'] = k.split('# ')[-1]
|
||||
results_final['authors'] = results.pop(k).lstrip('\n')
|
||||
if k.startswith('###### Abstract'):
|
||||
results_final['abstract'] = results.pop(k).lstrip('\n')
|
||||
|
||||
results_final_sections = []
|
||||
for k,v in results.items():
|
||||
results_final_sections.append({
|
||||
'heading':k.lstrip("# "),
|
||||
'text':v if len(v) > 0 else f"The beginning of {k.lstrip('# ')} section."
|
||||
})
|
||||
results_final['sections'] = results_final_sections
|
||||
return results_final
|
||||
|
||||
|
||||
@CatchException
|
||||
def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
|
||||
disable_auto_promotion(chatbot)
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"批量翻译PDF文档。函数插件贡献者: Binary-Husky"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
import nougat
|
||||
import tiktoken
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}",
|
||||
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade nougat-ocr tiktoken```。")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
# 清空历史,以免输入溢出
|
||||
history = []
|
||||
|
||||
from .crazy_utils import get_files_from_everything
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
|
||||
# 检测输入参数,如没有给定输入参数,直接退出
|
||||
if not success:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
|
||||
# 如果没找到任何文件
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}", b=f"找不到任何.tex或.pdf文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
# 开始正式执行任务
|
||||
yield from 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
|
||||
|
||||
def nougat_with_timeout(command, cwd, timeout=3600):
|
||||
import subprocess
|
||||
process = subprocess.Popen(command, shell=True, cwd=cwd)
|
||||
try:
|
||||
stdout, stderr = process.communicate(timeout=timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
stdout, stderr = process.communicate()
|
||||
print("Process timed out!")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def NOUGAT_parse_pdf(fp):
|
||||
import glob
|
||||
from toolbox import get_log_folder, gen_time_str
|
||||
dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
|
||||
os.makedirs(dst)
|
||||
nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd())
|
||||
res = glob.glob(os.path.join(dst,'*.mmd'))
|
||||
if len(res) == 0:
|
||||
raise RuntimeError("Nougat解析论文失败。")
|
||||
return res[0]
|
||||
|
||||
|
||||
def 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
import copy
|
||||
import tiktoken
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 1280
|
||||
generated_conclusion_files = []
|
||||
generated_html_files = []
|
||||
DST_LANG = "中文"
|
||||
for index, fp in enumerate(file_manifest):
|
||||
chatbot.append(["当前进度:", f"正在解析论文,请稍候。(第一次运行时,需要花费较长时间下载NOUGAT参数)"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
fpp = NOUGAT_parse_pdf(fp)
|
||||
|
||||
with open(fpp, 'r', encoding='utf8') as f:
|
||||
article_content = f.readlines()
|
||||
article_dict = markdown_to_dict(article_content)
|
||||
logging.info(article_dict)
|
||||
|
||||
prompt = "以下是一篇学术论文的基本信息:\n"
|
||||
# title
|
||||
title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
|
||||
# authors
|
||||
authors = article_dict.get('authors', '无法获取 authors'); prompt += f'authors:{authors}\n\n'
|
||||
# abstract
|
||||
abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
|
||||
# command
|
||||
prompt += f"请将题目和摘要翻译为{DST_LANG}。"
|
||||
meta = [f'# Title:\n\n', title, f'# Abstract:\n\n', abstract ]
|
||||
|
||||
# 单线,获取文章meta信息
|
||||
paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=prompt,
|
||||
inputs_show_user=prompt,
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot, history=[],
|
||||
sys_prompt="You are an academic paper reader。",
|
||||
)
|
||||
|
||||
# 多线,翻译
|
||||
inputs_array = []
|
||||
inputs_show_user_array = []
|
||||
|
||||
# get_token_num
|
||||
from request_llm.bridge_all import model_info
|
||||
enc = model_info[llm_kwargs['llm_model']]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
|
||||
def break_down(txt):
|
||||
raw_token_num = get_token_num(txt)
|
||||
if raw_token_num <= TOKEN_LIMIT_PER_FRAGMENT:
|
||||
return [txt]
|
||||
else:
|
||||
# raw_token_num > TOKEN_LIMIT_PER_FRAGMENT
|
||||
# find a smooth token limit to achieve even seperation
|
||||
count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT))
|
||||
token_limit_smooth = raw_token_num // count + count
|
||||
return breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn=get_token_num, limit=token_limit_smooth)
|
||||
|
||||
for section in article_dict.get('sections'):
|
||||
if len(section['text']) == 0: continue
|
||||
section_frags = break_down(section['text'])
|
||||
for i, fragment in enumerate(section_frags):
|
||||
heading = section['heading']
|
||||
if len(section_frags) > 1: heading += f' Part-{i+1}'
|
||||
inputs_array.append(
|
||||
f"你需要翻译{heading}章节,内容如下: \n\n{fragment}"
|
||||
)
|
||||
inputs_show_user_array.append(
|
||||
f"# {heading}\n\n{fragment}"
|
||||
)
|
||||
|
||||
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array=inputs_array,
|
||||
inputs_show_user_array=inputs_show_user_array,
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history_array=[meta for _ in inputs_array],
|
||||
sys_prompt_array=[
|
||||
"请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in inputs_array],
|
||||
)
|
||||
res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + gpt_response_collection, file_basename=None, file_fullname=None)
|
||||
promote_file_to_downloadzone(res_path, rename_file=os.path.basename(fp)+'.md', chatbot=chatbot)
|
||||
generated_conclusion_files.append(res_path)
|
||||
|
||||
ch = construct_html()
|
||||
orig = ""
|
||||
trans = ""
|
||||
gpt_response_collection_html = copy.deepcopy(gpt_response_collection)
|
||||
for i,k in enumerate(gpt_response_collection_html):
|
||||
if i%2==0:
|
||||
gpt_response_collection_html[i] = inputs_show_user_array[i//2]
|
||||
else:
|
||||
gpt_response_collection_html[i] = gpt_response_collection_html[i]
|
||||
|
||||
final = ["", "", "一、论文概况", "", "Abstract", paper_meta_info, "二、论文翻译", ""]
|
||||
final.extend(gpt_response_collection_html)
|
||||
for i, k in enumerate(final):
|
||||
if i%2==0:
|
||||
orig = k
|
||||
if i%2==1:
|
||||
trans = k
|
||||
ch.add_row(a=orig, b=trans)
|
||||
create_report_file_name = f"{os.path.basename(fp)}.trans.html"
|
||||
html_file = ch.save_file(create_report_file_name)
|
||||
generated_html_files.append(html_file)
|
||||
promote_file_to_downloadzone(html_file, rename_file=os.path.basename(html_file), chatbot=chatbot)
|
||||
|
||||
chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files)))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
|
||||
|
||||
class construct_html():
|
||||
def __init__(self) -> None:
|
||||
self.css = """
|
||||
.row {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.column {
|
||||
flex: 1;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
.table-header {
|
||||
font-weight: bold;
|
||||
border-bottom: 1px solid black;
|
||||
}
|
||||
|
||||
.table-row {
|
||||
border-bottom: 1px solid lightgray;
|
||||
}
|
||||
|
||||
.table-cell {
|
||||
padding: 5px;
|
||||
}
|
||||
"""
|
||||
self.html_string = f'<!DOCTYPE html><head><meta charset="utf-8"><title>翻译结果</title><style>{self.css}</style></head>'
|
||||
|
||||
|
||||
def add_row(self, a, b):
|
||||
tmp = """
|
||||
<div class="row table-row">
|
||||
<div class="column table-cell">REPLACE_A</div>
|
||||
<div class="column table-cell">REPLACE_B</div>
|
||||
</div>
|
||||
"""
|
||||
from toolbox import markdown_convertion
|
||||
tmp = tmp.replace('REPLACE_A', markdown_convertion(a))
|
||||
tmp = tmp.replace('REPLACE_B', markdown_convertion(b))
|
||||
self.html_string += tmp
|
||||
|
||||
|
||||
def save_file(self, file_name):
|
||||
with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
|
||||
f.write(self.html_string.encode('utf-8', 'ignore').decode())
|
||||
return os.path.join(get_log_folder(), file_name)
|
||||
@@ -24,10 +24,11 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
try:
|
||||
import fitz
|
||||
import tiktoken
|
||||
import scipdf
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}",
|
||||
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf tiktoken```。")
|
||||
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf tiktoken scipdf_parser```。")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
@@ -58,7 +59,6 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
|
||||
def 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, grobid_url):
|
||||
import copy
|
||||
import tiktoken
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 1280
|
||||
generated_conclusion_files = []
|
||||
generated_html_files = []
|
||||
@@ -66,7 +66,7 @@ def 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwa
|
||||
for index, fp in enumerate(file_manifest):
|
||||
chatbot.append(["当前进度:", f"正在连接GROBID服务,请稍候: {grobid_url}\n如果等待时间过长,请修改config中的GROBID_URL,可修改成本地GROBID服务。"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
article_dict = parse_pdf(fp, grobid_url)
|
||||
print(article_dict)
|
||||
if article_dict is None: raise RuntimeError("解析PDF失败,请检查PDF是否损坏。")
|
||||
prompt = "以下是一篇学术论文的基本信息:\n"
|
||||
# title
|
||||
title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
|
||||
|
||||
@@ -75,7 +75,11 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
||||
proxies, = get_conf('proxies')
|
||||
urls = google(txt, proxies)
|
||||
history = []
|
||||
|
||||
if len(urls) == 0:
|
||||
chatbot.append((f"结论:{txt}",
|
||||
"[Local Message] 受到google限制,无法从google获取信息!"))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||
return
|
||||
# ------------- < 第2步:依次访问网页 > -------------
|
||||
max_search_result = 5 # 最多收纳多少个网页的结果
|
||||
for index, url in enumerate(urls[:max_search_result]):
|
||||
|
||||
@@ -75,7 +75,11 @@ def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, histor
|
||||
proxies, = get_conf('proxies')
|
||||
urls = bing_search(txt, proxies)
|
||||
history = []
|
||||
|
||||
if len(urls) == 0:
|
||||
chatbot.append((f"结论:{txt}",
|
||||
"[Local Message] 受到bing限制,无法从bing获取信息!"))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||
return
|
||||
# ------------- < 第2步:依次访问网页 > -------------
|
||||
max_search_result = 8 # 最多收纳多少个网页的结果
|
||||
for index, url in enumerate(urls[:max_search_result]):
|
||||
|
||||
@@ -1,24 +1,68 @@
|
||||
"""
|
||||
Explanation of the Void Terminal Plugin:
|
||||
|
||||
Please describe in natural language what you want to do.
|
||||
|
||||
1. You can open the plugin's dropdown menu to explore various capabilities of this project, and then describe your needs in natural language, for example:
|
||||
- "Please call the plugin to translate a PDF paper for me. I just uploaded the paper to the upload area."
|
||||
- "Please use the plugin to translate a PDF paper, with the address being https://www.nature.com/articles/s41586-019-1724-z.pdf."
|
||||
- "Generate an image with blooming flowers and lush green grass using the plugin."
|
||||
- "Translate the README using the plugin. The GitHub URL is https://github.com/facebookresearch/co-tracker."
|
||||
- "Translate an Arxiv paper for me. The Arxiv ID is 1812.10695. Remember to use the plugin and don't do it manually!"
|
||||
- "I don't like the current interface color. Modify the configuration and change the theme to THEME="High-Contrast"."
|
||||
- "Could you please explain the structure of the Transformer network?"
|
||||
|
||||
2. If you use keywords like "call the plugin xxx", "modify the configuration xxx", "please", etc., your intention can be recognized more accurately.
|
||||
|
||||
3. Your intention can be recognized more accurately when using powerful models like GPT4. This plugin is relatively new, so please feel free to provide feedback on GitHub.
|
||||
|
||||
4. Now, if you need to process a file, please upload the file (drag the file to the file upload area) or describe the path to the file.
|
||||
|
||||
5. If you don't need to upload a file, you can simply repeat your command again.
|
||||
"""
|
||||
explain_msg = """
|
||||
## 虚空终端插件说明:
|
||||
|
||||
1. 请用**自然语言**描述您需要做什么。例如:
|
||||
- 「请调用插件,为我翻译PDF论文,论文我刚刚放到上传区了」
|
||||
- 「请调用插件翻译PDF论文,地址为https://aaa/bbb/ccc.pdf」
|
||||
- 「把Arxiv论文翻译成中文PDF,arxiv论文的ID是1812.10695,记得用插件!」
|
||||
- 「生成一张图片,图中鲜花怒放,绿草如茵,用插件实现」
|
||||
- 「用插件翻译README,Github网址是https://github.com/facebookresearch/co-tracker」
|
||||
- 「我不喜欢当前的界面颜色,修改配置,把主题THEME更换为THEME="High-Contrast"」
|
||||
- 「请问Transformer网络的结构是怎样的?」
|
||||
|
||||
2. 您可以打开插件下拉菜单以了解本项目的各种能力。
|
||||
|
||||
3. 如果您使用「调用插件xxx」、「修改配置xxx」、「请问」等关键词,您的意图可以被识别的更准确。
|
||||
|
||||
4. 建议使用 GPT3.5 或更强的模型,弱模型可能无法理解您的想法。该插件诞生时间不长,欢迎您前往Github反馈问题。
|
||||
|
||||
5. 现在,如果需要处理文件,请您上传文件(将文件拖动到文件上传区),或者描述文件所在的路径。
|
||||
|
||||
6. 如果不需要上传文件,现在您只需要再次重复一次您的指令即可。
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
from toolbox import CatchException, update_ui, gen_time_str
|
||||
from toolbox import update_ui_lastest_msg
|
||||
from toolbox import update_ui_lastest_msg, disable_auto_promotion
|
||||
from request_llm.bridge_all import predict_no_ui_long_connection
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import input_clipping
|
||||
from crazy_functions.json_fns.pydantic_io import GptJsonIO
|
||||
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
|
||||
from crazy_functions.vt_fns.vt_state import VoidTerminalState
|
||||
from crazy_functions.vt_fns.vt_modify_config import modify_configuration_hot
|
||||
from crazy_functions.vt_fns.vt_modify_config import modify_configuration_reboot
|
||||
from crazy_functions.vt_fns.vt_call_plugin import execute_plugin
|
||||
from enum import Enum
|
||||
import copy, json, pickle, os, sys
|
||||
|
||||
|
||||
class UserIntention(BaseModel):
|
||||
user_prompt: str = Field(description="the content of user input", default="")
|
||||
intention_type: str = Field(description="the type of user intention, choose from ['ModifyConfiguration', 'ExecutePlugin', 'Chat']", default="Chat")
|
||||
intention_type: str = Field(description="the type of user intention, choose from ['ModifyConfiguration', 'ExecutePlugin', 'Chat']", default="ExecutePlugin")
|
||||
user_provide_file: bool = Field(description="whether the user provides a path to a file", default=False)
|
||||
user_provide_url: bool = Field(description="whether the user provides a url", default=False)
|
||||
|
||||
|
||||
def chat(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=txt, inputs_show_user=txt,
|
||||
@@ -30,12 +74,24 @@ def chat(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_i
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
pass
|
||||
|
||||
def analyze_with_rule(txt):
|
||||
|
||||
explain_intention_to_user = {
|
||||
'Chat': "聊天对话",
|
||||
'ExecutePlugin': "调用插件",
|
||||
'ModifyConfiguration': "修改配置",
|
||||
}
|
||||
|
||||
|
||||
def analyze_intention_with_simple_rules(txt):
|
||||
user_intention = UserIntention()
|
||||
user_intention.user_prompt = txt
|
||||
is_certain = False
|
||||
|
||||
if '调用插件' in txt:
|
||||
if '请问' in txt:
|
||||
is_certain = True
|
||||
user_intention.intention_type = 'Chat'
|
||||
|
||||
if '用插件' in txt:
|
||||
is_certain = True
|
||||
user_intention.intention_type = 'ExecutePlugin'
|
||||
|
||||
@@ -45,43 +101,68 @@ def analyze_with_rule(txt):
|
||||
|
||||
return is_certain, user_intention
|
||||
|
||||
|
||||
@CatchException
|
||||
def 自动终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
"""
|
||||
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
||||
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
|
||||
plugin_kwargs 插件模型的参数, 如温度和top_p等, 一般原样传递下去就行
|
||||
chatbot 聊天显示框的句柄,用于显示给用户
|
||||
history 聊天历史,前情提要
|
||||
system_prompt 给gpt的静默提醒
|
||||
web_port 当前软件运行的端口号
|
||||
"""
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
chatbot.append(("自动终端状态: ", f"正在执行任务: {txt}"))
|
||||
def 虚空终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
disable_auto_promotion(chatbot=chatbot)
|
||||
# 获取当前虚空终端状态
|
||||
state = VoidTerminalState.get_state(chatbot)
|
||||
appendix_msg = ""
|
||||
|
||||
# 用简单的关键词检测用户意图
|
||||
is_certain, _ = analyze_intention_with_simple_rules(txt)
|
||||
if txt.startswith('private_upload/') and len(txt) == 34:
|
||||
state.set_state(chatbot=chatbot, key='has_provided_explaination', value=False)
|
||||
appendix_msg = "\n\n**很好,您已经上传了文件**,现在请您描述您的需求。"
|
||||
|
||||
if is_certain or (state.has_provided_explaination):
|
||||
# 如果意图明确,跳过提示环节
|
||||
state.set_state(chatbot=chatbot, key='has_provided_explaination', value=True)
|
||||
state.unlock_plugin(chatbot=chatbot)
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
yield from 虚空终端主路由(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
|
||||
return
|
||||
else:
|
||||
# 如果意图模糊,提示
|
||||
state.set_state(chatbot=chatbot, key='has_provided_explaination', value=True)
|
||||
state.lock_plugin(chatbot=chatbot)
|
||||
chatbot.append(("虚空终端状态:", explain_msg+appendix_msg))
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
return
|
||||
|
||||
|
||||
|
||||
def 虚空终端主路由(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
history = []
|
||||
chatbot.append(("虚空终端状态: ", f"正在执行任务: {txt}"))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# 初始化插件状态
|
||||
state = chatbot._cookies.get('plugin_state', None)
|
||||
if state is not None: state = pickle.loads(state)
|
||||
else: state = {}
|
||||
|
||||
def update_vt_state():
|
||||
# 赋予插件锁定 锁定插件回调路径,当下一次用户提交时,会直接转到该函数
|
||||
chatbot._cookies['lock_plugin'] = 'crazy_functions.虚空终端->自动终端'
|
||||
chatbot._cookies['vt_state'] = pickle.dumps(state)
|
||||
|
||||
# ⭐ ⭐ ⭐ 分析用户意图
|
||||
is_certain, user_intention = analyze_with_rule(txt)
|
||||
is_certain, user_intention = analyze_intention_with_simple_rules(txt)
|
||||
if not is_certain:
|
||||
yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n分析用户意图中", chatbot=chatbot, history=history, delay=0)
|
||||
yield from update_ui_lastest_msg(
|
||||
lastmsg=f"正在执行任务: {txt}\n\n分析用户意图中", chatbot=chatbot, history=history, delay=0)
|
||||
gpt_json_io = GptJsonIO(UserIntention)
|
||||
inputs = "Analyze the intention of the user according to following user input: \n\n" + txt + '\n\n' + gpt_json_io.format_instructions
|
||||
rf_req = "\nchoose from ['ModifyConfiguration', 'ExecutePlugin', 'Chat']"
|
||||
inputs = "Analyze the intention of the user according to following user input: \n\n" + \
|
||||
">> " + (txt+rf_req).rstrip('\n').replace('\n','\n>> ') + '\n\n' + gpt_json_io.format_instructions
|
||||
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
|
||||
inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
|
||||
user_intention = gpt_json_io.generate_output_auto_repair(run_gpt_fn(inputs, ""), run_gpt_fn)
|
||||
analyze_res = run_gpt_fn(inputs, "")
|
||||
try:
|
||||
user_intention = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
|
||||
lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 意图={explain_intention_to_user[user_intention.intention_type]}",
|
||||
except JsonStringError as e:
|
||||
yield from update_ui_lastest_msg(
|
||||
lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 失败 当前语言模型({llm_kwargs['llm_model']})不能理解您的意图", chatbot=chatbot, history=history, delay=0)
|
||||
return
|
||||
else:
|
||||
pass
|
||||
|
||||
yield from update_ui_lastest_msg(
|
||||
lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 意图={explain_intention_to_user[user_intention.intention_type]}",
|
||||
chatbot=chatbot, history=history, delay=0)
|
||||
|
||||
# 用户意图: 修改本项目的配置
|
||||
if user_intention.intention_type == 'ModifyConfiguration':
|
||||
yield from modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
|
||||
@@ -96,23 +177,3 @@ def 自动终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
|
||||
|
||||
return
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# # if state == 'wait_user_keyword':
|
||||
# # chatbot._cookies['lock_plugin'] = None # 解除插件锁定,避免遗忘导致死锁
|
||||
# # chatbot._cookies['plugin_state_0001'] = None # 解除插件状态,避免遗忘导致死锁
|
||||
|
||||
# # # 解除插件锁定
|
||||
# # chatbot.append((f"获取关键词:{txt}", ""))
|
||||
# # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
# # inputs=inputs_show_user=f"Extract all image urls in this html page, pick the first 5 images and show them with markdown format: \n\n {page_return}"
|
||||
# # gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
# # inputs=inputs, inputs_show_user=inputs_show_user,
|
||||
# # llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
||||
# # sys_prompt="When you want to show an image, use markdown format. e.g. . If there are no image url provided, answer 'no image url provided'"
|
||||
# # )
|
||||
# # chatbot[-1] = [chatbot[-1][0], gpt_say]
|
||||
# yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
# return
|
||||
|
||||
@@ -80,9 +80,9 @@ class InterviewAssistant(AliyunASR):
|
||||
def __init__(self):
|
||||
self.capture_interval = 0.5 # second
|
||||
self.stop = False
|
||||
self.parsed_text = ""
|
||||
self.parsed_sentence = ""
|
||||
self.buffered_sentence = ""
|
||||
self.parsed_text = "" # 下个句子中已经说完的部分, 由 test_on_result_chg() 写入
|
||||
self.parsed_sentence = "" # 某段话的整个句子,由 test_on_sentence_end() 写入
|
||||
self.buffered_sentence = "" #
|
||||
self.event_on_result_chg = threading.Event()
|
||||
self.event_on_entence_end = threading.Event()
|
||||
self.event_on_commit_question = threading.Event()
|
||||
@@ -132,7 +132,7 @@ class InterviewAssistant(AliyunASR):
|
||||
self.plugin_wd.feed()
|
||||
|
||||
if self.event_on_result_chg.is_set():
|
||||
# update audio decode result
|
||||
# called when some words have finished
|
||||
self.event_on_result_chg.clear()
|
||||
chatbot[-1] = list(chatbot[-1])
|
||||
chatbot[-1][0] = self.buffered_sentence + self.parsed_text
|
||||
@@ -144,7 +144,11 @@ class InterviewAssistant(AliyunASR):
|
||||
# called when a sentence has ended
|
||||
self.event_on_entence_end.clear()
|
||||
self.parsed_text = self.parsed_sentence
|
||||
self.buffered_sentence += self.parsed_sentence
|
||||
self.buffered_sentence += self.parsed_text
|
||||
chatbot[-1] = list(chatbot[-1])
|
||||
chatbot[-1][0] = self.buffered_sentence
|
||||
history = chatbot2history(chatbot)
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
if self.event_on_commit_question.is_set():
|
||||
# called when a question should be commited
|
||||
|
||||
Reference in New Issue
Block a user