diff --git a/crazy_functional.py b/crazy_functional.py index df6a2e32..609c0a15 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -5,21 +5,21 @@ from toolbox import trimmed_format_exc def get_crazy_functions(): from crazy_functions.读文章写摘要 import 读文章写摘要 from crazy_functions.生成函数注释 import 批量生成函数注释 - from crazy_functions.解析项目源代码 import 解析项目本身 - from crazy_functions.解析项目源代码 import 解析一个Python项目 - from crazy_functions.解析项目源代码 import 解析一个Matlab项目 - from crazy_functions.解析项目源代码 import 解析一个C项目的头文件 - from crazy_functions.解析项目源代码 import 解析一个C项目 - from crazy_functions.解析项目源代码 import 解析一个Golang项目 - from crazy_functions.解析项目源代码 import 解析一个Rust项目 - from crazy_functions.解析项目源代码 import 解析一个Java项目 - from crazy_functions.解析项目源代码 import 解析一个前端项目 + from crazy_functions.SourceCode_Analyse import 解析项目本身 + from crazy_functions.SourceCode_Analyse import 解析一个Python项目 + from crazy_functions.SourceCode_Analyse import 解析一个Matlab项目 + from crazy_functions.SourceCode_Analyse import 解析一个C项目的头文件 + from crazy_functions.SourceCode_Analyse import 解析一个C项目 + from crazy_functions.SourceCode_Analyse import 解析一个Golang项目 + from crazy_functions.SourceCode_Analyse import 解析一个Rust项目 + from crazy_functions.SourceCode_Analyse import 解析一个Java项目 + from crazy_functions.SourceCode_Analyse import 解析一个前端项目 from crazy_functions.高级功能函数模板 import 高阶功能模板函数 from crazy_functions.高级功能函数模板 import Demo_Wrap from crazy_functions.Latex全文润色 import Latex英文润色 from crazy_functions.询问多个大语言模型 import 同时问询 - from crazy_functions.解析项目源代码 import 解析一个Lua项目 - from crazy_functions.解析项目源代码 import 解析一个CSharp项目 + from crazy_functions.SourceCode_Analyse import 解析一个Lua项目 + from crazy_functions.SourceCode_Analyse import 解析一个CSharp项目 from crazy_functions.总结word文档 import 总结word文档 from crazy_functions.解析JupyterNotebook import 解析ipynb文件 from crazy_functions.Conversation_To_File import 载入对话历史存档 @@ -448,7 +448,7 @@ def get_crazy_functions(): # print("Load function plugin failed") try: - from crazy_functions.解析项目源代码 import 解析任意code项目 + from crazy_functions.SourceCode_Analyse import 解析任意code项目 function_plugins.update( { diff --git a/crazy_functions/解析项目源代码.py b/crazy_functions/SourceCode_Analyse.py similarity index 99% rename from crazy_functions/解析项目源代码.py rename to crazy_functions/SourceCode_Analyse.py index 65e78249..53814df2 100644 --- a/crazy_functions/解析项目源代码.py +++ b/crazy_functions/SourceCode_Analyse.py @@ -1,4 +1,4 @@ -from toolbox import update_ui, promote_file_to_downloadzone, disable_auto_promotion +from toolbox import update_ui, promote_file_to_downloadzone from toolbox import CatchException, report_exception, write_history_to_file from shared_utils.fastapi_server import validate_path_safety from crazy_functions.crazy_utils import input_clipping @@ -7,7 +7,6 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, import os, copy from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive - disable_auto_promotion(chatbot=chatbot) summary_batch_isolation = True inputs_array = [] diff --git a/crazy_functions/SourceCode_Comment.py b/crazy_functions/SourceCode_Comment.py new file mode 100644 index 00000000..c15966de --- /dev/null +++ b/crazy_functions/SourceCode_Comment.py @@ -0,0 +1,96 @@ +import os, copy +from toolbox import CatchException, report_exception, update_ui, zip_result, promote_file_to_downloadzone, update_ui_lastest_msg +from shared_utils.fastapi_server import validate_path_safety +from crazy_functions.crazy_utils import input_clipping +from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency +from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from crazy_functions.agent_fns.python_comment_agent import PythonCodeComment +from crazy_functions.diagram_fns.file_tree import FileNode + +def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): + + summary_batch_isolation = True + inputs_array = [] + inputs_show_user_array = [] + history_array = [] + sys_prompt_array = [] + + assert len(file_manifest) <= 512, "源文件太多(超过512个), 请缩减输入文件的数量。或者,您也可以选择删除此行警告,并修改代码拆分file_manifest列表,从而实现分批次处理。" + + # 建立文件树 + file_tree_struct = FileNode("root", build_manifest=True) + for file_path in file_manifest: + file_tree_struct.add_file(file_path, file_path) + + # <第一步,逐个文件分析,多线程> + for index, fp in enumerate(file_manifest): + # 读取文件 + with open(fp, 'r', encoding='utf-8', errors='replace') as f: + file_content = f.read() + prefix = "" + i_say = prefix + f'Please analyse the following source code at {os.path.relpath(fp, project_folder)}, the code is:\n```{file_content}```' + i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请用一句话对下面的程序文件做一个整体概述: {fp}' + # 装载请求内容 + MAX_TOKEN_SINGLE_FILE = 2560 + i_say, _ = input_clipping(inputs=i_say, history=[], max_token_limit=MAX_TOKEN_SINGLE_FILE) + inputs_array.append(i_say) + inputs_show_user_array.append(i_say_show_user) + history_array.append([]) + sys_prompt_array.append("You are a software architecture analyst analyzing a source code project. Do not dig into details, tell me what the code is doing in general. Your answer must be short, simple and clear.") + # 文件读取完成,对每一个源代码文件,生成一个请求线程,发送到大模型进行分析 + gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( + inputs_array = inputs_array, + inputs_show_user_array = inputs_show_user_array, + history_array = history_array, + sys_prompt_array = sys_prompt_array, + llm_kwargs = llm_kwargs, + chatbot = chatbot, + show_user_at_complete = True + ) + + # <第二步,逐个文件分析,生成带注释文件> + chatbot.append([None, f"正在处理:"]) + for i_say, gpt_say, fp in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], file_manifest): + with open(fp, 'r', encoding='utf-8', errors='replace') as f: + file_content = f.read() + yield from update_ui_lastest_msg(f"正在处理: {fp}", chatbot=chatbot, history=history, delay=0) + pcc = PythonCodeComment(llm_kwargs, language='English') + pcc.read_file(path=fp, brief=gpt_say) + revised_path, revised_content = yield from pcc.begin_comment_source_code(chatbot, history) + file_tree_struct.manifest[fp].revised_path = revised_path + file_tree_struct.manifest[fp].revised_content = revised_content + + # <第三步,将结果写回源文件> + for i_say, gpt_say, fp in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], file_manifest): + with open(fp, 'w', encoding='utf-8') as f: + f.write(file_tree_struct.manifest[fp].revised_content) + + # <第四步,压缩结果> + zip_res = zip_result(project_folder) + promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) + + # + chatbot.append((None, "所有源文件均已处理完毕。")) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + + +@CatchException +def 注释Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request): + history = [] # 清空历史,以免输入溢出 + import glob, os + if os.path.exists(txt): + project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) + else: + if txt == "": txt = '空空如也的输入栏' + report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)] + if len(file_manifest) == 0: + report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + yield from 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt) diff --git a/crazy_functions/agent_fns/python_comment_agent.py b/crazy_functions/agent_fns/python_comment_agent.py new file mode 100644 index 00000000..a6bdbd81 --- /dev/null +++ b/crazy_functions/agent_fns/python_comment_agent.py @@ -0,0 +1,379 @@ +from toolbox import CatchException, update_ui +from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from request_llms.bridge_all import predict_no_ui_long_connection +import datetime +import re +import os +from textwrap import dedent +# TODO: 解决缩进问题 + +find_function_end_prompt = ''' +Below is a page of code that you need to read. This page may not yet complete, you job is to split this page to sperate functions, class functions etc. +- Provide the line number where the first visible function ends. +- Provide the line number where the next visible function begins. +- If there are no other functions in this page, you should simply return the line number of the last line. +- Only focus on functions declared by `def` keyword. Ignore inline functions. Ignore function calls. + +------------------ Example ------------------ +INPUT: + + ``` + L0000 |import sys + L0001 |import re + L0002 | + L0003 |def trimmed_format_exc(): + L0004 | import os + L0005 | import traceback + L0006 | str = traceback.format_exc() + L0007 | current_path = os.getcwd() + L0008 | replace_path = "." + L0009 | return str.replace(current_path, replace_path) + L0010 | + L0011 | + L0012 |def trimmed_format_exc_markdown(): + L0013 | ... + L0014 | ... + ``` + +OUTPUT: + + ``` + L0009 + L0012 + ``` + +------------------ End of Example ------------------ + + +------------------ the real INPUT you need to process NOW ------------------ +``` +{THE_TAGGED_CODE} +``` +''' + + + + + + + +revise_funtion_prompt = ''' +You need to read the following code, and revise the source code ({FILE_BASENAME}) according to following instructions: +1. You should analyze the purpose of the functions (if there are any). +2. You need to add docstring for the provided functions (if there are any). + +Be aware: +1. You must NOT modify the indent of code. +2. You are NOT authorized to change or translate non-comment code, and you are NOT authorized to add empty lines either, toggle qu. +3. Use {LANG} to add comments and docstrings. Do NOT translate Chinese that is already in the code. + +------------------ Example ------------------ +INPUT: +``` +L0000 | +L0001 |def zip_result(folder): +L0002 | t = gen_time_str() +L0003 | zip_folder(folder, get_log_folder(), f"result.zip") +L0004 | return os.path.join(get_log_folder(), f"result.zip") +L0005 | +L0006 | +``` + +OUTPUT: + + +This function compresses a given folder, and return the path of the resulting `zip` file. + + +``` +def zip_result(folder): + """ + Compresses the specified folder into a zip file and stores it in the log folder. + + Args: + folder (str): The path to the folder that needs to be compressed. + + Returns: + str: The path to the created zip file in the log folder. + """ + t = gen_time_str() + zip_folder(folder, get_log_folder(), f"result.zip") # ⭐ Execute the zipping of folder + return os.path.join(get_log_folder(), f"result.zip") +``` + +------------------ End of Example ------------------ + + +------------------ the real INPUT you need to process NOW ({FILE_BASENAME}) ------------------ +``` +{THE_CODE} +``` +{INDENT_REMINDER} +{BRIEF_REMINDER} +{HINT_REMINDER} +''' + + + +class PythonCodeComment(): + + def __init__(self, llm_kwargs, language) -> None: + self.full_context = [] + self.full_context_with_line_no = [] + self.current_page_start = 0 + self.page_limit = 100 # 100 lines of code each page + self.ignore_limit = 20 + self.llm_kwargs = llm_kwargs + self.language = language + self.path = None + self.file_basename = None + self.file_brief = "" + + def generate_tagged_code_from_full_context(self): + for i, code in enumerate(self.full_context): + number = i + padded_number = f"{number:04}" + result = f"L{padded_number}" + self.full_context_with_line_no.append(f"{result} | {code}") + return self.full_context_with_line_no + + def read_file(self, path, brief): + with open(path, 'r', encoding='utf8') as f: + self.full_context = f.readlines() + self.file_basename = os.path.basename(path) + self.file_brief = brief + self.full_context_with_line_no = self.generate_tagged_code_from_full_context() + self.path = path + + def find_next_function_begin(self, tagged_code:list, begin_and_end): + begin, end = begin_and_end + THE_TAGGED_CODE = ''.join(tagged_code) + self.llm_kwargs['temperature'] = 0 + result = predict_no_ui_long_connection( + inputs=find_function_end_prompt.format(THE_TAGGED_CODE=THE_TAGGED_CODE), + llm_kwargs=self.llm_kwargs, + history=[], + sys_prompt="", + observe_window=[], + console_slience=True + ) + + def extract_number(text): + # 使用正则表达式匹配模式 + match = re.search(r'L(\d+)', text) + if match: + # 提取匹配的数字部分并转换为整数 + return int(match.group(1)) + return None + + line_no = extract_number(result) + if line_no is not None: + return line_no + else: + raise RuntimeError + return end + + def _get_next_window(self): + # + current_page_start = self.current_page_start + + if self.current_page_start == len(self.full_context) + 1: + raise StopIteration + + # 如果剩余的行数非常少,一鼓作气处理掉 + if len(self.full_context) - self.current_page_start < self.ignore_limit: + future_page_start = len(self.full_context) + 1 + self.current_page_start = future_page_start + return current_page_start, future_page_start + + + tagged_code = self.full_context_with_line_no[ self.current_page_start: self.current_page_start + self.page_limit] + line_no = self.find_next_function_begin(tagged_code, [self.current_page_start, self.current_page_start + self.page_limit]) + + if line_no > len(self.full_context) - 5: + line_no = len(self.full_context) + 1 + + future_page_start = line_no + self.current_page_start = future_page_start + + # ! consider eof + return current_page_start, future_page_start + + def dedent(self, text): + """Remove any common leading whitespace from every line in `text`. + """ + # Look for the longest leading string of spaces and tabs common to + # all lines. + margin = None + _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) + _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) + text = _whitespace_only_re.sub('', text) + indents = _leading_whitespace_re.findall(text) + for indent in indents: + if margin is None: + margin = indent + + # Current line more deeply indented than previous winner: + # no change (previous winner is still on top). + elif indent.startswith(margin): + pass + + # Current line consistent with and no deeper than previous winner: + # it's the new winner. + elif margin.startswith(indent): + margin = indent + + # Find the largest common whitespace between current line and previous + # winner. + else: + for i, (x, y) in enumerate(zip(margin, indent)): + if x != y: + margin = margin[:i] + break + + # sanity check (testing/debugging only) + if 0 and margin: + for line in text.split("\n"): + assert not line or line.startswith(margin), \ + "line = %r, margin = %r" % (line, margin) + + if margin: + text = re.sub(r'(?m)^' + margin, '', text) + return text, len(margin) + else: + return text, 0 + + def get_next_batch(self): + current_page_start, future_page_start = self._get_next_window() + return ''.join(self.full_context[current_page_start: future_page_start]), current_page_start, future_page_start + + def tag_code(self, fn, hint): + code = fn + _, n_indent = self.dedent(code) + indent_reminder = "" if n_indent == 0 else "(Reminder: as you can see, this piece of code has indent made up with {n_indent} whitespace, please preseve them in the OUTPUT.)" + brief_reminder = "" if self.file_brief == "" else f"({self.file_basename} abstract: {self.file_brief})" + hint_reminder = "" if hint is None else f"(Reminder: do not ignore or modify code such as `{hint}`, provide complete code in the OUTPUT.)" + self.llm_kwargs['temperature'] = 0 + result = predict_no_ui_long_connection( + inputs=revise_funtion_prompt.format( + LANG=self.language, + FILE_BASENAME=self.file_basename, + THE_CODE=code, + INDENT_REMINDER=indent_reminder, + BRIEF_REMINDER=brief_reminder, + HINT_REMINDER=hint_reminder + ), + llm_kwargs=self.llm_kwargs, + history=[], + sys_prompt="", + observe_window=[], + console_slience=True + ) + + def get_code_block(reply): + import re + pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks + matches = re.findall(pattern, reply) # find all code blocks in text + if len(matches) == 1: + return matches[0].strip('python') # code block + return None + + code_block = get_code_block(result) + if code_block is not None: + code_block = self.sync_and_patch(original=code, revised=code_block) + return code_block + else: + return code + + def sync_and_patch(self, original, revised): + """Ensure the number of pre-string empty lines in revised matches those in original.""" + + def count_leading_empty_lines(s, reverse=False): + """Count the number of leading empty lines in a string.""" + lines = s.split('\n') + if reverse: lines = list(reversed(lines)) + count = 0 + for line in lines: + if line.strip() == '': + count += 1 + else: + break + return count + + original_empty_lines = count_leading_empty_lines(original) + revised_empty_lines = count_leading_empty_lines(revised) + + if original_empty_lines > revised_empty_lines: + additional_lines = '\n' * (original_empty_lines - revised_empty_lines) + revised = additional_lines + revised + elif original_empty_lines < revised_empty_lines: + lines = revised.split('\n') + revised = '\n'.join(lines[revised_empty_lines - original_empty_lines:]) + + original_empty_lines = count_leading_empty_lines(original, reverse=True) + revised_empty_lines = count_leading_empty_lines(revised, reverse=True) + + if original_empty_lines > revised_empty_lines: + additional_lines = '\n' * (original_empty_lines - revised_empty_lines) + revised = revised + additional_lines + elif original_empty_lines < revised_empty_lines: + lines = revised.split('\n') + revised = '\n'.join(lines[:-(revised_empty_lines - original_empty_lines)]) + + return revised + + def begin_comment_source_code(self, chatbot, history): + from toolbox import update_ui_lastest_msg + assert self.path is not None + assert '.py' in self.path # must be python source code + # write_target = self.path + '.revised.py' + + write_content = "" + # with open(self.path + '.revised.py', 'w+', encoding='utf8') as f: + while True: + try: + yield from update_ui_lastest_msg(f"({self.file_basename}) 正在读取下一段代码片段:\n", chatbot=chatbot, history=history, delay=0) + next_batch, line_no_start, line_no_end = self.get_next_batch() + yield from update_ui_lastest_msg(f"({self.file_basename}) 处理代码片段:\n\n{next_batch}", chatbot=chatbot, history=history, delay=0) + + hint = None + MAX_ATTEMPT = 2 + for attempt in range(MAX_ATTEMPT): + result = self.tag_code(next_batch, hint) + try: + successful, hint = self.verify_successful(next_batch, result) + except Exception as e: + print('ignored exception:\n' + str(e)) + break + if successful: + break + if attempt == MAX_ATTEMPT - 1: + # cannot deal with this, give up + result = next_batch + break + + # f.write(result) + write_content += result + except StopIteration: + next_batch, line_no_start, line_no_end = [], -1, -1 + return None, write_content + + def verify_successful(self, original, revised): + """ Determine whether the revised code contains every line that already exists + """ + from crazy_functions.ast_fns.comment_remove import remove_python_comments + original = remove_python_comments(original) + original_lines = original.split('\n') + revised_lines = revised.split('\n') + + for l in original_lines: + l = l.strip() + if '\'' in l or '\"' in l: continue # ast sometimes toggle " to ' + found = False + for lt in revised_lines: + if l in lt: + found = True + break + if not found: + return False, l + return True, None \ No newline at end of file diff --git a/crazy_functions/ast_fns/comment_remove.py b/crazy_functions/ast_fns/comment_remove.py new file mode 100644 index 00000000..1c482afd --- /dev/null +++ b/crazy_functions/ast_fns/comment_remove.py @@ -0,0 +1,46 @@ +import ast + +class CommentRemover(ast.NodeTransformer): + def visit_FunctionDef(self, node): + # 移除函数的文档字符串 + if (node.body and isinstance(node.body[0], ast.Expr) and + isinstance(node.body[0].value, ast.Str)): + node.body = node.body[1:] + self.generic_visit(node) + return node + + def visit_ClassDef(self, node): + # 移除类的文档字符串 + if (node.body and isinstance(node.body[0], ast.Expr) and + isinstance(node.body[0].value, ast.Str)): + node.body = node.body[1:] + self.generic_visit(node) + return node + + def visit_Module(self, node): + # 移除模块的文档字符串 + if (node.body and isinstance(node.body[0], ast.Expr) and + isinstance(node.body[0].value, ast.Str)): + node.body = node.body[1:] + self.generic_visit(node) + return node + + +def remove_python_comments(source_code): + # 解析源代码为 AST + tree = ast.parse(source_code) + # 移除注释 + transformer = CommentRemover() + tree = transformer.visit(tree) + # 将处理后的 AST 转换回源代码 + return ast.unparse(tree) + +# 示例使用 +if __name__ == "__main__": + with open("source.py", "r", encoding="utf-8") as f: + source_code = f.read() + + cleaned_code = remove_python_comments(source_code) + + with open("cleaned_source.py", "w", encoding="utf-8") as f: + f.write(cleaned_code) \ No newline at end of file diff --git a/crazy_functions/diagram_fns/file_tree.py b/crazy_functions/diagram_fns/file_tree.py index d00ad13a..e1800ee9 100644 --- a/crazy_functions/diagram_fns/file_tree.py +++ b/crazy_functions/diagram_fns/file_tree.py @@ -2,7 +2,7 @@ import os from textwrap import indent class FileNode: - def __init__(self, name): + def __init__(self, name, build_manifest=False): self.name = name self.children = [] self.is_leaf = False @@ -10,6 +10,8 @@ class FileNode: self.parenting_ship = [] self.comment = "" self.comment_maxlen_show = 50 + self.build_manifest = build_manifest + self.manifest = {} @staticmethod def add_linebreaks_at_spaces(string, interval=10): @@ -29,6 +31,7 @@ class FileNode: level = 1 if directory_names == "": new_node = FileNode(file_name) + self.manifest[file_path] = new_node current_node.children.append(new_node) new_node.is_leaf = True new_node.comment = self.sanitize_comment(file_comment) @@ -50,6 +53,7 @@ class FileNode: new_node.level = level - 1 current_node = new_node term = FileNode(file_name) + self.manifest[file_path] = term term.level = level term.comment = self.sanitize_comment(file_comment) term.is_leaf = True diff --git a/shared_utils/fastapi_server.py b/shared_utils/fastapi_server.py index 45363d8d..6c9b1d1c 100644 --- a/shared_utils/fastapi_server.py +++ b/shared_utils/fastapi_server.py @@ -57,7 +57,7 @@ def validate_path_safety(path_or_url, user): sensitive_path = PATH_LOGGING elif path_or_url.startswith(PATH_PRIVATE_UPLOAD): # 用户的上传目录(按用户划分) sensitive_path = PATH_PRIVATE_UPLOAD - elif path_or_url.startswith('tests'): # 一个常用的测试目录 + elif path_or_url.startswith('tests') or path_or_url.startswith('build'): # 一个常用的测试目录 return True else: raise FriendlyException(f"输入文件的路径 ({path_or_url}) 存在,但位置非法。请将文件上传后再执行该任务。") # return False diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 0c4d9251..02f013ce 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -2,23 +2,16 @@ 对项目中的各个插件进行测试。运行方法:直接运行 python tests/test_plugins.py """ - +import init_test import os, sys -def validate_path(): - dir_name = os.path.dirname(__file__) - root_dir_assume = os.path.abspath(dir_name + "/..") - os.chdir(root_dir_assume) - sys.path.append(root_dir_assume) - - -validate_path() # 返回项目根路径 - if __name__ == "__main__": - from tests.test_utils import plugin_test + from test_utils import plugin_test - plugin_test(plugin='crazy_functions.Internet_GPT->连接网络回答问题', main_input="谁是应急食品?") + plugin_test(plugin='crazy_functions.SourceCode_Comment->注释Python项目', main_input="build/test/python_comment") + + # plugin_test(plugin='crazy_functions.Internet_GPT->连接网络回答问题', main_input="谁是应急食品?") # plugin_test(plugin='crazy_functions.函数动态生成->函数动态生成', main_input='交换图像的蓝色通道和红色通道', advanced_arg={"file_path_arg": "./build/ants.jpg"}) @@ -39,9 +32,9 @@ if __name__ == "__main__": # plugin_test(plugin='crazy_functions.命令行助手->命令行助手', main_input='查看当前的docker容器列表') - # plugin_test(plugin='crazy_functions.解析项目源代码->解析一个Python项目', main_input="crazy_functions/test_project/python/dqn") + # plugin_test(plugin='crazy_functions.SourceCode_Analyse->解析一个Python项目', main_input="crazy_functions/test_project/python/dqn") - # plugin_test(plugin='crazy_functions.解析项目源代码->解析一个C项目', main_input="crazy_functions/test_project/cpp/cppipc") + # plugin_test(plugin='crazy_functions.SourceCode_Analyse->解析一个C项目', main_input="crazy_functions/test_project/cpp/cppipc") # plugin_test(plugin='crazy_functions.Latex全文润色->Latex英文润色', main_input="crazy_functions/test_project/latex/attention") diff --git a/tests/test_python_auto_docstring.py b/tests/test_python_auto_docstring.py index 0e65cf15..6bfab23e 100644 --- a/tests/test_python_auto_docstring.py +++ b/tests/test_python_auto_docstring.py @@ -5,7 +5,7 @@ from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_ from request_llms.bridge_all import predict_no_ui_long_connection import datetime import re - +from textwrap import dedent # TODO: 解决缩进问题 find_function_end_prompt = ''' @@ -191,13 +191,56 @@ class ContextWindowManager(): # ! consider eof return current_page_start, future_page_start + def dedent(self, text): + """Remove any common leading whitespace from every line in `text`. + """ + # Look for the longest leading string of spaces and tabs common to + # all lines. + margin = None + _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) + _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) + text = _whitespace_only_re.sub('', text) + indents = _leading_whitespace_re.findall(text) + for indent in indents: + if margin is None: + margin = indent + + # Current line more deeply indented than previous winner: + # no change (previous winner is still on top). + elif indent.startswith(margin): + pass + + # Current line consistent with and no deeper than previous winner: + # it's the new winner. + elif margin.startswith(indent): + margin = indent + + # Find the largest common whitespace between current line and previous + # winner. + else: + for i, (x, y) in enumerate(zip(margin, indent)): + if x != y: + margin = margin[:i] + break + + # sanity check (testing/debugging only) + if 0 and margin: + for line in text.split("\n"): + assert not line or line.startswith(margin), \ + "line = %r, margin = %r" % (line, margin) + + if margin: + text = re.sub(r'(?m)^' + margin, '', text) + return text, len(margin) + def get_next_batch(self): current_page_start, future_page_start = self._get_next_window() return self.full_context[current_page_start: future_page_start], current_page_start, future_page_start def tag_code(self, fn): code = ''.join(fn) - indent_reminder = "" + _, n_indent = self.dedent(code) + indent_reminder = "" if n_indent == 0 else "(Reminder: as you can see, this piece of code has indent made up with {n_indent} whitespace, please preseve them in the OUTPUT.)" self.llm_kwargs['temperature'] = 0 result = predict_no_ui_long_connection( inputs=revise_funtion_prompt.format(THE_CODE=code, INDENT_REMINDER=indent_reminder), @@ -264,6 +307,7 @@ class ContextWindowManager(): from toolbox import get_plugin_default_kwargs llm_kwargs = get_plugin_default_kwargs()["llm_kwargs"] cwm = ContextWindowManager(llm_kwargs) +cwm.read_file(path="./test.py") output_buf = "" with open('temp.py', 'w+', encoding='utf8') as f: while True: diff --git a/toolbox.py b/toolbox.py index a4b0821b..a40f71d0 100644 --- a/toolbox.py +++ b/toolbox.py @@ -220,9 +220,10 @@ def CatchException(f): try: yield from f(main_input, llm_kwargs, plugin_kwargs, chatbot_with_cookie, history, *args, **kwargs) except FriendlyException as e: + tb_str = '```\n' + trimmed_format_exc() + '```' if len(chatbot_with_cookie) == 0: chatbot_with_cookie.clear() - chatbot_with_cookie.append(["插件调度异常", None]) + chatbot_with_cookie.append(["插件调度异常:\n" + tb_str, None]) chatbot_with_cookie[-1] = [chatbot_with_cookie[-1][0], e.generate_error_html()] yield from update_ui(chatbot=chatbot_with_cookie, history=history, msg=f'异常') # 刷新界面 except Exception as e: