diff --git a/.gitignore b/.gitignore index c353e4f2..f8b24d75 100644 --- a/.gitignore +++ b/.gitignore @@ -153,6 +153,7 @@ media flagged request_llms/ChatGLM-6b-onnx-u8s8 .pre-commit-config.yaml -test.html +test.* +temp.* objdump* *.min.*.js diff --git a/request_llms/bridge_chatgpt.py b/request_llms/bridge_chatgpt.py index 897e6917..a30b5c6c 100644 --- a/request_llms/bridge_chatgpt.py +++ b/request_llms/bridge_chatgpt.py @@ -496,10 +496,10 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st "n": 1, "stream": stream, } - try: - print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........") - except: - print('输入中可能存在乱码。') + # try: + # print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........") + # except: + # print('输入中可能存在乱码。') return headers,payload diff --git a/tests/init_test.py b/tests/init_test.py new file mode 100644 index 00000000..cf97fc34 --- /dev/null +++ b/tests/init_test.py @@ -0,0 +1,10 @@ +def validate_path(): + import os, sys + + os.path.dirname(__file__) + root_dir_assume = os.path.abspath(os.path.dirname(__file__) + "/..") + os.chdir(root_dir_assume) + sys.path.append(root_dir_assume) + + +validate_path() # validate path so you can run from base directory diff --git a/tests/test_python_auto_docstring.py b/tests/test_python_auto_docstring.py new file mode 100644 index 00000000..0e65cf15 --- /dev/null +++ b/tests/test_python_auto_docstring.py @@ -0,0 +1,298 @@ +import init_test + +from toolbox import CatchException, update_ui +from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from request_llms.bridge_all import predict_no_ui_long_connection +import datetime +import re + +# TODO: 解决缩进问题 + +find_function_end_prompt = ''' +Below is a page of code that you need to read. This page may not yet complete, you job is to split this page to sperate functions, class functions etc. +- Provide the line number where the first visible function ends. +- Provide the line number where the next visible function begins. +- If there are no other functions in this page, you should simply return the line number of the last line. +- Only focus on functions declared by `def` keyword. Ignore inline functions. Ignore function calls. + +------------------ Example ------------------ +INPUT: + + ``` + L0000 |import sys + L0001 |import re + L0002 | + L0003 |def trimmed_format_exc(): + L0004 | import os + L0005 | import traceback + L0006 | str = traceback.format_exc() + L0007 | current_path = os.getcwd() + L0008 | replace_path = "." + L0009 | return str.replace(current_path, replace_path) + L0010 | + L0011 | + L0012 |def trimmed_format_exc_markdown(): + L0013 | ... + L0014 | ... + ``` + +OUTPUT: + + ``` + L0009 + L0012 + ``` + +------------------ End of Example ------------------ + + +------------------ the real INPUT you need to process NOW ------------------ +``` +{THE_TAGGED_CODE} +``` +''' + + + + + + + +revise_funtion_prompt = ''' +You need to read the following code, and revise the code according to following instructions: +1. You should analyze the purpose of the functions (if there are any). +2. You need to add docstring for the provided functions (if there are any). + +Be aware: +1. You must NOT modify the indent of code. +2. You are NOT authorized to change or translate non-comment code, and you are NOT authorized to add empty lines either. +3. Use English to add comments and docstrings. Do NOT translate Chinese that is already in the code. + +------------------ Example ------------------ +INPUT: +``` +L0000 | +L0001 |def zip_result(folder): +L0002 | t = gen_time_str() +L0003 | zip_folder(folder, get_log_folder(), f"result.zip") +L0004 | return os.path.join(get_log_folder(), f"result.zip") +L0005 | +L0006 | +``` + +OUTPUT: + + +This function compresses a given folder, and return the path of the resulting `zip` file. + + +``` +def zip_result(folder): + """ + Compresses the specified folder into a zip file and stores it in the log folder. + + Args: + folder (str): The path to the folder that needs to be compressed. + + Returns: + str: The path to the created zip file in the log folder. + """ + t = gen_time_str() + zip_folder(folder, get_log_folder(), f"result.zip") # ⭐ Execute the zipping of folder + return os.path.join(get_log_folder(), f"result.zip") +``` + +------------------ End of Example ------------------ + + +------------------ the real INPUT you need to process NOW ------------------ +``` +{THE_CODE} +``` +{INDENT_REMINDER} +''' + + +class ContextWindowManager(): + + def __init__(self, llm_kwargs) -> None: + self.full_context = [] + self.full_context_with_line_no = [] + self.current_page_start = 0 + self.page_limit = 100 # 100 lines of code each page + self.ignore_limit = 20 + self.llm_kwargs = llm_kwargs + + def generate_tagged_code_from_full_context(self): + for i, code in enumerate(self.full_context): + number = i + padded_number = f"{number:04}" + result = f"L{padded_number}" + self.full_context_with_line_no.append(f"{result} | {code}") + return self.full_context_with_line_no + + def read_file(self, path): + with open(path, 'r', encoding='utf8') as f: + self.full_context = f.readlines() + self.full_context_with_line_no = self.generate_tagged_code_from_full_context() + + + def find_next_function_begin(self, tagged_code:list, begin_and_end): + begin, end = begin_and_end + THE_TAGGED_CODE = ''.join(tagged_code) + self.llm_kwargs['temperature'] = 0 + result = predict_no_ui_long_connection( + inputs=find_function_end_prompt.format(THE_TAGGED_CODE=THE_TAGGED_CODE), + llm_kwargs=self.llm_kwargs, + history=[], + sys_prompt="", + observe_window=[], + console_slience=True + ) + + def extract_number(text): + # 使用正则表达式匹配模式 + match = re.search(r'L(\d+)', text) + if match: + # 提取匹配的数字部分并转换为整数 + return int(match.group(1)) + return None + + line_no = extract_number(result) + if line_no is not None: + return line_no + else: + raise RuntimeError + return end + + def _get_next_window(self): + # + current_page_start = self.current_page_start + + if self.current_page_start == len(self.full_context) + 1: + raise StopIteration + + # 如果剩余的行数非常少,一鼓作气处理掉 + if len(self.full_context) - self.current_page_start < self.ignore_limit: + future_page_start = len(self.full_context) + 1 + self.current_page_start = future_page_start + return current_page_start, future_page_start + + + tagged_code = self.full_context_with_line_no[ self.current_page_start: self.current_page_start + self.page_limit] + line_no = self.find_next_function_begin(tagged_code, [self.current_page_start, self.current_page_start + self.page_limit]) + + if line_no > len(self.full_context) - 5: + line_no = len(self.full_context) + 1 + + future_page_start = line_no + self.current_page_start = future_page_start + + # ! consider eof + return current_page_start, future_page_start + + def get_next_batch(self): + current_page_start, future_page_start = self._get_next_window() + return self.full_context[current_page_start: future_page_start], current_page_start, future_page_start + + def tag_code(self, fn): + code = ''.join(fn) + indent_reminder = "" + self.llm_kwargs['temperature'] = 0 + result = predict_no_ui_long_connection( + inputs=revise_funtion_prompt.format(THE_CODE=code, INDENT_REMINDER=indent_reminder), + llm_kwargs=self.llm_kwargs, + history=[], + sys_prompt="", + observe_window=[], + console_slience=True + ) + + def get_code_block(reply): + import re + pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks + matches = re.findall(pattern, reply) # find all code blocks in text + if len(matches) == 1: + return matches[0].strip('python') # code block + return None + + code_block = get_code_block(result) + if code_block is not None: + code_block = self.sync_and_patch(original=code, revised=code_block) + return code_block + else: + return code + + def sync_and_patch(self, original, revised): + """Ensure the number of pre-string empty lines in revised matches those in original.""" + + def count_leading_empty_lines(s, reverse=False): + """Count the number of leading empty lines in a string.""" + lines = s.split('\n') + if reverse: lines = list(reversed(lines)) + count = 0 + for line in lines: + if line.strip() == '': + count += 1 + else: + break + return count + + original_empty_lines = count_leading_empty_lines(original) + revised_empty_lines = count_leading_empty_lines(revised) + + if original_empty_lines > revised_empty_lines: + additional_lines = '\n' * (original_empty_lines - revised_empty_lines) + revised = additional_lines + revised + elif original_empty_lines < revised_empty_lines: + lines = revised.split('\n') + revised = '\n'.join(lines[revised_empty_lines - original_empty_lines:]) + + original_empty_lines = count_leading_empty_lines(original, reverse=True) + revised_empty_lines = count_leading_empty_lines(revised, reverse=True) + + if original_empty_lines > revised_empty_lines: + additional_lines = '\n' * (original_empty_lines - revised_empty_lines) + revised = revised + additional_lines + elif original_empty_lines < revised_empty_lines: + lines = revised.split('\n') + revised = '\n'.join(lines[:-(revised_empty_lines - original_empty_lines)]) + + return revised + + +from toolbox import get_plugin_default_kwargs +llm_kwargs = get_plugin_default_kwargs()["llm_kwargs"] +cwm = ContextWindowManager(llm_kwargs) +output_buf = "" +with open('temp.py', 'w+', encoding='utf8') as f: + while True: + try: + next_batch, line_no_start, line_no_end = cwm.get_next_batch() + result = cwm.tag_code(next_batch) + f.write(result) + output_buf += result + except StopIteration: + next_batch, line_no_start, line_no_end = [], -1, -1 + break + print('-------------------------------------------') + print(''.join(next_batch)) + print('-------------------------------------------') + + +print(cwm) + + + + + + + + + + + + + + +