stashed commit
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
|
||||
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
|
||||
from toolbox import get_conf, objdump, objload, promote_file_to_downloadzone
|
||||
from .latex_toolbox import PRESERVE, TRANSFORM
|
||||
from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
|
||||
from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
|
||||
from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
|
||||
from .latex_toolbox import find_title_and_abs
|
||||
|
||||
import os, shutil
|
||||
import re
|
||||
@@ -90,7 +91,15 @@ class LatexPaperSplit():
|
||||
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
|
||||
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
|
||||
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
|
||||
self.title = "unknown"
|
||||
self.abstract = "unknown"
|
||||
|
||||
def read_title_and_abstract(self, txt):
|
||||
title, abstract = find_title_and_abs(txt)
|
||||
if title is not None:
|
||||
self.title = title.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
|
||||
if abstract is not None:
|
||||
self.abstract = abstract.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
|
||||
|
||||
def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
|
||||
"""
|
||||
@@ -234,8 +243,8 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
|
||||
chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。'))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
lps = LatexPaperSplit()
|
||||
lps.read_title_and_abstract(merged_content)
|
||||
res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
|
||||
|
||||
# <-------- 拆分过长的latex片段 ---------->
|
||||
pfg = LatexPaperFileGroup()
|
||||
for index, r in enumerate(res):
|
||||
@@ -256,12 +265,19 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
|
||||
|
||||
else:
|
||||
# <-------- gpt 多线程请求 ---------->
|
||||
LATEX_EXPERIMENTAL, = get_conf('LATEX_EXPERIMENTAL')
|
||||
history_array = [[""] for _ in range(n_split)]
|
||||
if LATEX_EXPERIMENTAL:
|
||||
paper_meta = f"The paper you processing is `{lps.title}`, a part of the abstraction is `{lps.abstract}`"
|
||||
paper_meta_max_len = 888
|
||||
history_array = [[ paper_meta[:paper_meta_max_len] + '...', "Understand, what should I do?"] for _ in range(n_split)]
|
||||
|
||||
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array=inputs_array,
|
||||
inputs_show_user_array=inputs_show_user_array,
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history_array=[[""] for _ in range(n_split)],
|
||||
history_array=history_array,
|
||||
sys_prompt_array=sys_prompt_array,
|
||||
# max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
|
||||
scroller_max_len = 40
|
||||
|
||||
@@ -318,6 +318,40 @@ def merge_tex_files_(project_foler, main_file, mode):
|
||||
main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
|
||||
return main_file
|
||||
|
||||
|
||||
def find_title_and_abs(main_file):
|
||||
|
||||
def extract_abstract_1(text):
|
||||
pattern = r"\\abstract\{(.*?)\}"
|
||||
match = re.search(pattern, text, re.DOTALL)
|
||||
if match:
|
||||
return match.group(1)
|
||||
else:
|
||||
return None
|
||||
|
||||
def extract_abstract_2(text):
|
||||
pattern = r"\\begin\{abstract\}(.*?)\\end\{abstract\}"
|
||||
match = re.search(pattern, text, re.DOTALL)
|
||||
if match:
|
||||
return match.group(1)
|
||||
else:
|
||||
return None
|
||||
|
||||
def extract_title(string):
|
||||
pattern = r"\\title\{(.*?)\}"
|
||||
match = re.search(pattern, string, re.DOTALL)
|
||||
|
||||
if match:
|
||||
return match.group(1)
|
||||
else:
|
||||
return None
|
||||
|
||||
abstract = extract_abstract_1(main_file)
|
||||
if abstract is None:
|
||||
abstract = extract_abstract_2(main_file)
|
||||
title = extract_title(main_file)
|
||||
return title, abstract
|
||||
|
||||
def merge_tex_files(project_foler, main_file, mode):
|
||||
"""
|
||||
Merge Tex project recrusively
|
||||
|
||||
Reference in New Issue
Block a user