logging -> loguru: final stage

This commit is contained in:
binary-husky
2024-09-15 15:51:51 +00:00
parent bbf9e9f868
commit 2f343179a2
55 changed files with 237 additions and 529 deletions

View File

@@ -171,7 +171,7 @@ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等
"""
from .crazy_utils import get_files_from_everything
from crazy_functions.crazy_utils import get_files_from_everything
success, file_manifest, _ = get_files_from_everything(txt, type='.html')
if not success:

View File

@@ -56,7 +56,7 @@ class PaperFileGroup():
def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='polish'):
import time, os, re
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
# <-------- 读取Latex文件删除其中的所有注释 ---------->

View File

@@ -37,7 +37,7 @@ class PaperFileGroup():
def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
import time, os, re
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
# <-------- 读取Latex文件删除其中的所有注释 ---------->
pfg = PaperFileGroup()

View File

@@ -52,7 +52,7 @@ class PaperFileGroup():
return manifest
def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
# <-------- 读取Markdown文件删除其中的所有注释 ---------->
pfg = PaperFileGroup()

View File

@@ -5,8 +5,8 @@ from crazy_functions.crazy_utils import input_clipping
def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import os, copy
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
summary_batch_isolation = True
inputs_array = []

View File

@@ -1,4 +1,5 @@
from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
from loguru import logger
class EchoDemo(PluginMultiprocessManager):
def subprocess_worker(self, child_conn):
@@ -16,4 +17,4 @@ class EchoDemo(PluginMultiprocessManager):
elif msg.cmd == "terminate":
self.child_conn.send(PipeCom("done", ""))
break
print('[debug] subprocess_worker terminated')
logger.info('[debug] subprocess_worker terminated')

View File

@@ -1,5 +1,6 @@
from toolbox import get_log_folder, update_ui, gen_time_str, get_conf, promote_file_to_downloadzone
from crazy_functions.agent_fns.watchdog import WatchDog
from loguru import logger
import time, os
class PipeCom:
@@ -47,7 +48,7 @@ class PluginMultiprocessManager:
def terminate(self):
self.p.terminate()
self.alive = False
print("[debug] instance terminated")
logger.info("[debug] instance terminated")
def subprocess_worker(self, child_conn):
# ⭐⭐ run in subprocess

View File

@@ -1,10 +1,12 @@
from toolbox import CatchException, update_ui
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from request_llms.bridge_all import predict_no_ui_long_connection
import datetime
import re
import os
from loguru import logger
from textwrap import dedent
from toolbox import CatchException, update_ui
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
# TODO: 解决缩进问题
find_function_end_prompt = '''
@@ -355,7 +357,7 @@ class PythonCodeComment():
try:
successful, hint = self.verify_successful(next_batch, result)
except Exception as e:
print('ignored exception:\n' + str(e))
logger.error('ignored exception:\n' + str(e))
break
if successful:
break

View File

@@ -1,4 +1,5 @@
import threading, time
from loguru import logger
class WatchDog():
def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
@@ -13,7 +14,7 @@ class WatchDog():
while True:
if self.kill_dog: break
if time.time() - self.last_feed > self.timeout:
if len(self.msg) > 0: print(self.msg)
if len(self.msg) > 0: logger.info(self.msg)
self.bark_fn()
break
time.sleep(self.interval)

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
import datetime, json
def fetch_items(list_of_items, batch_size):

View File

@@ -1,5 +1,6 @@
import os
from textwrap import indent
from loguru import logger
class FileNode:
def __init__(self, name, build_manifest=False):
@@ -60,7 +61,7 @@ class FileNode:
current_node.children.append(term)
def print_files_recursively(self, level=0, code="R0"):
print(' '*level + self.name + ' ' + str(self.is_leaf) + ' ' + str(self.level))
logger.info(' '*level + self.name + ' ' + str(self.is_leaf) + ' ' + str(self.level))
for j, child in enumerate(self.children):
child.print_files_recursively(level=level+1, code=code+str(j))
self.parenting_ship.extend(child.parenting_ship)
@@ -123,4 +124,4 @@ if __name__ == "__main__":
"用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器",
"包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类",
]
print(build_file_tree_mermaid_diagram(file_manifest, file_comments, "项目文件树"))
logger.info(build_file_tree_mermaid_diagram(file_manifest, file_comments, "项目文件树"))

View File

@@ -1,15 +1,17 @@
import os
import re
import shutil
import numpy as np
from loguru import logger
from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
from toolbox import get_conf, promote_file_to_downloadzone
from .latex_toolbox import PRESERVE, TRANSFORM
from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
from .latex_toolbox import find_title_and_abs
from .latex_pickle_io import objdump, objload
from crazy_functions.latex_fns.latex_toolbox import PRESERVE, TRANSFORM
from crazy_functions.latex_fns.latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
from crazy_functions.latex_fns.latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
from crazy_functions.latex_fns.latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
from crazy_functions.latex_fns.latex_toolbox import find_title_and_abs
from crazy_functions.latex_fns.latex_pickle_io import objdump, objload
import os, shutil
import re
import numpy as np
pj = os.path.join
@@ -323,7 +325,7 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
buggy_lines = [int(l) for l in buggy_lines]
buggy_lines = sorted(buggy_lines)
buggy_line = buggy_lines[0]-1
print("reversing tex line that has errors", buggy_line)
logger.warning("reversing tex line that has errors", buggy_line)
# 重组,逆转出错的段落
if buggy_line not in fixed_line:
@@ -337,7 +339,7 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
except:
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
logger.error("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
return False, -1, [-1]
@@ -380,7 +382,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
if mode!='translate_zh':
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
logger.info( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd())
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
@@ -419,7 +421,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf'))
promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
except Exception as e:
print(e)
logger.error(e)
pass
return True # 成功啦
else:
@@ -465,4 +467,4 @@ def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
promote_file_to_downloadzone(file=res, chatbot=chatbot)
except:
from toolbox import trimmed_format_exc
print('writing html result failed:', trimmed_format_exc())
logger.error('writing html result failed:', trimmed_format_exc())

View File

@@ -1,6 +1,8 @@
import os, shutil
import os
import re
import shutil
import numpy as np
from loguru import logger
PRESERVE = 0
TRANSFORM = 1
@@ -55,7 +57,7 @@ def post_process(root):
str_stack.append("{")
elif c == "}":
if len(str_stack) == 1:
print("stack fix")
logger.warning("fixing brace error")
return i
str_stack.pop(-1)
else:
@@ -601,7 +603,7 @@ def compile_latex_with_timeout(command, cwd, timeout=60):
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
print("Process timed out!")
logger.error("Process timed out (compile_latex_with_timeout)!")
return False
return True

View File

@@ -107,18 +107,14 @@ def is_speaker_speaking(vad, data, sample_rate):
class AliyunASR():
def test_on_sentence_begin(self, message, *args):
# print("test_on_sentence_begin:{}".format(message))
pass
def test_on_sentence_end(self, message, *args):
# print("test_on_sentence_end:{}".format(message))
message = json.loads(message)
self.parsed_sentence = message['payload']['result']
self.event_on_entence_end.set()
# print(self.parsed_sentence)
def test_on_start(self, message, *args):
# print("test_on_start:{}".format(message))
pass
def test_on_error(self, message, *args):
@@ -130,13 +126,11 @@ class AliyunASR():
pass
def test_on_result_chg(self, message, *args):
# print("test_on_chg:{}".format(message))
message = json.loads(message)
self.parsed_text = message['payload']['result']
self.event_on_result_chg.set()
def test_on_completed(self, message, *args):
# print("on_completed:args=>{} message=>{}".format(args, message))
pass
def audio_convertion_thread(self, uuid):
@@ -249,14 +243,14 @@ class AliyunASR():
try:
response = client.do_action_with_exception(request)
print(response)
logging.info(response)
jss = json.loads(response)
if 'Token' in jss and 'Id' in jss['Token']:
token = jss['Token']['Id']
expireTime = jss['Token']['ExpireTime']
print("token = " + token)
print("expireTime = " + str(expireTime))
logging.info("token = " + token)
logging.info("expireTime = " + str(expireTime))
except Exception as e:
print(e)
logging.error(e)
return token

View File

@@ -1,4 +1,5 @@
from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout
from loguru import logger
def force_breakdown(txt, limit, get_token_fn):
""" 当无法用标点、空行分割时,我们用最暴力的方法切割
@@ -76,7 +77,7 @@ def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=F
remain_txt_to_cut = post
remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
process = fin_len/total_len
print(f'正在文本切分 {int(process*100)}%')
logger.info(f'正在文本切分 {int(process*100)}%')
if len(remain_txt_to_cut.strip()) == 0:
break
return res
@@ -119,7 +120,7 @@ if __name__ == '__main__':
for i in range(5):
file_content += file_content
print(len(file_content))
logger.info(len(file_content))
TOKEN_LIMIT_PER_FRAGMENT = 2500
res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT)

View File

@@ -5,6 +5,7 @@ from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import read_and_clean_pdf_text
from shared_utils.colorful import *
from loguru import logger
import os
def 解析PDF_简单拆解(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
@@ -93,7 +94,7 @@ def 解析PDF_简单拆解(file_manifest, project_folder, llm_kwargs, plugin_kwa
generated_html_files.append(ch.save_file(create_report_file_name))
except:
from toolbox import trimmed_format_exc
print('writing html result failed:', trimmed_format_exc())
logger.error('writing html result failed:', trimmed_format_exc())
# 准备文件的下载
for pdf_path in generated_conclusion_files:

View File

@@ -1,6 +1,7 @@
import llama_index
import os
import atexit
from loguru import logger
from typing import List
from llama_index.core import Document
from llama_index.core.schema import TextNode
@@ -41,14 +42,14 @@ class SaveLoad():
return True
def save_to_checkpoint(self, checkpoint_dir=None):
print(f'saving vector store to: {checkpoint_dir}')
logger.info(f'saving vector store to: {checkpoint_dir}')
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
self.vs_index.storage_context.persist(persist_dir=checkpoint_dir)
def load_from_checkpoint(self, checkpoint_dir=None):
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
if self.does_checkpoint_exist(checkpoint_dir=checkpoint_dir):
print('loading checkpoint from disk')
logger.info('loading checkpoint from disk')
from llama_index.core import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir=checkpoint_dir)
self.vs_index = load_index_from_storage(storage_context, embed_model=self.embed_model)
@@ -85,9 +86,9 @@ class LlamaIndexRagWorker(SaveLoad):
self.vs_index.storage_context.index_store.to_dict()
docstore = self.vs_index.storage_context.docstore.docs
vector_store_preview = "\n".join([ f"{_id} | {tn.text}" for _id, tn in docstore.items() ])
print('\n++ --------inspect_vector_store begin--------')
print(vector_store_preview)
print('oo --------inspect_vector_store end--------')
logger.info('\n++ --------inspect_vector_store begin--------')
logger.info(vector_store_preview)
logger.info('oo --------inspect_vector_store end--------')
return vector_store_preview
def add_documents_to_vector_store(self, document_list):
@@ -125,5 +126,5 @@ class LlamaIndexRagWorker(SaveLoad):
def generate_node_array_preview(self, nodes):
buf = "\n".join(([f"(No.{i+1} | score {n.score:.3f}): {n.text}" for i, n in enumerate(nodes)]))
if self.debug_mode: print(buf)
if self.debug_mode: logger.info(buf)
return buf

View File

@@ -2,6 +2,7 @@ import llama_index
import os
import atexit
from typing import List
from loguru import logger
from llama_index.core import Document
from llama_index.core.schema import TextNode
from request_llms.embed_models.openai_embed import OpenAiEmbeddingModel
@@ -44,14 +45,14 @@ class MilvusSaveLoad():
return True
def save_to_checkpoint(self, checkpoint_dir=None):
print(f'saving vector store to: {checkpoint_dir}')
logger.info(f'saving vector store to: {checkpoint_dir}')
# if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
# self.vs_index.storage_context.persist(persist_dir=checkpoint_dir)
def load_from_checkpoint(self, checkpoint_dir=None):
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
if self.does_checkpoint_exist(checkpoint_dir=checkpoint_dir):
print('loading checkpoint from disk')
logger.info('loading checkpoint from disk')
from llama_index.core import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir=checkpoint_dir)
try:
@@ -101,7 +102,7 @@ class MilvusRagWorker(MilvusSaveLoad, LlamaIndexRagWorker):
vector_store_preview = "\n".join(
[f"{node.id_} | {node.text}" for node in dummy_retrieve_res]
)
print('\n++ --------inspect_vector_store begin--------')
print(vector_store_preview)
print('oo --------inspect_vector_store end--------')
logger.info('\n++ --------inspect_vector_store begin--------')
logger.info(vector_store_preview)
logger.info('oo --------inspect_vector_store end--------')
return vector_store_preview

View File

@@ -1,16 +1,17 @@
# From project chatglm-langchain
import threading
from toolbox import Singleton
import os
import shutil
import os
import uuid
import tqdm
import shutil
import threading
import numpy as np
from toolbox import Singleton
from loguru import logger
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from typing import List, Tuple
import numpy as np
from crazy_functions.vector_fns.general_file_loader import load_file
embedding_model_dict = {
@@ -150,17 +151,17 @@ class LocalDocQA:
failed_files = []
if isinstance(filepath, str):
if not os.path.exists(filepath):
print("路径不存在")
logger.error("路径不存在")
return None
elif os.path.isfile(filepath):
file = os.path.split(filepath)[-1]
try:
docs = load_file(filepath, SENTENCE_SIZE)
print(f"{file} 已成功加载")
logger.info(f"{file} 已成功加载")
loaded_files.append(filepath)
except Exception as e:
print(e)
print(f"{file} 未能成功加载")
logger.error(e)
logger.error(f"{file} 未能成功加载")
return None
elif os.path.isdir(filepath):
docs = []
@@ -170,23 +171,23 @@ class LocalDocQA:
docs += load_file(fullfilepath, SENTENCE_SIZE)
loaded_files.append(fullfilepath)
except Exception as e:
print(e)
logger.error(e)
failed_files.append(file)
if len(failed_files) > 0:
print("以下文件未能成功加载:")
logger.error("以下文件未能成功加载:")
for file in failed_files:
print(f"{file}\n")
logger.error(f"{file}\n")
else:
docs = []
for file in filepath:
docs += load_file(file, SENTENCE_SIZE)
print(f"{file} 已成功加载")
logger.info(f"{file} 已成功加载")
loaded_files.append(file)
if len(docs) > 0:
print("文件加载完毕,正在生成向量库")
logger.info("文件加载完毕,正在生成向量库")
if vs_path and os.path.isdir(vs_path):
try:
self.vector_store = FAISS.load_local(vs_path, text2vec)
@@ -233,7 +234,7 @@ class LocalDocQA:
prompt += "\n\n".join([f"({k}): " + doc.page_content for k, doc in enumerate(related_docs_with_score)])
prompt += "\n\n---\n\n"
prompt = prompt.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
# print(prompt)
# logger.info(prompt)
response = {"query": query, "source_documents": related_docs_with_score}
return response, prompt
@@ -262,7 +263,7 @@ def construct_vector_store(vs_id, vs_path, files, sentence_size, history, one_co
else:
pass
# file_status = "文件未成功加载,请重新上传文件"
# print(file_status)
# logger.info(file_status)
return local_doc_qa, vs_path
@Singleton
@@ -278,7 +279,7 @@ class knowledge_archive_interface():
if self.text2vec_large_chinese is None:
# < -------------------预热文本向量化模组--------------- >
from toolbox import ProxyNetworkActivate
print('Checking Text2vec ...')
logger.info('Checking Text2vec ...')
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")

View File

@@ -1,17 +1,19 @@
import re, requests, unicodedata, os
from toolbox import update_ui, get_log_folder
from toolbox import write_history_to_file, promote_file_to_downloadzone
from toolbox import CatchException, report_exception, get_conf
import re, requests, unicodedata, os
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from loguru import logger
def download_arxiv_(url_pdf):
if 'arxiv.org' not in url_pdf:
if ('.' in url_pdf) and ('/' not in url_pdf):
new_url = 'https://arxiv.org/abs/'+url_pdf
print('下载编号:', url_pdf, '自动定位:', new_url)
logger.info('下载编号:', url_pdf, '自动定位:', new_url)
# download_arxiv_(new_url)
return download_arxiv_(new_url)
else:
print('不能识别的URL')
logger.info('不能识别的URL')
return None
if 'abs' in url_pdf:
url_pdf = url_pdf.replace('abs', 'pdf')
@@ -42,15 +44,12 @@ def download_arxiv_(url_pdf):
requests_pdf_url = url_pdf
file_path = download_dir+title_str
print('下载中')
logger.info('下载中')
proxies = get_conf('proxies')
r = requests.get(requests_pdf_url, proxies=proxies)
with open(file_path, 'wb+') as f:
f.write(r.content)
print('下载完成')
# print('输出下载命令:','aria2c -o \"%s\" %s'%(title_str,url_pdf))
# subprocess.call('aria2c --all-proxy=\"172.18.116.150:11084\" -o \"%s\" %s'%(download_dir+title_str,url_pdf), shell=True)
logger.info('下载完成')
x = "%s %s %s.bib" % (paper_id, other_info['year'], other_info['authors'])
x = x.replace('?', '')\
@@ -63,19 +62,9 @@ def download_arxiv_(url_pdf):
def get_name(_url_):
import os
from bs4 import BeautifulSoup
print('正在获取文献名!')
print(_url_)
# arxiv_recall = {}
# if os.path.exists('./arxiv_recall.pkl'):
# with open('./arxiv_recall.pkl', 'rb') as f:
# arxiv_recall = pickle.load(f)
# if _url_ in arxiv_recall:
# print('在缓存中')
# return arxiv_recall[_url_]
logger.info('正在获取文献名!')
logger.info(_url_)
proxies = get_conf('proxies')
res = requests.get(_url_, proxies=proxies)
@@ -92,7 +81,7 @@ def get_name(_url_):
other_details['abstract'] = abstract
except:
other_details['year'] = ''
print('年份获取失败')
logger.info('年份获取失败')
# get author
try:
@@ -101,7 +90,7 @@ def get_name(_url_):
other_details['authors'] = authors
except:
other_details['authors'] = ''
print('authors获取失败')
logger.info('authors获取失败')
# get comment
try:
@@ -116,11 +105,11 @@ def get_name(_url_):
other_details['comment'] = ''
except:
other_details['comment'] = ''
print('年份获取失败')
logger.info('年份获取失败')
title_str = BeautifulSoup(
res.text, 'html.parser').find('title').contents[0]
print('获取成功:', title_str)
logger.info('获取成功:', title_str)
# arxiv_recall[_url_] = (title_str+'.pdf', other_details)
# with open('./arxiv_recall.pkl', 'wb') as f:
# pickle.dump(arxiv_recall, f)

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
@CatchException
def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):

View File

@@ -16,8 +16,8 @@ Testing:
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
from .crazy_utils import input_clipping, try_install_deps
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
from crazy_functions.crazy_utils import input_clipping, try_install_deps
from crazy_functions.gen_fns.gen_fns_shared import is_function_successfully_generated
from crazy_functions.gen_fns.gen_fns_shared import get_class_name
from crazy_functions.gen_fns.gen_fns_shared import subprocess_worker

View File

@@ -1,6 +1,6 @@
from toolbox import CatchException, update_ui, gen_time_str
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import input_clipping
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import input_clipping
import copy, json
@CatchException

View File

@@ -6,13 +6,14 @@
"""
import time
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
from toolbox import get_conf, select_api_key, update_ui_lastest_msg, Singleton
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
from crazy_functions.crazy_utils import input_clipping, try_install_deps
from crazy_functions.agent_fns.persistent import GradioMultiuserManagerForPersistentClasses
from crazy_functions.agent_fns.auto_agent import AutoGenMath
import time
from loguru import logger
def remove_model_prefix(llm):
if llm.startswith('api2d-'): llm = llm.replace('api2d-', '')
@@ -80,12 +81,12 @@ def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
persistent_key = f"{user_uuid}->多智能体终端"
if persistent_class_multi_user_manager.already_alive(persistent_key):
# 当已经存在一个正在运行的多智能体终端时,直接将用户输入传递给它,而不是再次启动一个新的多智能体终端
print('[debug] feed new user input')
logger.info('[debug] feed new user input')
executor = persistent_class_multi_user_manager.get(persistent_key)
exit_reason = yield from executor.main_process_ui_control(txt, create_or_resume="resume")
else:
# 运行多智能体终端 (首次)
print('[debug] create new executor instance')
logger.info('[debug] create new executor instance')
history = []
chatbot.append(["正在启动: 多智能体终端", "插件动态生成, 执行开始, 作者 Microsoft & Binary-Husky."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@@ -1,7 +1,7 @@
from toolbox import update_ui
from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, report_exception, select_api_key, update_ui, get_conf
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import write_history_to_file, promote_file_to_downloadzone, get_log_folder
def split_audio_file(filename, split_duration=1000):

View File

@@ -1,16 +1,18 @@
from loguru import logger
from toolbox import update_ui, promote_file_to_downloadzone, gen_time_str
from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import read_and_clean_pdf_text
from .crazy_utils import input_clipping
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import read_and_clean_pdf_text
from crazy_functions.crazy_utils import input_clipping
def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
file_write_buffer = []
for file_name in file_manifest:
print('begin analysis on:', file_name)
logger.info('begin analysis on:', file_name)
############################## <第 0 步切割PDF> ##################################
# 递归地切割PDF文件每一块尽量是完整的一个section比如introductionexperiment等必要时再进行切割
# 的长度必须小于 2500 个 Token
@@ -38,7 +40,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
last_iteration_result = paper_meta # 初始值是摘要
MAX_WORD_TOTAL = 4096 * 0.7
n_fragment = len(paper_fragments)
if n_fragment >= 20: print('文章极长,不能达到预期效果')
if n_fragment >= 20: logger.warning('文章极长,不能达到预期效果')
for i in range(n_fragment):
NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} Chinese characters: {paper_fragments[i]}"

View File

@@ -1,6 +1,7 @@
from loguru import logger
from toolbox import update_ui
from toolbox import CatchException, report_exception
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import write_history_to_file, promote_file_to_downloadzone
fast_debug = False
@@ -57,7 +58,6 @@ def readPdf(pdfPath):
layout = device.get_result()
for obj in layout._objs:
if isinstance(obj, pdfminer.layout.LTTextBoxHorizontal):
# print(obj.get_text())
outTextList.append(obj.get_text())
return outTextList
@@ -66,7 +66,7 @@ def readPdf(pdfPath):
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, glob, os
from bs4 import BeautifulSoup
print('begin analysis on:', file_manifest)
logger.info('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest):
if ".tex" in fp:
with open(fp, 'r', encoding='utf-8', errors='replace') as f:

View File

@@ -1,9 +1,9 @@
from toolbox import CatchException, report_exception, get_log_folder, gen_time_str
from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion
from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .crazy_utils import read_and_clean_pdf_text
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import read_and_clean_pdf_text
from .pdf_fns.parse_pdf import parse_pdf, get_avail_grobid_url, translate_pdf
from shared_utils.colorful import *
import copy
@@ -60,7 +60,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
# 清空历史,以免输入溢出
history = []
from .crazy_utils import get_files_from_everything
from crazy_functions.crazy_utils import get_files_from_everything
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
if len(file_manifest) > 0:
# 尝试导入依赖,如果缺少依赖,则给出安装建议

View File

@@ -1,4 +1,5 @@
import os
from loguru import logger
from toolbox import CatchException, update_ui, gen_time_str, promote_file_to_downloadzone
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import input_clipping
@@ -34,10 +35,10 @@ def eval_manim(code):
return f'gpt_log/{time_str}.mp4'
except subprocess.CalledProcessError as e:
output = e.output.decode()
print(f"Command returned non-zero exit status {e.returncode}: {output}.")
logger.error(f"Command returned non-zero exit status {e.returncode}: {output}.")
return f"Evaluating python script failed: {e.output}."
except:
print('generating mp4 failed')
logger.error('generating mp4 failed')
return "Generating mp4 failed."

View File

@@ -1,13 +1,12 @@
from loguru import logger
from toolbox import update_ui
from toolbox import CatchException, report_exception
from .crazy_utils import read_and_clean_pdf_text
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False
from crazy_functions.crazy_utils import read_and_clean_pdf_text
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import tiktoken
print('begin analysis on:', file_name)
logger.info('begin analysis on:', file_name)
############################## <第 0 步切割PDF> ##################################
# 递归地切割PDF文件每一块尽量是完整的一个section比如introductionexperiment等必要时再进行切割
@@ -36,7 +35,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
last_iteration_result = paper_meta # 初始值是摘要
MAX_WORD_TOTAL = 4096
n_fragment = len(paper_fragments)
if n_fragment >= 20: print('文章极长,不能达到预期效果')
if n_fragment >= 20: logger.warning('文章极长,不能达到预期效果')
for i in range(n_fragment):
NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i]}"
@@ -57,7 +56,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
chatbot.append([i_say_show_user, gpt_say])
############################## <第 4 步设置一个token上限防止回答时Token溢出> ##################################
from .crazy_utils import input_clipping
from crazy_functions.crazy_utils import input_clipping
_, final_results = input_clipping("", final_results, max_token_limit=3200)
yield from update_ui(chatbot=chatbot, history=final_results) # 注意这里的历史记录被替代了

View File

@@ -1,12 +1,12 @@
from loguru import logger
from toolbox import update_ui
from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, os
print('begin analysis on:', file_manifest)
logger.info('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
@@ -16,22 +16,20 @@ def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
if not fast_debug: time.sleep(2)
if not fast_debug:
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
time.sleep(2)
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui, report_exception
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.plugin_template.plugin_class_template import (
GptAcademicPluginTemplate,
)
@@ -201,8 +201,7 @@ def 解析历史输入(history, llm_kwargs, file_manifest, chatbot, plugin_kwarg
MAX_WORD_TOTAL = 4096
n_txt = len(txt)
last_iteration_result = "从以下文本中提取摘要。"
if n_txt >= 20:
print("文章极长,不能达到预期效果")
for i in range(n_txt):
NUM_OF_WORD = MAX_WORD_TOTAL // n_txt
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words in Chinese: {txt[i]}"

View File

@@ -1,6 +1,6 @@
from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg, get_log_folder, get_user
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
from loguru import logger
install_msg ="""
1. python -m pip install torch --index-url https://download.pytorch.org/whl/cpu
@@ -40,7 +40,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
except Exception as e:
chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# from .crazy_utils import try_install_deps
# from crazy_functions.crazy_utils import try_install_deps
# try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
# yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
return
@@ -60,7 +60,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
# < -------------------预热文本向量化模组--------------- >
chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
print('Checking Text2vec ...')
logger.info('Checking Text2vec ...')
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
@@ -68,7 +68,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
# < -------------------构建知识库--------------- >
chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
print('Establishing knowledge archive ...')
logger.info('Establishing knowledge archive ...')
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
kai = knowledge_archive_interface()
vs_path = get_log_folder(user=get_user(chatbot), plugin_name='vec_store')
@@ -93,7 +93,7 @@ def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
except Exception as e:
chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# from .crazy_utils import try_install_deps
# from crazy_functions.crazy_utils import try_install_deps
# try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
# yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
return

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
import requests
from bs4 import BeautifulSoup
from request_llms.bridge_all import model_info
@@ -23,8 +23,8 @@ def google(query, proxies):
item = {'title': title, 'link': link}
results.append(item)
for r in results:
print(r['link'])
# for r in results:
# print(r['link'])
return results
def scrape_text(url, proxies) -> str:

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
import requests
from bs4 import BeautifulSoup
from request_llms.bridge_all import model_info
@@ -22,8 +22,8 @@ def bing_search(query, proxies=None):
item = {'title': title, 'link': link}
results.append(item)
for r in results:
print(r['link'])
# for r in results:
# print(r['link'])
return results

View File

@@ -64,7 +64,7 @@ def parseNotebook(filename, enable_markdown=1):
def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
enable_markdown = plugin_kwargs.get("advanced_arg", "1")

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui, get_conf
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import datetime
@CatchException
def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):

View File

@@ -1,11 +1,13 @@
from toolbox import update_ui
from toolbox import CatchException, get_conf, markdown_convertion
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.crazy_utils import input_clipping
from crazy_functions.agent_fns.watchdog import WatchDog
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.live_audio.aliyunASR import AliyunASR
from loguru import logger
import threading, time
import numpy as np
from .live_audio.aliyunASR import AliyunASR
import json
import re
@@ -42,9 +44,9 @@ class AsyncGptTask():
gpt_say_partial = predict_no_ui_long_connection(inputs=i_say, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt,
observe_window=observe_window[index], console_slience=True)
except ConnectionAbortedError as token_exceed_err:
print('至少一个线程任务Token溢出而失败', e)
logger.error('至少一个线程任务Token溢出而失败', e)
except Exception as e:
print('至少一个线程任务意外失败', e)
logger.error('至少一个线程任务意外失败', e)
def add_async_gpt_task(self, i_say, chatbot_index, llm_kwargs, history, system_prompt):
self.observe_future.append([""])

View File

@@ -1,12 +1,11 @@
from toolbox import update_ui
from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, glob, os
print('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()

View File

@@ -1,4 +1,4 @@
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import CatchException, report_exception, promote_file_to_downloadzone
from toolbox import update_ui, update_ui_lastest_msg, disable_auto_promotion, write_history_to_file
import logging