logging -> loguru: final stage

This commit is contained in:
binary-husky
2024-09-15 15:51:51 +00:00
parent bbf9e9f868
commit 2f343179a2
55 changed files with 237 additions and 529 deletions

View File

@@ -171,7 +171,7 @@ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
system_prompt 给gpt的静默提醒 system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息IP地址等 user_request 当前用户的请求信息IP地址等
""" """
from .crazy_utils import get_files_from_everything from crazy_functions.crazy_utils import get_files_from_everything
success, file_manifest, _ = get_files_from_everything(txt, type='.html') success, file_manifest, _ = get_files_from_everything(txt, type='.html')
if not success: if not success:

View File

@@ -56,7 +56,7 @@ class PaperFileGroup():
def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='polish'): def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='polish'):
import time, os, re import time, os, re
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
# <-------- 读取Latex文件删除其中的所有注释 ----------> # <-------- 读取Latex文件删除其中的所有注释 ---------->

View File

@@ -37,7 +37,7 @@ class PaperFileGroup():
def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'): def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
import time, os, re import time, os, re
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
# <-------- 读取Latex文件删除其中的所有注释 ----------> # <-------- 读取Latex文件删除其中的所有注释 ---------->
pfg = PaperFileGroup() pfg = PaperFileGroup()

View File

@@ -52,7 +52,7 @@ class PaperFileGroup():
return manifest return manifest
def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'): def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
# <-------- 读取Markdown文件删除其中的所有注释 ----------> # <-------- 读取Markdown文件删除其中的所有注释 ---------->
pfg = PaperFileGroup() pfg = PaperFileGroup()

View File

@@ -5,8 +5,8 @@ from crazy_functions.crazy_utils import input_clipping
def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import os, copy import os, copy
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
summary_batch_isolation = True summary_batch_isolation = True
inputs_array = [] inputs_array = []

View File

@@ -1,4 +1,5 @@
from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
from loguru import logger
class EchoDemo(PluginMultiprocessManager): class EchoDemo(PluginMultiprocessManager):
def subprocess_worker(self, child_conn): def subprocess_worker(self, child_conn):
@@ -16,4 +17,4 @@ class EchoDemo(PluginMultiprocessManager):
elif msg.cmd == "terminate": elif msg.cmd == "terminate":
self.child_conn.send(PipeCom("done", "")) self.child_conn.send(PipeCom("done", ""))
break break
print('[debug] subprocess_worker terminated') logger.info('[debug] subprocess_worker terminated')

View File

@@ -1,5 +1,6 @@
from toolbox import get_log_folder, update_ui, gen_time_str, get_conf, promote_file_to_downloadzone from toolbox import get_log_folder, update_ui, gen_time_str, get_conf, promote_file_to_downloadzone
from crazy_functions.agent_fns.watchdog import WatchDog from crazy_functions.agent_fns.watchdog import WatchDog
from loguru import logger
import time, os import time, os
class PipeCom: class PipeCom:
@@ -47,7 +48,7 @@ class PluginMultiprocessManager:
def terminate(self): def terminate(self):
self.p.terminate() self.p.terminate()
self.alive = False self.alive = False
print("[debug] instance terminated") logger.info("[debug] instance terminated")
def subprocess_worker(self, child_conn): def subprocess_worker(self, child_conn):
# ⭐⭐ run in subprocess # ⭐⭐ run in subprocess

View File

@@ -1,10 +1,12 @@
from toolbox import CatchException, update_ui
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from request_llms.bridge_all import predict_no_ui_long_connection
import datetime import datetime
import re import re
import os import os
from loguru import logger
from textwrap import dedent from textwrap import dedent
from toolbox import CatchException, update_ui
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
# TODO: 解决缩进问题 # TODO: 解决缩进问题
find_function_end_prompt = ''' find_function_end_prompt = '''
@@ -355,7 +357,7 @@ class PythonCodeComment():
try: try:
successful, hint = self.verify_successful(next_batch, result) successful, hint = self.verify_successful(next_batch, result)
except Exception as e: except Exception as e:
print('ignored exception:\n' + str(e)) logger.error('ignored exception:\n' + str(e))
break break
if successful: if successful:
break break

View File

@@ -1,4 +1,5 @@
import threading, time import threading, time
from loguru import logger
class WatchDog(): class WatchDog():
def __init__(self, timeout, bark_fn, interval=3, msg="") -> None: def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
@@ -13,7 +14,7 @@ class WatchDog():
while True: while True:
if self.kill_dog: break if self.kill_dog: break
if time.time() - self.last_feed > self.timeout: if time.time() - self.last_feed > self.timeout:
if len(self.msg) > 0: print(self.msg) if len(self.msg) > 0: logger.info(self.msg)
self.bark_fn() self.bark_fn()
break break
time.sleep(self.interval) time.sleep(self.interval)

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui, promote_file_to_downloadzone from toolbox import CatchException, update_ui, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
import datetime, json import datetime, json
def fetch_items(list_of_items, batch_size): def fetch_items(list_of_items, batch_size):

View File

@@ -1,5 +1,6 @@
import os import os
from textwrap import indent from textwrap import indent
from loguru import logger
class FileNode: class FileNode:
def __init__(self, name, build_manifest=False): def __init__(self, name, build_manifest=False):
@@ -60,7 +61,7 @@ class FileNode:
current_node.children.append(term) current_node.children.append(term)
def print_files_recursively(self, level=0, code="R0"): def print_files_recursively(self, level=0, code="R0"):
print(' '*level + self.name + ' ' + str(self.is_leaf) + ' ' + str(self.level)) logger.info(' '*level + self.name + ' ' + str(self.is_leaf) + ' ' + str(self.level))
for j, child in enumerate(self.children): for j, child in enumerate(self.children):
child.print_files_recursively(level=level+1, code=code+str(j)) child.print_files_recursively(level=level+1, code=code+str(j))
self.parenting_ship.extend(child.parenting_ship) self.parenting_ship.extend(child.parenting_ship)
@@ -123,4 +124,4 @@ if __name__ == "__main__":
"用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器", "用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器",
"包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类", "包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类",
] ]
print(build_file_tree_mermaid_diagram(file_manifest, file_comments, "项目文件树")) logger.info(build_file_tree_mermaid_diagram(file_manifest, file_comments, "项目文件树"))

View File

@@ -1,15 +1,17 @@
import os
import re
import shutil
import numpy as np
from loguru import logger
from toolbox import update_ui, update_ui_lastest_msg, get_log_folder from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
from toolbox import get_conf, promote_file_to_downloadzone from toolbox import get_conf, promote_file_to_downloadzone
from .latex_toolbox import PRESERVE, TRANSFORM from crazy_functions.latex_fns.latex_toolbox import PRESERVE, TRANSFORM
from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace from crazy_functions.latex_fns.latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process from crazy_functions.latex_fns.latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout from crazy_functions.latex_fns.latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
from .latex_toolbox import find_title_and_abs from crazy_functions.latex_fns.latex_toolbox import find_title_and_abs
from .latex_pickle_io import objdump, objload from crazy_functions.latex_fns.latex_pickle_io import objdump, objload
import os, shutil
import re
import numpy as np
pj = os.path.join pj = os.path.join
@@ -323,7 +325,7 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
buggy_lines = [int(l) for l in buggy_lines] buggy_lines = [int(l) for l in buggy_lines]
buggy_lines = sorted(buggy_lines) buggy_lines = sorted(buggy_lines)
buggy_line = buggy_lines[0]-1 buggy_line = buggy_lines[0]-1
print("reversing tex line that has errors", buggy_line) logger.warning("reversing tex line that has errors", buggy_line)
# 重组,逆转出错的段落 # 重组,逆转出错的段落
if buggy_line not in fixed_line: if buggy_line not in fixed_line:
@@ -337,7 +339,7 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
except: except:
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.") logger.error("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
return False, -1, [-1] return False, -1, [-1]
@@ -380,7 +382,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
if mode!='translate_zh': if mode!='translate_zh':
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') logger.info( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd()) ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd())
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
@@ -419,7 +421,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf')) shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf'))
promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
except Exception as e: except Exception as e:
print(e) logger.error(e)
pass pass
return True # 成功啦 return True # 成功啦
else: else:
@@ -465,4 +467,4 @@ def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
promote_file_to_downloadzone(file=res, chatbot=chatbot) promote_file_to_downloadzone(file=res, chatbot=chatbot)
except: except:
from toolbox import trimmed_format_exc from toolbox import trimmed_format_exc
print('writing html result failed:', trimmed_format_exc()) logger.error('writing html result failed:', trimmed_format_exc())

View File

@@ -1,6 +1,8 @@
import os, shutil import os
import re import re
import shutil
import numpy as np import numpy as np
from loguru import logger
PRESERVE = 0 PRESERVE = 0
TRANSFORM = 1 TRANSFORM = 1
@@ -55,7 +57,7 @@ def post_process(root):
str_stack.append("{") str_stack.append("{")
elif c == "}": elif c == "}":
if len(str_stack) == 1: if len(str_stack) == 1:
print("stack fix") logger.warning("fixing brace error")
return i return i
str_stack.pop(-1) str_stack.pop(-1)
else: else:
@@ -601,7 +603,7 @@ def compile_latex_with_timeout(command, cwd, timeout=60):
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
process.kill() process.kill()
stdout, stderr = process.communicate() stdout, stderr = process.communicate()
print("Process timed out!") logger.error("Process timed out (compile_latex_with_timeout)!")
return False return False
return True return True

View File

@@ -107,18 +107,14 @@ def is_speaker_speaking(vad, data, sample_rate):
class AliyunASR(): class AliyunASR():
def test_on_sentence_begin(self, message, *args): def test_on_sentence_begin(self, message, *args):
# print("test_on_sentence_begin:{}".format(message))
pass pass
def test_on_sentence_end(self, message, *args): def test_on_sentence_end(self, message, *args):
# print("test_on_sentence_end:{}".format(message))
message = json.loads(message) message = json.loads(message)
self.parsed_sentence = message['payload']['result'] self.parsed_sentence = message['payload']['result']
self.event_on_entence_end.set() self.event_on_entence_end.set()
# print(self.parsed_sentence)
def test_on_start(self, message, *args): def test_on_start(self, message, *args):
# print("test_on_start:{}".format(message))
pass pass
def test_on_error(self, message, *args): def test_on_error(self, message, *args):
@@ -130,13 +126,11 @@ class AliyunASR():
pass pass
def test_on_result_chg(self, message, *args): def test_on_result_chg(self, message, *args):
# print("test_on_chg:{}".format(message))
message = json.loads(message) message = json.loads(message)
self.parsed_text = message['payload']['result'] self.parsed_text = message['payload']['result']
self.event_on_result_chg.set() self.event_on_result_chg.set()
def test_on_completed(self, message, *args): def test_on_completed(self, message, *args):
# print("on_completed:args=>{} message=>{}".format(args, message))
pass pass
def audio_convertion_thread(self, uuid): def audio_convertion_thread(self, uuid):
@@ -249,14 +243,14 @@ class AliyunASR():
try: try:
response = client.do_action_with_exception(request) response = client.do_action_with_exception(request)
print(response) logging.info(response)
jss = json.loads(response) jss = json.loads(response)
if 'Token' in jss and 'Id' in jss['Token']: if 'Token' in jss and 'Id' in jss['Token']:
token = jss['Token']['Id'] token = jss['Token']['Id']
expireTime = jss['Token']['ExpireTime'] expireTime = jss['Token']['ExpireTime']
print("token = " + token) logging.info("token = " + token)
print("expireTime = " + str(expireTime)) logging.info("expireTime = " + str(expireTime))
except Exception as e: except Exception as e:
print(e) logging.error(e)
return token return token

View File

@@ -1,4 +1,5 @@
from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout
from loguru import logger
def force_breakdown(txt, limit, get_token_fn): def force_breakdown(txt, limit, get_token_fn):
""" 当无法用标点、空行分割时,我们用最暴力的方法切割 """ 当无法用标点、空行分割时,我们用最暴力的方法切割
@@ -76,7 +77,7 @@ def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=F
remain_txt_to_cut = post remain_txt_to_cut = post
remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage) remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
process = fin_len/total_len process = fin_len/total_len
print(f'正在文本切分 {int(process*100)}%') logger.info(f'正在文本切分 {int(process*100)}%')
if len(remain_txt_to_cut.strip()) == 0: if len(remain_txt_to_cut.strip()) == 0:
break break
return res return res
@@ -119,7 +120,7 @@ if __name__ == '__main__':
for i in range(5): for i in range(5):
file_content += file_content file_content += file_content
print(len(file_content)) logger.info(len(file_content))
TOKEN_LIMIT_PER_FRAGMENT = 2500 TOKEN_LIMIT_PER_FRAGMENT = 2500
res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT) res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT)

View File

@@ -5,6 +5,7 @@ from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from crazy_functions.crazy_utils import read_and_clean_pdf_text from crazy_functions.crazy_utils import read_and_clean_pdf_text
from shared_utils.colorful import * from shared_utils.colorful import *
from loguru import logger
import os import os
def 解析PDF_简单拆解(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): def 解析PDF_简单拆解(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
@@ -93,7 +94,7 @@ def 解析PDF_简单拆解(file_manifest, project_folder, llm_kwargs, plugin_kwa
generated_html_files.append(ch.save_file(create_report_file_name)) generated_html_files.append(ch.save_file(create_report_file_name))
except: except:
from toolbox import trimmed_format_exc from toolbox import trimmed_format_exc
print('writing html result failed:', trimmed_format_exc()) logger.error('writing html result failed:', trimmed_format_exc())
# 准备文件的下载 # 准备文件的下载
for pdf_path in generated_conclusion_files: for pdf_path in generated_conclusion_files:

View File

@@ -1,6 +1,7 @@
import llama_index import llama_index
import os import os
import atexit import atexit
from loguru import logger
from typing import List from typing import List
from llama_index.core import Document from llama_index.core import Document
from llama_index.core.schema import TextNode from llama_index.core.schema import TextNode
@@ -41,14 +42,14 @@ class SaveLoad():
return True return True
def save_to_checkpoint(self, checkpoint_dir=None): def save_to_checkpoint(self, checkpoint_dir=None):
print(f'saving vector store to: {checkpoint_dir}') logger.info(f'saving vector store to: {checkpoint_dir}')
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
self.vs_index.storage_context.persist(persist_dir=checkpoint_dir) self.vs_index.storage_context.persist(persist_dir=checkpoint_dir)
def load_from_checkpoint(self, checkpoint_dir=None): def load_from_checkpoint(self, checkpoint_dir=None):
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
if self.does_checkpoint_exist(checkpoint_dir=checkpoint_dir): if self.does_checkpoint_exist(checkpoint_dir=checkpoint_dir):
print('loading checkpoint from disk') logger.info('loading checkpoint from disk')
from llama_index.core import StorageContext, load_index_from_storage from llama_index.core import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir=checkpoint_dir) storage_context = StorageContext.from_defaults(persist_dir=checkpoint_dir)
self.vs_index = load_index_from_storage(storage_context, embed_model=self.embed_model) self.vs_index = load_index_from_storage(storage_context, embed_model=self.embed_model)
@@ -85,9 +86,9 @@ class LlamaIndexRagWorker(SaveLoad):
self.vs_index.storage_context.index_store.to_dict() self.vs_index.storage_context.index_store.to_dict()
docstore = self.vs_index.storage_context.docstore.docs docstore = self.vs_index.storage_context.docstore.docs
vector_store_preview = "\n".join([ f"{_id} | {tn.text}" for _id, tn in docstore.items() ]) vector_store_preview = "\n".join([ f"{_id} | {tn.text}" for _id, tn in docstore.items() ])
print('\n++ --------inspect_vector_store begin--------') logger.info('\n++ --------inspect_vector_store begin--------')
print(vector_store_preview) logger.info(vector_store_preview)
print('oo --------inspect_vector_store end--------') logger.info('oo --------inspect_vector_store end--------')
return vector_store_preview return vector_store_preview
def add_documents_to_vector_store(self, document_list): def add_documents_to_vector_store(self, document_list):
@@ -125,5 +126,5 @@ class LlamaIndexRagWorker(SaveLoad):
def generate_node_array_preview(self, nodes): def generate_node_array_preview(self, nodes):
buf = "\n".join(([f"(No.{i+1} | score {n.score:.3f}): {n.text}" for i, n in enumerate(nodes)])) buf = "\n".join(([f"(No.{i+1} | score {n.score:.3f}): {n.text}" for i, n in enumerate(nodes)]))
if self.debug_mode: print(buf) if self.debug_mode: logger.info(buf)
return buf return buf

View File

@@ -2,6 +2,7 @@ import llama_index
import os import os
import atexit import atexit
from typing import List from typing import List
from loguru import logger
from llama_index.core import Document from llama_index.core import Document
from llama_index.core.schema import TextNode from llama_index.core.schema import TextNode
from request_llms.embed_models.openai_embed import OpenAiEmbeddingModel from request_llms.embed_models.openai_embed import OpenAiEmbeddingModel
@@ -44,14 +45,14 @@ class MilvusSaveLoad():
return True return True
def save_to_checkpoint(self, checkpoint_dir=None): def save_to_checkpoint(self, checkpoint_dir=None):
print(f'saving vector store to: {checkpoint_dir}') logger.info(f'saving vector store to: {checkpoint_dir}')
# if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir # if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
# self.vs_index.storage_context.persist(persist_dir=checkpoint_dir) # self.vs_index.storage_context.persist(persist_dir=checkpoint_dir)
def load_from_checkpoint(self, checkpoint_dir=None): def load_from_checkpoint(self, checkpoint_dir=None):
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
if self.does_checkpoint_exist(checkpoint_dir=checkpoint_dir): if self.does_checkpoint_exist(checkpoint_dir=checkpoint_dir):
print('loading checkpoint from disk') logger.info('loading checkpoint from disk')
from llama_index.core import StorageContext, load_index_from_storage from llama_index.core import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir=checkpoint_dir) storage_context = StorageContext.from_defaults(persist_dir=checkpoint_dir)
try: try:
@@ -101,7 +102,7 @@ class MilvusRagWorker(MilvusSaveLoad, LlamaIndexRagWorker):
vector_store_preview = "\n".join( vector_store_preview = "\n".join(
[f"{node.id_} | {node.text}" for node in dummy_retrieve_res] [f"{node.id_} | {node.text}" for node in dummy_retrieve_res]
) )
print('\n++ --------inspect_vector_store begin--------') logger.info('\n++ --------inspect_vector_store begin--------')
print(vector_store_preview) logger.info(vector_store_preview)
print('oo --------inspect_vector_store end--------') logger.info('oo --------inspect_vector_store end--------')
return vector_store_preview return vector_store_preview

View File

@@ -1,16 +1,17 @@
# From project chatglm-langchain # From project chatglm-langchain
import threading
from toolbox import Singleton
import os import os
import shutil
import os import os
import uuid import uuid
import tqdm import tqdm
import shutil
import threading
import numpy as np
from toolbox import Singleton
from loguru import logger
from langchain.vectorstores import FAISS from langchain.vectorstores import FAISS
from langchain.docstore.document import Document from langchain.docstore.document import Document
from typing import List, Tuple from typing import List, Tuple
import numpy as np
from crazy_functions.vector_fns.general_file_loader import load_file from crazy_functions.vector_fns.general_file_loader import load_file
embedding_model_dict = { embedding_model_dict = {
@@ -150,17 +151,17 @@ class LocalDocQA:
failed_files = [] failed_files = []
if isinstance(filepath, str): if isinstance(filepath, str):
if not os.path.exists(filepath): if not os.path.exists(filepath):
print("路径不存在") logger.error("路径不存在")
return None return None
elif os.path.isfile(filepath): elif os.path.isfile(filepath):
file = os.path.split(filepath)[-1] file = os.path.split(filepath)[-1]
try: try:
docs = load_file(filepath, SENTENCE_SIZE) docs = load_file(filepath, SENTENCE_SIZE)
print(f"{file} 已成功加载") logger.info(f"{file} 已成功加载")
loaded_files.append(filepath) loaded_files.append(filepath)
except Exception as e: except Exception as e:
print(e) logger.error(e)
print(f"{file} 未能成功加载") logger.error(f"{file} 未能成功加载")
return None return None
elif os.path.isdir(filepath): elif os.path.isdir(filepath):
docs = [] docs = []
@@ -170,23 +171,23 @@ class LocalDocQA:
docs += load_file(fullfilepath, SENTENCE_SIZE) docs += load_file(fullfilepath, SENTENCE_SIZE)
loaded_files.append(fullfilepath) loaded_files.append(fullfilepath)
except Exception as e: except Exception as e:
print(e) logger.error(e)
failed_files.append(file) failed_files.append(file)
if len(failed_files) > 0: if len(failed_files) > 0:
print("以下文件未能成功加载:") logger.error("以下文件未能成功加载:")
for file in failed_files: for file in failed_files:
print(f"{file}\n") logger.error(f"{file}\n")
else: else:
docs = [] docs = []
for file in filepath: for file in filepath:
docs += load_file(file, SENTENCE_SIZE) docs += load_file(file, SENTENCE_SIZE)
print(f"{file} 已成功加载") logger.info(f"{file} 已成功加载")
loaded_files.append(file) loaded_files.append(file)
if len(docs) > 0: if len(docs) > 0:
print("文件加载完毕,正在生成向量库") logger.info("文件加载完毕,正在生成向量库")
if vs_path and os.path.isdir(vs_path): if vs_path and os.path.isdir(vs_path):
try: try:
self.vector_store = FAISS.load_local(vs_path, text2vec) self.vector_store = FAISS.load_local(vs_path, text2vec)
@@ -233,7 +234,7 @@ class LocalDocQA:
prompt += "\n\n".join([f"({k}): " + doc.page_content for k, doc in enumerate(related_docs_with_score)]) prompt += "\n\n".join([f"({k}): " + doc.page_content for k, doc in enumerate(related_docs_with_score)])
prompt += "\n\n---\n\n" prompt += "\n\n---\n\n"
prompt = prompt.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars prompt = prompt.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
# print(prompt) # logger.info(prompt)
response = {"query": query, "source_documents": related_docs_with_score} response = {"query": query, "source_documents": related_docs_with_score}
return response, prompt return response, prompt
@@ -262,7 +263,7 @@ def construct_vector_store(vs_id, vs_path, files, sentence_size, history, one_co
else: else:
pass pass
# file_status = "文件未成功加载,请重新上传文件" # file_status = "文件未成功加载,请重新上传文件"
# print(file_status) # logger.info(file_status)
return local_doc_qa, vs_path return local_doc_qa, vs_path
@Singleton @Singleton
@@ -278,7 +279,7 @@ class knowledge_archive_interface():
if self.text2vec_large_chinese is None: if self.text2vec_large_chinese is None:
# < -------------------预热文本向量化模组--------------- > # < -------------------预热文本向量化模组--------------- >
from toolbox import ProxyNetworkActivate from toolbox import ProxyNetworkActivate
print('Checking Text2vec ...') logger.info('Checking Text2vec ...')
from langchain.embeddings.huggingface import HuggingFaceEmbeddings from langchain.embeddings.huggingface import HuggingFaceEmbeddings
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络 with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")

View File

@@ -1,17 +1,19 @@
import re, requests, unicodedata, os
from toolbox import update_ui, get_log_folder from toolbox import update_ui, get_log_folder
from toolbox import write_history_to_file, promote_file_to_downloadzone from toolbox import write_history_to_file, promote_file_to_downloadzone
from toolbox import CatchException, report_exception, get_conf from toolbox import CatchException, report_exception, get_conf
import re, requests, unicodedata, os from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from loguru import logger
def download_arxiv_(url_pdf): def download_arxiv_(url_pdf):
if 'arxiv.org' not in url_pdf: if 'arxiv.org' not in url_pdf:
if ('.' in url_pdf) and ('/' not in url_pdf): if ('.' in url_pdf) and ('/' not in url_pdf):
new_url = 'https://arxiv.org/abs/'+url_pdf new_url = 'https://arxiv.org/abs/'+url_pdf
print('下载编号:', url_pdf, '自动定位:', new_url) logger.info('下载编号:', url_pdf, '自动定位:', new_url)
# download_arxiv_(new_url) # download_arxiv_(new_url)
return download_arxiv_(new_url) return download_arxiv_(new_url)
else: else:
print('不能识别的URL') logger.info('不能识别的URL')
return None return None
if 'abs' in url_pdf: if 'abs' in url_pdf:
url_pdf = url_pdf.replace('abs', 'pdf') url_pdf = url_pdf.replace('abs', 'pdf')
@@ -42,15 +44,12 @@ def download_arxiv_(url_pdf):
requests_pdf_url = url_pdf requests_pdf_url = url_pdf
file_path = download_dir+title_str file_path = download_dir+title_str
print('下载中') logger.info('下载中')
proxies = get_conf('proxies') proxies = get_conf('proxies')
r = requests.get(requests_pdf_url, proxies=proxies) r = requests.get(requests_pdf_url, proxies=proxies)
with open(file_path, 'wb+') as f: with open(file_path, 'wb+') as f:
f.write(r.content) f.write(r.content)
print('下载完成') logger.info('下载完成')
# print('输出下载命令:','aria2c -o \"%s\" %s'%(title_str,url_pdf))
# subprocess.call('aria2c --all-proxy=\"172.18.116.150:11084\" -o \"%s\" %s'%(download_dir+title_str,url_pdf), shell=True)
x = "%s %s %s.bib" % (paper_id, other_info['year'], other_info['authors']) x = "%s %s %s.bib" % (paper_id, other_info['year'], other_info['authors'])
x = x.replace('?', '')\ x = x.replace('?', '')\
@@ -63,19 +62,9 @@ def download_arxiv_(url_pdf):
def get_name(_url_): def get_name(_url_):
import os
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
print('正在获取文献名!') logger.info('正在获取文献名!')
print(_url_) logger.info(_url_)
# arxiv_recall = {}
# if os.path.exists('./arxiv_recall.pkl'):
# with open('./arxiv_recall.pkl', 'rb') as f:
# arxiv_recall = pickle.load(f)
# if _url_ in arxiv_recall:
# print('在缓存中')
# return arxiv_recall[_url_]
proxies = get_conf('proxies') proxies = get_conf('proxies')
res = requests.get(_url_, proxies=proxies) res = requests.get(_url_, proxies=proxies)
@@ -92,7 +81,7 @@ def get_name(_url_):
other_details['abstract'] = abstract other_details['abstract'] = abstract
except: except:
other_details['year'] = '' other_details['year'] = ''
print('年份获取失败') logger.info('年份获取失败')
# get author # get author
try: try:
@@ -101,7 +90,7 @@ def get_name(_url_):
other_details['authors'] = authors other_details['authors'] = authors
except: except:
other_details['authors'] = '' other_details['authors'] = ''
print('authors获取失败') logger.info('authors获取失败')
# get comment # get comment
try: try:
@@ -116,11 +105,11 @@ def get_name(_url_):
other_details['comment'] = '' other_details['comment'] = ''
except: except:
other_details['comment'] = '' other_details['comment'] = ''
print('年份获取失败') logger.info('年份获取失败')
title_str = BeautifulSoup( title_str = BeautifulSoup(
res.text, 'html.parser').find('title').contents[0] res.text, 'html.parser').find('title').contents[0]
print('获取成功:', title_str) logger.info('获取成功:', title_str)
# arxiv_recall[_url_] = (title_str+'.pdf', other_details) # arxiv_recall[_url_] = (title_str+'.pdf', other_details)
# with open('./arxiv_recall.pkl', 'wb') as f: # with open('./arxiv_recall.pkl', 'wb') as f:
# pickle.dump(arxiv_recall, f) # pickle.dump(arxiv_recall, f)

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
@CatchException @CatchException
def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request): def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):

View File

@@ -16,8 +16,8 @@ Testing:
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
from .crazy_utils import input_clipping, try_install_deps from crazy_functions.crazy_utils import input_clipping, try_install_deps
from crazy_functions.gen_fns.gen_fns_shared import is_function_successfully_generated from crazy_functions.gen_fns.gen_fns_shared import is_function_successfully_generated
from crazy_functions.gen_fns.gen_fns_shared import get_class_name from crazy_functions.gen_fns.gen_fns_shared import get_class_name
from crazy_functions.gen_fns.gen_fns_shared import subprocess_worker from crazy_functions.gen_fns.gen_fns_shared import subprocess_worker

View File

@@ -1,6 +1,6 @@
from toolbox import CatchException, update_ui, gen_time_str from toolbox import CatchException, update_ui, gen_time_str
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import input_clipping from crazy_functions.crazy_utils import input_clipping
import copy, json import copy, json
@CatchException @CatchException

View File

@@ -6,13 +6,14 @@
""" """
import time
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
from toolbox import get_conf, select_api_key, update_ui_lastest_msg, Singleton from toolbox import get_conf, select_api_key, update_ui_lastest_msg, Singleton
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
from crazy_functions.crazy_utils import input_clipping, try_install_deps from crazy_functions.crazy_utils import input_clipping, try_install_deps
from crazy_functions.agent_fns.persistent import GradioMultiuserManagerForPersistentClasses from crazy_functions.agent_fns.persistent import GradioMultiuserManagerForPersistentClasses
from crazy_functions.agent_fns.auto_agent import AutoGenMath from crazy_functions.agent_fns.auto_agent import AutoGenMath
import time from loguru import logger
def remove_model_prefix(llm): def remove_model_prefix(llm):
if llm.startswith('api2d-'): llm = llm.replace('api2d-', '') if llm.startswith('api2d-'): llm = llm.replace('api2d-', '')
@@ -80,12 +81,12 @@ def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
persistent_key = f"{user_uuid}->多智能体终端" persistent_key = f"{user_uuid}->多智能体终端"
if persistent_class_multi_user_manager.already_alive(persistent_key): if persistent_class_multi_user_manager.already_alive(persistent_key):
# 当已经存在一个正在运行的多智能体终端时,直接将用户输入传递给它,而不是再次启动一个新的多智能体终端 # 当已经存在一个正在运行的多智能体终端时,直接将用户输入传递给它,而不是再次启动一个新的多智能体终端
print('[debug] feed new user input') logger.info('[debug] feed new user input')
executor = persistent_class_multi_user_manager.get(persistent_key) executor = persistent_class_multi_user_manager.get(persistent_key)
exit_reason = yield from executor.main_process_ui_control(txt, create_or_resume="resume") exit_reason = yield from executor.main_process_ui_control(txt, create_or_resume="resume")
else: else:
# 运行多智能体终端 (首次) # 运行多智能体终端 (首次)
print('[debug] create new executor instance') logger.info('[debug] create new executor instance')
history = [] history = []
chatbot.append(["正在启动: 多智能体终端", "插件动态生成, 执行开始, 作者 Microsoft & Binary-Husky."]) chatbot.append(["正在启动: 多智能体终端", "插件动态生成, 执行开始, 作者 Microsoft & Binary-Husky."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@@ -1,7 +1,7 @@
from toolbox import update_ui from toolbox import update_ui
from toolbox import CatchException, report_exception from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False fast_debug = False

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, report_exception, select_api_key, update_ui, get_conf from toolbox import CatchException, report_exception, select_api_key, update_ui, get_conf
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import write_history_to_file, promote_file_to_downloadzone, get_log_folder from toolbox import write_history_to_file, promote_file_to_downloadzone, get_log_folder
def split_audio_file(filename, split_duration=1000): def split_audio_file(filename, split_duration=1000):

View File

@@ -1,16 +1,18 @@
from loguru import logger
from toolbox import update_ui, promote_file_to_downloadzone, gen_time_str from toolbox import update_ui, promote_file_to_downloadzone, gen_time_str
from toolbox import CatchException, report_exception from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import read_and_clean_pdf_text from crazy_functions.crazy_utils import read_and_clean_pdf_text
from .crazy_utils import input_clipping from crazy_functions.crazy_utils import input_clipping
def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
file_write_buffer = [] file_write_buffer = []
for file_name in file_manifest: for file_name in file_manifest:
print('begin analysis on:', file_name) logger.info('begin analysis on:', file_name)
############################## <第 0 步切割PDF> ################################## ############################## <第 0 步切割PDF> ##################################
# 递归地切割PDF文件每一块尽量是完整的一个section比如introductionexperiment等必要时再进行切割 # 递归地切割PDF文件每一块尽量是完整的一个section比如introductionexperiment等必要时再进行切割
# 的长度必须小于 2500 个 Token # 的长度必须小于 2500 个 Token
@@ -38,7 +40,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
last_iteration_result = paper_meta # 初始值是摘要 last_iteration_result = paper_meta # 初始值是摘要
MAX_WORD_TOTAL = 4096 * 0.7 MAX_WORD_TOTAL = 4096 * 0.7
n_fragment = len(paper_fragments) n_fragment = len(paper_fragments)
if n_fragment >= 20: print('文章极长,不能达到预期效果') if n_fragment >= 20: logger.warning('文章极长,不能达到预期效果')
for i in range(n_fragment): for i in range(n_fragment):
NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} Chinese characters: {paper_fragments[i]}" i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} Chinese characters: {paper_fragments[i]}"

View File

@@ -1,6 +1,7 @@
from loguru import logger
from toolbox import update_ui from toolbox import update_ui
from toolbox import CatchException, report_exception from toolbox import CatchException, report_exception
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import write_history_to_file, promote_file_to_downloadzone from toolbox import write_history_to_file, promote_file_to_downloadzone
fast_debug = False fast_debug = False
@@ -57,7 +58,6 @@ def readPdf(pdfPath):
layout = device.get_result() layout = device.get_result()
for obj in layout._objs: for obj in layout._objs:
if isinstance(obj, pdfminer.layout.LTTextBoxHorizontal): if isinstance(obj, pdfminer.layout.LTTextBoxHorizontal):
# print(obj.get_text())
outTextList.append(obj.get_text()) outTextList.append(obj.get_text())
return outTextList return outTextList
@@ -66,7 +66,7 @@ def readPdf(pdfPath):
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, glob, os import time, glob, os
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
print('begin analysis on:', file_manifest) logger.info('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest): for index, fp in enumerate(file_manifest):
if ".tex" in fp: if ".tex" in fp:
with open(fp, 'r', encoding='utf-8', errors='replace') as f: with open(fp, 'r', encoding='utf-8', errors='replace') as f:

View File

@@ -1,9 +1,9 @@
from toolbox import CatchException, report_exception, get_log_folder, gen_time_str from toolbox import CatchException, report_exception, get_log_folder, gen_time_str
from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion
from toolbox import write_history_to_file, promote_file_to_downloadzone from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .crazy_utils import read_and_clean_pdf_text from crazy_functions.crazy_utils import read_and_clean_pdf_text
from .pdf_fns.parse_pdf import parse_pdf, get_avail_grobid_url, translate_pdf from .pdf_fns.parse_pdf import parse_pdf, get_avail_grobid_url, translate_pdf
from shared_utils.colorful import * from shared_utils.colorful import *
import copy import copy
@@ -60,7 +60,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
# 清空历史,以免输入溢出 # 清空历史,以免输入溢出
history = [] history = []
from .crazy_utils import get_files_from_everything from crazy_functions.crazy_utils import get_files_from_everything
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf') success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
if len(file_manifest) > 0: if len(file_manifest) > 0:
# 尝试导入依赖,如果缺少依赖,则给出安装建议 # 尝试导入依赖,如果缺少依赖,则给出安装建议

View File

@@ -1,4 +1,5 @@
import os import os
from loguru import logger
from toolbox import CatchException, update_ui, gen_time_str, promote_file_to_downloadzone from toolbox import CatchException, update_ui, gen_time_str, promote_file_to_downloadzone
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import input_clipping from crazy_functions.crazy_utils import input_clipping
@@ -34,10 +35,10 @@ def eval_manim(code):
return f'gpt_log/{time_str}.mp4' return f'gpt_log/{time_str}.mp4'
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
output = e.output.decode() output = e.output.decode()
print(f"Command returned non-zero exit status {e.returncode}: {output}.") logger.error(f"Command returned non-zero exit status {e.returncode}: {output}.")
return f"Evaluating python script failed: {e.output}." return f"Evaluating python script failed: {e.output}."
except: except:
print('generating mp4 failed') logger.error('generating mp4 failed')
return "Generating mp4 failed." return "Generating mp4 failed."

View File

@@ -1,13 +1,12 @@
from loguru import logger
from toolbox import update_ui from toolbox import update_ui
from toolbox import CatchException, report_exception from toolbox import CatchException, report_exception
from .crazy_utils import read_and_clean_pdf_text from crazy_functions.crazy_utils import read_and_clean_pdf_text
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False
def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import tiktoken logger.info('begin analysis on:', file_name)
print('begin analysis on:', file_name)
############################## <第 0 步切割PDF> ################################## ############################## <第 0 步切割PDF> ##################################
# 递归地切割PDF文件每一块尽量是完整的一个section比如introductionexperiment等必要时再进行切割 # 递归地切割PDF文件每一块尽量是完整的一个section比如introductionexperiment等必要时再进行切割
@@ -36,7 +35,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
last_iteration_result = paper_meta # 初始值是摘要 last_iteration_result = paper_meta # 初始值是摘要
MAX_WORD_TOTAL = 4096 MAX_WORD_TOTAL = 4096
n_fragment = len(paper_fragments) n_fragment = len(paper_fragments)
if n_fragment >= 20: print('文章极长,不能达到预期效果') if n_fragment >= 20: logger.warning('文章极长,不能达到预期效果')
for i in range(n_fragment): for i in range(n_fragment):
NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i]}" i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i]}"
@@ -57,7 +56,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
chatbot.append([i_say_show_user, gpt_say]) chatbot.append([i_say_show_user, gpt_say])
############################## <第 4 步设置一个token上限防止回答时Token溢出> ################################## ############################## <第 4 步设置一个token上限防止回答时Token溢出> ##################################
from .crazy_utils import input_clipping from crazy_functions.crazy_utils import input_clipping
_, final_results = input_clipping("", final_results, max_token_limit=3200) _, final_results = input_clipping("", final_results, max_token_limit=3200)
yield from update_ui(chatbot=chatbot, history=final_results) # 注意这里的历史记录被替代了 yield from update_ui(chatbot=chatbot, history=final_results) # 注意这里的历史记录被替代了

View File

@@ -1,12 +1,12 @@
from loguru import logger
from toolbox import update_ui from toolbox import update_ui
from toolbox import CatchException, report_exception from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False
def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, os import time, os
print('begin analysis on:', file_manifest) logger.info('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest): for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f: with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read() file_content = f.read()
@@ -16,22 +16,20 @@ def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response.")) chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if not fast_debug: msg = '正常'
msg = '正常' # ** gpt request **
# ** gpt request ** gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say) chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say) history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
if not fast_debug: time.sleep(2)
if not fast_debug:
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
time.sleep(2)
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui, report_exception from toolbox import CatchException, update_ui, report_exception
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.plugin_template.plugin_class_template import ( from crazy_functions.plugin_template.plugin_class_template import (
GptAcademicPluginTemplate, GptAcademicPluginTemplate,
) )
@@ -201,8 +201,7 @@ def 解析历史输入(history, llm_kwargs, file_manifest, chatbot, plugin_kwarg
MAX_WORD_TOTAL = 4096 MAX_WORD_TOTAL = 4096
n_txt = len(txt) n_txt = len(txt)
last_iteration_result = "从以下文本中提取摘要。" last_iteration_result = "从以下文本中提取摘要。"
if n_txt >= 20:
print("文章极长,不能达到预期效果")
for i in range(n_txt): for i in range(n_txt):
NUM_OF_WORD = MAX_WORD_TOTAL // n_txt NUM_OF_WORD = MAX_WORD_TOTAL // n_txt
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words in Chinese: {txt[i]}" i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words in Chinese: {txt[i]}"

View File

@@ -1,6 +1,6 @@
from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg, get_log_folder, get_user from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg, get_log_folder, get_user
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
from loguru import logger
install_msg =""" install_msg ="""
1. python -m pip install torch --index-url https://download.pytorch.org/whl/cpu 1. python -m pip install torch --index-url https://download.pytorch.org/whl/cpu
@@ -40,7 +40,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
except Exception as e: except Exception as e:
chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg]) chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# from .crazy_utils import try_install_deps # from crazy_functions.crazy_utils import try_install_deps
# try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain']) # try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
# yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history) # yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
return return
@@ -60,7 +60,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
# < -------------------预热文本向量化模组--------------- > # < -------------------预热文本向量化模组--------------- >
chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."]) chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
print('Checking Text2vec ...') logger.info('Checking Text2vec ...')
from langchain.embeddings.huggingface import HuggingFaceEmbeddings from langchain.embeddings.huggingface import HuggingFaceEmbeddings
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络 with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
@@ -68,7 +68,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
# < -------------------构建知识库--------------- > # < -------------------构建知识库--------------- >
chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."]) chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
print('Establishing knowledge archive ...') logger.info('Establishing knowledge archive ...')
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络 with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
kai = knowledge_archive_interface() kai = knowledge_archive_interface()
vs_path = get_log_folder(user=get_user(chatbot), plugin_name='vec_store') vs_path = get_log_folder(user=get_user(chatbot), plugin_name='vec_store')
@@ -93,7 +93,7 @@ def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
except Exception as e: except Exception as e:
chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg]) chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# from .crazy_utils import try_install_deps # from crazy_functions.crazy_utils import try_install_deps
# try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain']) # try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
# yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history) # yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
return return

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from request_llms.bridge_all import model_info from request_llms.bridge_all import model_info
@@ -23,8 +23,8 @@ def google(query, proxies):
item = {'title': title, 'link': link} item = {'title': title, 'link': link}
results.append(item) results.append(item)
for r in results: # for r in results:
print(r['link']) # print(r['link'])
return results return results
def scrape_text(url, proxies) -> str: def scrape_text(url, proxies) -> str:

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from request_llms.bridge_all import model_info from request_llms.bridge_all import model_info
@@ -22,8 +22,8 @@ def bing_search(query, proxies=None):
item = {'title': title, 'link': link} item = {'title': title, 'link': link}
results.append(item) results.append(item)
for r in results: # for r in results:
print(r['link']) # print(r['link'])
return results return results

View File

@@ -64,7 +64,7 @@ def parseNotebook(filename, enable_markdown=1):
def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
enable_markdown = plugin_kwargs.get("advanced_arg", "1") enable_markdown = plugin_kwargs.get("advanced_arg", "1")

View File

@@ -1,5 +1,5 @@
from toolbox import CatchException, update_ui, get_conf from toolbox import CatchException, update_ui, get_conf
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import datetime import datetime
@CatchException @CatchException
def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request): def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):

View File

@@ -1,11 +1,13 @@
from toolbox import update_ui from toolbox import update_ui
from toolbox import CatchException, get_conf, markdown_convertion from toolbox import CatchException, get_conf, markdown_convertion
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.crazy_utils import input_clipping from crazy_functions.crazy_utils import input_clipping
from crazy_functions.agent_fns.watchdog import WatchDog from crazy_functions.agent_fns.watchdog import WatchDog
from request_llms.bridge_all import predict_no_ui_long_connection from crazy_functions.live_audio.aliyunASR import AliyunASR
from loguru import logger
import threading, time import threading, time
import numpy as np import numpy as np
from .live_audio.aliyunASR import AliyunASR
import json import json
import re import re
@@ -42,9 +44,9 @@ class AsyncGptTask():
gpt_say_partial = predict_no_ui_long_connection(inputs=i_say, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt, gpt_say_partial = predict_no_ui_long_connection(inputs=i_say, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt,
observe_window=observe_window[index], console_slience=True) observe_window=observe_window[index], console_slience=True)
except ConnectionAbortedError as token_exceed_err: except ConnectionAbortedError as token_exceed_err:
print('至少一个线程任务Token溢出而失败', e) logger.error('至少一个线程任务Token溢出而失败', e)
except Exception as e: except Exception as e:
print('至少一个线程任务意外失败', e) logger.error('至少一个线程任务意外失败', e)
def add_async_gpt_task(self, i_say, chatbot_index, llm_kwargs, history, system_prompt): def add_async_gpt_task(self, i_say, chatbot_index, llm_kwargs, history, system_prompt):
self.observe_future.append([""]) self.observe_future.append([""])

View File

@@ -1,12 +1,11 @@
from toolbox import update_ui from toolbox import update_ui
from toolbox import CatchException, report_exception from toolbox import CatchException, report_exception
from toolbox import write_history_to_file, promote_file_to_downloadzone from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, glob, os import time, glob, os
print('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest): for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f: with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read() file_content = f.read()

View File

@@ -1,4 +1,4 @@
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import CatchException, report_exception, promote_file_to_downloadzone from toolbox import CatchException, report_exception, promote_file_to_downloadzone
from toolbox import update_ui, update_ui_lastest_msg, disable_auto_promotion, write_history_to_file from toolbox import update_ui, update_ui_lastest_msg, disable_auto_promotion, write_history_to_file
import logging import logging

View File

@@ -1022,7 +1022,7 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
# 如果是已知模型,则尝试获取其信息 # 如果是已知模型,则尝试获取其信息
original_model_info = model_info.get(origin_model_name.replace("one-api-", "", 1), None) original_model_info = model_info.get(origin_model_name.replace("one-api-", "", 1), None)
except: except:
print(f"one-api模型 {model} 的 max_token 配置不是整数,请检查配置文件。") logger.error(f"one-api模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
continue continue
this_model_info = { this_model_info = {
"fn_with_ui": chatgpt_ui, "fn_with_ui": chatgpt_ui,
@@ -1053,7 +1053,7 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("vllm-")]:
try: try:
_, max_token_tmp = read_one_api_model_name(model) _, max_token_tmp = read_one_api_model_name(model)
except: except:
print(f"vllm模型 {model} 的 max_token 配置不是整数,请检查配置文件。") logger.error(f"vllm模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
continue continue
model_info.update({ model_info.update({
model: { model: {
@@ -1080,7 +1080,7 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("ollama-")]:
try: try:
_, max_token_tmp = read_one_api_model_name(model) _, max_token_tmp = read_one_api_model_name(model)
except: except:
print(f"ollama模型 {model} 的 max_token 配置不是整数,请检查配置文件。") logger.error(f"ollama模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
continue continue
model_info.update({ model_info.update({
model: { model: {

View File

@@ -1,12 +1,13 @@
from transformers import AutoModel, AutoTokenizer from transformers import AutoModel, AutoTokenizer
from loguru import logger
from toolbox import update_ui, get_conf
from multiprocessing import Process, Pipe
import time import time
import os import os
import json import json
import threading import threading
import importlib import importlib
from toolbox import update_ui, get_conf
from multiprocessing import Process, Pipe
load_message = "ChatGLMFT尚未加载加载需要一段时间。注意取决于`config.py`的配置ChatGLMFT消耗大量的内存CPU或显存GPU也许会导致低配计算机卡死 ……" load_message = "ChatGLMFT尚未加载加载需要一段时间。注意取决于`config.py`的配置ChatGLMFT消耗大量的内存CPU或显存GPU也许会导致低配计算机卡死 ……"
@@ -78,7 +79,7 @@ class GetGLMFTHandle(Process):
config.pre_seq_len = model_args['pre_seq_len'] config.pre_seq_len = model_args['pre_seq_len']
config.prefix_projection = model_args['prefix_projection'] config.prefix_projection = model_args['prefix_projection']
print(f"Loading prefix_encoder weight from {CHATGLM_PTUNING_CHECKPOINT}") logger.info(f"Loading prefix_encoder weight from {CHATGLM_PTUNING_CHECKPOINT}")
model = AutoModel.from_pretrained(model_args['model_name_or_path'], config=config, trust_remote_code=True) model = AutoModel.from_pretrained(model_args['model_name_or_path'], config=config, trust_remote_code=True)
prefix_state_dict = torch.load(os.path.join(CHATGLM_PTUNING_CHECKPOINT, "pytorch_model.bin")) prefix_state_dict = torch.load(os.path.join(CHATGLM_PTUNING_CHECKPOINT, "pytorch_model.bin"))
new_prefix_state_dict = {} new_prefix_state_dict = {}
@@ -88,7 +89,7 @@ class GetGLMFTHandle(Process):
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict) model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
if model_args['quantization_bit'] is not None and model_args['quantization_bit'] != 0: if model_args['quantization_bit'] is not None and model_args['quantization_bit'] != 0:
print(f"Quantized to {model_args['quantization_bit']} bit") logger.info(f"Quantized to {model_args['quantization_bit']} bit")
model = model.quantize(model_args['quantization_bit']) model = model.quantize(model_args['quantization_bit'])
model = model.cuda() model = model.cuda()
if model_args['pre_seq_len'] is not None: if model_args['pre_seq_len'] is not None:

View File

@@ -16,6 +16,8 @@ import traceback
import requests import requests
import random import random
from loguru import logger
# config_private.py放自己的秘密如API和代理网址 # config_private.py放自己的秘密如API和代理网址
# 读取时首先看是否存在私密的config_private配置文件不受git管控如果有则覆盖原config文件 # 读取时首先看是否存在私密的config_private配置文件不受git管控如果有则覆盖原config文件
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history
@@ -146,7 +148,7 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[],
retry += 1 retry += 1
traceback.print_exc() traceback.print_exc()
if retry > MAX_RETRY: raise TimeoutError if retry > MAX_RETRY: raise TimeoutError
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') if MAX_RETRY!=0: logger.error(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
stream_response = response.iter_lines() stream_response = response.iter_lines()
result = '' result = ''
@@ -179,7 +181,7 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[],
if (not has_content) and (not has_role): continue # raise RuntimeError("发现不标准的第三方接口:"+delta) if (not has_content) and (not has_role): continue # raise RuntimeError("发现不标准的第三方接口:"+delta)
if has_content: # has_role = True/False if has_content: # has_role = True/False
result += delta["content"] result += delta["content"]
if not console_slience: print(delta["content"], end='') if not console_slience: logger.info(delta["content"], end='')
if observe_window is not None: if observe_window is not None:
# 观测窗,把已经获取的数据显示出去 # 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: if len(observe_window) >= 1:
@@ -342,7 +344,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
error_msg = chunk_decoded error_msg = chunk_decoded
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg) chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
print(error_msg) logger.error(error_msg)
return return
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg): def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
@@ -493,10 +495,7 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st
"n": 1, "n": 1,
"stream": stream, "stream": stream,
} }
# try:
# print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
# except:
# print('输入中可能存在乱码。')
return headers,payload return headers,payload

View File

@@ -14,6 +14,7 @@ import time
import requests import requests
import base64 import base64
import glob import glob
from loguru import logger
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, \ from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, \
update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files, log_chat update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files, log_chat
@@ -208,7 +209,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
error_msg = chunk_decoded error_msg = chunk_decoded
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key) chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key)
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
print(error_msg) logger.error(error_msg)
return return
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key=""): def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key=""):
@@ -299,10 +300,7 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths):
"presence_penalty": 0, "presence_penalty": 0,
"frequency_penalty": 0, "frequency_penalty": 0,
} }
try:
print(f" {llm_kwargs['llm_model']} : {inputs[:100]} ..........")
except:
print('输入中可能存在乱码。')
return headers, payload, api_key return headers, payload, api_key

View File

@@ -1,281 +0,0 @@
# 借鉴了 https://github.com/GaiZhenbiao/ChuanhuChatGPT 项目
"""
该文件中主要包含三个函数
不具备多线程能力的函数:
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
具备多线程调用能力的函数
2. predict_no_ui_long_connection支持多线程
"""
import json
import time
import gradio as gr
import traceback
import requests
import importlib
from loguru import logger as logging
# config_private.py放自己的秘密如API和代理网址
# 读取时首先看是否存在私密的config_private配置文件不受git管控如果有则覆盖原config文件
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc
proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG = \
get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG')
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
def get_full_error(chunk, stream_response):
"""
获取完整的从Openai返回的报错
"""
while True:
try:
chunk += next(stream_response)
except:
break
return chunk
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
"""
发送至chatGPT等待回复一次性完成不显示中间过程。但内部用stream的方法避免中途网线被掐。
inputs
是本次问询的输入
sys_prompt:
系统静默prompt
llm_kwargs
chatGPT的内部调优参数
history
是之前的对话列表
observe_window = None
用于负责跨越线程传递已经输出的部分大部分时候仅仅为了fancy的视觉效果留空即可。observe_window[0]观测窗。observe_window[1]:看门狗
"""
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
retry = 0
while True:
try:
# make a POST request to the API endpoint, stream=False
from .bridge_all import model_info
endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
response = requests.post(endpoint, headers=headers, proxies=proxies,
json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
except requests.exceptions.ReadTimeout as e:
retry += 1
traceback.print_exc()
if retry > MAX_RETRY: raise TimeoutError
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
stream_response = response.iter_lines()
result = ''
while True:
try: chunk = next(stream_response).decode()
except StopIteration:
break
except requests.exceptions.ConnectionError:
chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
if len(chunk)==0: continue
if not chunk.startswith('data:'):
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
if "reduce the length" in error_msg:
raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
else:
raise RuntimeError("OpenAI拒绝了请求" + error_msg)
if ('data: [DONE]' in chunk): break # api2d 正常完成
json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
delta = json_data["delta"]
if len(delta) == 0: break
if "role" in delta: continue
if "content" in delta:
result += delta["content"]
if not console_slience: print(delta["content"], end='')
if observe_window is not None:
# 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: observe_window[0] += delta["content"]
# 看门狗,如果超过期限没有喂狗,则终止
if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience:
raise RuntimeError("用户取消了程序。")
else: raise RuntimeError("意外Json结构"+delta)
if json_data['finish_reason'] == 'content_filter':
raise RuntimeError("由于提问含不合规内容被Azure过滤。")
if json_data['finish_reason'] == 'length':
raise ConnectionAbortedError("正常结束但显示Token不足导致输出不完整请削减单次输入的文本量。")
return result
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
"""
发送至chatGPT流式获取输出。
用于基础的对话功能。
inputs 是本次问询的输入
top_p, temperature是chatGPT的内部调优参数
history 是之前的对话列表注意无论是inputs还是history内容太长了都会触发token数量溢出的错误
chatbot 为WebUI中显示的对话列表修改它然后yeild出去可以直接修改对话界面内容
additional_fn代表点击的哪个按钮按钮见functional.py
"""
if additional_fn is not None:
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
raw_input = inputs
logging.info(f'[raw_input] {raw_input}')
chatbot.append((inputs, ""))
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
try:
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
except RuntimeError as e:
chatbot[-1] = (inputs, f"您提供的api-key不满足要求不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
return
history.append(inputs); history.append("")
retry = 0
while True:
try:
# make a POST request to the API endpoint, stream=True
from .bridge_all import model_info
endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
response = requests.post(endpoint, headers=headers, proxies=proxies,
json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
except:
retry += 1
chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
if retry > MAX_RETRY: raise TimeoutError
gpt_replying_buffer = ""
is_head_of_the_stream = True
if stream:
stream_response = response.iter_lines()
while True:
try:
chunk = next(stream_response)
except StopIteration:
# 非OpenAI官方接口的出现这样的报错OpenAI和API2D不会走这里
chunk_decoded = chunk.decode()
error_msg = chunk_decoded
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
return
# print(chunk.decode()[6:])
if is_head_of_the_stream and (r'"object":"error"' not in chunk.decode()):
# 数据流的第一帧不携带content
is_head_of_the_stream = False; continue
if chunk:
try:
chunk_decoded = chunk.decode()
# 前者是API2D的结束条件后者是OPENAI的结束条件
if 'data: [DONE]' in chunk_decoded:
# 判定为数据流的结束gpt_replying_buffer也写完了
logging.info(f'[response] {gpt_replying_buffer}')
break
# 处理数据流的主体
chunkjson = json.loads(chunk_decoded[6:])
status_text = f"finish_reason: {chunkjson['choices'][0]['finish_reason']}"
delta = chunkjson['choices'][0]["delta"]
if "content" in delta:
gpt_replying_buffer = gpt_replying_buffer + delta["content"]
history[-1] = gpt_replying_buffer
chatbot[-1] = (history[-2], history[-1])
yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
except Exception as e:
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
chunk = get_full_error(chunk, stream_response)
chunk_decoded = chunk.decode()
error_msg = chunk_decoded
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
print(error_msg)
return
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
from .bridge_all import model_info
openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
if "reduce the length" in error_msg:
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入history[-2] 是本次输入, history[-1] 是本次输出
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
# history = [] # 清除历史
elif "does not exist" in error_msg:
chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
elif "Incorrect API key" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
elif "exceeded your current quota" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
elif "account is not active" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
elif "associated with a deactivated account" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
elif "bad forward key" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
elif "Not enough point" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
else:
from toolbox import regular_txt_to_markdown
tb_str = '```\n' + trimmed_format_exc() + '```'
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
return chatbot, history
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
"""
整合所有信息选择LLM模型生成http请求为发送请求做准备
"""
if not is_any_api_key(llm_kwargs['api_key']):
raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案直接在输入区键入api_key然后回车提交。\n\n2. 长效解决方案在config.py中配置。")
headers = {
"Content-Type": "application/json",
}
conversation_cnt = len(history) // 2
messages = [{"role": "system", "content": system_prompt}]
if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {}
what_i_have_asked["role"] = "user"
what_i_have_asked["content"] = history[index]
what_gpt_answer = {}
what_gpt_answer["role"] = "assistant"
what_gpt_answer["content"] = history[index+1]
if what_i_have_asked["content"] != "":
if what_gpt_answer["content"] == "": continue
if what_gpt_answer["content"] == timeout_bot_msg: continue
messages.append(what_i_have_asked)
messages.append(what_gpt_answer)
else:
messages[-1]['content'] = what_gpt_answer['content']
what_i_ask_now = {}
what_i_ask_now["role"] = "user"
what_i_ask_now["content"] = inputs
messages.append(what_i_ask_now)
payload = {
"model": llm_kwargs['llm_model'].strip('api2d-'),
"messages": messages,
"temperature": llm_kwargs['temperature'], # 1.0,
"top_p": llm_kwargs['top_p'], # 1.0,
"n": 1,
"stream": stream,
"presence_penalty": 0,
"frequency_penalty": 0,
}
try:
print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
except:
print('输入中可能存在乱码。')
return headers,payload

View File

@@ -14,7 +14,7 @@ import time
import traceback import traceback
import json import json
import requests import requests
from loguru import logger as logging from loguru import logger
from toolbox import get_conf, update_ui, trimmed_format_exc, encode_image, every_image_file_in_path, log_chat from toolbox import get_conf, update_ui, trimmed_format_exc, encode_image, every_image_file_in_path, log_chat
picture_system_prompt = "\n当回复图像时,必须说明正在回复哪张图像。所有图像仅在最后一个问题中提供,即使它们在历史记录中被提及。请使用'这是第X张图像:'的格式来指明您正在描述的是哪张图像。" picture_system_prompt = "\n当回复图像时,必须说明正在回复哪张图像。所有图像仅在最后一个问题中提供,即使它们在历史记录中被提及。请使用'这是第X张图像:'的格式来指明您正在描述的是哪张图像。"
@@ -102,7 +102,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
retry += 1 retry += 1
traceback.print_exc() traceback.print_exc()
if retry > MAX_RETRY: raise TimeoutError if retry > MAX_RETRY: raise TimeoutError
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') if MAX_RETRY!=0: logger.error(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
stream_response = response.iter_lines() stream_response = response.iter_lines()
result = '' result = ''
while True: while True:
@@ -117,12 +117,11 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
if need_to_pass: if need_to_pass:
pass pass
elif is_last_chunk: elif is_last_chunk:
# logging.info(f'[response] {result}') # logger.info(f'[response] {result}')
break break
else: else:
if chunkjson and chunkjson['type'] == 'content_block_delta': if chunkjson and chunkjson['type'] == 'content_block_delta':
result += chunkjson['delta']['text'] result += chunkjson['delta']['text']
print(chunkjson['delta']['text'], end='')
if observe_window is not None: if observe_window is not None:
# 观测窗,把已经获取的数据显示出去 # 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: if len(observe_window) >= 1:
@@ -135,7 +134,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
chunk = get_full_error(chunk, stream_response) chunk = get_full_error(chunk, stream_response)
chunk_decoded = chunk.decode() chunk_decoded = chunk.decode()
error_msg = chunk_decoded error_msg = chunk_decoded
print(error_msg) logger.error(error_msg)
raise RuntimeError("Json解析不合常规") raise RuntimeError("Json解析不合常规")
return result return result
@@ -201,7 +200,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
retry += 1 retry += 1
traceback.print_exc() traceback.print_exc()
if retry > MAX_RETRY: raise TimeoutError if retry > MAX_RETRY: raise TimeoutError
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') if MAX_RETRY!=0: logger.error(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
stream_response = response.iter_lines() stream_response = response.iter_lines()
gpt_replying_buffer = "" gpt_replying_buffer = ""
@@ -218,7 +217,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
pass pass
elif is_last_chunk: elif is_last_chunk:
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
# logging.info(f'[response] {gpt_replying_buffer}') # logger.info(f'[response] {gpt_replying_buffer}')
break break
else: else:
if chunkjson and chunkjson['type'] == 'content_block_delta': if chunkjson and chunkjson['type'] == 'content_block_delta':
@@ -231,7 +230,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
chunk = get_full_error(chunk, stream_response) chunk = get_full_error(chunk, stream_response)
chunk_decoded = chunk.decode() chunk_decoded = chunk.decode()
error_msg = chunk_decoded error_msg = chunk_decoded
print(error_msg) logger.error(error_msg)
raise RuntimeError("Json解析不合常规") raise RuntimeError("Json解析不合常规")
def multiple_picture_types(image_paths): def multiple_picture_types(image_paths):

View File

@@ -15,7 +15,7 @@ import time
import gradio as gr import gradio as gr
import traceback import traceback
import requests import requests
from loguru import logger as logging from loguru import logger
# config_private.py放自己的秘密如API和代理网址 # config_private.py放自己的秘密如API和代理网址
# 读取时首先看是否存在私密的config_private配置文件不受git管控如果有则覆盖原config文件 # 读取时首先看是否存在私密的config_private配置文件不受git管控如果有则覆盖原config文件
@@ -96,7 +96,7 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[],
retry += 1 retry += 1
traceback.print_exc() traceback.print_exc()
if retry > MAX_RETRY: raise TimeoutError if retry > MAX_RETRY: raise TimeoutError
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') if MAX_RETRY!=0: logger.error(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
stream_response = response.iter_lines() stream_response = response.iter_lines()
result = '' result = ''
@@ -111,7 +111,7 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[],
if chunkjson['event_type'] == 'stream-start': continue if chunkjson['event_type'] == 'stream-start': continue
if chunkjson['event_type'] == 'text-generation': if chunkjson['event_type'] == 'text-generation':
result += chunkjson["text"] result += chunkjson["text"]
if not console_slience: print(chunkjson["text"], end='') if not console_slience: logger.info(chunkjson["text"], end='')
if observe_window is not None: if observe_window is not None:
# 观测窗,把已经获取的数据显示出去 # 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: if len(observe_window) >= 1:
@@ -151,7 +151,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
raw_input = inputs raw_input = inputs
# logging.info(f'[raw_input] {raw_input}') # logger.info(f'[raw_input] {raw_input}')
chatbot.append((inputs, "")) chatbot.append((inputs, ""))
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
@@ -235,7 +235,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
error_msg = chunk_decoded error_msg = chunk_decoded
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg) chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
print(error_msg) logger.error(error_msg)
return return
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg): def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):

View File

@@ -1,12 +1,13 @@
model_name = "deepseek-coder-6.7b-instruct" model_name = "deepseek-coder-6.7b-instruct"
cmd_to_install = "未知" # "`pip install -r request_llms/requirements_qwen.txt`" cmd_to_install = "未知" # "`pip install -r request_llms/requirements_qwen.txt`"
import os
from toolbox import ProxyNetworkActivate from toolbox import ProxyNetworkActivate
from toolbox import get_conf from toolbox import get_conf
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns from request_llms.local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
from threading import Thread from threading import Thread
from loguru import logger
import torch import torch
import os
def download_huggingface_model(model_name, max_retry, local_dir): def download_huggingface_model(model_name, max_retry, local_dir):
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
@@ -15,7 +16,7 @@ def download_huggingface_model(model_name, max_retry, local_dir):
snapshot_download(repo_id=model_name, local_dir=local_dir, resume_download=True) snapshot_download(repo_id=model_name, local_dir=local_dir, resume_download=True)
break break
except Exception as e: except Exception as e:
print(f'\n\n下载失败,重试第{i}次中...\n\n') logger.error(f'\n\n下载失败,重试第{i}次中...\n\n')
return local_dir return local_dir
# ------------------------------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model # 🔌💻 Local Model
@@ -112,7 +113,6 @@ class GetCoderLMHandle(LocalLLMHandle):
generated_text = "" generated_text = ""
for new_text in self._streamer: for new_text in self._streamer:
generated_text += new_text generated_text += new_text
# print(generated_text)
yield generated_text yield generated_text

View File

@@ -17,7 +17,7 @@ import traceback
import requests import requests
import importlib import importlib
import random import random
from loguru import logger as logging from loguru import logger
# config_private.py放自己的秘密如API和代理网址 # config_private.py放自己的秘密如API和代理网址
# 读取时首先看是否存在私密的config_private配置文件不受git管控如果有则覆盖原config文件 # 读取时首先看是否存在私密的config_private配置文件不受git管控如果有则覆盖原config文件
@@ -81,7 +81,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
retry += 1 retry += 1
traceback.print_exc() traceback.print_exc()
if retry > MAX_RETRY: raise TimeoutError if retry > MAX_RETRY: raise TimeoutError
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') if MAX_RETRY!=0: logger.error(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
stream_response = response.iter_lines() stream_response = response.iter_lines()
result = '' result = ''
@@ -96,10 +96,10 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
try: try:
if is_last_chunk: if is_last_chunk:
# 判定为数据流的结束gpt_replying_buffer也写完了 # 判定为数据流的结束gpt_replying_buffer也写完了
logging.info(f'[response] {result}') logger.info(f'[response] {result}')
break break
result += chunkjson['message']["content"] result += chunkjson['message']["content"]
if not console_slience: print(chunkjson['message']["content"], end='') if not console_slience: logger.info(chunkjson['message']["content"], end='')
if observe_window is not None: if observe_window is not None:
# 观测窗,把已经获取的数据显示出去 # 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: if len(observe_window) >= 1:
@@ -112,7 +112,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
chunk = get_full_error(chunk, stream_response) chunk = get_full_error(chunk, stream_response)
chunk_decoded = chunk.decode() chunk_decoded = chunk.decode()
error_msg = chunk_decoded error_msg = chunk_decoded
print(error_msg) logger.error(error_msg)
raise RuntimeError("Json解析不合常规") raise RuntimeError("Json解析不合常规")
return result return result
@@ -134,7 +134,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
raw_input = inputs raw_input = inputs
logging.info(f'[raw_input] {raw_input}') logger.info(f'[raw_input] {raw_input}')
chatbot.append((inputs, "")) chatbot.append((inputs, ""))
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
@@ -183,7 +183,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
try: try:
if is_last_chunk: if is_last_chunk:
# 判定为数据流的结束gpt_replying_buffer也写完了 # 判定为数据流的结束gpt_replying_buffer也写完了
logging.info(f'[response] {gpt_replying_buffer}') logger.info(f'[response] {gpt_replying_buffer}')
break break
# 处理数据流的主体 # 处理数据流的主体
try: try:
@@ -202,7 +202,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
error_msg = chunk_decoded error_msg = chunk_decoded
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg) chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
print(error_msg) logger.error(error_msg)
return return
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg): def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
@@ -265,8 +265,5 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
"messages": messages, "messages": messages,
"options": options, "options": options,
} }
try:
print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
except:
print('输入中可能存在乱码。')
return headers,payload return headers,payload

View File

@@ -218,5 +218,3 @@ class GoogleChatInit:
if __name__ == "__main__": if __name__ == "__main__":
google = GoogleChatInit() google = GoogleChatInit()
# print(gootle.generate_message_payload('你好呀', {}, ['123123', '3123123'], ''))
# gootle.input_encode_handle('123123[123123](./123123), ![53425](./asfafa/fff.jpg)')

View File

@@ -1,17 +1,18 @@
from toolbox import get_conf, get_pictures_list, encode_image
import base64 import base64
import datetime import datetime
import hashlib import hashlib
import hmac import hmac
import json import json
from urllib.parse import urlparse
import ssl import ssl
import websocket
import threading
from toolbox import get_conf, get_pictures_list, encode_image
from loguru import logger
from urllib.parse import urlparse
from datetime import datetime from datetime import datetime
from time import mktime from time import mktime
from urllib.parse import urlencode from urllib.parse import urlencode
from wsgiref.handlers import format_date_time from wsgiref.handlers import format_date_time
import websocket
import threading, time
timeout_bot_msg = '[Local Message] Request timeout. Network error.' timeout_bot_msg = '[Local Message] Request timeout. Network error.'
@@ -104,7 +105,7 @@ class SparkRequestInstance():
if llm_kwargs['most_recent_uploaded'].get('path'): if llm_kwargs['most_recent_uploaded'].get('path'):
file_manifest = get_pictures_list(llm_kwargs['most_recent_uploaded']['path']) file_manifest = get_pictures_list(llm_kwargs['most_recent_uploaded']['path'])
if len(file_manifest) > 0: if len(file_manifest) > 0:
print('正在使用讯飞图片理解API') logger.info('正在使用讯飞图片理解API')
gpt_url = self.gpt_url_img gpt_url = self.gpt_url_img
wsParam = Ws_Param(self.appid, self.api_key, self.api_secret, gpt_url) wsParam = Ws_Param(self.appid, self.api_key, self.api_secret, gpt_url)
websocket.enableTrace(False) websocket.enableTrace(False)
@@ -123,7 +124,7 @@ class SparkRequestInstance():
data = json.loads(message) data = json.loads(message)
code = data['header']['code'] code = data['header']['code']
if code != 0: if code != 0:
print(f'请求错误: {code}, {data}') logger.error(f'请求错误: {code}, {data}')
self.result_buf += str(data) self.result_buf += str(data)
ws.close() ws.close()
self.time_to_exit_event.set() self.time_to_exit_event.set()
@@ -140,7 +141,7 @@ class SparkRequestInstance():
# 收到websocket错误的处理 # 收到websocket错误的处理
def on_error(ws, error): def on_error(ws, error):
print("error:", error) logger.error("error:", error)
self.time_to_exit_event.set() self.time_to_exit_event.set()
# 收到websocket关闭的处理 # 收到websocket关闭的处理

View File

@@ -5,7 +5,8 @@
from toolbox import get_conf from toolbox import get_conf
from zhipuai import ZhipuAI from zhipuai import ZhipuAI
from toolbox import get_conf, encode_image, get_pictures_list from toolbox import get_conf, encode_image, get_pictures_list
import logging, os from loguru import logger
import os
def input_encode_handler(inputs:str, llm_kwargs:dict): def input_encode_handler(inputs:str, llm_kwargs:dict):
@@ -24,7 +25,7 @@ class ZhipuChatInit:
def __init__(self): def __init__(self):
ZHIPUAI_API_KEY, ZHIPUAI_MODEL = get_conf("ZHIPUAI_API_KEY", "ZHIPUAI_MODEL") ZHIPUAI_API_KEY, ZHIPUAI_MODEL = get_conf("ZHIPUAI_API_KEY", "ZHIPUAI_MODEL")
if len(ZHIPUAI_MODEL) > 0: if len(ZHIPUAI_MODEL) > 0:
logging.error('ZHIPUAI_MODEL 配置项选项已经弃用请在LLM_MODEL中配置') logger.error('ZHIPUAI_MODEL 配置项选项已经弃用请在LLM_MODEL中配置')
self.zhipu_bro = ZhipuAI(api_key=ZHIPUAI_API_KEY) self.zhipu_bro = ZhipuAI(api_key=ZHIPUAI_API_KEY)
self.model = '' self.model = ''
@@ -37,8 +38,7 @@ class ZhipuChatInit:
what_i_have_asked['content'].append({"type": 'text', "text": user_input}) what_i_have_asked['content'].append({"type": 'text', "text": user_input})
if encode_img: if encode_img:
if len(encode_img) > 1: if len(encode_img) > 1:
logging.warning("glm-4v只支持一张图片,将只取第一张图片进行处理") logger.warning("glm-4v只支持一张图片,将只取第一张图片进行处理")
print("glm-4v只支持一张图片,将只取第一张图片进行处理")
img_d = {"type": "image_url", img_d = {"type": "image_url",
"image_url": { "image_url": {
"url": encode_img[0]['data'] "url": encode_img[0]['data']

View File

@@ -5,6 +5,7 @@ from toolbox import ChatBotWithCookies
from multiprocessing import Process, Pipe from multiprocessing import Process, Pipe
from contextlib import redirect_stdout from contextlib import redirect_stdout
from request_llms.queued_pipe import create_queue_pipe from request_llms.queued_pipe import create_queue_pipe
from loguru import logger
class ThreadLock(object): class ThreadLock(object):
def __init__(self): def __init__(self):
@@ -51,7 +52,7 @@ def reset_tqdm_output():
getattr(sys.stdout, 'flush', lambda: None)() getattr(sys.stdout, 'flush', lambda: None)()
def fp_write(s): def fp_write(s):
print(s) logger.info(s)
last_len = [0] last_len = [0]
def print_status(s): def print_status(s):
@@ -199,7 +200,7 @@ class LocalLLMHandle(Process):
if res.startswith(self.std_tag): if res.startswith(self.std_tag):
new_output = res[len(self.std_tag):] new_output = res[len(self.std_tag):]
std_out = std_out[:std_out_clip_len] std_out = std_out[:std_out_clip_len]
print(new_output, end='') logger.info(new_output, end='')
std_out = new_output + std_out std_out = new_output + std_out
yield self.std_tag + '\n```\n' + std_out + '\n```\n' yield self.std_tag + '\n```\n' + std_out + '\n```\n'
elif res == '[Finish]': elif res == '[Finish]':

View File

@@ -2,7 +2,7 @@ import json
import time import time
import traceback import traceback
import requests import requests
from loguru import logger as logging from loguru import logger
# config_private.py放自己的秘密如API和代理网址 # config_private.py放自己的秘密如API和代理网址
# 读取时首先看是否存在私密的config_private配置文件不受git管控如果有则覆盖原config文件 # 读取时首先看是否存在私密的config_private配置文件不受git管控如果有则覆盖原config文件
@@ -106,10 +106,7 @@ def generate_message(input, model, key, history, max_output_token, system_prompt
"stream": True, "stream": True,
"max_tokens": max_output_token, "max_tokens": max_output_token,
} }
try:
print(f" {model} : {conversation_cnt} : {input[:100]} ..........")
except:
print("输入中可能存在乱码。")
return headers, playload return headers, playload
@@ -196,7 +193,7 @@ def get_predict_function(
if retry > MAX_RETRY: if retry > MAX_RETRY:
raise TimeoutError raise TimeoutError
if MAX_RETRY != 0: if MAX_RETRY != 0:
print(f"请求超时,正在重试 ({retry}/{MAX_RETRY}) ……") logger.error(f"请求超时,正在重试 ({retry}/{MAX_RETRY}) ……")
stream_response = response.iter_lines() stream_response = response.iter_lines()
result = "" result = ""
@@ -219,18 +216,17 @@ def get_predict_function(
): ):
chunk = get_full_error(chunk, stream_response) chunk = get_full_error(chunk, stream_response)
chunk_decoded = chunk.decode() chunk_decoded = chunk.decode()
print(chunk_decoded) logger.error(chunk_decoded)
raise RuntimeError( raise RuntimeError(
f"API异常,请检测终端输出。可能的原因是:{finish_reason}" f"API异常,请检测终端输出。可能的原因是:{finish_reason}"
) )
if chunk: if chunk:
try: try:
if finish_reason == "stop": if finish_reason == "stop":
logging.info(f"[response] {result}") if not console_slience:
logger.info(f"[response] {result}")
break break
result += response_text result += response_text
if not console_slience:
print(response_text, end="")
if observe_window is not None: if observe_window is not None:
# 观测窗,把已经获取的数据显示出去 # 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: if len(observe_window) >= 1:
@@ -243,7 +239,7 @@ def get_predict_function(
chunk = get_full_error(chunk, stream_response) chunk = get_full_error(chunk, stream_response)
chunk_decoded = chunk.decode() chunk_decoded = chunk.decode()
error_msg = chunk_decoded error_msg = chunk_decoded
print(error_msg) logger.error(error_msg)
raise RuntimeError("Json解析不合常规") raise RuntimeError("Json解析不合常规")
return result return result
@@ -276,7 +272,7 @@ def get_predict_function(
inputs, history = handle_core_functionality( inputs, history = handle_core_functionality(
additional_fn, inputs, history, chatbot additional_fn, inputs, history, chatbot
) )
logging.info(f"[raw_input] {inputs}") logger.info(f"[raw_input] {inputs}")
chatbot.append((inputs, "")) chatbot.append((inputs, ""))
yield from update_ui( yield from update_ui(
chatbot=chatbot, history=history, msg="等待响应" chatbot=chatbot, history=history, msg="等待响应"
@@ -376,11 +372,11 @@ def get_predict_function(
history=history, history=history,
msg="API异常:" + chunk_decoded, msg="API异常:" + chunk_decoded,
) # 刷新界面 ) # 刷新界面
print(chunk_decoded) logger.error(chunk_decoded)
return return
if finish_reason == "stop": if finish_reason == "stop":
logging.info(f"[response] {gpt_replying_buffer}") logger.info(f"[response] {gpt_replying_buffer}")
break break
status_text = f"finish_reason: {finish_reason}" status_text = f"finish_reason: {finish_reason}"
gpt_replying_buffer += response_text gpt_replying_buffer += response_text
@@ -403,7 +399,7 @@ def get_predict_function(
yield from update_ui( yield from update_ui(
chatbot=chatbot, history=history, msg="Json异常" + chunk_decoded chatbot=chatbot, history=history, msg="Json异常" + chunk_decoded
) # 刷新界面 ) # 刷新界面
print(chunk_decoded) logger.error(chunk_decoded)
return return
return predict_no_ui_long_connection, predict return predict_no_ui_long_connection, predict