Files
gpt_academic/crazy_functions/Arxiv_论文对话.py
lbykkkk 68aa846a89 up
2024-11-10 15:06:50 +08:00

83 lines
3.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os.path
from toolbox import CatchException, update_ui
from crazy_functions.rag_essay_fns.paper_processing import ArxivPaperProcessor
import asyncio
@CatchException
def Rag论文对话(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
"""
txt: 用户输入通常是arxiv论文链接
功能RAG论文总结和对话
"""
if_project, if_arxiv = False, False
if os.path.exists(txt):
from crazy_functions.rag_essay_fns.document_splitter import SmartDocumentSplitter
splitter = SmartDocumentSplitter(
char_range=(1000, 1200),
max_workers=32 # 可选默认会根据CPU核心数自动设置
)
if_project = True
else:
from crazy_functions.rag_essay_fns.arxiv_splitter import SmartArxivSplitter
splitter = SmartArxivSplitter(
char_range=(1000, 1200),
root_dir="gpt_log/arxiv_cache"
)
if_arxiv = True
for fragment in splitter.process(txt):
pass
# 初始化处理器
processor = ArxivPaperProcessor()
rag_handler = RagHandler()
# Step 1: 下载和提取论文
download_result = processor.download_and_extract(txt, chatbot, history)
project_folder, arxiv_id = None, None
for result in download_result:
if isinstance(result, tuple) and len(result) == 2:
project_folder, arxiv_id = result
break
if not project_folder or not arxiv_id:
return
# Step 2: 合并TEX文件
paper_content = processor.merge_tex_files(project_folder, chatbot, history)
if not paper_content:
return
# Step 3: RAG处理
chatbot.append(["正在构建知识图谱...", "处理中..."])
yield from update_ui(chatbot=chatbot, history=history)
# 处理论文内容
rag_handler.process_paper_content(paper_content)
# 生成初始摘要
summary = rag_handler.query("请总结这篇论文的主要内容,包括研究目的、方法、结果和结论。")
chatbot.append(["论文摘要", summary])
yield from update_ui(chatbot=chatbot, history=history)
# 交互式问答
chatbot.append(["知识图谱构建完成", "您可以开始提问了。支持以下类型的问题:\n1. 论文的具体内容\n2. 研究方法的细节\n3. 实验结果分析\n4. 与其他工作的比较"])
yield from update_ui(chatbot=chatbot, history=history)
# 等待用户提问并回答
while True:
question = yield from wait_user_input()
if not question:
break
# 根据问题类型选择不同的查询模式
if "比较" in question or "关系" in question:
mode = "global" # 使用全局模式处理比较类问题
elif "具体" in question or "细节" in question:
mode = "local" # 使用局部模式处理细节问题
else:
mode = "hybrid" # 默认使用混合模式
response = rag_handler.query(question, mode=mode)
chatbot.append([question, response])
yield from update_ui(chatbot=chatbot, history=history)