Master 4.0 (#2210)
* stage academic conversation * stage document conversation * fix buggy gradio version * file dynamic load * merge more academic plugins * accelerate nltk * feat: 为predict函数添加文件和URL读取功能 - 添加URL检测和网页内容提取功能,支持自动提取网页文本 - 添加文件路径识别和文件内容读取功能,支持private_upload路径格式 - 集成WebTextExtractor处理网页内容提取 - 集成TextContentLoader处理本地文件读取 - 支持文件路径与问题组合的智能处理 * back * block unstable --------- Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
This commit is contained in:
344
crazy_functions/review_fns/handlers/paper_handler.py
Normal file
344
crazy_functions/review_fns/handlers/paper_handler.py
Normal file
@@ -0,0 +1,344 @@
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from .base_handler import BaseHandler
|
||||
from crazy_functions.review_fns.query_analyzer import SearchCriteria
|
||||
import asyncio
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency as request_gpt
|
||||
|
||||
class 单篇论文分析功能(BaseHandler):
|
||||
"""论文分析处理器"""
|
||||
|
||||
def __init__(self, arxiv, semantic, llm_kwargs=None):
|
||||
super().__init__(arxiv, semantic, llm_kwargs)
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> str:
|
||||
"""处理论文分析请求,返回最终的prompt"""
|
||||
|
||||
# 1. 获取论文详情
|
||||
paper = await self._get_paper_details(criteria)
|
||||
if not paper:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 保存为ranked_papers以便统一接口
|
||||
self.ranked_papers = [paper]
|
||||
|
||||
# 2. 构建最终的prompt
|
||||
current_time = self._get_current_time()
|
||||
|
||||
# 获取论文信息
|
||||
title = getattr(paper, "title", "Unknown Title")
|
||||
authors = getattr(paper, "authors", [])
|
||||
year = getattr(paper, "year", "Unknown Year")
|
||||
abstract = getattr(paper, "abstract", "No abstract available")
|
||||
citations = getattr(paper, "citations", "N/A")
|
||||
|
||||
# 添加论文ID信息
|
||||
paper_id = ""
|
||||
if criteria.paper_source == "arxiv":
|
||||
paper_id = f"arXiv ID: {criteria.paper_id}\n"
|
||||
elif criteria.paper_source == "doi":
|
||||
paper_id = f"DOI: {criteria.paper_id}\n"
|
||||
|
||||
# 格式化作者列表
|
||||
authors_str = ', '.join(authors) if isinstance(authors, list) else authors
|
||||
|
||||
final_prompt = f"""Current time: {current_time}
|
||||
|
||||
Please provide a comprehensive analysis of the following paper:
|
||||
|
||||
{paper_id}Title: {title}
|
||||
Authors: {authors_str}
|
||||
Year: {year}
|
||||
Citations: {citations}
|
||||
Publication Venue: {paper.venue_name} ({paper.venue_type})
|
||||
{f"Publisher: {paper.venue_info.get('publisher')}" if paper.venue_info.get('publisher') else ""}
|
||||
{f"Journal Reference: {paper.venue_info.get('journal_ref')}" if paper.venue_info.get('journal_ref') else ""}
|
||||
Abstract: {abstract}
|
||||
|
||||
Please provide:
|
||||
1. Publication Context
|
||||
- Publication venue analysis and impact factor (if available)
|
||||
- Paper type (journal article, conference paper, preprint)
|
||||
- Publication timeline and peer review status
|
||||
- Publisher reputation and venue prestige
|
||||
|
||||
2. Research Context
|
||||
- Field positioning and significance
|
||||
- Historical context and prior work
|
||||
- Related research streams
|
||||
- Cross-venue impact analysis
|
||||
|
||||
3. Technical Analysis
|
||||
- Detailed methodology review
|
||||
- Implementation details
|
||||
- Experimental setup and results
|
||||
- Technical innovations
|
||||
|
||||
4. Impact Analysis
|
||||
- Citation patterns and influence
|
||||
- Cross-venue recognition
|
||||
- Industry vs. academic impact
|
||||
- Practical applications
|
||||
|
||||
5. Critical Review
|
||||
- Methodological rigor assessment
|
||||
- Result reliability and reproducibility
|
||||
- Venue-appropriate evaluation standards
|
||||
- Limitations and potential improvements
|
||||
|
||||
IMPORTANT:
|
||||
- Strictly use ONLY the information provided above about the paper
|
||||
- Do not make ANY assumptions or inferences beyond the given data
|
||||
- If certain information is not provided, explicitly state that it is unknown
|
||||
- For any unclear or missing details, acknowledge the limitation rather than speculating
|
||||
- When discussing methodology or results, only describe what is explicitly stated in the abstract
|
||||
- Never fabricate or assume any details about:
|
||||
* Publication venues or status
|
||||
* Implementation details not mentioned
|
||||
* Results or findings not stated
|
||||
* Impact or influence not supported by the citation count
|
||||
* Authors' affiliations or backgrounds
|
||||
* Future work or implications not mentioned
|
||||
- You can find the paper's download options in the 📥 PDF Downloads section
|
||||
- Available download formats include arXiv PDF, DOI links, and source URLs
|
||||
|
||||
Format your response in markdown with clear sections.
|
||||
|
||||
Language requirement:
|
||||
- If the query explicitly specifies a language, use that language
|
||||
- Otherwise, match the language of the original user query
|
||||
"""
|
||||
|
||||
return final_prompt
|
||||
|
||||
async def _get_paper_details(self, criteria: SearchCriteria):
|
||||
"""获取论文详情"""
|
||||
try:
|
||||
if criteria.paper_source == "arxiv":
|
||||
# 使用 arxiv ID 搜索
|
||||
papers = await self.arxiv.search_by_id(criteria.paper_id)
|
||||
return papers[0] if papers else None
|
||||
|
||||
elif criteria.paper_source == "doi":
|
||||
# 尝试从所有来源获取
|
||||
paper = await self.semantic.get_paper_by_doi(criteria.paper_id)
|
||||
if not paper:
|
||||
# 如果Semantic Scholar没有找到,尝试PubMed
|
||||
papers = await self.pubmed.search(
|
||||
f"{criteria.paper_id}[doi]",
|
||||
limit=1
|
||||
)
|
||||
if papers:
|
||||
return papers[0]
|
||||
return paper
|
||||
|
||||
elif criteria.paper_source == "title":
|
||||
# 使用_search_all_sources搜索
|
||||
search_params = {
|
||||
'max_papers': 1,
|
||||
'min_year': 1900, # 不限制年份
|
||||
'search_multiplier': 1
|
||||
}
|
||||
|
||||
# 设置搜索参数
|
||||
criteria.arxiv_params = {
|
||||
"search_type": "basic",
|
||||
"query": f'ti:"{criteria.paper_title}"',
|
||||
"limit": 1
|
||||
}
|
||||
criteria.semantic_params = {
|
||||
"query": criteria.paper_title,
|
||||
"limit": 1
|
||||
}
|
||||
criteria.pubmed_params = {
|
||||
"search_type": "basic",
|
||||
"query": f'"{criteria.paper_title}"[Title]',
|
||||
"limit": 1
|
||||
}
|
||||
|
||||
papers = await self._search_all_sources(criteria, search_params)
|
||||
return papers[0] if papers else None
|
||||
|
||||
# 如果都没有找到,尝试使用 main_topic 作为标题搜索
|
||||
if not criteria.paper_title and not criteria.paper_id:
|
||||
search_params = {
|
||||
'max_papers': 1,
|
||||
'min_year': 1900,
|
||||
'search_multiplier': 1
|
||||
}
|
||||
|
||||
# 设置搜索参数
|
||||
criteria.arxiv_params = {
|
||||
"search_type": "basic",
|
||||
"query": f'ti:"{criteria.main_topic}"',
|
||||
"limit": 1
|
||||
}
|
||||
criteria.semantic_params = {
|
||||
"query": criteria.main_topic,
|
||||
"limit": 1
|
||||
}
|
||||
criteria.pubmed_params = {
|
||||
"search_type": "basic",
|
||||
"query": f'"{criteria.main_topic}"[Title]',
|
||||
"limit": 1
|
||||
}
|
||||
|
||||
papers = await self._search_all_sources(criteria, search_params)
|
||||
return papers[0] if papers else None
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"获取论文详情时出错: {str(e)}")
|
||||
return None
|
||||
|
||||
async def _get_citation_context(self, paper: Dict, plugin_kwargs: Dict) -> Tuple[List, List]:
|
||||
"""获取引用上下文"""
|
||||
search_params = self._get_search_params(plugin_kwargs)
|
||||
|
||||
# 使用论文标题构建搜索参数
|
||||
title_query = f'ti:"{getattr(paper, "title", "")}"'
|
||||
arxiv_params = {
|
||||
"query": title_query,
|
||||
"limit": search_params['max_papers'],
|
||||
"search_type": "basic",
|
||||
"sort_by": "relevance",
|
||||
"sort_order": "descending"
|
||||
}
|
||||
semantic_params = {
|
||||
"query": getattr(paper, "title", ""),
|
||||
"limit": search_params['max_papers']
|
||||
}
|
||||
|
||||
citations, references = await asyncio.gather(
|
||||
self._search_semantic(
|
||||
semantic_params,
|
||||
limit_multiplier=search_params['search_multiplier'],
|
||||
min_year=search_params['min_year']
|
||||
),
|
||||
self._search_arxiv(
|
||||
arxiv_params,
|
||||
limit_multiplier=search_params['search_multiplier'],
|
||||
min_year=search_params['min_year']
|
||||
)
|
||||
)
|
||||
|
||||
return citations, references
|
||||
|
||||
async def _generate_analysis(
|
||||
self,
|
||||
paper: Dict,
|
||||
citations: List,
|
||||
references: List,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any]
|
||||
) -> List[List[str]]:
|
||||
"""生成论文分析"""
|
||||
|
||||
# 构建提示
|
||||
analysis_prompt = f"""Please provide a comprehensive analysis of the following paper:
|
||||
|
||||
Paper details:
|
||||
{self._format_paper(paper)}
|
||||
|
||||
Key references (papers cited by this paper):
|
||||
{self._format_papers(references)}
|
||||
|
||||
Important citations (papers that cite this paper):
|
||||
{self._format_papers(citations)}
|
||||
|
||||
Please provide:
|
||||
1. Paper Overview
|
||||
- Main research question/objective
|
||||
- Key methodology/approach
|
||||
- Main findings/contributions
|
||||
|
||||
2. Technical Analysis
|
||||
- Detailed methodology review
|
||||
- Technical innovations
|
||||
- Implementation details
|
||||
- Experimental setup and results
|
||||
|
||||
3. Impact Analysis
|
||||
- Significance in the field
|
||||
- Influence on subsequent research (based on citing papers)
|
||||
- Relationship to prior work (based on cited papers)
|
||||
- Practical applications
|
||||
|
||||
4. Critical Review
|
||||
- Strengths and limitations
|
||||
- Potential improvements
|
||||
- Open questions and future directions
|
||||
- Alternative approaches
|
||||
|
||||
5. Related Research Context
|
||||
- How it builds on previous work
|
||||
- How it has influenced subsequent research
|
||||
- Comparison with alternative approaches
|
||||
|
||||
Format your response in markdown with clear sections."""
|
||||
|
||||
# 并行生成概述和技术分析
|
||||
for response_chunk in request_gpt(
|
||||
inputs_array=[
|
||||
analysis_prompt,
|
||||
self._get_technical_prompt(paper)
|
||||
],
|
||||
inputs_show_user_array=[
|
||||
"Generating paper analysis...",
|
||||
"Analyzing technical details..."
|
||||
],
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history_array=[history, []],
|
||||
sys_prompt_array=[
|
||||
system_prompt,
|
||||
"You are an expert at analyzing technical details in research papers."
|
||||
]
|
||||
):
|
||||
pass # 等待生成完成
|
||||
|
||||
# 获取最后的两个回答
|
||||
if chatbot and len(chatbot[-2:]) == 2:
|
||||
analysis = chatbot[-2][1]
|
||||
technical = chatbot[-1][1]
|
||||
full_analysis = f"""# Paper Analysis: {paper.title}
|
||||
|
||||
## General Analysis
|
||||
{analysis}
|
||||
|
||||
## Technical Deep Dive
|
||||
{technical}
|
||||
"""
|
||||
chatbot.append(["Here is the paper analysis:", full_analysis])
|
||||
else:
|
||||
chatbot.append(["Here is the paper analysis:", "Failed to generate analysis."])
|
||||
|
||||
return chatbot
|
||||
|
||||
def _get_technical_prompt(self, paper: Dict) -> str:
|
||||
"""生成技术分析提示"""
|
||||
return f"""Please provide a detailed technical analysis of the following paper:
|
||||
|
||||
{self._format_paper(paper)}
|
||||
|
||||
Focus on:
|
||||
1. Mathematical formulations and their implications
|
||||
2. Algorithm design and complexity analysis
|
||||
3. Architecture details and design choices
|
||||
4. Implementation challenges and solutions
|
||||
5. Performance analysis and bottlenecks
|
||||
6. Technical limitations and potential improvements
|
||||
|
||||
Format your response in markdown, focusing purely on technical aspects."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user