Master 4.0 (#2210)
* stage academic conversation * stage document conversation * fix buggy gradio version * file dynamic load * merge more academic plugins * accelerate nltk * feat: 为predict函数添加文件和URL读取功能 - 添加URL检测和网页内容提取功能,支持自动提取网页文本 - 添加文件路径识别和文件内容读取功能,支持private_upload路径格式 - 集成WebTextExtractor处理网页内容提取 - 集成TextContentLoader处理本地文件读取 - 支持文件路径与问题组合的智能处理 * back * block unstable --------- Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
This commit is contained in:
412
crazy_functions/review_fns/handlers/base_handler.py
Normal file
412
crazy_functions/review_fns/handlers/base_handler.py
Normal file
@@ -0,0 +1,412 @@
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Dict, Any
|
||||
from crazy_functions.review_fns.query_analyzer import SearchCriteria
|
||||
from crazy_functions.review_fns.data_sources.arxiv_source import ArxivSource
|
||||
from crazy_functions.review_fns.data_sources.semantic_source import SemanticScholarSource
|
||||
from crazy_functions.review_fns.data_sources.pubmed_source import PubMedSource
|
||||
from crazy_functions.review_fns.paper_processor.paper_llm_ranker import PaperLLMRanker
|
||||
from crazy_functions.pdf_fns.breakdown_pdf_txt import cut_from_end_to_satisfy_token_limit
|
||||
from request_llms.bridge_all import model_info
|
||||
from crazy_functions.review_fns.data_sources.crossref_source import CrossrefSource
|
||||
from crazy_functions.review_fns.data_sources.adsabs_source import AdsabsSource
|
||||
from toolbox import get_conf
|
||||
|
||||
|
||||
class BaseHandler(ABC):
|
||||
"""处理器基类"""
|
||||
|
||||
def __init__(self, arxiv: ArxivSource, semantic: SemanticScholarSource, llm_kwargs: Dict = None):
|
||||
self.arxiv = arxiv
|
||||
self.semantic = semantic
|
||||
self.pubmed = PubMedSource()
|
||||
self.crossref = CrossrefSource() # 添加 Crossref 实例
|
||||
self.adsabs = AdsabsSource() # 添加 ADS 实例
|
||||
self.paper_ranker = PaperLLMRanker(llm_kwargs=llm_kwargs)
|
||||
self.ranked_papers = [] # 存储排序后的论文列表
|
||||
self.llm_kwargs = llm_kwargs or {} # 保存llm_kwargs
|
||||
|
||||
def _get_search_params(self, plugin_kwargs: Dict) -> Dict:
|
||||
"""获取搜索参数"""
|
||||
return {
|
||||
'max_papers': plugin_kwargs.get('max_papers', 100), # 最大论文数量
|
||||
'min_year': plugin_kwargs.get('min_year', 2015), # 最早年份
|
||||
'search_multiplier': plugin_kwargs.get('search_multiplier', 3), # 检索倍数
|
||||
}
|
||||
|
||||
@abstractmethod
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> List[List[str]]:
|
||||
"""处理查询"""
|
||||
pass
|
||||
|
||||
async def _search_arxiv(self, params: Dict, limit_multiplier: int = 1, min_year: int = 2015) -> List:
|
||||
"""使用arXiv专用参数搜索"""
|
||||
try:
|
||||
original_limit = params.get("limit", 20)
|
||||
params["limit"] = original_limit * limit_multiplier
|
||||
papers = []
|
||||
|
||||
# 首先尝试基础搜索
|
||||
query = params.get("query", "")
|
||||
if query:
|
||||
papers = await self.arxiv.search(
|
||||
query,
|
||||
limit=params["limit"],
|
||||
sort_by=params.get("sort_by", "relevance"),
|
||||
sort_order=params.get("sort_order", "descending"),
|
||||
start_year=min_year
|
||||
)
|
||||
|
||||
# 如果基础搜索没有结果,尝试分类搜索
|
||||
if not papers:
|
||||
categories = params.get("categories", [])
|
||||
for category in categories:
|
||||
category_papers = await self.arxiv.search_by_category(
|
||||
category,
|
||||
limit=params["limit"],
|
||||
sort_by=params.get("sort_by", "relevance"),
|
||||
sort_order=params.get("sort_order", "descending"),
|
||||
)
|
||||
if category_papers:
|
||||
papers.extend(category_papers)
|
||||
|
||||
return papers or []
|
||||
|
||||
except Exception as e:
|
||||
print(f"arXiv搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_semantic(self, params: Dict, limit_multiplier: int = 1, min_year: int = 2015) -> List:
|
||||
"""使用Semantic Scholar专用参数搜索"""
|
||||
try:
|
||||
original_limit = params.get("limit", 20)
|
||||
params["limit"] = original_limit * limit_multiplier
|
||||
|
||||
# 只使用基本的搜索参数
|
||||
papers = await self.semantic.search(
|
||||
query=params.get("query", ""),
|
||||
limit=params["limit"]
|
||||
)
|
||||
|
||||
# 在内存中进行过滤
|
||||
if papers and min_year:
|
||||
papers = [p for p in papers if getattr(p, 'year', 0) and p.year >= min_year]
|
||||
|
||||
return papers or []
|
||||
|
||||
except Exception as e:
|
||||
print(f"Semantic Scholar搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_pubmed(self, params: Dict, limit_multiplier: int = 1, min_year: int = 2015) -> List:
|
||||
"""使用PubMed专用参数搜索"""
|
||||
try:
|
||||
# 如果不需要PubMed搜索,直接返回空列表
|
||||
if params.get("search_type") == "none":
|
||||
return []
|
||||
|
||||
original_limit = params.get("limit", 20)
|
||||
params["limit"] = original_limit * limit_multiplier
|
||||
papers = []
|
||||
|
||||
# 根据搜索类型选择搜索方法
|
||||
if params.get("search_type") == "basic":
|
||||
papers = await self.pubmed.search(
|
||||
query=params.get("query", ""),
|
||||
limit=params["limit"],
|
||||
start_year=min_year
|
||||
)
|
||||
elif params.get("search_type") == "author":
|
||||
papers = await self.pubmed.search_by_author(
|
||||
author=params.get("query", ""),
|
||||
limit=params["limit"],
|
||||
start_year=min_year
|
||||
)
|
||||
elif params.get("search_type") == "journal":
|
||||
papers = await self.pubmed.search_by_journal(
|
||||
journal=params.get("query", ""),
|
||||
limit=params["limit"],
|
||||
start_year=min_year
|
||||
)
|
||||
|
||||
return papers or []
|
||||
|
||||
except Exception as e:
|
||||
print(f"PubMed搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_crossref(self, params: Dict, limit_multiplier: int = 1, min_year: int = 2015) -> List:
|
||||
"""使用Crossref专用参数搜索"""
|
||||
try:
|
||||
original_limit = params.get("limit", 20)
|
||||
params["limit"] = original_limit * limit_multiplier
|
||||
papers = []
|
||||
|
||||
# 根据搜索类型选择搜索方法
|
||||
if params.get("search_type") == "basic":
|
||||
papers = await self.crossref.search(
|
||||
query=params.get("query", ""),
|
||||
limit=params["limit"],
|
||||
start_year=min_year
|
||||
)
|
||||
elif params.get("search_type") == "author":
|
||||
papers = await self.crossref.search_by_authors(
|
||||
authors=[params.get("query", "")],
|
||||
limit=params["limit"],
|
||||
start_year=min_year
|
||||
)
|
||||
elif params.get("search_type") == "journal":
|
||||
# 实现期刊搜索逻辑
|
||||
pass
|
||||
|
||||
return papers or []
|
||||
|
||||
except Exception as e:
|
||||
print(f"Crossref搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_adsabs(self, params: Dict, limit_multiplier: int = 1, min_year: int = 2015) -> List:
|
||||
"""使用ADS专用参数搜索"""
|
||||
try:
|
||||
original_limit = params.get("limit", 20)
|
||||
params["limit"] = original_limit * limit_multiplier
|
||||
papers = []
|
||||
|
||||
# 执行搜索
|
||||
if params.get("search_type") == "basic":
|
||||
papers = await self.adsabs.search(
|
||||
query=params.get("query", ""),
|
||||
limit=params["limit"],
|
||||
start_year=min_year
|
||||
)
|
||||
|
||||
return papers or []
|
||||
|
||||
except Exception as e:
|
||||
print(f"ADS搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_all_sources(self, criteria: SearchCriteria, search_params: Dict) -> List:
|
||||
"""从所有数据源搜索论文"""
|
||||
search_tasks = []
|
||||
|
||||
# # 检查是否需要执行PubMed搜索
|
||||
# is_using_pubmed = criteria.pubmed_params.get("search_type") != "none" and criteria.pubmed_params.get("query") != "none"
|
||||
is_using_pubmed = False # 开源版本不再搜索pubmed
|
||||
|
||||
# 如果使用PubMed,则只执行PubMed和Semantic Scholar搜索
|
||||
if is_using_pubmed:
|
||||
search_tasks.append(
|
||||
self._search_pubmed(
|
||||
criteria.pubmed_params,
|
||||
limit_multiplier=search_params['search_multiplier'],
|
||||
min_year=criteria.start_year
|
||||
)
|
||||
)
|
||||
|
||||
# Semantic Scholar总是执行搜索
|
||||
search_tasks.append(
|
||||
self._search_semantic(
|
||||
criteria.semantic_params,
|
||||
limit_multiplier=search_params['search_multiplier'],
|
||||
min_year=criteria.start_year
|
||||
)
|
||||
)
|
||||
else:
|
||||
|
||||
# 如果不使用ADS,则执行Crossref搜索
|
||||
if criteria.crossref_params.get("search_type") != "none" and criteria.crossref_params.get("query") != "none":
|
||||
search_tasks.append(
|
||||
self._search_crossref(
|
||||
criteria.crossref_params,
|
||||
limit_multiplier=search_params['search_multiplier'],
|
||||
min_year=criteria.start_year
|
||||
)
|
||||
)
|
||||
|
||||
search_tasks.append(
|
||||
self._search_arxiv(
|
||||
criteria.arxiv_params,
|
||||
limit_multiplier=search_params['search_multiplier'],
|
||||
min_year=criteria.start_year
|
||||
)
|
||||
)
|
||||
if get_conf("SEMANTIC_SCHOLAR_KEY"):
|
||||
search_tasks.append(
|
||||
self._search_semantic(
|
||||
criteria.semantic_params,
|
||||
limit_multiplier=search_params['search_multiplier'],
|
||||
min_year=criteria.start_year
|
||||
)
|
||||
)
|
||||
|
||||
# 执行所有需要的搜索任务
|
||||
papers = await asyncio.gather(*search_tasks)
|
||||
|
||||
# 合并所有来源的论文并统计各来源的数量
|
||||
all_papers = []
|
||||
source_counts = {
|
||||
'arxiv': 0,
|
||||
'semantic': 0,
|
||||
'pubmed': 0,
|
||||
'crossref': 0,
|
||||
'adsabs': 0
|
||||
}
|
||||
|
||||
for source_papers in papers:
|
||||
if source_papers:
|
||||
for paper in source_papers:
|
||||
source = getattr(paper, 'source', 'unknown')
|
||||
if source in source_counts:
|
||||
source_counts[source] += 1
|
||||
all_papers.extend(source_papers)
|
||||
|
||||
# 打印各来源的论文数量
|
||||
print("\n=== 各数据源找到的论文数量 ===")
|
||||
for source, count in source_counts.items():
|
||||
if count > 0: # 只打印有论文的来源
|
||||
print(f"{source.capitalize()}: {count} 篇")
|
||||
print(f"总计: {len(all_papers)} 篇")
|
||||
print("===========================\n")
|
||||
|
||||
return all_papers
|
||||
|
||||
def _format_paper_time(self, paper) -> str:
|
||||
"""格式化论文时间信息"""
|
||||
year = getattr(paper, 'year', None)
|
||||
if not year:
|
||||
return ""
|
||||
|
||||
# 如果有具体的发表日期,使用具体日期
|
||||
if hasattr(paper, 'published') and paper.published:
|
||||
return f"(发表于 {paper.published.strftime('%Y-%m')})"
|
||||
# 如果只有年份,只显示年份
|
||||
return f"({year})"
|
||||
|
||||
def _format_papers(self, papers: List) -> str:
|
||||
"""格式化论文列表,使用token限制控制长度"""
|
||||
formatted = []
|
||||
|
||||
for i, paper in enumerate(papers, 1):
|
||||
# 只保留前三个作者
|
||||
authors = paper.authors[:3]
|
||||
if len(paper.authors) > 3:
|
||||
authors.append("et al.")
|
||||
|
||||
# 构建所有可能的下载链接
|
||||
download_links = []
|
||||
|
||||
# 添加arXiv链接
|
||||
if hasattr(paper, 'doi') and paper.doi:
|
||||
if paper.doi.startswith("10.48550/arXiv."):
|
||||
# 从DOI中提取完整的arXiv ID
|
||||
arxiv_id = paper.doi.split("arXiv.")[-1]
|
||||
# 移除多余的点号并确保格式正确
|
||||
arxiv_id = arxiv_id.replace("..", ".") # 移除重复的点号
|
||||
if arxiv_id.startswith("."): # 移除开头的点号
|
||||
arxiv_id = arxiv_id[1:]
|
||||
if arxiv_id.endswith("."): # 移除结尾的点号
|
||||
arxiv_id = arxiv_id[:-1]
|
||||
|
||||
download_links.append(f"[arXiv PDF](https://arxiv.org/pdf/{arxiv_id}.pdf)")
|
||||
download_links.append(f"[arXiv Page](https://arxiv.org/abs/{arxiv_id})")
|
||||
elif "arxiv.org/abs/" in paper.doi:
|
||||
# 直接从URL中提取arXiv ID
|
||||
arxiv_id = paper.doi.split("arxiv.org/abs/")[-1]
|
||||
if "v" in arxiv_id: # 移除版本号
|
||||
arxiv_id = arxiv_id.split("v")[0]
|
||||
|
||||
download_links.append(f"[arXiv PDF](https://arxiv.org/pdf/{arxiv_id}.pdf)")
|
||||
download_links.append(f"[arXiv Page](https://arxiv.org/abs/{arxiv_id})")
|
||||
else:
|
||||
download_links.append(f"[DOI](https://doi.org/{paper.doi})")
|
||||
|
||||
# 添加直接URL链接(如果存在且不同于前面的链接)
|
||||
if hasattr(paper, 'url') and paper.url:
|
||||
if not any(paper.url in link for link in download_links):
|
||||
download_links.append(f"[Source]({paper.url})")
|
||||
|
||||
# 构建下载链接字符串
|
||||
download_section = " | ".join(download_links) if download_links else "No direct download link available"
|
||||
|
||||
# 构建来源信息
|
||||
source_info = []
|
||||
if hasattr(paper, 'venue_type') and paper.venue_type and paper.venue_type != 'preprint':
|
||||
source_info.append(f"Type: {paper.venue_type}")
|
||||
if hasattr(paper, 'venue_name') and paper.venue_name:
|
||||
source_info.append(f"Venue: {paper.venue_name}")
|
||||
|
||||
# 添加IF指数和分区信息
|
||||
if hasattr(paper, 'if_factor') and paper.if_factor:
|
||||
source_info.append(f"IF: {paper.if_factor}")
|
||||
if hasattr(paper, 'cas_division') and paper.cas_division:
|
||||
source_info.append(f"中科院分区: {paper.cas_division}")
|
||||
if hasattr(paper, 'jcr_division') and paper.jcr_division:
|
||||
source_info.append(f"JCR分区: {paper.jcr_division}")
|
||||
|
||||
if hasattr(paper, 'venue_info') and paper.venue_info:
|
||||
if paper.venue_info.get('journal_ref'):
|
||||
source_info.append(f"Journal Reference: {paper.venue_info['journal_ref']}")
|
||||
if paper.venue_info.get('publisher'):
|
||||
source_info.append(f"Publisher: {paper.venue_info['publisher']}")
|
||||
|
||||
# 构建当前论文的格式化文本
|
||||
paper_text = (
|
||||
f"{i}. **{paper.title}**\n" +
|
||||
f" Authors: {', '.join(authors)}\n" +
|
||||
f" Year: {paper.year}\n" +
|
||||
f" Citations: {paper.citations if paper.citations else 'N/A'}\n" +
|
||||
(f" Source: {'; '.join(source_info)}\n" if source_info else "") +
|
||||
# 添加PubMed特有信息
|
||||
(f" MeSH Terms: {'; '.join(paper.mesh_terms)}\n" if hasattr(paper,
|
||||
'mesh_terms') and paper.mesh_terms else "") +
|
||||
f" 📥 PDF Downloads: {download_section}\n" +
|
||||
f" Abstract: {paper.abstract}\n"
|
||||
)
|
||||
|
||||
formatted.append(paper_text)
|
||||
|
||||
full_text = "\n".join(formatted)
|
||||
|
||||
# 根据不同模型设置不同的token限制
|
||||
model_name = getattr(self, 'llm_kwargs', {}).get('llm_model', 'gpt-3.5-turbo')
|
||||
|
||||
token_limit = model_info[model_name]['max_token'] * 3 // 4
|
||||
# 使用token限制控制长度
|
||||
return cut_from_end_to_satisfy_token_limit(full_text, limit=token_limit, reserve_token=0, llm_model=model_name)
|
||||
|
||||
def _get_current_time(self) -> str:
|
||||
"""获取当前时间信息"""
|
||||
now = datetime.now()
|
||||
return now.strftime("%Y年%m月%d日")
|
||||
|
||||
def _generate_apology_prompt(self, criteria: SearchCriteria) -> str:
|
||||
"""生成道歉提示"""
|
||||
return f"""很抱歉,我们未能找到与"{criteria.main_topic}"相关的有效文献。
|
||||
|
||||
可能的原因:
|
||||
1. 搜索词过于具体或专业
|
||||
2. 时间范围限制过严
|
||||
|
||||
建议解决方案:
|
||||
1. 尝试使用更通用的关键词
|
||||
2. 扩大搜索时间范围
|
||||
3. 使用同义词或相关术语
|
||||
请根据以上建议调整后重试。"""
|
||||
|
||||
def get_ranked_papers(self) -> str:
|
||||
"""获取排序后的论文列表的格式化字符串"""
|
||||
return self._format_papers(self.ranked_papers) if self.ranked_papers else ""
|
||||
|
||||
def _is_pubmed_paper(self, paper) -> bool:
|
||||
"""判断是否为PubMed论文"""
|
||||
return (paper.url and 'pubmed.ncbi.nlm.nih.gov' in paper.url)
|
||||
106
crazy_functions/review_fns/handlers/latest_handler.py
Normal file
106
crazy_functions/review_fns/handlers/latest_handler.py
Normal file
@@ -0,0 +1,106 @@
|
||||
from typing import List, Dict, Any
|
||||
from .base_handler import BaseHandler
|
||||
from crazy_functions.review_fns.query_analyzer import SearchCriteria
|
||||
import asyncio
|
||||
|
||||
class Arxiv最新论文推荐功能(BaseHandler):
|
||||
"""最新论文推荐处理器"""
|
||||
|
||||
def __init__(self, arxiv, semantic, llm_kwargs=None):
|
||||
super().__init__(arxiv, semantic, llm_kwargs)
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> str:
|
||||
"""处理最新论文推荐请求"""
|
||||
|
||||
# 获取搜索参数
|
||||
search_params = self._get_search_params(plugin_kwargs)
|
||||
|
||||
# 获取最新论文
|
||||
papers = []
|
||||
for category in criteria.arxiv_params["categories"]:
|
||||
latest_papers = await self.arxiv.get_latest_papers(
|
||||
category=category,
|
||||
debug=False,
|
||||
batch_size=50
|
||||
)
|
||||
papers.extend(latest_papers)
|
||||
|
||||
if not papers:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 使用embedding模型对论文进行排序
|
||||
self.ranked_papers = self.paper_ranker.rank_papers(
|
||||
query=criteria.original_query,
|
||||
papers=papers,
|
||||
search_criteria=criteria
|
||||
)
|
||||
|
||||
# 构建最终的prompt
|
||||
current_time = self._get_current_time()
|
||||
final_prompt = f"""Current time: {current_time}
|
||||
|
||||
Based on your interest in {criteria.main_topic}, here are the latest papers from arXiv in relevant categories:
|
||||
{', '.join(criteria.arxiv_params["categories"])}
|
||||
|
||||
Latest papers available:
|
||||
{self._format_papers(self.ranked_papers)}
|
||||
|
||||
Please provide:
|
||||
1. A clear list of latext papers, organized by themes or approaches
|
||||
|
||||
|
||||
2. Group papers by sub-topics or themes if applicable
|
||||
|
||||
3. For each paper:
|
||||
- Publication time
|
||||
- The key contributions and main findings
|
||||
- Why it's relevant to the user's interests
|
||||
- How it relates to other latest papers
|
||||
- The paper's citation count and citation impact
|
||||
- The paper's download link
|
||||
|
||||
4. A suggested reading order based on:
|
||||
- Paper relationships and dependencies
|
||||
- Difficulty level
|
||||
- Significance
|
||||
|
||||
5. Future Directions
|
||||
- Emerging venues and research streams
|
||||
- Novel methodological approaches
|
||||
- Cross-disciplinary opportunities
|
||||
- Research gaps by publication type
|
||||
|
||||
IMPORTANT:
|
||||
- Focus on explaining why each paper is interesting
|
||||
- Highlight the novelty and potential impact
|
||||
- Consider the credibility and stage of each publication
|
||||
- Use the provided paper titles with their links when referring to specific papers
|
||||
- Base recommendations ONLY on the explicitly provided paper information
|
||||
- Do not make ANY assumptions about papers beyond the given data
|
||||
- When information is missing or unclear, acknowledge the limitation
|
||||
- Never speculate about:
|
||||
* Paper quality or rigor not evidenced in the data
|
||||
* Research impact beyond citation counts
|
||||
* Implementation details not mentioned
|
||||
* Author expertise or background
|
||||
* Future research directions not stated
|
||||
- For each paper, cite only verifiable information
|
||||
- Clearly distinguish between facts and potential implications
|
||||
- Each paper includes download links in its 📥 PDF Downloads section
|
||||
|
||||
Format your response in markdown with clear sections.
|
||||
|
||||
Language requirement:
|
||||
- If the query explicitly specifies a language, use that language
|
||||
- Otherwise, match the language of the original user query
|
||||
"""
|
||||
|
||||
return final_prompt
|
||||
344
crazy_functions/review_fns/handlers/paper_handler.py
Normal file
344
crazy_functions/review_fns/handlers/paper_handler.py
Normal file
@@ -0,0 +1,344 @@
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from .base_handler import BaseHandler
|
||||
from crazy_functions.review_fns.query_analyzer import SearchCriteria
|
||||
import asyncio
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency as request_gpt
|
||||
|
||||
class 单篇论文分析功能(BaseHandler):
|
||||
"""论文分析处理器"""
|
||||
|
||||
def __init__(self, arxiv, semantic, llm_kwargs=None):
|
||||
super().__init__(arxiv, semantic, llm_kwargs)
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> str:
|
||||
"""处理论文分析请求,返回最终的prompt"""
|
||||
|
||||
# 1. 获取论文详情
|
||||
paper = await self._get_paper_details(criteria)
|
||||
if not paper:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 保存为ranked_papers以便统一接口
|
||||
self.ranked_papers = [paper]
|
||||
|
||||
# 2. 构建最终的prompt
|
||||
current_time = self._get_current_time()
|
||||
|
||||
# 获取论文信息
|
||||
title = getattr(paper, "title", "Unknown Title")
|
||||
authors = getattr(paper, "authors", [])
|
||||
year = getattr(paper, "year", "Unknown Year")
|
||||
abstract = getattr(paper, "abstract", "No abstract available")
|
||||
citations = getattr(paper, "citations", "N/A")
|
||||
|
||||
# 添加论文ID信息
|
||||
paper_id = ""
|
||||
if criteria.paper_source == "arxiv":
|
||||
paper_id = f"arXiv ID: {criteria.paper_id}\n"
|
||||
elif criteria.paper_source == "doi":
|
||||
paper_id = f"DOI: {criteria.paper_id}\n"
|
||||
|
||||
# 格式化作者列表
|
||||
authors_str = ', '.join(authors) if isinstance(authors, list) else authors
|
||||
|
||||
final_prompt = f"""Current time: {current_time}
|
||||
|
||||
Please provide a comprehensive analysis of the following paper:
|
||||
|
||||
{paper_id}Title: {title}
|
||||
Authors: {authors_str}
|
||||
Year: {year}
|
||||
Citations: {citations}
|
||||
Publication Venue: {paper.venue_name} ({paper.venue_type})
|
||||
{f"Publisher: {paper.venue_info.get('publisher')}" if paper.venue_info.get('publisher') else ""}
|
||||
{f"Journal Reference: {paper.venue_info.get('journal_ref')}" if paper.venue_info.get('journal_ref') else ""}
|
||||
Abstract: {abstract}
|
||||
|
||||
Please provide:
|
||||
1. Publication Context
|
||||
- Publication venue analysis and impact factor (if available)
|
||||
- Paper type (journal article, conference paper, preprint)
|
||||
- Publication timeline and peer review status
|
||||
- Publisher reputation and venue prestige
|
||||
|
||||
2. Research Context
|
||||
- Field positioning and significance
|
||||
- Historical context and prior work
|
||||
- Related research streams
|
||||
- Cross-venue impact analysis
|
||||
|
||||
3. Technical Analysis
|
||||
- Detailed methodology review
|
||||
- Implementation details
|
||||
- Experimental setup and results
|
||||
- Technical innovations
|
||||
|
||||
4. Impact Analysis
|
||||
- Citation patterns and influence
|
||||
- Cross-venue recognition
|
||||
- Industry vs. academic impact
|
||||
- Practical applications
|
||||
|
||||
5. Critical Review
|
||||
- Methodological rigor assessment
|
||||
- Result reliability and reproducibility
|
||||
- Venue-appropriate evaluation standards
|
||||
- Limitations and potential improvements
|
||||
|
||||
IMPORTANT:
|
||||
- Strictly use ONLY the information provided above about the paper
|
||||
- Do not make ANY assumptions or inferences beyond the given data
|
||||
- If certain information is not provided, explicitly state that it is unknown
|
||||
- For any unclear or missing details, acknowledge the limitation rather than speculating
|
||||
- When discussing methodology or results, only describe what is explicitly stated in the abstract
|
||||
- Never fabricate or assume any details about:
|
||||
* Publication venues or status
|
||||
* Implementation details not mentioned
|
||||
* Results or findings not stated
|
||||
* Impact or influence not supported by the citation count
|
||||
* Authors' affiliations or backgrounds
|
||||
* Future work or implications not mentioned
|
||||
- You can find the paper's download options in the 📥 PDF Downloads section
|
||||
- Available download formats include arXiv PDF, DOI links, and source URLs
|
||||
|
||||
Format your response in markdown with clear sections.
|
||||
|
||||
Language requirement:
|
||||
- If the query explicitly specifies a language, use that language
|
||||
- Otherwise, match the language of the original user query
|
||||
"""
|
||||
|
||||
return final_prompt
|
||||
|
||||
async def _get_paper_details(self, criteria: SearchCriteria):
|
||||
"""获取论文详情"""
|
||||
try:
|
||||
if criteria.paper_source == "arxiv":
|
||||
# 使用 arxiv ID 搜索
|
||||
papers = await self.arxiv.search_by_id(criteria.paper_id)
|
||||
return papers[0] if papers else None
|
||||
|
||||
elif criteria.paper_source == "doi":
|
||||
# 尝试从所有来源获取
|
||||
paper = await self.semantic.get_paper_by_doi(criteria.paper_id)
|
||||
if not paper:
|
||||
# 如果Semantic Scholar没有找到,尝试PubMed
|
||||
papers = await self.pubmed.search(
|
||||
f"{criteria.paper_id}[doi]",
|
||||
limit=1
|
||||
)
|
||||
if papers:
|
||||
return papers[0]
|
||||
return paper
|
||||
|
||||
elif criteria.paper_source == "title":
|
||||
# 使用_search_all_sources搜索
|
||||
search_params = {
|
||||
'max_papers': 1,
|
||||
'min_year': 1900, # 不限制年份
|
||||
'search_multiplier': 1
|
||||
}
|
||||
|
||||
# 设置搜索参数
|
||||
criteria.arxiv_params = {
|
||||
"search_type": "basic",
|
||||
"query": f'ti:"{criteria.paper_title}"',
|
||||
"limit": 1
|
||||
}
|
||||
criteria.semantic_params = {
|
||||
"query": criteria.paper_title,
|
||||
"limit": 1
|
||||
}
|
||||
criteria.pubmed_params = {
|
||||
"search_type": "basic",
|
||||
"query": f'"{criteria.paper_title}"[Title]',
|
||||
"limit": 1
|
||||
}
|
||||
|
||||
papers = await self._search_all_sources(criteria, search_params)
|
||||
return papers[0] if papers else None
|
||||
|
||||
# 如果都没有找到,尝试使用 main_topic 作为标题搜索
|
||||
if not criteria.paper_title and not criteria.paper_id:
|
||||
search_params = {
|
||||
'max_papers': 1,
|
||||
'min_year': 1900,
|
||||
'search_multiplier': 1
|
||||
}
|
||||
|
||||
# 设置搜索参数
|
||||
criteria.arxiv_params = {
|
||||
"search_type": "basic",
|
||||
"query": f'ti:"{criteria.main_topic}"',
|
||||
"limit": 1
|
||||
}
|
||||
criteria.semantic_params = {
|
||||
"query": criteria.main_topic,
|
||||
"limit": 1
|
||||
}
|
||||
criteria.pubmed_params = {
|
||||
"search_type": "basic",
|
||||
"query": f'"{criteria.main_topic}"[Title]',
|
||||
"limit": 1
|
||||
}
|
||||
|
||||
papers = await self._search_all_sources(criteria, search_params)
|
||||
return papers[0] if papers else None
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"获取论文详情时出错: {str(e)}")
|
||||
return None
|
||||
|
||||
async def _get_citation_context(self, paper: Dict, plugin_kwargs: Dict) -> Tuple[List, List]:
|
||||
"""获取引用上下文"""
|
||||
search_params = self._get_search_params(plugin_kwargs)
|
||||
|
||||
# 使用论文标题构建搜索参数
|
||||
title_query = f'ti:"{getattr(paper, "title", "")}"'
|
||||
arxiv_params = {
|
||||
"query": title_query,
|
||||
"limit": search_params['max_papers'],
|
||||
"search_type": "basic",
|
||||
"sort_by": "relevance",
|
||||
"sort_order": "descending"
|
||||
}
|
||||
semantic_params = {
|
||||
"query": getattr(paper, "title", ""),
|
||||
"limit": search_params['max_papers']
|
||||
}
|
||||
|
||||
citations, references = await asyncio.gather(
|
||||
self._search_semantic(
|
||||
semantic_params,
|
||||
limit_multiplier=search_params['search_multiplier'],
|
||||
min_year=search_params['min_year']
|
||||
),
|
||||
self._search_arxiv(
|
||||
arxiv_params,
|
||||
limit_multiplier=search_params['search_multiplier'],
|
||||
min_year=search_params['min_year']
|
||||
)
|
||||
)
|
||||
|
||||
return citations, references
|
||||
|
||||
async def _generate_analysis(
|
||||
self,
|
||||
paper: Dict,
|
||||
citations: List,
|
||||
references: List,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any]
|
||||
) -> List[List[str]]:
|
||||
"""生成论文分析"""
|
||||
|
||||
# 构建提示
|
||||
analysis_prompt = f"""Please provide a comprehensive analysis of the following paper:
|
||||
|
||||
Paper details:
|
||||
{self._format_paper(paper)}
|
||||
|
||||
Key references (papers cited by this paper):
|
||||
{self._format_papers(references)}
|
||||
|
||||
Important citations (papers that cite this paper):
|
||||
{self._format_papers(citations)}
|
||||
|
||||
Please provide:
|
||||
1. Paper Overview
|
||||
- Main research question/objective
|
||||
- Key methodology/approach
|
||||
- Main findings/contributions
|
||||
|
||||
2. Technical Analysis
|
||||
- Detailed methodology review
|
||||
- Technical innovations
|
||||
- Implementation details
|
||||
- Experimental setup and results
|
||||
|
||||
3. Impact Analysis
|
||||
- Significance in the field
|
||||
- Influence on subsequent research (based on citing papers)
|
||||
- Relationship to prior work (based on cited papers)
|
||||
- Practical applications
|
||||
|
||||
4. Critical Review
|
||||
- Strengths and limitations
|
||||
- Potential improvements
|
||||
- Open questions and future directions
|
||||
- Alternative approaches
|
||||
|
||||
5. Related Research Context
|
||||
- How it builds on previous work
|
||||
- How it has influenced subsequent research
|
||||
- Comparison with alternative approaches
|
||||
|
||||
Format your response in markdown with clear sections."""
|
||||
|
||||
# 并行生成概述和技术分析
|
||||
for response_chunk in request_gpt(
|
||||
inputs_array=[
|
||||
analysis_prompt,
|
||||
self._get_technical_prompt(paper)
|
||||
],
|
||||
inputs_show_user_array=[
|
||||
"Generating paper analysis...",
|
||||
"Analyzing technical details..."
|
||||
],
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history_array=[history, []],
|
||||
sys_prompt_array=[
|
||||
system_prompt,
|
||||
"You are an expert at analyzing technical details in research papers."
|
||||
]
|
||||
):
|
||||
pass # 等待生成完成
|
||||
|
||||
# 获取最后的两个回答
|
||||
if chatbot and len(chatbot[-2:]) == 2:
|
||||
analysis = chatbot[-2][1]
|
||||
technical = chatbot[-1][1]
|
||||
full_analysis = f"""# Paper Analysis: {paper.title}
|
||||
|
||||
## General Analysis
|
||||
{analysis}
|
||||
|
||||
## Technical Deep Dive
|
||||
{technical}
|
||||
"""
|
||||
chatbot.append(["Here is the paper analysis:", full_analysis])
|
||||
else:
|
||||
chatbot.append(["Here is the paper analysis:", "Failed to generate analysis."])
|
||||
|
||||
return chatbot
|
||||
|
||||
def _get_technical_prompt(self, paper: Dict) -> str:
|
||||
"""生成技术分析提示"""
|
||||
return f"""Please provide a detailed technical analysis of the following paper:
|
||||
|
||||
{self._format_paper(paper)}
|
||||
|
||||
Focus on:
|
||||
1. Mathematical formulations and their implications
|
||||
2. Algorithm design and complexity analysis
|
||||
3. Architecture details and design choices
|
||||
4. Implementation challenges and solutions
|
||||
5. Performance analysis and bottlenecks
|
||||
6. Technical limitations and potential improvements
|
||||
|
||||
Format your response in markdown, focusing purely on technical aspects."""
|
||||
|
||||
|
||||
147
crazy_functions/review_fns/handlers/qa_handler.py
Normal file
147
crazy_functions/review_fns/handlers/qa_handler.py
Normal file
@@ -0,0 +1,147 @@
|
||||
from typing import List, Dict, Any
|
||||
from .base_handler import BaseHandler
|
||||
from crazy_functions.review_fns.query_analyzer import SearchCriteria
|
||||
from textwrap import dedent
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency as request_gpt
|
||||
|
||||
class 学术问答功能(BaseHandler):
|
||||
"""学术问答处理器"""
|
||||
|
||||
def __init__(self, arxiv, semantic, llm_kwargs=None):
|
||||
super().__init__(arxiv, semantic, llm_kwargs)
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> str:
|
||||
"""处理学术问答请求,返回最终的prompt"""
|
||||
|
||||
# 1. 获取搜索参数
|
||||
search_params = self._get_search_params(plugin_kwargs)
|
||||
|
||||
# 2. 搜索相关论文
|
||||
papers = await self._search_relevant_papers(criteria, search_params)
|
||||
if not papers:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 构建最终的prompt
|
||||
current_time = self._get_current_time()
|
||||
final_prompt = dedent(f"""Current time: {current_time}
|
||||
|
||||
Based on the following paper abstracts, please answer this academic question: {criteria.original_query}
|
||||
|
||||
Available papers for reference:
|
||||
{self._format_papers(self.ranked_papers)}
|
||||
|
||||
Please structure your response in the following format:
|
||||
|
||||
1. Core Answer (2-3 paragraphs)
|
||||
- Provide a clear, direct answer synthesizing key findings
|
||||
- Support main points with citations [1,2,etc.]
|
||||
- Focus on consensus and differences across papers
|
||||
|
||||
2. Key Evidence (2-3 paragraphs)
|
||||
- Present supporting evidence from abstracts
|
||||
- Compare methodologies and results
|
||||
- Highlight significant findings with citations
|
||||
|
||||
3. Research Context (1-2 paragraphs)
|
||||
- Discuss current trends and developments
|
||||
- Identify research gaps or limitations
|
||||
- Suggest potential future directions
|
||||
|
||||
Guidelines:
|
||||
- Base your answer ONLY on the provided abstracts
|
||||
- Use numbered citations [1], [2,3], etc. for every claim
|
||||
- Maintain academic tone and objectivity
|
||||
- Synthesize findings across multiple papers
|
||||
- Focus on the most relevant information to the question
|
||||
|
||||
Constraints:
|
||||
- Do not include information beyond the provided abstracts
|
||||
- Avoid speculation or personal opinions
|
||||
- Do not elaborate on technical details unless directly relevant
|
||||
- Keep citations concise and focused
|
||||
- Use [N] citations for every major claim or finding
|
||||
- Cite multiple papers [1,2,3] when showing consensus
|
||||
- Place citations immediately after the relevant statements
|
||||
|
||||
Note: Provide citations for every major claim to ensure traceability to source papers.
|
||||
Language requirement:
|
||||
- If the query explicitly specifies a language, use that language. Use Chinese to answer if no language is specified.
|
||||
- Otherwise, match the language of the original user query
|
||||
"""
|
||||
)
|
||||
|
||||
return final_prompt
|
||||
|
||||
async def _search_relevant_papers(self, criteria: SearchCriteria, search_params: Dict) -> List:
|
||||
"""搜索相关论文"""
|
||||
# 使用_search_all_sources替代原来的并行搜索
|
||||
all_papers = await self._search_all_sources(criteria, search_params)
|
||||
|
||||
if not all_papers:
|
||||
return []
|
||||
|
||||
# 使用BGE重排序
|
||||
self.ranked_papers = self.paper_ranker.rank_papers(
|
||||
query=criteria.main_topic,
|
||||
papers=all_papers,
|
||||
search_criteria=criteria
|
||||
)
|
||||
|
||||
return self.ranked_papers or []
|
||||
|
||||
async def _generate_answer(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
papers: List,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any]
|
||||
) -> List[List[str]]:
|
||||
"""生成答案"""
|
||||
|
||||
# 构建提示
|
||||
qa_prompt = dedent(f"""Please answer the following academic question based on recent research papers.
|
||||
|
||||
Question: {criteria.main_topic}
|
||||
|
||||
Relevant papers:
|
||||
{self._format_papers(papers)}
|
||||
|
||||
Please provide:
|
||||
1. A direct answer to the question
|
||||
2. Supporting evidence from the papers
|
||||
3. Different perspectives or approaches if applicable
|
||||
4. Current limitations and open questions
|
||||
5. References to specific papers
|
||||
|
||||
Format your response in markdown with clear sections."""
|
||||
)
|
||||
# 调用LLM生成答案
|
||||
for response_chunk in request_gpt(
|
||||
inputs_array=[qa_prompt],
|
||||
inputs_show_user_array=["Generating answer..."],
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history_array=[history],
|
||||
sys_prompt_array=[system_prompt]
|
||||
):
|
||||
pass # 等待生成完成
|
||||
|
||||
# 获取最后的回答
|
||||
if chatbot and len(chatbot[-1]) >= 2:
|
||||
answer = chatbot[-1][1]
|
||||
chatbot.append(["Here is the answer:", answer])
|
||||
else:
|
||||
chatbot.append(["Here is the answer:", "Failed to generate answer."])
|
||||
|
||||
return chatbot
|
||||
|
||||
185
crazy_functions/review_fns/handlers/recommend_handler.py
Normal file
185
crazy_functions/review_fns/handlers/recommend_handler.py
Normal file
@@ -0,0 +1,185 @@
|
||||
from typing import List, Dict, Any
|
||||
from .base_handler import BaseHandler
|
||||
from textwrap import dedent
|
||||
from crazy_functions.review_fns.query_analyzer import SearchCriteria
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency as request_gpt
|
||||
|
||||
class 论文推荐功能(BaseHandler):
|
||||
"""论文推荐处理器"""
|
||||
|
||||
def __init__(self, arxiv, semantic, llm_kwargs=None):
|
||||
super().__init__(arxiv, semantic, llm_kwargs)
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> str:
|
||||
"""处理论文推荐请求,返回最终的prompt"""
|
||||
|
||||
search_params = self._get_search_params(plugin_kwargs)
|
||||
|
||||
# 1. 先搜索种子论文
|
||||
seed_papers = await self._search_seed_papers(criteria, search_params)
|
||||
if not seed_papers:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 使用BGE重排序
|
||||
all_papers = seed_papers
|
||||
|
||||
if not all_papers:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
self.ranked_papers = self.paper_ranker.rank_papers(
|
||||
query=criteria.original_query,
|
||||
papers=all_papers,
|
||||
search_criteria=criteria
|
||||
)
|
||||
|
||||
if not self.ranked_papers:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 构建最终的prompt
|
||||
current_time = self._get_current_time()
|
||||
final_prompt = dedent(f"""Current time: {current_time}
|
||||
|
||||
Based on the user's interest in {criteria.main_topic}, here are relevant papers.
|
||||
|
||||
Available papers for recommendation:
|
||||
{self._format_papers(self.ranked_papers)}
|
||||
|
||||
Please provide:
|
||||
1. Group papers by sub-topics or themes if applicable
|
||||
|
||||
2. For each paper:
|
||||
- Publication time and venue (when available)
|
||||
- Journal metrics (when available):
|
||||
* Impact Factor (IF)
|
||||
* JCR Quartile
|
||||
* Chinese Academy of Sciences (CAS) Division
|
||||
- The key contributions and main findings
|
||||
- Why it's relevant to the user's interests
|
||||
- How it relates to other recommended papers
|
||||
- The paper's citation count and citation impact
|
||||
- The paper's download link
|
||||
|
||||
3. A suggested reading order based on:
|
||||
- Journal impact and quality metrics
|
||||
- Chronological development of ideas
|
||||
- Paper relationships and dependencies
|
||||
- Difficulty level
|
||||
- Impact and significance
|
||||
|
||||
4. Future Directions
|
||||
- Emerging venues and research streams
|
||||
- Novel methodological approaches
|
||||
- Cross-disciplinary opportunities
|
||||
- Research gaps by publication type
|
||||
|
||||
|
||||
IMPORTANT:
|
||||
- Focus on explaining why each paper is valuable
|
||||
- Highlight connections between papers
|
||||
- Consider both citation counts AND journal metrics when discussing impact
|
||||
- When available, use IF, JCR quartile, and CAS division to assess paper quality
|
||||
- Mention publication timing when discussing paper relationships
|
||||
- When referring to papers, use HTML links in this format:
|
||||
* For DOIs: <a href='https://doi.org/DOI_HERE' target='_blank'>DOI: DOI_HERE</a>
|
||||
* For titles: <a href='PAPER_URL' target='_blank'>PAPER_TITLE</a>
|
||||
- Present papers in a way that shows the evolution of ideas over time
|
||||
- Base recommendations ONLY on the explicitly provided paper information
|
||||
- Do not make ANY assumptions about papers beyond the given data
|
||||
- When information is missing or unclear, acknowledge the limitation
|
||||
- Never speculate about:
|
||||
* Paper quality or rigor not evidenced in the data
|
||||
* Research impact beyond citation counts and journal metrics
|
||||
* Implementation details not mentioned
|
||||
* Author expertise or background
|
||||
* Future research directions not stated
|
||||
- For each recommendation, cite only verifiable information
|
||||
- Clearly distinguish between facts and potential implications
|
||||
|
||||
Format your response in markdown with clear sections.
|
||||
Language requirement:
|
||||
- If the query explicitly specifies a language, use that language
|
||||
- Otherwise, match the language of the original user query
|
||||
"""
|
||||
)
|
||||
return final_prompt
|
||||
|
||||
async def _search_seed_papers(self, criteria: SearchCriteria, search_params: Dict) -> List:
|
||||
"""搜索种子论文"""
|
||||
try:
|
||||
# 使用_search_all_sources替代原来的并行搜索
|
||||
all_papers = await self._search_all_sources(criteria, search_params)
|
||||
|
||||
if not all_papers:
|
||||
return []
|
||||
|
||||
return all_papers
|
||||
|
||||
except Exception as e:
|
||||
print(f"搜索种子论文时出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _get_recommendations(self, seed_papers: List, multiplier: int = 1) -> List:
|
||||
"""获取推荐论文"""
|
||||
recommendations = []
|
||||
base_limit = 3 * multiplier
|
||||
|
||||
# 将种子论文添加到推荐列表中
|
||||
recommendations.extend(seed_papers)
|
||||
|
||||
# 只使用前5篇论文作为种子
|
||||
seed_papers = seed_papers[:5]
|
||||
|
||||
for paper in seed_papers:
|
||||
try:
|
||||
if paper.doi and paper.doi.startswith("10.48550/arXiv."):
|
||||
# arXiv论文
|
||||
arxiv_id = paper.doi.split(".")[-1]
|
||||
paper_details = await self.arxiv.get_paper_details(arxiv_id)
|
||||
if paper_details and hasattr(paper_details, 'venue'):
|
||||
category = paper_details.venue.split(":")[-1]
|
||||
similar_papers = await self.arxiv.search_by_category(
|
||||
category,
|
||||
limit=base_limit,
|
||||
sort_by='relevance'
|
||||
)
|
||||
recommendations.extend(similar_papers)
|
||||
elif paper.doi: # 只对有DOI的论文获取推荐
|
||||
# Semantic Scholar论文
|
||||
similar_papers = await self.semantic.get_recommended_papers(
|
||||
paper.doi,
|
||||
limit=base_limit
|
||||
)
|
||||
if similar_papers: # 只添加成功获取的推荐
|
||||
recommendations.extend(similar_papers)
|
||||
else:
|
||||
# 对于没有DOI的论文,使用标题进行相关搜索
|
||||
if paper.title:
|
||||
similar_papers = await self.semantic.search(
|
||||
query=paper.title,
|
||||
limit=base_limit
|
||||
)
|
||||
recommendations.extend(similar_papers)
|
||||
|
||||
except Exception as e:
|
||||
print(f"获取论文 '{paper.title}' 的推荐时发生错误: {str(e)}")
|
||||
continue
|
||||
|
||||
# 去重处理
|
||||
seen_dois = set()
|
||||
unique_recommendations = []
|
||||
for paper in recommendations:
|
||||
if paper.doi and paper.doi not in seen_dois:
|
||||
seen_dois.add(paper.doi)
|
||||
unique_recommendations.append(paper)
|
||||
elif not paper.doi and paper not in unique_recommendations:
|
||||
unique_recommendations.append(paper)
|
||||
|
||||
return unique_recommendations
|
||||
193
crazy_functions/review_fns/handlers/review_handler.py
Normal file
193
crazy_functions/review_fns/handlers/review_handler.py
Normal file
@@ -0,0 +1,193 @@
|
||||
from typing import List, Dict, Any, Tuple
|
||||
from .base_handler import BaseHandler
|
||||
from crazy_functions.review_fns.query_analyzer import SearchCriteria
|
||||
import asyncio
|
||||
|
||||
class 文献综述功能(BaseHandler):
|
||||
"""文献综述处理器"""
|
||||
|
||||
def __init__(self, arxiv, semantic, llm_kwargs=None):
|
||||
super().__init__(arxiv, semantic, llm_kwargs)
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> str:
|
||||
"""处理文献综述请求,返回最终的prompt"""
|
||||
|
||||
# 获取搜索参数
|
||||
search_params = self._get_search_params(plugin_kwargs)
|
||||
|
||||
# 使用_search_all_sources替代原来的并行搜索
|
||||
all_papers = await self._search_all_sources(criteria, search_params)
|
||||
|
||||
if not all_papers:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
self.ranked_papers = self.paper_ranker.rank_papers(
|
||||
query=criteria.original_query,
|
||||
papers=all_papers,
|
||||
search_criteria=criteria
|
||||
)
|
||||
|
||||
# 检查排序后的论文数量
|
||||
if not self.ranked_papers:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 检查是否包含PubMed论文
|
||||
has_pubmed_papers = any(paper.url and 'pubmed.ncbi.nlm.nih.gov' in paper.url
|
||||
for paper in self.ranked_papers)
|
||||
|
||||
if has_pubmed_papers:
|
||||
return self._generate_medical_review_prompt(criteria)
|
||||
else:
|
||||
return self._generate_general_review_prompt(criteria)
|
||||
|
||||
def _generate_medical_review_prompt(self, criteria: SearchCriteria) -> str:
|
||||
"""生成医学文献综述prompt"""
|
||||
return f"""Current time: {self._get_current_time()}
|
||||
|
||||
Conduct a systematic medical literature review on {criteria.main_topic} based STRICTLY on the provided articles.
|
||||
|
||||
Available literature for review:
|
||||
{self._format_papers(self.ranked_papers)}
|
||||
|
||||
IMPORTANT: If the user query contains specific requirements for the review structure or format, those requirements take precedence over the following guidelines.
|
||||
|
||||
Please structure your medical review following these guidelines:
|
||||
|
||||
1. Research Overview
|
||||
- Main research questions and objectives from the studies
|
||||
- Types of studies included (clinical trials, observational studies, etc.)
|
||||
- Study populations and settings
|
||||
- Time period of the research
|
||||
|
||||
2. Key Findings
|
||||
- Main outcomes and results reported in abstracts
|
||||
- Primary endpoints and their measurements
|
||||
- Statistical significance when reported
|
||||
- Observed trends across studies
|
||||
|
||||
3. Methods Summary
|
||||
- Study designs used
|
||||
- Major interventions or treatments studied
|
||||
- Key outcome measures
|
||||
- Patient populations studied
|
||||
|
||||
4. Clinical Relevance
|
||||
- Reported clinical implications
|
||||
- Main conclusions from authors
|
||||
- Reported benefits and risks
|
||||
- Treatment responses when available
|
||||
|
||||
5. Research Status
|
||||
- Current research focus areas
|
||||
- Reported limitations
|
||||
- Gaps identified in abstracts
|
||||
- Authors' suggested future directions
|
||||
|
||||
CRITICAL REQUIREMENTS:
|
||||
|
||||
Citation Rules (MANDATORY):
|
||||
- EVERY finding or statement MUST be supported by citations [N], where N is the number matching the paper in the provided literature list
|
||||
- When reporting outcomes, ALWAYS cite the source studies using the exact paper numbers from the literature list
|
||||
- For findings supported by multiple studies, use consecutive numbers as shown in the literature list [1,2,3]
|
||||
- Use ONLY the papers provided in the available literature list above
|
||||
- Citations must appear immediately after each statement
|
||||
- Citation numbers MUST match the numbers assigned to papers in the literature list above (e.g., if a finding comes from the first paper in the list, cite it as [1])
|
||||
- DO NOT change or reorder the citation numbers - they must exactly match the paper numbers in the literature list
|
||||
|
||||
Content Guidelines:
|
||||
- Present only information available in the provided papers
|
||||
- If certain information is not available, simply omit that aspect rather than explicitly stating its absence
|
||||
- Focus on synthesizing and presenting available findings
|
||||
- Maintain professional medical writing style
|
||||
- Present limitations and gaps as research opportunities rather than missing information
|
||||
|
||||
Writing Style:
|
||||
- Use precise medical terminology
|
||||
- Maintain objective reporting
|
||||
- Use consistent terminology throughout
|
||||
- Present a cohesive narrative without referencing data limitations
|
||||
|
||||
Language requirement:
|
||||
- If the query explicitly specifies a language, use that language
|
||||
- Otherwise, match the language of the original user query
|
||||
"""
|
||||
|
||||
def _generate_general_review_prompt(self, criteria: SearchCriteria) -> str:
|
||||
"""生成通用文献综述prompt"""
|
||||
current_time = self._get_current_time()
|
||||
final_prompt = f"""Current time: {current_time}
|
||||
|
||||
Conduct a comprehensive literature review on {criteria.main_topic} focusing on the following aspects:
|
||||
{', '.join(criteria.sub_topics)}
|
||||
|
||||
Available literature for review:
|
||||
{self._format_papers(self.ranked_papers)}
|
||||
|
||||
IMPORTANT: If the user query contains specific requirements for the review structure or format, those requirements take precedence over the following guidelines.
|
||||
|
||||
Please structure your review following these guidelines:
|
||||
|
||||
1. Introduction and Research Background
|
||||
- Current state and significance of the research field
|
||||
- Key research problems and challenges
|
||||
- Research development timeline and evolution
|
||||
|
||||
2. Research Directions and Classifications
|
||||
- Major research directions and their relationships
|
||||
- Different technical approaches and their characteristics
|
||||
- Comparative analysis of various solutions
|
||||
|
||||
3. Core Technologies and Methods
|
||||
- Key technological breakthroughs
|
||||
- Advantages and limitations of different methods
|
||||
- Technical challenges and solutions
|
||||
|
||||
4. Applications and Impact
|
||||
- Real-world applications and use cases
|
||||
- Industry influence and practical value
|
||||
- Implementation challenges and solutions
|
||||
|
||||
5. Future Trends and Prospects
|
||||
- Emerging research directions
|
||||
- Unsolved problems and challenges
|
||||
- Potential breakthrough points
|
||||
|
||||
CRITICAL REQUIREMENTS:
|
||||
|
||||
Citation Rules (MANDATORY):
|
||||
- EVERY finding or statement MUST be supported by citations [N], where N is the number matching the paper in the provided literature list
|
||||
- When reporting outcomes, ALWAYS cite the source studies using the exact paper numbers from the literature list
|
||||
- For findings supported by multiple studies, use consecutive numbers as shown in the literature list [1,2,3]
|
||||
- Use ONLY the papers provided in the available literature list above
|
||||
- Citations must appear immediately after each statement
|
||||
- Citation numbers MUST match the numbers assigned to papers in the literature list above (e.g., if a finding comes from the first paper in the list, cite it as [1])
|
||||
- DO NOT change or reorder the citation numbers - they must exactly match the paper numbers in the literature list
|
||||
|
||||
Writing Style:
|
||||
- Maintain academic and professional tone
|
||||
- Focus on objective analysis with proper citations
|
||||
- Ensure logical flow and clear structure
|
||||
|
||||
Content Requirements:
|
||||
- Base ALL analysis STRICTLY on the provided papers with explicit citations
|
||||
- When introducing any concept, method, or finding, immediately follow with [N]
|
||||
- For each research direction or approach, cite the specific papers [N] that proposed or developed it
|
||||
- When discussing limitations or challenges, cite the papers [N] that identified them
|
||||
- DO NOT include information from sources outside the provided paper list
|
||||
- DO NOT make unsupported claims or statements
|
||||
|
||||
Language requirement:
|
||||
- If the query explicitly specifies a language, use that language
|
||||
- Otherwise, match the language of the original user query
|
||||
"""
|
||||
|
||||
return final_prompt
|
||||
|
||||
Reference in New Issue
Block a user