* stage academic conversation * stage document conversation * fix buggy gradio version * file dynamic load * merge more academic plugins * accelerate nltk * feat: 为predict函数添加文件和URL读取功能 - 添加URL检测和网页内容提取功能,支持自动提取网页文本 - 添加文件路径识别和文件内容读取功能,支持private_upload路径格式 - 集成WebTextExtractor处理网页内容提取 - 集成TextContentLoader处理本地文件读取 - 支持文件路径与问题组合的智能处理 * back * block unstable --------- Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
142 lines
5.2 KiB
Python
142 lines
5.2 KiB
Python
import json
|
||
import os
|
||
from typing import Dict, Optional
|
||
|
||
class JournalMetrics:
|
||
"""期刊指标管理类"""
|
||
|
||
def __init__(self):
|
||
self.journal_data: Dict = {} # 期刊名称到指标的映射
|
||
self.issn_map: Dict = {} # ISSN到指标的映射
|
||
self.name_map: Dict = {} # 标准化名称到指标的映射
|
||
self._load_journal_data()
|
||
|
||
def _normalize_journal_name(self, name: str) -> str:
|
||
"""标准化期刊名称
|
||
|
||
Args:
|
||
name: 原始期刊名称
|
||
|
||
Returns:
|
||
标准化后的期刊名称
|
||
"""
|
||
if not name:
|
||
return ""
|
||
|
||
# 转换为小写
|
||
name = name.lower()
|
||
|
||
# 移除常见的前缀和后缀
|
||
prefixes = ['the ', 'proceedings of ', 'journal of ']
|
||
suffixes = [' journal', ' proceedings', ' magazine', ' review', ' letters']
|
||
|
||
for prefix in prefixes:
|
||
if name.startswith(prefix):
|
||
name = name[len(prefix):]
|
||
|
||
for suffix in suffixes:
|
||
if name.endswith(suffix):
|
||
name = name[:-len(suffix)]
|
||
|
||
# 移除特殊字符,保留字母、数字和空格
|
||
name = ''.join(c for c in name if c.isalnum() or c.isspace())
|
||
|
||
# 移除多余的空格
|
||
name = ' '.join(name.split())
|
||
|
||
return name
|
||
|
||
def _convert_if_value(self, if_str: str) -> Optional[float]:
|
||
"""转换IF值为float,处理特殊情况"""
|
||
try:
|
||
if if_str.startswith('<'):
|
||
# 对于<0.1这样的值,返回0.1
|
||
return float(if_str.strip('<'))
|
||
return float(if_str)
|
||
except (ValueError, AttributeError):
|
||
return None
|
||
|
||
def _load_journal_data(self):
|
||
"""加载期刊数据"""
|
||
try:
|
||
file_path = os.path.join(os.path.dirname(__file__), 'cas_if.json')
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
|
||
# 建立期刊名称到指标的映射
|
||
for journal in data:
|
||
# 准备指标数据
|
||
metrics = {
|
||
'if_factor': self._convert_if_value(journal.get('IF')),
|
||
'jcr_division': journal.get('Q'),
|
||
'cas_division': journal.get('B')
|
||
}
|
||
|
||
# 存储期刊名称映射(使用标准化名称)
|
||
if journal.get('journal'):
|
||
normalized_name = self._normalize_journal_name(journal['journal'])
|
||
self.journal_data[normalized_name] = metrics
|
||
self.name_map[normalized_name] = metrics
|
||
|
||
# 存储期刊缩写映射
|
||
if journal.get('jabb'):
|
||
normalized_abbr = self._normalize_journal_name(journal['jabb'])
|
||
self.journal_data[normalized_abbr] = metrics
|
||
self.name_map[normalized_abbr] = metrics
|
||
|
||
# 存储ISSN映射
|
||
if journal.get('issn'):
|
||
self.issn_map[journal['issn']] = metrics
|
||
if journal.get('eissn'):
|
||
self.issn_map[journal['eissn']] = metrics
|
||
|
||
except Exception as e:
|
||
print(f"加载期刊数据时出错: {str(e)}")
|
||
self.journal_data = {}
|
||
self.issn_map = {}
|
||
self.name_map = {}
|
||
|
||
def get_journal_metrics(self, venue_name: str, venue_info: dict) -> dict:
|
||
"""获取期刊指标
|
||
|
||
Args:
|
||
venue_name: 期刊名称
|
||
venue_info: 期刊详细信息
|
||
|
||
Returns:
|
||
包含期刊指标的字典
|
||
"""
|
||
try:
|
||
metrics = {}
|
||
|
||
# 1. 首先尝试通过ISSN匹配
|
||
if venue_info and 'issn' in venue_info:
|
||
issn_value = venue_info['issn']
|
||
# 处理ISSN可能是列表的情况
|
||
if isinstance(issn_value, list):
|
||
# 尝试每个ISSN
|
||
for issn in issn_value:
|
||
metrics = self.issn_map.get(issn, {})
|
||
if metrics: # 如果找到匹配的指标,就停止搜索
|
||
break
|
||
else: # ISSN是字符串的情况
|
||
metrics = self.issn_map.get(issn_value, {})
|
||
|
||
# 2. 如果ISSN匹配失败,尝试通过期刊名称匹配
|
||
if not metrics and venue_name:
|
||
# 标准化期刊名称
|
||
normalized_name = self._normalize_journal_name(venue_name)
|
||
metrics = self.name_map.get(normalized_name, {})
|
||
|
||
# 如果完全匹配失败,尝试部分匹配
|
||
# if not metrics:
|
||
# for db_name, db_metrics in self.name_map.items():
|
||
# if normalized_name in db_name:
|
||
# metrics = db_metrics
|
||
# break
|
||
|
||
return metrics
|
||
|
||
except Exception as e:
|
||
print(f"获取期刊指标时出错: {str(e)}")
|
||
return {} |