gpt_academic/crazy_functions/review_fns/data_sources/unpaywall_source.py

import aiohttp
from typing import List, Dict, Optional
from datetime import datetime
from .base_source import DataSource, PaperMetadata

class UnpaywallSource(DataSource):
    """Unpaywall API实现"""

    def _initialize(self) -> None:
        self.base_url = "https://api.unpaywall.org/v2"
        self.email = self.api_key  # Unpaywall使用email作为API key

    async def search(self, query: str, limit: int = 100) -> List[PaperMetadata]:
        async with aiohttp.ClientSession() as session:
            async with session.get(
                f"{self.base_url}/search",
                params={
                    "query": query,
                    "email": self.email,
                    "limit": limit
                }
            ) as response:
                data = await response.json()
                return [self._parse_response(item.response)
                        for item in data.get("results", [])]

    def _parse_response(self, data: Dict) -> PaperMetadata:
        """解析Unpaywall返回的数据"""
        return PaperMetadata(
            title=data.get("title", ""),
            authors=[
                f"{author.get('given', '')} {author.get('family', '')}"
                for author in data.get("z_authors", [])
            ],
            institutions=[
                aff.get("name", "")
                for author in data.get("z_authors", [])
                for aff in author.get("affiliation", [])
            ],
            abstract="",  # Unpaywall不提供摘要
            year=data.get("year"),
            doi=data.get("doi"),
            url=data.get("doi_url"),
            citations=None,  # Unpaywall不提供引用计数
            venue=data.get("journal_name")
        )