Files
gpt_academic/crazy_functions/review_fns/conversation_doc/html_doc.py
binary-husky 8042750d41 Master 4.0 (#2210)
* stage academic conversation

* stage document conversation

* fix buggy gradio version

* file dynamic load

* merge more academic plugins

* accelerate nltk

* feat: 为predict函数添加文件和URL读取功能
- 添加URL检测和网页内容提取功能,支持自动提取网页文本
- 添加文件路径识别和文件内容读取功能,支持private_upload路径格式
- 集成WebTextExtractor处理网页内容提取
- 集成TextContentLoader处理本地文件读取
- 支持文件路径与问题组合的智能处理

* back

* block unstable

---------

Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
2025-08-23 15:59:22 +08:00

472 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

class HtmlFormatter:
"""聊天记录HTML格式生成器"""
def __init__(self):
self.css_styles = """
:root {
--primary-color: #2563eb;
--primary-light: #eff6ff;
--secondary-color: #1e293b;
--background-color: #f8fafc;
--text-color: #334155;
--border-color: #e2e8f0;
--card-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
}
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.8;
margin: 0;
padding: 2rem;
color: var(--text-color);
background-color: var(--background-color);
}
.container {
max-width: 1200px;
margin: 0 auto;
background: white;
padding: 2rem;
border-radius: 16px;
box-shadow: var(--card-shadow);
}
::selection {
background: var(--primary-light);
color: var(--primary-color);
}
@keyframes fadeIn {
from { opacity: 0; transform: translateY(20px); }
to { opacity: 1; transform: translateY(0); }
}
@keyframes slideIn {
from { transform: translateX(-20px); opacity: 0; }
to { transform: translateX(0); opacity: 1; }
}
.container {
animation: fadeIn 0.6s ease-out;
}
.QaBox {
animation: slideIn 0.5s ease-out;
transition: all 0.3s ease;
}
.QaBox:hover {
transform: translateX(5px);
}
.Question, .Answer, .historyBox {
transition: all 0.3s ease;
}
.chat-title {
color: var(--primary-color);
font-size: 2em;
text-align: center;
margin: 1rem 0 2rem;
padding-bottom: 1rem;
border-bottom: 2px solid var(--primary-color);
}
.chat-body {
display: flex;
flex-direction: column;
gap: 1.5rem;
margin: 2rem 0;
}
.QaBox {
background: white;
padding: 1.5rem;
border-radius: 8px;
border-left: 4px solid var(--primary-color);
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
margin-bottom: 1.5rem;
}
.Question {
color: var(--secondary-color);
font-weight: 500;
margin-bottom: 1rem;
}
.Answer {
color: var(--text-color);
background: var(--primary-light);
padding: 1rem;
border-radius: 6px;
}
.history-section {
margin-top: 3rem;
padding-top: 2rem;
border-top: 2px solid var(--border-color);
}
.history-title {
color: var(--secondary-color);
font-size: 1.5em;
margin-bottom: 1.5rem;
text-align: center;
}
.historyBox {
background: white;
padding: 1rem;
margin: 0.5rem 0;
border-radius: 6px;
border: 1px solid var(--border-color);
}
@media (prefers-color-scheme: dark) {
:root {
--background-color: #0f172a;
--text-color: #e2e8f0;
--border-color: #1e293b;
}
.container, .QaBox {
background: #1e293b;
}
}
"""
def create_document(self, question: str, answer: str, ranked_papers: list = None) -> str:
"""生成完整的HTML文档
Args:
question: str, 用户问题
answer: str, AI回答
ranked_papers: list, 排序后的论文列表
Returns:
str: 完整的HTML文档字符串
"""
chat_content = f'''
<div class="QaBox">
<div class="Question">{question}</div>
<div class="Answer markdown-body" id="answer-content">{answer}</div>
</div>
'''
references_content = ""
if ranked_papers:
references_content = '<div class="history-section"><h2 class="history-title">参考文献</h2>'
for idx, paper in enumerate(ranked_papers, 1):
authors = ', '.join(paper.authors)
# 构建引用信息
citations_info = f"被引用次数:{paper.citations}" if paper.citations is not None else "引用信息未知"
# 构建下载链接
download_links = []
if paper.doi:
# 检查是否是arXiv链接
if 'arxiv.org' in paper.doi:
# 如果DOI中包含完整的arXiv URL直接使用
arxiv_url = paper.doi if paper.doi.startswith('http') else f'http://{paper.doi}'
download_links.append(f'<a href="{arxiv_url}">arXiv链接</a>')
# 提取arXiv ID并添加PDF链接
arxiv_id = arxiv_url.split('abs/')[-1].split('v')[0]
download_links.append(f'<a href="https://arxiv.org/pdf/{arxiv_id}.pdf">PDF下载</a>')
else:
# 非arXiv的DOI使用标准格式
download_links.append(f'<a href="https://doi.org/{paper.doi}">DOI: {paper.doi}</a>')
if hasattr(paper, 'url') and paper.url and 'arxiv.org' not in str(paper.url):
# 只有当URL不是arXiv链接时才添加
download_links.append(f'<a href="{paper.url}">原文链接</a>')
download_section = ' | '.join(download_links) if download_links else "无直接下载链接"
# 构建来源信息
source_info = []
if paper.venue_type:
source_info.append(f"类型:{paper.venue_type}")
if paper.venue_name:
source_info.append(f"来源:{paper.venue_name}")
# 添加期刊指标信息
if hasattr(paper, 'if_factor') and paper.if_factor:
source_info.append(f"<span class='journal-metric'>IF: {paper.if_factor}</span>")
if hasattr(paper, 'jcr_division') and paper.jcr_division:
source_info.append(f"<span class='journal-metric'>JCR分区: {paper.jcr_division}</span>")
if hasattr(paper, 'cas_division') and paper.cas_division:
source_info.append(f"<span class='journal-metric'>中科院分区: {paper.cas_division}</span>")
if hasattr(paper, 'venue_info') and paper.venue_info:
if paper.venue_info.get('journal_ref'):
source_info.append(f"期刊参考:{paper.venue_info['journal_ref']}")
if paper.venue_info.get('publisher'):
source_info.append(f"出版商:{paper.venue_info['publisher']}")
source_section = ' | '.join(source_info) if source_info else ""
# 构建标准引用格式
standard_citation = f"[{idx}] "
# 添加作者最多3个超过则添加et al.
author_list = paper.authors[:3]
if len(paper.authors) > 3:
author_list.append("et al.")
standard_citation += ", ".join(author_list) + ". "
# 添加标题
standard_citation += f"<i>{paper.title}</i>"
# 添加期刊/会议名称
if paper.venue_name:
standard_citation += f". {paper.venue_name}"
# 添加年份
if paper.year:
standard_citation += f", {paper.year}"
# 添加DOI
if paper.doi:
if 'arxiv.org' in paper.doi:
# 如果是arXiv链接直接使用arXiv URL
arxiv_url = paper.doi if paper.doi.startswith('http') else f'http://{paper.doi}'
standard_citation += f". {arxiv_url}"
else:
# 非arXiv的DOI使用标准格式
standard_citation += f". DOI: {paper.doi}"
standard_citation += "."
references_content += f'''
<div class="historyBox">
<div class="entry">
<p class="paper-title"><b>[{idx}]</b> <i>{paper.title}</i></p>
<p class="paper-authors">作者:{authors}</p>
<p class="paper-year">发表年份:{paper.year if paper.year else "未知"}</p>
<p class="paper-citations">{citations_info}</p>
{f'<p class="paper-source">{source_section}</p>' if source_section else ""}
<p class="paper-abstract">摘要:{paper.abstract if paper.abstract else "无摘要"}</p>
<p class="paper-links">链接:{download_section}</p>
<div class="standard-citation">
<p class="citation-title">标准引用格式:</p>
<p class="citation-text">{standard_citation}</p>
<button class="copy-btn" onclick="copyToClipboard(this.previousElementSibling)">复制引用格式</button>
</div>
</div>
</div>
'''
references_content += '</div>'
# 添加新的CSS样式
css_additions = """
.paper-title {
font-size: 1.1em;
margin-bottom: 0.5em;
}
.paper-authors {
color: var(--secondary-color);
margin: 0.3em 0;
}
.paper-year, .paper-citations {
color: var(--text-color);
margin: 0.3em 0;
}
.paper-source {
color: var(--text-color);
font-style: italic;
margin: 0.3em 0;
}
.paper-abstract {
margin: 0.8em 0;
padding: 0.8em;
background: var(--primary-light);
border-radius: 4px;
}
.paper-links {
margin-top: 0.5em;
}
.paper-links a {
color: var(--primary-color);
text-decoration: none;
margin-right: 1em;
}
.paper-links a:hover {
text-decoration: underline;
}
.standard-citation {
margin-top: 1em;
padding: 1em;
background: #f8fafc;
border-radius: 4px;
border: 1px solid var(--border-color);
}
.citation-title {
font-weight: bold;
margin-bottom: 0.5em;
color: var(--secondary-color);
}
.citation-text {
font-family: 'Times New Roman', Times, serif;
line-height: 1.6;
margin-bottom: 0.5em;
padding: 0.5em;
background: white;
border-radius: 4px;
border: 1px solid var(--border-color);
}
.copy-btn {
background: var(--primary-color);
color: white;
border: none;
padding: 0.5em 1em;
border-radius: 4px;
cursor: pointer;
font-size: 0.9em;
transition: background-color 0.2s;
}
.copy-btn:hover {
background: #1e40af;
}
@media (prefers-color-scheme: dark) {
.standard-citation {
background: #1e293b;
}
.citation-text {
background: #0f172a;
}
}
/* 添加期刊指标样式 */
.journal-metric {
display: inline-block;
padding: 0.2em 0.6em;
margin: 0 0.3em;
background: var(--primary-light);
border-radius: 4px;
font-weight: 500;
color: var(--primary-color);
}
@media (prefers-color-scheme: dark) {
.journal-metric {
background: #1e293b;
color: #60a5fa;
}
}
"""
# 修改 js_code 部分,添加 markdown 解析功能
js_code = """
<script>
// 复制功能
function copyToClipboard(element) {
const text = element.innerText;
navigator.clipboard.writeText(text).then(function() {
const btn = element.nextElementSibling;
const originalText = btn.innerText;
btn.innerText = '已复制!';
setTimeout(() => {
btn.innerText = originalText;
}, 2000);
}).catch(function(err) {
console.error('复制失败:', err);
});
}
// Markdown解析
document.addEventListener('DOMContentLoaded', function() {
const answerContent = document.getElementById('answer-content');
if (answerContent) {
const markdown = answerContent.textContent;
answerContent.innerHTML = marked.parse(markdown);
}
});
</script>
"""
# 将新的CSS样式添加到现有样式中
self.css_styles += css_additions
return f"""
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>学术对话存档</title>
<!-- 添加 marked.js -->
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<!-- 添加 GitHub Markdown CSS -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/sindresorhus/github-markdown-css@4.0.0/github-markdown.min.css">
<style>
{self.css_styles}
/* 添加 Markdown 相关样式 */
.markdown-body {{
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
padding: 1rem;
background: var(--primary-light);
border-radius: 6px;
}}
.markdown-body pre {{
background-color: #f6f8fa;
border-radius: 6px;
padding: 16px;
overflow: auto;
}}
.markdown-body code {{
background-color: rgba(175,184,193,0.2);
border-radius: 6px;
padding: 0.2em 0.4em;
font-size: 85%;
}}
.markdown-body pre code {{
background-color: transparent;
padding: 0;
}}
.markdown-body blockquote {{
border-left: 0.25em solid #d0d7de;
padding: 0 1em;
color: #656d76;
}}
.markdown-body table {{
border-collapse: collapse;
width: 100%;
margin: 1em 0;
}}
.markdown-body table th,
.markdown-body table td {{
border: 1px solid #d0d7de;
padding: 6px 13px;
}}
.markdown-body table tr:nth-child(2n) {{
background-color: #f6f8fa;
}}
@media (prefers-color-scheme: dark) {{
.markdown-body {{
background: #1e293b;
color: #e2e8f0;
}}
.markdown-body pre {{
background-color: #0f172a;
}}
.markdown-body code {{
background-color: rgba(99,110,123,0.4);
}}
.markdown-body blockquote {{
border-left-color: #30363d;
color: #8b949e;
}}
.markdown-body table th,
.markdown-body table td {{
border-color: #30363d;
}}
.markdown-body table tr:nth-child(2n) {{
background-color: #0f172a;
}}
}}
</style>
</head>
<body>
<div class="container">
<h1 class="chat-title">学术对话存档</h1>
<div class="chat-body">
{chat_content}
{references_content}
</div>
</div>
{js_code}
</body>
</html>
"""