Master 4.0 (#2210)
* stage academic conversation * stage document conversation * fix buggy gradio version * file dynamic load * merge more academic plugins * accelerate nltk * feat: 为predict函数添加文件和URL读取功能 - 添加URL检测和网页内容提取功能,支持自动提取网页文本 - 添加文件路径识别和文件内容读取功能,支持private_upload路径格式 - 集成WebTextExtractor处理网页内容提取 - 集成TextContentLoader处理本地文件读取 - 支持文件路径与问题组合的智能处理 * back * block unstable --------- Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
This commit is contained in:
68
crazy_functions/review_fns/conversation_doc/endnote_doc.py
Normal file
68
crazy_functions/review_fns/conversation_doc/endnote_doc.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from typing import List
|
||||
from crazy_functions.review_fns.data_sources.base_source import PaperMetadata
|
||||
|
||||
class EndNoteFormatter:
|
||||
"""EndNote参考文献格式生成器"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def create_document(self, papers: List[PaperMetadata]) -> str:
|
||||
"""生成EndNote格式的参考文献文本
|
||||
|
||||
Args:
|
||||
papers: 论文列表
|
||||
|
||||
Returns:
|
||||
str: EndNote格式的参考文献文本
|
||||
"""
|
||||
endnote_text = ""
|
||||
|
||||
for paper in papers:
|
||||
# 开始一个新条目
|
||||
endnote_text += "%0 Journal Article\n" # 默认类型为期刊文章
|
||||
|
||||
# 根据venue_type调整条目类型
|
||||
if hasattr(paper, 'venue_type') and paper.venue_type:
|
||||
if paper.venue_type.lower() == 'conference':
|
||||
endnote_text = endnote_text.replace("Journal Article", "Conference Paper")
|
||||
elif paper.venue_type.lower() == 'preprint':
|
||||
endnote_text = endnote_text.replace("Journal Article", "Electronic Article")
|
||||
|
||||
# 添加标题
|
||||
endnote_text += f"%T {paper.title}\n"
|
||||
|
||||
# 添加作者
|
||||
for author in paper.authors:
|
||||
endnote_text += f"%A {author}\n"
|
||||
|
||||
# 添加年份
|
||||
if paper.year:
|
||||
endnote_text += f"%D {paper.year}\n"
|
||||
|
||||
# 添加期刊/会议名称
|
||||
if hasattr(paper, 'venue_name') and paper.venue_name:
|
||||
endnote_text += f"%J {paper.venue_name}\n"
|
||||
elif paper.venue:
|
||||
endnote_text += f"%J {paper.venue}\n"
|
||||
|
||||
# 添加DOI
|
||||
if paper.doi:
|
||||
endnote_text += f"%R {paper.doi}\n"
|
||||
endnote_text += f"%U https://doi.org/{paper.doi}\n"
|
||||
elif paper.url:
|
||||
endnote_text += f"%U {paper.url}\n"
|
||||
|
||||
# 添加摘要
|
||||
if paper.abstract:
|
||||
endnote_text += f"%X {paper.abstract}\n"
|
||||
|
||||
# 添加机构
|
||||
if hasattr(paper, 'institutions'):
|
||||
for institution in paper.institutions:
|
||||
endnote_text += f"%I {institution}\n"
|
||||
|
||||
# 条目之间添加空行
|
||||
endnote_text += "\n"
|
||||
|
||||
return endnote_text
|
||||
211
crazy_functions/review_fns/conversation_doc/excel_doc.py
Normal file
211
crazy_functions/review_fns/conversation_doc/excel_doc.py
Normal file
@@ -0,0 +1,211 @@
|
||||
import re
|
||||
import os
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class ExcelTableFormatter:
|
||||
"""聊天记录中Markdown表格转Excel生成器"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化Excel文档对象"""
|
||||
from openpyxl import Workbook
|
||||
self.workbook = Workbook()
|
||||
self._table_count = 0
|
||||
self._current_sheet = None
|
||||
|
||||
def _normalize_table_row(self, row):
|
||||
"""标准化表格行,处理不同的分隔符情况"""
|
||||
row = row.strip()
|
||||
if row.startswith('|'):
|
||||
row = row[1:]
|
||||
if row.endswith('|'):
|
||||
row = row[:-1]
|
||||
return [cell.strip() for cell in row.split('|')]
|
||||
|
||||
def _is_separator_row(self, row):
|
||||
"""检查是否是分隔行(由 - 或 : 组成)"""
|
||||
clean_row = re.sub(r'[\s|]', '', row)
|
||||
return bool(re.match(r'^[-:]+$', clean_row))
|
||||
|
||||
def _extract_tables_from_text(self, text):
|
||||
"""从文本中提取所有表格内容"""
|
||||
if not isinstance(text, str):
|
||||
return []
|
||||
|
||||
tables = []
|
||||
current_table = []
|
||||
is_in_table = False
|
||||
|
||||
for line in text.split('\n'):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
if is_in_table and current_table:
|
||||
if len(current_table) >= 2:
|
||||
tables.append(current_table)
|
||||
current_table = []
|
||||
is_in_table = False
|
||||
continue
|
||||
|
||||
if '|' in line:
|
||||
if not is_in_table:
|
||||
is_in_table = True
|
||||
current_table.append(line)
|
||||
else:
|
||||
if is_in_table and current_table:
|
||||
if len(current_table) >= 2:
|
||||
tables.append(current_table)
|
||||
current_table = []
|
||||
is_in_table = False
|
||||
|
||||
if is_in_table and current_table and len(current_table) >= 2:
|
||||
tables.append(current_table)
|
||||
|
||||
return tables
|
||||
|
||||
def _parse_table(self, table_lines):
|
||||
"""解析表格内容为结构化数据"""
|
||||
try:
|
||||
headers = self._normalize_table_row(table_lines[0])
|
||||
|
||||
separator_index = next(
|
||||
(i for i, line in enumerate(table_lines) if self._is_separator_row(line)),
|
||||
1
|
||||
)
|
||||
|
||||
data_rows = []
|
||||
for line in table_lines[separator_index + 1:]:
|
||||
cells = self._normalize_table_row(line)
|
||||
# 确保单元格数量与表头一致
|
||||
while len(cells) < len(headers):
|
||||
cells.append('')
|
||||
cells = cells[:len(headers)]
|
||||
data_rows.append(cells)
|
||||
|
||||
if headers and data_rows:
|
||||
return {
|
||||
'headers': headers,
|
||||
'data': data_rows
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"解析表格时发生错误: {str(e)}")
|
||||
|
||||
return None
|
||||
|
||||
def _create_sheet(self, question_num, table_num):
|
||||
"""创建新的工作表"""
|
||||
sheet_name = f'Q{question_num}_T{table_num}'
|
||||
if len(sheet_name) > 31:
|
||||
sheet_name = f'Table{self._table_count}'
|
||||
|
||||
if sheet_name in self.workbook.sheetnames:
|
||||
sheet_name = f'{sheet_name}_{datetime.now().strftime("%H%M%S")}'
|
||||
|
||||
return self.workbook.create_sheet(title=sheet_name)
|
||||
|
||||
def create_document(self, history):
|
||||
"""
|
||||
处理聊天历史中的所有表格并创建Excel文档
|
||||
|
||||
Args:
|
||||
history: 聊天历史列表
|
||||
|
||||
Returns:
|
||||
Workbook: 处理完成的Excel工作簿对象,如果没有表格则返回None
|
||||
"""
|
||||
has_tables = False
|
||||
|
||||
# 删除默认创建的工作表
|
||||
default_sheet = self.workbook['Sheet']
|
||||
self.workbook.remove(default_sheet)
|
||||
|
||||
# 遍历所有回答
|
||||
for i in range(1, len(history), 2):
|
||||
answer = history[i]
|
||||
tables = self._extract_tables_from_text(answer)
|
||||
|
||||
for table_lines in tables:
|
||||
parsed_table = self._parse_table(table_lines)
|
||||
if parsed_table:
|
||||
self._table_count += 1
|
||||
sheet = self._create_sheet(i // 2 + 1, self._table_count)
|
||||
|
||||
# 写入表头
|
||||
for col, header in enumerate(parsed_table['headers'], 1):
|
||||
sheet.cell(row=1, column=col, value=header)
|
||||
|
||||
# 写入数据
|
||||
for row_idx, row_data in enumerate(parsed_table['data'], 2):
|
||||
for col_idx, value in enumerate(row_data, 1):
|
||||
sheet.cell(row=row_idx, column=col_idx, value=value)
|
||||
|
||||
has_tables = True
|
||||
|
||||
return self.workbook if has_tables else None
|
||||
|
||||
|
||||
def save_chat_tables(history, save_dir, base_name):
|
||||
"""
|
||||
保存聊天历史中的表格到Excel文件
|
||||
|
||||
Args:
|
||||
history: 聊天历史列表
|
||||
save_dir: 保存目录
|
||||
base_name: 基础文件名
|
||||
|
||||
Returns:
|
||||
list: 保存的文件路径列表
|
||||
"""
|
||||
result_files = []
|
||||
|
||||
try:
|
||||
# 创建Excel格式
|
||||
excel_formatter = ExcelTableFormatter()
|
||||
workbook = excel_formatter.create_document(history)
|
||||
|
||||
if workbook is not None:
|
||||
# 确保保存目录存在
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
|
||||
# 生成Excel文件路径
|
||||
excel_file = os.path.join(save_dir, base_name + '.xlsx')
|
||||
|
||||
# 保存Excel文件
|
||||
workbook.save(excel_file)
|
||||
result_files.append(excel_file)
|
||||
print(f"已保存表格到Excel文件: {excel_file}")
|
||||
except Exception as e:
|
||||
print(f"保存Excel格式失败: {str(e)}")
|
||||
|
||||
return result_files
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
# 示例聊天历史
|
||||
history = [
|
||||
"问题1",
|
||||
"""这是第一个表格:
|
||||
| A | B | C |
|
||||
|---|---|---|
|
||||
| 1 | 2 | 3 |""",
|
||||
|
||||
"问题2",
|
||||
"这是没有表格的回答",
|
||||
|
||||
"问题3",
|
||||
"""回答包含多个表格:
|
||||
| Name | Age |
|
||||
|------|-----|
|
||||
| Tom | 20 |
|
||||
|
||||
第二个表格:
|
||||
| X | Y |
|
||||
|---|---|
|
||||
| 1 | 2 |"""
|
||||
]
|
||||
|
||||
# 保存表格
|
||||
save_dir = "output"
|
||||
base_name = "chat_tables"
|
||||
saved_files = save_chat_tables(history, save_dir, base_name)
|
||||
472
crazy_functions/review_fns/conversation_doc/html_doc.py
Normal file
472
crazy_functions/review_fns/conversation_doc/html_doc.py
Normal file
@@ -0,0 +1,472 @@
|
||||
class HtmlFormatter:
|
||||
"""聊天记录HTML格式生成器"""
|
||||
|
||||
def __init__(self):
|
||||
self.css_styles = """
|
||||
:root {
|
||||
--primary-color: #2563eb;
|
||||
--primary-light: #eff6ff;
|
||||
--secondary-color: #1e293b;
|
||||
--background-color: #f8fafc;
|
||||
--text-color: #334155;
|
||||
--border-color: #e2e8f0;
|
||||
--card-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
line-height: 1.8;
|
||||
margin: 0;
|
||||
padding: 2rem;
|
||||
color: var(--text-color);
|
||||
background-color: var(--background-color);
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
background: white;
|
||||
padding: 2rem;
|
||||
border-radius: 16px;
|
||||
box-shadow: var(--card-shadow);
|
||||
}
|
||||
::selection {
|
||||
background: var(--primary-light);
|
||||
color: var(--primary-color);
|
||||
}
|
||||
@keyframes fadeIn {
|
||||
from { opacity: 0; transform: translateY(20px); }
|
||||
to { opacity: 1; transform: translateY(0); }
|
||||
}
|
||||
|
||||
@keyframes slideIn {
|
||||
from { transform: translateX(-20px); opacity: 0; }
|
||||
to { transform: translateX(0); opacity: 1; }
|
||||
}
|
||||
|
||||
.container {
|
||||
animation: fadeIn 0.6s ease-out;
|
||||
}
|
||||
|
||||
.QaBox {
|
||||
animation: slideIn 0.5s ease-out;
|
||||
transition: all 0.3s ease;
|
||||
}
|
||||
|
||||
.QaBox:hover {
|
||||
transform: translateX(5px);
|
||||
}
|
||||
.Question, .Answer, .historyBox {
|
||||
transition: all 0.3s ease;
|
||||
}
|
||||
.chat-title {
|
||||
color: var(--primary-color);
|
||||
font-size: 2em;
|
||||
text-align: center;
|
||||
margin: 1rem 0 2rem;
|
||||
padding-bottom: 1rem;
|
||||
border-bottom: 2px solid var(--primary-color);
|
||||
}
|
||||
|
||||
.chat-body {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1.5rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
|
||||
.QaBox {
|
||||
background: white;
|
||||
padding: 1.5rem;
|
||||
border-radius: 8px;
|
||||
border-left: 4px solid var(--primary-color);
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.Question {
|
||||
color: var(--secondary-color);
|
||||
font-weight: 500;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.Answer {
|
||||
color: var(--text-color);
|
||||
background: var(--primary-light);
|
||||
padding: 1rem;
|
||||
border-radius: 6px;
|
||||
}
|
||||
|
||||
.history-section {
|
||||
margin-top: 3rem;
|
||||
padding-top: 2rem;
|
||||
border-top: 2px solid var(--border-color);
|
||||
}
|
||||
|
||||
.history-title {
|
||||
color: var(--secondary-color);
|
||||
font-size: 1.5em;
|
||||
margin-bottom: 1.5rem;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.historyBox {
|
||||
background: white;
|
||||
padding: 1rem;
|
||||
margin: 0.5rem 0;
|
||||
border-radius: 6px;
|
||||
border: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
:root {
|
||||
--background-color: #0f172a;
|
||||
--text-color: #e2e8f0;
|
||||
--border-color: #1e293b;
|
||||
}
|
||||
|
||||
.container, .QaBox {
|
||||
background: #1e293b;
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def create_document(self, question: str, answer: str, ranked_papers: list = None) -> str:
|
||||
"""生成完整的HTML文档
|
||||
Args:
|
||||
question: str, 用户问题
|
||||
answer: str, AI回答
|
||||
ranked_papers: list, 排序后的论文列表
|
||||
Returns:
|
||||
str: 完整的HTML文档字符串
|
||||
"""
|
||||
chat_content = f'''
|
||||
<div class="QaBox">
|
||||
<div class="Question">{question}</div>
|
||||
<div class="Answer markdown-body" id="answer-content">{answer}</div>
|
||||
</div>
|
||||
'''
|
||||
|
||||
references_content = ""
|
||||
if ranked_papers:
|
||||
references_content = '<div class="history-section"><h2 class="history-title">参考文献</h2>'
|
||||
for idx, paper in enumerate(ranked_papers, 1):
|
||||
authors = ', '.join(paper.authors)
|
||||
|
||||
# 构建引用信息
|
||||
citations_info = f"被引用次数:{paper.citations}" if paper.citations is not None else "引用信息未知"
|
||||
|
||||
# 构建下载链接
|
||||
download_links = []
|
||||
if paper.doi:
|
||||
# 检查是否是arXiv链接
|
||||
if 'arxiv.org' in paper.doi:
|
||||
# 如果DOI中包含完整的arXiv URL,直接使用
|
||||
arxiv_url = paper.doi if paper.doi.startswith('http') else f'http://{paper.doi}'
|
||||
download_links.append(f'<a href="{arxiv_url}">arXiv链接</a>')
|
||||
# 提取arXiv ID并添加PDF链接
|
||||
arxiv_id = arxiv_url.split('abs/')[-1].split('v')[0]
|
||||
download_links.append(f'<a href="https://arxiv.org/pdf/{arxiv_id}.pdf">PDF下载</a>')
|
||||
else:
|
||||
# 非arXiv的DOI使用标准格式
|
||||
download_links.append(f'<a href="https://doi.org/{paper.doi}">DOI: {paper.doi}</a>')
|
||||
|
||||
if hasattr(paper, 'url') and paper.url and 'arxiv.org' not in str(paper.url):
|
||||
# 只有当URL不是arXiv链接时才添加
|
||||
download_links.append(f'<a href="{paper.url}">原文链接</a>')
|
||||
download_section = ' | '.join(download_links) if download_links else "无直接下载链接"
|
||||
|
||||
# 构建来源信息
|
||||
source_info = []
|
||||
if paper.venue_type:
|
||||
source_info.append(f"类型:{paper.venue_type}")
|
||||
if paper.venue_name:
|
||||
source_info.append(f"来源:{paper.venue_name}")
|
||||
|
||||
# 添加期刊指标信息
|
||||
if hasattr(paper, 'if_factor') and paper.if_factor:
|
||||
source_info.append(f"<span class='journal-metric'>IF: {paper.if_factor}</span>")
|
||||
if hasattr(paper, 'jcr_division') and paper.jcr_division:
|
||||
source_info.append(f"<span class='journal-metric'>JCR分区: {paper.jcr_division}</span>")
|
||||
if hasattr(paper, 'cas_division') and paper.cas_division:
|
||||
source_info.append(f"<span class='journal-metric'>中科院分区: {paper.cas_division}</span>")
|
||||
|
||||
if hasattr(paper, 'venue_info') and paper.venue_info:
|
||||
if paper.venue_info.get('journal_ref'):
|
||||
source_info.append(f"期刊参考:{paper.venue_info['journal_ref']}")
|
||||
if paper.venue_info.get('publisher'):
|
||||
source_info.append(f"出版商:{paper.venue_info['publisher']}")
|
||||
source_section = ' | '.join(source_info) if source_info else ""
|
||||
|
||||
# 构建标准引用格式
|
||||
standard_citation = f"[{idx}] "
|
||||
# 添加作者(最多3个,超过则添加et al.)
|
||||
author_list = paper.authors[:3]
|
||||
if len(paper.authors) > 3:
|
||||
author_list.append("et al.")
|
||||
standard_citation += ", ".join(author_list) + ". "
|
||||
# 添加标题
|
||||
standard_citation += f"<i>{paper.title}</i>"
|
||||
# 添加期刊/会议名称
|
||||
if paper.venue_name:
|
||||
standard_citation += f". {paper.venue_name}"
|
||||
# 添加年份
|
||||
if paper.year:
|
||||
standard_citation += f", {paper.year}"
|
||||
# 添加DOI
|
||||
if paper.doi:
|
||||
if 'arxiv.org' in paper.doi:
|
||||
# 如果是arXiv链接,直接使用arXiv URL
|
||||
arxiv_url = paper.doi if paper.doi.startswith('http') else f'http://{paper.doi}'
|
||||
standard_citation += f". {arxiv_url}"
|
||||
else:
|
||||
# 非arXiv的DOI使用标准格式
|
||||
standard_citation += f". DOI: {paper.doi}"
|
||||
standard_citation += "."
|
||||
|
||||
references_content += f'''
|
||||
<div class="historyBox">
|
||||
<div class="entry">
|
||||
<p class="paper-title"><b>[{idx}]</b> <i>{paper.title}</i></p>
|
||||
<p class="paper-authors">作者:{authors}</p>
|
||||
<p class="paper-year">发表年份:{paper.year if paper.year else "未知"}</p>
|
||||
<p class="paper-citations">{citations_info}</p>
|
||||
{f'<p class="paper-source">{source_section}</p>' if source_section else ""}
|
||||
<p class="paper-abstract">摘要:{paper.abstract if paper.abstract else "无摘要"}</p>
|
||||
<p class="paper-links">链接:{download_section}</p>
|
||||
<div class="standard-citation">
|
||||
<p class="citation-title">标准引用格式:</p>
|
||||
<p class="citation-text">{standard_citation}</p>
|
||||
<button class="copy-btn" onclick="copyToClipboard(this.previousElementSibling)">复制引用格式</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
references_content += '</div>'
|
||||
|
||||
# 添加新的CSS样式
|
||||
css_additions = """
|
||||
.paper-title {
|
||||
font-size: 1.1em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
.paper-authors {
|
||||
color: var(--secondary-color);
|
||||
margin: 0.3em 0;
|
||||
}
|
||||
.paper-year, .paper-citations {
|
||||
color: var(--text-color);
|
||||
margin: 0.3em 0;
|
||||
}
|
||||
.paper-source {
|
||||
color: var(--text-color);
|
||||
font-style: italic;
|
||||
margin: 0.3em 0;
|
||||
}
|
||||
.paper-abstract {
|
||||
margin: 0.8em 0;
|
||||
padding: 0.8em;
|
||||
background: var(--primary-light);
|
||||
border-radius: 4px;
|
||||
}
|
||||
.paper-links {
|
||||
margin-top: 0.5em;
|
||||
}
|
||||
.paper-links a {
|
||||
color: var(--primary-color);
|
||||
text-decoration: none;
|
||||
margin-right: 1em;
|
||||
}
|
||||
.paper-links a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
.standard-citation {
|
||||
margin-top: 1em;
|
||||
padding: 1em;
|
||||
background: #f8fafc;
|
||||
border-radius: 4px;
|
||||
border: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.citation-title {
|
||||
font-weight: bold;
|
||||
margin-bottom: 0.5em;
|
||||
color: var(--secondary-color);
|
||||
}
|
||||
|
||||
.citation-text {
|
||||
font-family: 'Times New Roman', Times, serif;
|
||||
line-height: 1.6;
|
||||
margin-bottom: 0.5em;
|
||||
padding: 0.5em;
|
||||
background: white;
|
||||
border-radius: 4px;
|
||||
border: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.copy-btn {
|
||||
background: var(--primary-color);
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 0.5em 1em;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-size: 0.9em;
|
||||
transition: background-color 0.2s;
|
||||
}
|
||||
|
||||
.copy-btn:hover {
|
||||
background: #1e40af;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
.standard-citation {
|
||||
background: #1e293b;
|
||||
}
|
||||
.citation-text {
|
||||
background: #0f172a;
|
||||
}
|
||||
}
|
||||
|
||||
/* 添加期刊指标样式 */
|
||||
.journal-metric {
|
||||
display: inline-block;
|
||||
padding: 0.2em 0.6em;
|
||||
margin: 0 0.3em;
|
||||
background: var(--primary-light);
|
||||
border-radius: 4px;
|
||||
font-weight: 500;
|
||||
color: var(--primary-color);
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
.journal-metric {
|
||||
background: #1e293b;
|
||||
color: #60a5fa;
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
# 修改 js_code 部分,添加 markdown 解析功能
|
||||
js_code = """
|
||||
<script>
|
||||
// 复制功能
|
||||
function copyToClipboard(element) {
|
||||
const text = element.innerText;
|
||||
navigator.clipboard.writeText(text).then(function() {
|
||||
const btn = element.nextElementSibling;
|
||||
const originalText = btn.innerText;
|
||||
btn.innerText = '已复制!';
|
||||
setTimeout(() => {
|
||||
btn.innerText = originalText;
|
||||
}, 2000);
|
||||
}).catch(function(err) {
|
||||
console.error('复制失败:', err);
|
||||
});
|
||||
}
|
||||
|
||||
// Markdown解析
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const answerContent = document.getElementById('answer-content');
|
||||
if (answerContent) {
|
||||
const markdown = answerContent.textContent;
|
||||
answerContent.innerHTML = marked.parse(markdown);
|
||||
}
|
||||
});
|
||||
</script>
|
||||
"""
|
||||
|
||||
# 将新的CSS样式添加到现有样式中
|
||||
self.css_styles += css_additions
|
||||
|
||||
return f"""
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>学术对话存档</title>
|
||||
<!-- 添加 marked.js -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
<!-- 添加 GitHub Markdown CSS -->
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/sindresorhus/github-markdown-css@4.0.0/github-markdown.min.css">
|
||||
<style>
|
||||
{self.css_styles}
|
||||
/* 添加 Markdown 相关样式 */
|
||||
.markdown-body {{
|
||||
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
padding: 1rem;
|
||||
background: var(--primary-light);
|
||||
border-radius: 6px;
|
||||
}}
|
||||
.markdown-body pre {{
|
||||
background-color: #f6f8fa;
|
||||
border-radius: 6px;
|
||||
padding: 16px;
|
||||
overflow: auto;
|
||||
}}
|
||||
.markdown-body code {{
|
||||
background-color: rgba(175,184,193,0.2);
|
||||
border-radius: 6px;
|
||||
padding: 0.2em 0.4em;
|
||||
font-size: 85%;
|
||||
}}
|
||||
.markdown-body pre code {{
|
||||
background-color: transparent;
|
||||
padding: 0;
|
||||
}}
|
||||
.markdown-body blockquote {{
|
||||
border-left: 0.25em solid #d0d7de;
|
||||
padding: 0 1em;
|
||||
color: #656d76;
|
||||
}}
|
||||
.markdown-body table {{
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: 1em 0;
|
||||
}}
|
||||
.markdown-body table th,
|
||||
.markdown-body table td {{
|
||||
border: 1px solid #d0d7de;
|
||||
padding: 6px 13px;
|
||||
}}
|
||||
.markdown-body table tr:nth-child(2n) {{
|
||||
background-color: #f6f8fa;
|
||||
}}
|
||||
@media (prefers-color-scheme: dark) {{
|
||||
.markdown-body {{
|
||||
background: #1e293b;
|
||||
color: #e2e8f0;
|
||||
}}
|
||||
.markdown-body pre {{
|
||||
background-color: #0f172a;
|
||||
}}
|
||||
.markdown-body code {{
|
||||
background-color: rgba(99,110,123,0.4);
|
||||
}}
|
||||
.markdown-body blockquote {{
|
||||
border-left-color: #30363d;
|
||||
color: #8b949e;
|
||||
}}
|
||||
.markdown-body table th,
|
||||
.markdown-body table td {{
|
||||
border-color: #30363d;
|
||||
}}
|
||||
.markdown-body table tr:nth-child(2n) {{
|
||||
background-color: #0f172a;
|
||||
}}
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1 class="chat-title">学术对话存档</h1>
|
||||
<div class="chat-body">
|
||||
{chat_content}
|
||||
{references_content}
|
||||
</div>
|
||||
</div>
|
||||
{js_code}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
47
crazy_functions/review_fns/conversation_doc/markdown_doc.py
Normal file
47
crazy_functions/review_fns/conversation_doc/markdown_doc.py
Normal file
@@ -0,0 +1,47 @@
|
||||
class MarkdownFormatter:
|
||||
"""Markdown格式文档生成器 - 用于生成对话记录的markdown文档"""
|
||||
|
||||
def __init__(self):
|
||||
self.content = []
|
||||
|
||||
def _add_content(self, text: str):
|
||||
"""添加正文内容"""
|
||||
if text:
|
||||
self.content.append(f"\n{text}\n")
|
||||
|
||||
def create_document(self, question: str, answer: str, ranked_papers: list = None) -> str:
|
||||
"""创建完整的Markdown文档
|
||||
Args:
|
||||
question: str, 用户问题
|
||||
answer: str, AI回答
|
||||
ranked_papers: list, 排序后的论文列表
|
||||
Returns:
|
||||
str: 生成的Markdown文本
|
||||
"""
|
||||
content = []
|
||||
|
||||
# 添加问答部分
|
||||
content.append("## 问题")
|
||||
content.append(question)
|
||||
content.append("\n## 回答")
|
||||
content.append(answer)
|
||||
|
||||
# 添加参考文献
|
||||
if ranked_papers:
|
||||
content.append("\n## 参考文献")
|
||||
for idx, paper in enumerate(ranked_papers, 1):
|
||||
authors = ', '.join(paper.authors[:3])
|
||||
if len(paper.authors) > 3:
|
||||
authors += ' et al.'
|
||||
|
||||
ref = f"[{idx}] {authors}. *{paper.title}*"
|
||||
if paper.venue_name:
|
||||
ref += f". {paper.venue_name}"
|
||||
if paper.year:
|
||||
ref += f", {paper.year}"
|
||||
if paper.doi:
|
||||
ref += f". [DOI: {paper.doi}](https://doi.org/{paper.doi})"
|
||||
|
||||
content.append(ref)
|
||||
|
||||
return "\n\n".join(content)
|
||||
@@ -0,0 +1,174 @@
|
||||
from typing import List
|
||||
from crazy_functions.review_fns.data_sources.base_source import PaperMetadata
|
||||
import re
|
||||
|
||||
class ReferenceFormatter:
|
||||
"""通用参考文献格式生成器"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def _sanitize_bibtex(self, text: str) -> str:
|
||||
"""清理BibTeX字符串,处理特殊字符"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# 替换特殊字符
|
||||
replacements = {
|
||||
'&': '\\&',
|
||||
'%': '\\%',
|
||||
'$': '\\$',
|
||||
'#': '\\#',
|
||||
'_': '\\_',
|
||||
'{': '\\{',
|
||||
'}': '\\}',
|
||||
'~': '\\textasciitilde{}',
|
||||
'^': '\\textasciicircum{}',
|
||||
'\\': '\\textbackslash{}',
|
||||
'<': '\\textless{}',
|
||||
'>': '\\textgreater{}',
|
||||
'"': '``',
|
||||
"'": "'",
|
||||
'-': '--',
|
||||
'—': '---',
|
||||
}
|
||||
|
||||
for char, replacement in replacements.items():
|
||||
text = text.replace(char, replacement)
|
||||
|
||||
return text
|
||||
|
||||
def _generate_cite_key(self, paper: PaperMetadata) -> str:
|
||||
"""生成引用键
|
||||
格式: 第一作者姓氏_年份_第一个实词
|
||||
"""
|
||||
# 获取第一作者姓氏
|
||||
first_author = ""
|
||||
if paper.authors and len(paper.authors) > 0:
|
||||
first_author = paper.authors[0].split()[-1].lower()
|
||||
|
||||
# 获取年份
|
||||
year = str(paper.year) if paper.year else "0000"
|
||||
|
||||
# 从标题中获取第一个实词
|
||||
title_word = ""
|
||||
if paper.title:
|
||||
# 移除特殊字符,分割成单词
|
||||
words = re.findall(r'\w+', paper.title.lower())
|
||||
# 过滤掉常见的停用词
|
||||
stop_words = {'a', 'an', 'the', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
|
||||
for word in words:
|
||||
if word not in stop_words and len(word) > 2:
|
||||
title_word = word
|
||||
break
|
||||
|
||||
# 组合cite key
|
||||
cite_key = f"{first_author}{year}{title_word}"
|
||||
|
||||
# 确保cite key只包含合法字符
|
||||
cite_key = re.sub(r'[^a-z0-9]', '', cite_key.lower())
|
||||
|
||||
return cite_key
|
||||
|
||||
def _get_entry_type(self, paper: PaperMetadata) -> str:
|
||||
"""确定BibTeX条目类型"""
|
||||
if hasattr(paper, 'venue_type') and paper.venue_type:
|
||||
venue_type = paper.venue_type.lower()
|
||||
if venue_type == 'conference':
|
||||
return 'inproceedings'
|
||||
elif venue_type == 'preprint':
|
||||
return 'unpublished'
|
||||
elif venue_type == 'journal':
|
||||
return 'article'
|
||||
elif venue_type == 'book':
|
||||
return 'book'
|
||||
elif venue_type == 'thesis':
|
||||
return 'phdthesis'
|
||||
return 'article' # 默认为期刊文章
|
||||
|
||||
|
||||
def create_document(self, papers: List[PaperMetadata]) -> str:
|
||||
"""生成BibTeX格式的参考文献文本"""
|
||||
bibtex_text = "% This file was automatically generated by GPT-Academic\n"
|
||||
bibtex_text += "% Compatible with: EndNote, Zotero, JabRef, and LaTeX\n\n"
|
||||
|
||||
for paper in papers:
|
||||
entry_type = self._get_entry_type(paper)
|
||||
cite_key = self._generate_cite_key(paper)
|
||||
|
||||
bibtex_text += f"@{entry_type}{{{cite_key},\n"
|
||||
|
||||
# 添加标题
|
||||
if paper.title:
|
||||
bibtex_text += f" title = {{{self._sanitize_bibtex(paper.title)}}},\n"
|
||||
|
||||
# 添加作者
|
||||
if paper.authors:
|
||||
# 确保每个作者的姓和名正确分隔
|
||||
processed_authors = []
|
||||
for author in paper.authors:
|
||||
names = author.split()
|
||||
if len(names) > 1:
|
||||
# 假设最后一个词是姓,其他的是名
|
||||
surname = names[-1]
|
||||
given_names = ' '.join(names[:-1])
|
||||
processed_authors.append(f"{surname}, {given_names}")
|
||||
else:
|
||||
processed_authors.append(author)
|
||||
|
||||
authors = " and ".join([self._sanitize_bibtex(author) for author in processed_authors])
|
||||
bibtex_text += f" author = {{{authors}}},\n"
|
||||
|
||||
# 添加年份
|
||||
if paper.year:
|
||||
bibtex_text += f" year = {{{paper.year}}},\n"
|
||||
|
||||
# 添加期刊/会议名称
|
||||
if hasattr(paper, 'venue_name') and paper.venue_name:
|
||||
if entry_type == 'inproceedings':
|
||||
bibtex_text += f" booktitle = {{{self._sanitize_bibtex(paper.venue_name)}}},\n"
|
||||
elif entry_type == 'article':
|
||||
bibtex_text += f" journal = {{{self._sanitize_bibtex(paper.venue_name)}}},\n"
|
||||
# 添加期刊相关信息
|
||||
if hasattr(paper, 'venue_info'):
|
||||
if 'volume' in paper.venue_info:
|
||||
bibtex_text += f" volume = {{{paper.venue_info['volume']}}},\n"
|
||||
if 'number' in paper.venue_info:
|
||||
bibtex_text += f" number = {{{paper.venue_info['number']}}},\n"
|
||||
if 'pages' in paper.venue_info:
|
||||
bibtex_text += f" pages = {{{paper.venue_info['pages']}}},\n"
|
||||
elif paper.venue:
|
||||
venue_field = "booktitle" if entry_type == "inproceedings" else "journal"
|
||||
bibtex_text += f" {venue_field} = {{{self._sanitize_bibtex(paper.venue)}}},\n"
|
||||
|
||||
# 添加DOI
|
||||
if paper.doi:
|
||||
bibtex_text += f" doi = {{{paper.doi}}},\n"
|
||||
|
||||
# 添加URL
|
||||
if paper.url:
|
||||
bibtex_text += f" url = {{{paper.url}}},\n"
|
||||
elif paper.doi:
|
||||
bibtex_text += f" url = {{https://doi.org/{paper.doi}}},\n"
|
||||
|
||||
# 添加摘要
|
||||
if paper.abstract:
|
||||
bibtex_text += f" abstract = {{{self._sanitize_bibtex(paper.abstract)}}},\n"
|
||||
|
||||
# 添加机构
|
||||
if hasattr(paper, 'institutions') and paper.institutions:
|
||||
institutions = " and ".join([self._sanitize_bibtex(inst) for inst in paper.institutions])
|
||||
bibtex_text += f" institution = {{{institutions}}},\n"
|
||||
|
||||
# 添加月份
|
||||
if hasattr(paper, 'month'):
|
||||
bibtex_text += f" month = {{{paper.month}}},\n"
|
||||
|
||||
# 添加注释字段
|
||||
if hasattr(paper, 'note'):
|
||||
bibtex_text += f" note = {{{self._sanitize_bibtex(paper.note)}}},\n"
|
||||
|
||||
# 移除最后一个逗号并关闭条目
|
||||
bibtex_text = bibtex_text.rstrip(',\n') + "\n}\n\n"
|
||||
|
||||
return bibtex_text
|
||||
138
crazy_functions/review_fns/conversation_doc/word2pdf.py
Normal file
138
crazy_functions/review_fns/conversation_doc/word2pdf.py
Normal file
@@ -0,0 +1,138 @@
|
||||
from docx2pdf import convert
|
||||
import os
|
||||
import platform
|
||||
from typing import Union
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
class WordToPdfConverter:
|
||||
"""Word文档转PDF转换器"""
|
||||
|
||||
@staticmethod
|
||||
def _replace_docx_in_filename(filename: Union[str, Path]) -> Path:
|
||||
"""
|
||||
将文件名中的'docx'替换为'pdf'
|
||||
例如: 'docx_test.pdf' -> 'pdf_test.pdf'
|
||||
"""
|
||||
path = Path(filename)
|
||||
new_name = path.stem.replace('docx', 'pdf')
|
||||
return path.parent / f"{new_name}{path.suffix}"
|
||||
|
||||
@staticmethod
|
||||
def convert_to_pdf(word_path: Union[str, Path], pdf_path: Union[str, Path] = None) -> str:
|
||||
"""
|
||||
将Word文档转换为PDF
|
||||
|
||||
参数:
|
||||
word_path: Word文档的路径
|
||||
pdf_path: 可选,PDF文件的输出路径。如果未指定,将使用与Word文档相同的名称和位置
|
||||
|
||||
返回:
|
||||
生成的PDF文件路径
|
||||
|
||||
异常:
|
||||
如果转换失败,将抛出相应异常
|
||||
"""
|
||||
try:
|
||||
word_path = Path(word_path)
|
||||
|
||||
if pdf_path is None:
|
||||
# 创建新的pdf路径,同时替换文件名中的docx
|
||||
pdf_path = WordToPdfConverter._replace_docx_in_filename(word_path).with_suffix('.pdf')
|
||||
else:
|
||||
pdf_path = WordToPdfConverter._replace_docx_in_filename(Path(pdf_path))
|
||||
|
||||
# 检查操作系统
|
||||
if platform.system() == 'Linux':
|
||||
# Linux系统需要安装libreoffice
|
||||
if not os.system('which libreoffice') == 0:
|
||||
raise RuntimeError("请先安装LibreOffice: sudo apt-get install libreoffice")
|
||||
|
||||
# 使用libreoffice进行转换
|
||||
os.system(f'libreoffice --headless --convert-to pdf "{word_path}" --outdir "{pdf_path.parent}"')
|
||||
|
||||
# 如果输出路径与默认生成的不同,则重命名
|
||||
default_pdf = word_path.with_suffix('.pdf')
|
||||
if default_pdf != pdf_path:
|
||||
os.rename(default_pdf, pdf_path)
|
||||
else:
|
||||
# Windows和MacOS使用 docx2pdf
|
||||
convert(word_path, pdf_path)
|
||||
|
||||
return str(pdf_path)
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"转换PDF失败: {str(e)}")
|
||||
|
||||
@staticmethod
|
||||
def batch_convert(word_dir: Union[str, Path], pdf_dir: Union[str, Path] = None) -> list:
|
||||
"""
|
||||
批量转换目录下的所有Word文档
|
||||
|
||||
参数:
|
||||
word_dir: 包含Word文档的目录路径
|
||||
pdf_dir: 可选,PDF文件的输出目录。如果未指定,将使用与Word文档相同的目录
|
||||
|
||||
返回:
|
||||
生成的PDF文件路径列表
|
||||
"""
|
||||
word_dir = Path(word_dir)
|
||||
if pdf_dir:
|
||||
pdf_dir = Path(pdf_dir)
|
||||
pdf_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
converted_files = []
|
||||
|
||||
for word_file in word_dir.glob("*.docx"):
|
||||
try:
|
||||
if pdf_dir:
|
||||
pdf_path = pdf_dir / WordToPdfConverter._replace_docx_in_filename(
|
||||
word_file.with_suffix('.pdf')
|
||||
).name
|
||||
else:
|
||||
pdf_path = WordToPdfConverter._replace_docx_in_filename(
|
||||
word_file.with_suffix('.pdf')
|
||||
)
|
||||
|
||||
pdf_file = WordToPdfConverter.convert_to_pdf(word_file, pdf_path)
|
||||
converted_files.append(pdf_file)
|
||||
|
||||
except Exception as e:
|
||||
print(f"转换 {word_file} 失败: {str(e)}")
|
||||
|
||||
return converted_files
|
||||
|
||||
@staticmethod
|
||||
def convert_doc_to_pdf(doc, output_dir: Union[str, Path] = None) -> str:
|
||||
"""
|
||||
将docx对象直接转换为PDF
|
||||
|
||||
参数:
|
||||
doc: python-docx的Document对象
|
||||
output_dir: 可选,输出目录。如果未指定,将使用当前目录
|
||||
|
||||
返回:
|
||||
生成的PDF文件路径
|
||||
"""
|
||||
try:
|
||||
# 设置临时文件路径和输出路径
|
||||
output_dir = Path(output_dir) if output_dir else Path.cwd()
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 生成临时word文件
|
||||
temp_docx = output_dir / f"temp_{datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
|
||||
doc.save(temp_docx)
|
||||
|
||||
# 转换为PDF
|
||||
pdf_path = temp_docx.with_suffix('.pdf')
|
||||
WordToPdfConverter.convert_to_pdf(temp_docx, pdf_path)
|
||||
|
||||
# 删除临时word文件
|
||||
temp_docx.unlink()
|
||||
|
||||
return str(pdf_path)
|
||||
|
||||
except Exception as e:
|
||||
if temp_docx.exists():
|
||||
temp_docx.unlink()
|
||||
raise Exception(f"转换PDF失败: {str(e)}")
|
||||
246
crazy_functions/review_fns/conversation_doc/word_doc.py
Normal file
246
crazy_functions/review_fns/conversation_doc/word_doc.py
Normal file
@@ -0,0 +1,246 @@
|
||||
import re
|
||||
from docx import Document
|
||||
from docx.shared import Cm, Pt
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_LINE_SPACING
|
||||
from docx.enum.style import WD_STYLE_TYPE
|
||||
from docx.oxml.ns import qn
|
||||
from datetime import datetime
|
||||
import docx
|
||||
from docx.oxml import shared
|
||||
from crazy_functions.doc_fns.conversation_doc.word_doc import convert_markdown_to_word
|
||||
|
||||
|
||||
class WordFormatter:
|
||||
"""聊天记录Word文档生成器 - 符合中国政府公文格式规范(GB/T 9704-2012)"""
|
||||
|
||||
def __init__(self):
|
||||
self.doc = Document()
|
||||
self._setup_document()
|
||||
self._create_styles()
|
||||
|
||||
def _setup_document(self):
|
||||
"""设置文档基本格式,包括页面设置和页眉"""
|
||||
sections = self.doc.sections
|
||||
for section in sections:
|
||||
# 设置页面大小为A4
|
||||
section.page_width = Cm(21)
|
||||
section.page_height = Cm(29.7)
|
||||
# 设置页边距
|
||||
section.top_margin = Cm(3.7) # 上边距37mm
|
||||
section.bottom_margin = Cm(3.5) # 下边距35mm
|
||||
section.left_margin = Cm(2.8) # 左边距28mm
|
||||
section.right_margin = Cm(2.6) # 右边距26mm
|
||||
# 设置页眉页脚距离
|
||||
section.header_distance = Cm(2.0)
|
||||
section.footer_distance = Cm(2.0)
|
||||
|
||||
# 修改页眉
|
||||
header = section.header
|
||||
header_para = header.paragraphs[0]
|
||||
header_para.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
header_run = header_para.add_run("GPT-Academic学术对话 (体验地址:https://auth.gpt-academic.top/)")
|
||||
header_run.font.name = '仿宋'
|
||||
header_run._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋')
|
||||
header_run.font.size = Pt(9)
|
||||
|
||||
def _create_styles(self):
|
||||
"""创建文档样式"""
|
||||
# 创建正文样式
|
||||
style = self.doc.styles.add_style('Normal_Custom', WD_STYLE_TYPE.PARAGRAPH)
|
||||
style.font.name = '仿宋'
|
||||
style._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋')
|
||||
style.font.size = Pt(12)
|
||||
style.paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE
|
||||
style.paragraph_format.space_after = Pt(0)
|
||||
|
||||
# 创建问题样式
|
||||
question_style = self.doc.styles.add_style('Question_Style', WD_STYLE_TYPE.PARAGRAPH)
|
||||
question_style.font.name = '黑体'
|
||||
question_style._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
|
||||
question_style.font.size = Pt(14) # 调整为14磅
|
||||
question_style.font.bold = True
|
||||
question_style.paragraph_format.space_before = Pt(12) # 减小段前距
|
||||
question_style.paragraph_format.space_after = Pt(6)
|
||||
question_style.paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE
|
||||
question_style.paragraph_format.left_indent = Pt(0) # 移除左缩进
|
||||
|
||||
# 创建回答样式
|
||||
answer_style = self.doc.styles.add_style('Answer_Style', WD_STYLE_TYPE.PARAGRAPH)
|
||||
answer_style.font.name = '仿宋'
|
||||
answer_style._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋')
|
||||
answer_style.font.size = Pt(12) # 调整为12磅
|
||||
answer_style.paragraph_format.space_before = Pt(6)
|
||||
answer_style.paragraph_format.space_after = Pt(12)
|
||||
answer_style.paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE
|
||||
answer_style.paragraph_format.left_indent = Pt(0) # 移除左缩进
|
||||
|
||||
# 创建标题样式
|
||||
title_style = self.doc.styles.add_style('Title_Custom', WD_STYLE_TYPE.PARAGRAPH)
|
||||
title_style.font.name = '黑体' # 改用黑体
|
||||
title_style._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
|
||||
title_style.font.size = Pt(22) # 调整为22磅
|
||||
title_style.font.bold = True
|
||||
title_style.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
title_style.paragraph_format.space_before = Pt(0)
|
||||
title_style.paragraph_format.space_after = Pt(24)
|
||||
title_style.paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE
|
||||
|
||||
# 添加参考文献样式
|
||||
ref_style = self.doc.styles.add_style('Reference_Style', WD_STYLE_TYPE.PARAGRAPH)
|
||||
ref_style.font.name = '宋体'
|
||||
ref_style._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
|
||||
ref_style.font.size = Pt(10.5) # 参考文献使用小号字体
|
||||
ref_style.paragraph_format.space_before = Pt(3)
|
||||
ref_style.paragraph_format.space_after = Pt(3)
|
||||
ref_style.paragraph_format.line_spacing_rule = WD_LINE_SPACING.SINGLE
|
||||
ref_style.paragraph_format.left_indent = Pt(21)
|
||||
ref_style.paragraph_format.first_line_indent = Pt(-21)
|
||||
|
||||
# 添加参考文献标题样式
|
||||
ref_title_style = self.doc.styles.add_style('Reference_Title_Style', WD_STYLE_TYPE.PARAGRAPH)
|
||||
ref_title_style.font.name = '黑体'
|
||||
ref_title_style._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
|
||||
ref_title_style.font.size = Pt(16) # 参考文献标题与问题同样大小
|
||||
ref_title_style.font.bold = True
|
||||
ref_title_style.paragraph_format.space_before = Pt(24) # 增加段前距
|
||||
ref_title_style.paragraph_format.space_after = Pt(12)
|
||||
ref_title_style.paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE
|
||||
|
||||
def create_document(self, question: str, answer: str, ranked_papers: list = None):
|
||||
"""写入聊天历史
|
||||
Args:
|
||||
question: str, 用户问题
|
||||
answer: str, AI回答
|
||||
ranked_papers: list, 排序后的论文列表
|
||||
"""
|
||||
try:
|
||||
# 添加标题
|
||||
title_para = self.doc.add_paragraph(style='Title_Custom')
|
||||
title_run = title_para.add_run('GPT-Academic 对话记录')
|
||||
|
||||
# 添加日期
|
||||
try:
|
||||
date_para = self.doc.add_paragraph()
|
||||
date_para.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
date_run = date_para.add_run(datetime.now().strftime('%Y年%m月%d日'))
|
||||
date_run.font.name = '仿宋'
|
||||
date_run._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋')
|
||||
date_run.font.size = Pt(16)
|
||||
except Exception as e:
|
||||
print(f"添加日期失败: {str(e)}")
|
||||
raise
|
||||
|
||||
self.doc.add_paragraph() # 添加空行
|
||||
|
||||
# 添加问答对话
|
||||
try:
|
||||
q_para = self.doc.add_paragraph(style='Question_Style')
|
||||
q_para.add_run('问题:').bold = True
|
||||
q_para.add_run(str(question))
|
||||
|
||||
a_para = self.doc.add_paragraph(style='Answer_Style')
|
||||
a_para.add_run('回答:').bold = True
|
||||
a_para.add_run(convert_markdown_to_word(str(answer)))
|
||||
except Exception as e:
|
||||
print(f"添加问答对话失败: {str(e)}")
|
||||
raise
|
||||
|
||||
# 添加参考文献部分
|
||||
if ranked_papers:
|
||||
try:
|
||||
ref_title = self.doc.add_paragraph(style='Reference_Title_Style')
|
||||
ref_title.add_run("参考文献")
|
||||
|
||||
for idx, paper in enumerate(ranked_papers, 1):
|
||||
try:
|
||||
ref_para = self.doc.add_paragraph(style='Reference_Style')
|
||||
ref_para.add_run(f'[{idx}] ').bold = True
|
||||
|
||||
# 添加作者
|
||||
authors = ', '.join(paper.authors[:3])
|
||||
if len(paper.authors) > 3:
|
||||
authors += ' et al.'
|
||||
ref_para.add_run(f'{authors}. ')
|
||||
|
||||
# 添加标题
|
||||
title_run = ref_para.add_run(paper.title)
|
||||
title_run.italic = True
|
||||
if hasattr(paper, 'url') and paper.url:
|
||||
try:
|
||||
title_run._element.rPr.rStyle = self._create_hyperlink_style()
|
||||
self._add_hyperlink(ref_para, paper.title, paper.url)
|
||||
except Exception as e:
|
||||
print(f"添加超链接失败: {str(e)}")
|
||||
|
||||
# 添加期刊/会议信息
|
||||
if paper.venue_name:
|
||||
ref_para.add_run(f'. {paper.venue_name}')
|
||||
|
||||
# 添加年份
|
||||
if paper.year:
|
||||
ref_para.add_run(f', {paper.year}')
|
||||
|
||||
# 添加DOI
|
||||
if paper.doi:
|
||||
ref_para.add_run('. ')
|
||||
if "arxiv" in paper.url:
|
||||
doi_url = paper.doi
|
||||
else:
|
||||
doi_url = f'https://doi.org/{paper.doi}'
|
||||
self._add_hyperlink(ref_para, f'DOI: {paper.doi}', doi_url)
|
||||
|
||||
ref_para.add_run('.')
|
||||
except Exception as e:
|
||||
print(f"添加第 {idx} 篇参考文献失败: {str(e)}")
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"添加参考文献部分失败: {str(e)}")
|
||||
raise
|
||||
|
||||
return self.doc
|
||||
|
||||
except Exception as e:
|
||||
print(f"Word文档创建失败: {str(e)}")
|
||||
import traceback
|
||||
print(f"详细错误信息: {traceback.format_exc()}")
|
||||
raise
|
||||
|
||||
def _create_hyperlink_style(self):
|
||||
"""创建超链接样式"""
|
||||
styles = self.doc.styles
|
||||
if 'Hyperlink' not in styles:
|
||||
hyperlink_style = styles.add_style('Hyperlink', WD_STYLE_TYPE.CHARACTER)
|
||||
# 使用科技蓝 (#0066CC)
|
||||
hyperlink_style.font.color.rgb = 0x0066CC # 科技蓝
|
||||
hyperlink_style.font.underline = True
|
||||
return styles['Hyperlink']
|
||||
|
||||
def _add_hyperlink(self, paragraph, text, url):
|
||||
"""添加超链接到段落"""
|
||||
# 这个是在XML级别添加超链接
|
||||
part = paragraph.part
|
||||
r_id = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True)
|
||||
|
||||
# 创建超链接XML元素
|
||||
hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink')
|
||||
hyperlink.set(docx.oxml.shared.qn('r:id'), r_id)
|
||||
|
||||
# 创建文本运行
|
||||
new_run = docx.oxml.shared.OxmlElement('w:r')
|
||||
rPr = docx.oxml.shared.OxmlElement('w:rPr')
|
||||
|
||||
# 应用超链接样式
|
||||
rStyle = docx.oxml.shared.OxmlElement('w:rStyle')
|
||||
rStyle.set(docx.oxml.shared.qn('w:val'), 'Hyperlink')
|
||||
rPr.append(rStyle)
|
||||
|
||||
# 添加文本
|
||||
t = docx.oxml.shared.OxmlElement('w:t')
|
||||
t.text = text
|
||||
new_run.append(rPr)
|
||||
new_run.append(t)
|
||||
hyperlink.append(new_run)
|
||||
|
||||
# 将超链接添加到段落
|
||||
paragraph._p.append(hyperlink)
|
||||
|
||||
Reference in New Issue
Block a user