diff --git a/crazy_functions/latex_fns/latex_toolbox.py b/crazy_functions/latex_fns/latex_toolbox.py index 81e191ab..58818a23 100644 --- a/crazy_functions/latex_fns/latex_toolbox.py +++ b/crazy_functions/latex_fns/latex_toolbox.py @@ -645,8 +645,9 @@ def run_in_subprocess(func): def _merge_pdfs(pdf1_path, pdf2_path, output_path): import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放 + from PyPDF2.generic import NameObject, TextStringObject,ArrayObject,FloatObject,NumberObject - Percent = 0.95 + Percent = 1 # raise RuntimeError('PyPDF2 has a serious memory leak problem, please use other tools to merge PDF files.') # Open the first PDF file with open(pdf1_path, "rb") as pdf1_file: @@ -687,6 +688,65 @@ def _merge_pdfs(pdf1_path, pdf2_path, output_path): ), 0, ) + if '/Annots' in page1: + page1_annot_id = [annot.idnum for annot in page1['/Annots']] + else: + page1_annot_id = [] + + if '/Annots' in page2: + page2_annot_id = [annot.idnum for annot in page2['/Annots']] + else: + page2_annot_id = [] + if '/Annots' in new_page: + annotations = new_page['/Annots'] + for i, annot in enumerate(annotations): + annot_obj = annot.get_object() + + # 检查注释类型是否是链接(/Link) + if annot_obj.get('/Subtype') == '/Link': + # 检查是否为内部链接跳转(/GoTo)或外部URI链接(/URI) + action = annot_obj.get('/A') + if action: + + if '/S' in action and action['/S'] == '/GoTo': + # 内部链接:跳转到文档中的某个页面 + dest = action.get('/D') # 目标页或目标位置 + if dest and annot.idnum in page2_annot_id: + # 获取原始文件中跳转信息,包括跳转页面 + destination = pdf2_reader.named_destinations[dest] + page_number = pdf2_reader.get_destination_page_number(destination) + #更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100% + #“/D”:[10,'/XYZ',100,100,0] + annot_obj['/A'].update({ + NameObject("/D"): ArrayObject([NumberObject(page_number),destination.dest_array[1], FloatObject(destination.dest_array[2] + int(page1.mediaBox.getWidth())) ,destination.dest_array[3],destination.dest_array[4]]) # 确保键和值是 PdfObject + }) + rect = annot_obj.get('/Rect') + # 更新点击坐标 + rect = ArrayObject([FloatObject(rect[0]+ int(page1.mediaBox.getWidth())),rect[1], + FloatObject(rect[2]+int(page1.mediaBox.getWidth())),rect[3] ]) + annot_obj.update({ + NameObject("/Rect"): rect # 确保键和值是 PdfObject + }) + if dest and annot.idnum in page1_annot_id: + # 获取原始文件中跳转信息,包括跳转页面 + destination = pdf1_reader.named_destinations[dest] + page_number = pdf1_reader.get_destination_page_number(destination) + #更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100% + #“/D”:[10,'/XYZ',100,100,0] + annot_obj['/A'].update({ + NameObject("/D"): ArrayObject([NumberObject(page_number),destination.dest_array[1], FloatObject(destination.dest_array[2]) ,destination.dest_array[3],destination.dest_array[4]]) # 确保键和值是 PdfObject + }) + rect = annot_obj.get('/Rect') + rect = ArrayObject([FloatObject(rect[0]),rect[1], + FloatObject(rect[2]),rect[3] ]) + annot_obj.update({ + NameObject("/Rect"): rect # 确保键和值是 PdfObject + }) + + elif '/S' in action and action['/S'] == '/URI': + # 外部链接:跳转到某个URI + uri = action.get('/URI') + output_writer.addPage(new_page) output_writer.addPage(new_page) # Save the merged PDF file with open(output_path, "wb") as output_file: