diff --git a/.github/workflows/build-with-latex-arm.yml b/.github/workflows/build-with-latex-arm.yml new file mode 100644 index 00000000..ac20afad --- /dev/null +++ b/.github/workflows/build-with-latex-arm.yml @@ -0,0 +1,51 @@ +# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages +name: build-with-latex-arm + +on: + push: + branches: + - "master" + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}_with_latex_arm + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: . + push: true + platforms: linux/arm64 + file: docs/GithubAction+NoLocal+Latex+Arm + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file diff --git a/crazy_functions/Latex_Function.py b/crazy_functions/Latex_Function.py index c82635af..53bbdd21 100644 --- a/crazy_functions/Latex_Function.py +++ b/crazy_functions/Latex_Function.py @@ -145,8 +145,8 @@ def arxiv_download(chatbot, history, txt, allow_cache=True): # <-------------- download arxiv source file -------------> def fix_url_and_download(): - for url_tar in [url_.replace('/abs/', '/e-print/'), url_.replace('/abs/', '/src/')]: - # for url_tar in [url_.replace('/abs/', '/src/'), url_.replace('/abs/', '/e-print/')]: + # for url_tar in [url_.replace('/abs/', '/e-print/'), url_.replace('/abs/', '/src/')]: + for url_tar in [url_.replace('/abs/', '/src/'), url_.replace('/abs/', '/e-print/')]: proxies = get_conf('proxies') r = requests.get(url_tar, proxies=proxies) if r.status_code == 200: diff --git a/crazy_functions/latex_fns/latex_toolbox.py b/crazy_functions/latex_fns/latex_toolbox.py index a49ffc4e..4a39ac83 100644 --- a/crazy_functions/latex_fns/latex_toolbox.py +++ b/crazy_functions/latex_fns/latex_toolbox.py @@ -697,15 +697,6 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path): ), 0, ) - if "/Annots" in page1: - page1_annot_id = [annot.idnum for annot in page1["/Annots"]] - else: - page1_annot_id = [] - - if "/Annots" in page2: - page2_annot_id = [annot.idnum for annot in page2["/Annots"]] - else: - page2_annot_id = [] if "/Annots" in new_page: annotations = new_page["/Annots"] for i, annot in enumerate(annotations): @@ -720,7 +711,8 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path): if "/S" in action and action["/S"] == "/GoTo": # 内部链接:跳转到文档中的某个页面 dest = action.get("/D") # 目标页或目标位置 - if dest and annot.idnum in page2_annot_id: + # if dest and annot.idnum in page2_annot_id: + if dest in pdf2_reader.named_destinations: # 获取原始文件中跳转信息,包括跳转页面 destination = pdf2_reader.named_destinations[ dest @@ -732,24 +724,39 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path): ) # 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100% # “/D”:[10,'/XYZ',100,100,0] - annot_obj["/A"].update( - { - NameObject("/D"): ArrayObject( - [ - NumberObject(page_number), - destination.dest_array[1], - FloatObject( - destination.dest_array[2] - + int( - page1.mediaBox.getWidth() - ) - ), - destination.dest_array[3], - destination.dest_array[4], - ] - ) # 确保键和值是 PdfObject - } - ) + if destination.dest_array[1] == "/XYZ": + annot_obj["/A"].update( + { + NameObject("/D"): ArrayObject( + [ + NumberObject(page_number), + destination.dest_array[1], + FloatObject( + destination.dest_array[ + 2 + ] + + int( + page1.mediaBox.getWidth() + ) + ), + destination.dest_array[3], + destination.dest_array[4], + ] + ) # 确保键和值是 PdfObject + } + ) + else: + annot_obj["/A"].update( + { + NameObject("/D"): ArrayObject( + [ + NumberObject(page_number), + destination.dest_array[1], + ] + ) # 确保键和值是 PdfObject + } + ) + rect = annot_obj.get("/Rect") # 更新点击坐标 rect = ArrayObject( @@ -773,7 +780,9 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path): ): rect # 确保键和值是 PdfObject } ) - if dest and annot.idnum in page1_annot_id: + # if dest and annot.idnum in page1_annot_id: + if dest in pdf1_reader.named_destinations: + # 获取原始文件中跳转信息,包括跳转页面 destination = pdf1_reader.named_destinations[ dest @@ -785,21 +794,36 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path): ) # 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100% # “/D”:[10,'/XYZ',100,100,0] - annot_obj["/A"].update( - { - NameObject("/D"): ArrayObject( - [ - NumberObject(page_number), - destination.dest_array[1], - FloatObject( - destination.dest_array[2] - ), - destination.dest_array[3], - destination.dest_array[4], - ] - ) # 确保键和值是 PdfObject - } - ) + if destination.dest_array[1] == "/XYZ": + annot_obj["/A"].update( + { + NameObject("/D"): ArrayObject( + [ + NumberObject(page_number), + destination.dest_array[1], + FloatObject( + destination.dest_array[ + 2 + ] + ), + destination.dest_array[3], + destination.dest_array[4], + ] + ) # 确保键和值是 PdfObject + } + ) + else: + annot_obj["/A"].update( + { + NameObject("/D"): ArrayObject( + [ + NumberObject(page_number), + destination.dest_array[1], + ] + ) # 确保键和值是 PdfObject + } + ) + rect = annot_obj.get("/Rect") rect = ArrayObject( [ @@ -820,14 +844,12 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path): elif "/S" in action and action["/S"] == "/URI": # 外部链接:跳转到某个URI uri = action.get("/URI") - output_writer.addPage(new_page) # Save the merged PDF file with open(output_path, "wb") as output_file: output_writer.write(output_file) - def _merge_pdfs_legacy(pdf1_path, pdf2_path, output_path): import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放 diff --git a/docs/GithubAction+NoLocal+Latex+Arm b/docs/GithubAction+NoLocal+Latex+Arm new file mode 100644 index 00000000..94ad8941 --- /dev/null +++ b/docs/GithubAction+NoLocal+Latex+Arm @@ -0,0 +1,25 @@ +# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM +# - 1 修改 `config.py` +# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/GithubAction+NoLocal+Latex . +# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex + +FROM menghuan1918/ubuntu_uv_ctex:latest +ENV DEBIAN_FRONTEND=noninteractive +SHELL ["/bin/bash", "-c"] +WORKDIR /gpt +COPY . . +RUN /root/.cargo/bin/uv venv --seed \ + && source .venv/bin/activate \ + && /root/.cargo/bin/uv pip install openai numpy arxiv rich colorama Markdown pygments pymupdf python-docx pdfminer \ + && /root/.cargo/bin/uv pip install -r requirements.txt \ + && /root/.cargo/bin/uv clean + +# 对齐python3 +RUN rm -f /usr/bin/python3 && ln -s /gpt/.venv/bin/python /usr/bin/python3 +RUN rm -f /usr/bin/python && ln -s /gpt/.venv/bin/python /usr/bin/python + +# 可选步骤,用于预热模块 +RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' + +# 启动 +CMD ["python3", "-u", "main.py"] diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 97300ec9..f953f8f9 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -256,6 +256,8 @@ model_info = { "max_token": 128000, "tokenizer": tokenizer_gpt4, "token_cnt": get_token_num_gpt4, + "openai_disable_system_prompt": True, + "openai_disable_stream": True, }, "o1-mini": { "fn_with_ui": chatgpt_ui, @@ -264,6 +266,8 @@ model_info = { "max_token": 128000, "tokenizer": tokenizer_gpt4, "token_cnt": get_token_num_gpt4, + "openai_disable_system_prompt": True, + "openai_disable_stream": True, }, "gpt-4-turbo": { @@ -1281,4 +1285,3 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot, # 更新一下llm_kwargs的参数,否则会出现参数不匹配的问题 yield from method(inputs, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, stream, additional_fn) - diff --git a/request_llms/bridge_chatgpt.py b/request_llms/bridge_chatgpt.py index d4cf1ef5..9e719a4e 100644 --- a/request_llms/bridge_chatgpt.py +++ b/request_llms/bridge_chatgpt.py @@ -202,10 +202,13 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[], if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("用户取消了程序。") else: raise RuntimeError("意外Json结构:"+delta) - if json_data and json_data['finish_reason'] == 'content_filter': - raise RuntimeError("由于提问含不合规内容被Azure过滤。") - if json_data and json_data['finish_reason'] == 'length': + + finish_reason = json_data.get('finish_reason', None) if json_data else None + if finish_reason == 'content_filter': + raise RuntimeError("由于提问含不合规内容被过滤。") + if finish_reason == 'length': raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。") + return result @@ -536,4 +539,3 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st return headers,payload -