This commit is contained in:
binary-husky
2023-09-09 18:56:10 +08:00
parent f5357f67ca
commit 5c0a0882c8
52 changed files with 2710 additions and 591 deletions

View File

@@ -20,6 +20,11 @@ def get_avail_grobid_url():
def parse_pdf(pdf_path, grobid_url):
import scipdf # pip install scipdf_parser
if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
try:
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
except GROBID_OFFLINE_EXCEPTION:
raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用请修改config中的GROBID_URL可修改成本地GROBID服务。")
except:
raise RuntimeError("解析PDF失败请检查PDF是否损坏。")
return article_dict