From: Vincent Vanwaelscappel Date: Mon, 4 Aug 2025 14:40:22 +0000 (+0200) Subject: #7678 @0.5 X-Git-Url: http://git.cubedesigners.com/?a=commitdiff_plain;h=5753c35b5743ad32b55b144d36a48a01cfa2f69d;p=cubist_pdf.git #7678 @0.5 --- diff --git a/resources/tools/docling/convert_page.py b/resources/tools/docling/convert_page.py index ff80a37..b269f10 100644 --- a/resources/tools/docling/convert_page.py +++ b/resources/tools/docling/convert_page.py @@ -21,13 +21,13 @@ converter = DocumentConverter( } ) result = converter.convert(sys.argv[1]) -allpages=result.document.export_to_markdown(page_break_placeholder="", image_mode=ImageRefMode.EMBEDDED); -Path(sys.argv[2]+"document.md").write_text(allpages) -i=0 +allpages = result.document.export_to_markdown(page_break_placeholder="", + image_mode=ImageRefMode.EMBEDDED); +Path(sys.argv[2] + "document.md").write_text(allpages) +i = 0 for md in allpages.split(""): - i+=1 - while not converter.convert(sys.argv[1]).pages[1].parsed_page.has_chars: - Path(sys.argv[2]+"p"+str(i)+".md").write_text("") - i+=1 - Path(sys.argv[2]+"p"+str(i)+".md").write_text(md) - + i += 1 + while not converter.convert(sys.argv[1]).pages[i - 1].parsed_page.has_chars: + Path(sys.argv[2] + "p" + str(i) + ".md").write_text("") + i += 1 + Path(sys.argv[2] + "p" + str(i) + ".md").write_text(md)