From: Vincent Vanwaelscappel Date: Mon, 4 Aug 2025 14:25:00 +0000 (+0200) Subject: #7678 @0.5 X-Git-Url: http://git.cubedesigners.com/?a=commitdiff_plain;h=6fbf5199dcbf174f9e0f08030e1a9c4f53f2b02f;p=cubist_pdf.git #7678 @0.5 --- diff --git a/resources/tools/docling/convert_page.py b/resources/tools/docling/convert_page.py index 86d4f7a..ff80a37 100644 --- a/resources/tools/docling/convert_page.py +++ b/resources/tools/docling/convert_page.py @@ -21,5 +21,13 @@ converter = DocumentConverter( } ) result = converter.convert(sys.argv[1]) +allpages=result.document.export_to_markdown(page_break_placeholder="", image_mode=ImageRefMode.EMBEDDED); +Path(sys.argv[2]+"document.md").write_text(allpages) +i=0 +for md in allpages.split(""): + i+=1 + while not converter.convert(sys.argv[1]).pages[1].parsed_page.has_chars: + Path(sys.argv[2]+"p"+str(i)+".md").write_text("") + i+=1 + Path(sys.argv[2]+"p"+str(i)+".md").write_text(md) -Path(sys.argv[2]).write_text(result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED))