From 6fbf5199dcbf174f9e0f08030e1a9c4f53f2b02f Mon Sep 17 00:00:00 2001 From: Vincent Vanwaelscappel Date: Mon, 4 Aug 2025 16:25:00 +0200 Subject: [PATCH] #7678 @0.5 --- resources/tools/docling/convert_page.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/resources/tools/docling/convert_page.py b/resources/tools/docling/convert_page.py index 86d4f7a..ff80a37 100644 --- a/resources/tools/docling/convert_page.py +++ b/resources/tools/docling/convert_page.py @@ -21,5 +21,13 @@ converter = DocumentConverter( } ) result = converter.convert(sys.argv[1]) +allpages=result.document.export_to_markdown(page_break_placeholder="", image_mode=ImageRefMode.EMBEDDED); +Path(sys.argv[2]+"document.md").write_text(allpages) +i=0 +for md in allpages.split(""): + i+=1 + while not converter.convert(sys.argv[1]).pages[1].parsed_page.has_chars: + Path(sys.argv[2]+"p"+str(i)+".md").write_text("") + i+=1 + Path(sys.argv[2]+"p"+str(i)+".md").write_text(md) -Path(sys.argv[2]).write_text(result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)) -- 2.39.5