]> _ Git - cubist_pdf.git/commitdiff
#7678 @0.5
authorVincent Vanwaelscappel <vincent@cubedesigners.com>
Mon, 4 Aug 2025 14:40:22 +0000 (16:40 +0200)
committerVincent Vanwaelscappel <vincent@cubedesigners.com>
Mon, 4 Aug 2025 14:40:22 +0000 (16:40 +0200)
resources/tools/docling/convert_page.py

index ff80a37ddddcaab2050b2c5a553df5e5157ab492..b269f10d959087d051c53fbed8e9a8148665f427 100644 (file)
@@ -21,13 +21,13 @@ converter = DocumentConverter(
     }
 )
 result = converter.convert(sys.argv[1])
-allpages=result.document.export_to_markdown(page_break_placeholder="<!-- page break -->", image_mode=ImageRefMode.EMBEDDED);
-Path(sys.argv[2]+"document.md").write_text(allpages)
-i=0
+allpages = result.document.export_to_markdown(page_break_placeholder="<!-- page break -->",
+                                              image_mode=ImageRefMode.EMBEDDED);
+Path(sys.argv[2] + "document.md").write_text(allpages)
+i = 0
 for md in allpages.split("<!-- page break -->"):
-    i+=1
-    while not converter.convert(sys.argv[1]).pages[1].parsed_page.has_chars:
-        Path(sys.argv[2]+"p"+str(i)+".md").write_text("")
-        i+=1
-    Path(sys.argv[2]+"p"+str(i)+".md").write_text(md)
-
+    i += 1
+    while not converter.convert(sys.argv[1]).pages[i - 1].parsed_page.has_chars:
+        Path(sys.argv[2] + "p" + str(i) + ".md").write_text("")
+        i += 1
+    Path(sys.argv[2] + "p" + str(i) + ".md").write_text(md)