]> _ Git - cubist_pdf.git/commitdiff
#7678
authorVincent Vanwaelscappel <vincent@cubedesigners.com>
Mon, 4 Aug 2025 13:45:05 +0000 (15:45 +0200)
committerVincent Vanwaelscappel <vincent@cubedesigners.com>
Mon, 4 Aug 2025 13:45:05 +0000 (15:45 +0200)
resources/tools/docling/convert_page.py

index d91230a578bdd592af0f5df63174cd138ab4dc5d..86d4f7af35de7aeee47aed5c08ce8e199ef2dc86 100644 (file)
@@ -21,13 +21,5 @@ converter = DocumentConverter(
     }
 )
 result = converter.convert(sys.argv[1])
-allpages=result.document.export_to_markdown(page_break_placeholder="<!-- page break -->", image_mode=ImageRefMode.EMBEDDED);
-Path(sys.argv[2]+"document.md").write_text(allpages)
-i=0
-for md in allpages.split("<!-- page break -->"):
-    i+=1
-    while os.stat(Path(sys.argv[2] + "/../texts/fh"+str(i)+".html")).st_size < 21:
-        Path(sys.argv[2]+"p"+str(i)+".md").write_text("")
-        i+=1
-    Path(sys.argv[2]+"p"+str(i)+".md").write_text(md)
 
+Path(sys.argv[2]).write_text(result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED))