From fd95cf9c9d6b326ea85074ec77065ad3918de5e9 Mon Sep 17 00:00:00 2001 From: Vincent Vanwaelscappel Date: Wed, 14 Jan 2026 19:00:46 +0100 Subject: [PATCH] wip #7894 @0.5 --- src/CommandLine/Docling.php | 10 ++++++++-- src/Url.php | 10 ++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/CommandLine/Docling.php b/src/CommandLine/Docling.php index ddeea82..b47b000 100644 --- a/src/CommandLine/Docling.php +++ b/src/CommandLine/Docling.php @@ -3,6 +3,7 @@ namespace Cubist\Util\CommandLine; use Cubist\Util\CommandLine; +use Cubist\Util\Files\Files; class Docling extends CommandLine { @@ -15,16 +16,21 @@ class Docling extends CommandLine public static function OCR($image, $locale = null) { - $out = $image . '.' . md5($locale) . '.md'; + $f = new \SplFileInfo($image); + + $out = $image . '.' . md5($locale) . '.html'; if (!file_exists($out) || filemtime($out) < filemtime($image) || filemtime($out) < filemtime(__FILE__)) { + $tmp = Files::tmpdir(); $cli = new self(); $cli->setArg('to', 'md'); if ($locale) { $cli->setArg('ocr-lang', $locale); } - $cli->setArg('output', $out); + $cli->setArg('output', $tmp); $cli->setArg(null, $image); $cli->execute(); + + $md = $tmp . '/' . $f->getBasename('.md'); } return file_get_contents($out); } diff --git a/src/Url.php b/src/Url.php index d273c6e..d95d25d 100644 --- a/src/Url.php +++ b/src/Url.php @@ -82,4 +82,14 @@ class Url return $str; } + public static function mtime($url){ + $h = get_headers($url, 1); + + $dt = NULL; + if (!($h || strstr($h[0], '200') === FALSE)) { + $dt = new \DateTime($h['Last-Modified']); + return $dt->getTimestamp(); + } + + } } -- 2.39.5