From: Vincent Vanwaelscappel Date: Thu, 20 Nov 2025 12:48:19 +0000 (+0100) Subject: wait #7819 @6 X-Git-Url: http://git.cubedesigners.com/?a=commitdiff_plain;h=a7be2ac8e4f520dd548b88e31fbf14708a6e483d;p=fluidbook-toolbox.git wait #7819 @6 --- diff --git a/.docker/docker-compose.yml b/.docker/docker-compose.yml index 077336997..e20e473b2 100644 --- a/.docker/docker-compose.yml +++ b/.docker/docker-compose.yml @@ -174,18 +174,7 @@ services: networks: - fluidbook-toolbox - flaresolverr: - # DockerHub mirror flaresolverr/flaresolverr:latest - image: ghcr.io/flaresolverr/flaresolverr:latest - container_name: flaresolverr - environment: - - LOG_LEVEL=${LOG_LEVEL:-info} - - LOG_HTML=${LOG_HTML:-false} - - CAPTCHA_SOLVER=${CAPTCHA_SOLVER:-none} - - TZ=Europe/London - ports: - - "${PORT:-8191}:8191" - restart: unless-stopped + networks: fluidbook-toolbox: diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index d7171ae09..81c91c6d5 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -38,16 +38,15 @@ class Kernel extends \Cubist\Backpack\Console\Kernel $schedule->command('job:dispatchNow Maintenance\\\\ListWorkingSymlinks')->dailyAt('0:30'); // Email config $schedule->command('job:dispatchNow Maintenance\\\\EmailServerRefresh')->twiceDailyAt(); - // WS to Toolbox migration - //$schedule->command('ws:migrate --publications=v2 --documents=missing')->dailyAt('1:00'); - //$schedule->command('syncfluidbooksv3toworkshop')->dailyAt('3:15'); - //$schedule->command('ws:migrate --publications=missing --documents=missing')->everyTwoHours(); // Quotes $schedule->command('fluidbook:quote --reminder')->weekdays()->at('8:00'); // DSN $schedule->command('dsn:check')->at('7:00'); // Mailjet $schedule->command('job:dispatchNow ListmonkSyncList')->dailyAt('5:00'); + // Link audit + $schedule->command('job:dispatchNow AuditLinkRegister')->dailyAt('0:30'); + $schedule->command('job:dispatchNow AuditLink')->dailyAt('1:00'); } $schedule->command('job:dispatchNow ProcessTotals')->everyTwoHours(); diff --git a/app/Http/Controllers/Admin/Operations/FluidbookCollection/AuditLinksOperation.php b/app/Http/Controllers/Admin/Operations/FluidbookCollection/AuditLinksOperation.php index 48a9bdd25..6b75226ef 100644 --- a/app/Http/Controllers/Admin/Operations/FluidbookCollection/AuditLinksOperation.php +++ b/app/Http/Controllers/Admin/Operations/FluidbookCollection/AuditLinksOperation.php @@ -118,106 +118,4 @@ trait AuditLinksOperation return response()->download($tmpfile, $filename)->deleteFileAfterSend(); } - - public static function getHttpCode($url) { - $ch = curl_init($url); - self::setCurlOpt([CURLOPT_FOLLOWLOCATION => false]); - curl_setopt_array($ch, self::$curlOpt); - curl_exec($ch); - - $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); - - $ch = curl_init($url); - self::$curlOpt[CURLOPT_FOLLOWLOCATION] = true; - curl_setopt_array($ch, self::$curlOpt); - curl_exec($ch); - - $finalHttpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - $finalUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); - - curl_close($ch); - - return ['httpcode' => $httpcode, 'finalurl' => $finalUrl, 'finalHttpCode' => $finalHttpCode]; - } - - public static function getHttpCodeCloudflare($url) { - $apiUrl = 'http://flaresolverr:8191/v1'; - - $payload = json_encode([ - 'cmd' => 'request.get', - 'url' => $url, - 'maxTimeout' => 60000 - ]); - - /*$flaresolverrOptions = [ - CURLOPT_POST => true, - CURLOPT_HTTPHEADER => ['Content-Type: application/json'], - CURLOPT_POSTFIELDS => $payload, - ];*/ - - $ch = curl_init($apiUrl); - self::$curlOpt = [ - CURLOPT_RETURNTRANSFER => true, - CURLOPT_HEADER => true, - CURLOPT_NOBODY => true, - CURLOPT_TIMEOUT => self::$timeout, - CURLOPT_POST => true, - CURLOPT_HTTPHEADER => ['Content-Type: application/json'], - CURLOPT_POSTFIELDS => $payload, - ]; - curl_setopt_array($ch, self::$curlOpt); - $response = curl_exec($ch); - - if(preg_match('/(error|code) \b(301|302|308|404|401|403|405|500|502|503)\b/', $response, $matches)) { - $httpcode = $matches[2] ?? ''; - $finalUrl = ''; - $finalHttpCode = ''; - } - - curl_close($ch); - - return ['httpcode' => $httpcode, 'finalurl' => $finalUrl, 'finalHttpCode' => $finalHttpCode]; - } - - protected static function setCurlOpt($moreOptions = []) { - self::$curlOpt = [ - CURLOPT_RETURNTRANSFER => true, - CURLOPT_HEADER => true, - CURLOPT_NOBODY => true, - CURLOPT_TIMEOUT => self::$timeout, - ...$moreOptions - ]; - } - - public static function getUrlInfo($url) { - $headers = get_headers($url, 1); - if($headers['Server'] === "cloudflare") { - $response = self::getHttpCodeCloudflare($url); - }else { - $response = self::getHttpCode($url); - } - - return $response; - } - - public static function getHttpCodeComment($httpcode) - { - switch ($httpcode) { - case 301: - return " - Moved Permanently: the resource has a new permanent home — update your bookmarks or links."; - case 302: - return " - Found: tells the client to look at (browse to) another URL."; - //case 307: - //case 308: - case 400: - return " - Bad request: this and all future requests should be directed to the given URI."; - } - } - - public static function youtubeVideoExist($videoID) - { - $headers = get_headers('https://www.youtube.com/oembed?format=json&url=http://www.youtube.com/watch?v=' . $videoID); - return (is_array($headers) && preg_match('/^HTTP\\/\\d+\\.\\d+\\s+2\\d\\d\\s+.*$/', $headers[0])); - } } diff --git a/app/Jobs/AuditLink.php b/app/Jobs/AuditLink.php index e82699d88..12824fca0 100644 --- a/app/Jobs/AuditLink.php +++ b/app/Jobs/AuditLink.php @@ -2,107 +2,84 @@ namespace App\Jobs; -use App\Fluidbook\Link\LinksData; + use App\Models\FluidbookAuditLink; -use App\Models\FluidbookPublication; -use App\Models\FluidbookCollection; -use Cubist\Net\Util; -use Cubist\Util\CommandLine; +use Cubist\Util\ArrayUtil; use Cubist\Util\Files\Files; use Cubist\Util\WebVideo; -use Illuminate\Support\Arr; -use Illuminate\Support\Facades\DB; use Illuminate\Support\Facades\Log; -use Illuminate\Support\Facades\Http; +use Cubist\Net\HTTP; class AuditLink extends Base { + protected $_batchSize = 500; + protected static $minTimeBetweenRequests = 10; + /** * Create a new job instance. */ - public function __construct() { - // + public function __construct($batchSize = 500) + { + $this->_batchSize = $batchSize; } + public function handle() { - $allLinks = FluidbookAuditLink::orderBy('last_date_test', 'asc')->limit(10); - $allLinks = $allLinks->inRandomOrder()->get()->toArray(); - $externalLinks = []; - - foreach ($allLinks as $fb => $link) { - // Error code start with 4 or 5 - // Redirection code start with 3 - try { - $curlResponse = FluidbookCollection::getUrlInfo($link['url']); - }catch (\Error $e) { - echo "Error when trying to get http code: " . $e->getMessage().PHP_EOL; - continue; - }catch (\Exception $e) { - echo "Exception when trying to get http code: " . $e->getMessage() . PHP_EOL; - continue; + $urls = []; + foreach (FluidbookAuditLink::orderBy('updated_at', 'asc')->get() as $link) { + if (!isset($urls[$link->url])) { + $urls[$link->url] = $link->http_code ?? 0; } + } + $urls = array_slice($urls, 0, $this->_batchSize, true); + //$urls = ['https://www.mckinsey.com/business-functions/people-and-organizational-performance/our-insights/dynamic-management-better-decisions-in-uncertain-times' => 403]; + //$urls = ['https://www.eurekalert.org/news-releases/569770' => 403]; + //$urls = ['https://www.practicallyperfectpa.com/tips-on-good-minute-taking#technologyto' => 403]; + $urls = ['https://dev.toolbox.fluidbook.com/httpcode/200' => 200, + 'https://dev.toolbox.fluidbook.com/httpcode/308' => 200,]; + ArrayUtil::shuffle($urls); + + HTTP::setCookieFile(Files::mkdir(protected_path('fluidbookpublication/linkaudit')) . 'cookies.txt'); - $httpCode = $curlResponse['httpcode']; - $finalurl = ''; - $finalcodeurl = ''; + $this->extracted($urls); + Log::info('Job exécuté avec succès'); + } - print_r($curlResponse['httpcode'].' '.$link['id'].PHP_EOL); + public function compareLinks() + { - $webvideo = WebVideo::parse($link['url'], true); - if($webvideo !== false) { - if($webvideo['service'] === 'youtube') { - if (!FluidbookCollection::youtubeVideoExist($webvideo['id'])) { - $httpCode = "404"; - } - } - } + } - if (str_starts_with($httpCode, 3)) { - try { - $finalurl = $curlResponse['finalurl']; - $finalcodeurl = $curlResponse['finalHttpCode']; - }catch (\Error $e) { - echo "Error when trying to get final url: " . $e->getMessage().PHP_EOL; - continue; - }catch (\Exception $e) { - echo "Exception when trying to get final url: " . $e->getMessage() . PHP_EOL; - continue; + /** + * @param array $urls + * @return void + */ + public function extracted(array $urls): void + { + foreach ($urls as $url => $previousCode) { + $start = time(); + $res = []; + + $webvideo = WebVideo::parse($url, true); + if ($webvideo && $webvideo['service'] === 'youtube') { + if (!HTTP::youtubeVideoExist($webvideo['id'])) { + $res['http_code'] = "404"; + } else { + $res['http_code'] = "200"; } + } else { + $res = HTTP::getResponseCode($url, 10, 'http://paris.cubedesigners.com:8191/v1'); } - $comment = FluidbookCollection::getHttpCodeComment($httpCode); - - $firstTimeError = ''; - if(str_starts_with($httpCode, 3) || str_starts_with($httpCode, 4) || str_starts_with($httpCode, 5)) { - $firstTimeError = null === $link['first_time_error'] || $httpCode !== $link['error_code'] ? date('Y-m-d H:i:s') : $link['first_time_error']; + if ($res['http_code'] != $previousCode) { + $res['code_date'] = new \DateTime(); + } + FluidbookAuditLink::where('url', $url)->update($res); + Log::info('Tested ' . $url . ' : ' . $res['http_code']); + $diff = time() - $start; + if ($diff < self::$minTimeBetweenRequests) { + sleep(self::$minTimeBetweenRequests - $diff); } - - $externalLinks[] = [ - 'id' => $link['id'], - 'fluidbook_id' => $link['fluidbook_id'], - 'page' => $link['page'], - 'link_id' => $link['link_id'], - 'url' => $link['url'], - 'error_code' => str_starts_with($httpCode, 4) || str_starts_with($httpCode, 5) ? $httpCode.$comment : "", - 'first_time_error' => $firstTimeError, // Datetime of the first time we saw this error - 'last_date_test' => date('Y-m-d H:i:s'), - 'redirection_code' => str_starts_with($httpCode, 3) ? $httpCode : "", - 'final_code_url' => $finalcodeurl, - 'final_target' => $finalurl, - 'updated_at'=> date('Y-m-d H:i:s') - ]; - } - - if($externalLinks) { - $keys = array_slice(array_keys($externalLinks[0]), 1); - - FluidbookAuditLink::upsert($externalLinks, ['id'], $keys); } - - Log::info('Job exécuté avec succès'); - } - - public function compareLinks() { - } } diff --git a/app/Models/FluidbookAuditLink.php b/app/Models/FluidbookAuditLink.php index 2b2747e71..1dd0f4df1 100644 --- a/app/Models/FluidbookAuditLink.php +++ b/app/Models/FluidbookAuditLink.php @@ -33,4 +33,8 @@ class FluidbookAuditLink extends CubistMagicAbstractModel $this->addUniqueKey(['fluidbook_id', 'link_id']); } + + public static function getURLToTest($limit){ + + } } diff --git a/routes/web.php b/routes/web.php index 73668f8aa..a51487a9c 100644 --- a/routes/web.php +++ b/routes/web.php @@ -36,27 +36,14 @@ Route::group([ Route::any('slack/endpoint', [\App\Http\Controllers\SlackController::class, 'endpoint']); }); -Route::get('/httpcode/{error}/{redirection?}', function($error, $redirection = null){ - if($error == 200) { - return "ok"; - }elseif($error == 401){ - abort(401); - }elseif($error == 403){ - abort(403); - }elseif($error == 404){ - abort(404); - }elseif($error == 500){ - abort(500); - }elseif($error == 502){ - abort(502); - }elseif($error == 503){ - abort(503); - }elseif($error == 308 && !$redirection){ - return redirect('/httpcode/308/urlderedirection', 308); - }elseif($error == 302 && !$redirection){ - return redirect('/httpcode/302/urlderedirection'); - }elseif($error == 301 && !$redirection){ - return redirect('/httpcode/301/urlderedirection', 301); +Route::get('/httpcode/{code}/{redirection?}', function ($code, $redirection = null) { + $redirections = [301, 302, 307, 308]; + $error = [401, 403, 404, 500, 502, 503]; + if (in_array($code, $error)) { + abort($code); + } else if (in_array($code, $redirections) && !$redirection) { + return redirect('/httpcode/' . $code . '/urlderedirection', $code); } + return 'ok (' . $code . ')'; });