]> _ Git - fluidbook-toolbox.git/commitdiff
wait #7819 @6
authorVincent Vanwaelscappel <vincent@cubedesigners.com>
Thu, 20 Nov 2025 12:48:19 +0000 (13:48 +0100)
committerVincent Vanwaelscappel <vincent@cubedesigners.com>
Thu, 20 Nov 2025 12:48:19 +0000 (13:48 +0100)
.docker/docker-compose.yml
app/Console/Kernel.php
app/Http/Controllers/Admin/Operations/FluidbookCollection/AuditLinksOperation.php
app/Jobs/AuditLink.php
app/Models/FluidbookAuditLink.php
routes/web.php

index 077336997c25d82daf837e45d61098215d96d39d..e20e473b2582d233e4bc82bfb82905b217d3372a 100644 (file)
@@ -174,18 +174,7 @@ services:
     networks:
       - fluidbook-toolbox
 
-  flaresolverr:
-    # DockerHub mirror flaresolverr/flaresolverr:latest
-    image: ghcr.io/flaresolverr/flaresolverr:latest
-    container_name: flaresolverr
-    environment:
-      - LOG_LEVEL=${LOG_LEVEL:-info}
-      - LOG_HTML=${LOG_HTML:-false}
-      - CAPTCHA_SOLVER=${CAPTCHA_SOLVER:-none}
-      - TZ=Europe/London
-    ports:
-      - "${PORT:-8191}:8191"
-    restart: unless-stopped
+
 
 networks:
   fluidbook-toolbox:
index d7171ae0912d094625825aa86f6d789f38377383..81c91c6d5a1c9421ec11f0e565bcc31adb9c7cb8 100644 (file)
@@ -38,16 +38,15 @@ class Kernel extends \Cubist\Backpack\Console\Kernel
             $schedule->command('job:dispatchNow Maintenance\\\\ListWorkingSymlinks')->dailyAt('0:30');
             // Email config
             $schedule->command('job:dispatchNow Maintenance\\\\EmailServerRefresh')->twiceDailyAt();
-            // WS to Toolbox migration
-            //$schedule->command('ws:migrate --publications=v2 --documents=missing')->dailyAt('1:00');
-            //$schedule->command('syncfluidbooksv3toworkshop')->dailyAt('3:15');
-            //$schedule->command('ws:migrate --publications=missing --documents=missing')->everyTwoHours();
             // Quotes
             $schedule->command('fluidbook:quote --reminder')->weekdays()->at('8:00');
             // DSN
             $schedule->command('dsn:check')->at('7:00');
             // Mailjet
             $schedule->command('job:dispatchNow ListmonkSyncList')->dailyAt('5:00');
+            // Link audit
+            $schedule->command('job:dispatchNow AuditLinkRegister')->dailyAt('0:30');
+            $schedule->command('job:dispatchNow AuditLink')->dailyAt('1:00');
         }
 
         $schedule->command('job:dispatchNow ProcessTotals')->everyTwoHours();
index 48a9bdd25aa46ec5c6376993bd12ef8390b158b5..6b75226efb1ac0c93bc9ba96b24b21f785f3ee17 100644 (file)
@@ -118,106 +118,4 @@ trait AuditLinksOperation
 
         return  response()->download($tmpfile, $filename)->deleteFileAfterSend();
     }
-
-    public static function getHttpCode($url) {
-        $ch = curl_init($url);
-        self::setCurlOpt([CURLOPT_FOLLOWLOCATION => false]);
-        curl_setopt_array($ch, self::$curlOpt);
-        curl_exec($ch);
-
-        $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
-        curl_close($ch);
-
-        $ch = curl_init($url);
-        self::$curlOpt[CURLOPT_FOLLOWLOCATION] = true;
-        curl_setopt_array($ch, self::$curlOpt);
-        curl_exec($ch);
-
-        $finalHttpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
-        $finalUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
-
-        curl_close($ch);
-
-        return ['httpcode' => $httpcode, 'finalurl' => $finalUrl, 'finalHttpCode' => $finalHttpCode];
-    }
-
-    public static function getHttpCodeCloudflare($url) {
-        $apiUrl = 'http://flaresolverr:8191/v1';
-
-        $payload = json_encode([
-            'cmd' => 'request.get',
-            'url' => $url,
-            'maxTimeout' => 60000
-        ]);
-
-        /*$flaresolverrOptions = [
-            CURLOPT_POST => true,
-            CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
-            CURLOPT_POSTFIELDS => $payload,
-        ];*/
-
-        $ch = curl_init($apiUrl);
-        self::$curlOpt = [
-            CURLOPT_RETURNTRANSFER => true,
-            CURLOPT_HEADER         => true,
-            CURLOPT_NOBODY         => true,
-            CURLOPT_TIMEOUT        => self::$timeout,
-            CURLOPT_POST => true,
-            CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
-            CURLOPT_POSTFIELDS => $payload,
-        ];
-        curl_setopt_array($ch, self::$curlOpt);
-        $response = curl_exec($ch);
-
-        if(preg_match('/(error|code) \b(301|302|308|404|401|403|405|500|502|503)\b/', $response, $matches)) {
-            $httpcode = $matches[2] ?? '';
-            $finalUrl = '';
-            $finalHttpCode = '';
-        }
-
-        curl_close($ch);
-
-        return ['httpcode' => $httpcode, 'finalurl' => $finalUrl, 'finalHttpCode' => $finalHttpCode];
-    }
-
-    protected static function setCurlOpt($moreOptions = []) {
-        self::$curlOpt = [
-            CURLOPT_RETURNTRANSFER => true,
-            CURLOPT_HEADER         => true,
-            CURLOPT_NOBODY         => true,
-            CURLOPT_TIMEOUT        => self::$timeout,
-            ...$moreOptions
-        ];
-    }
-
-    public static function getUrlInfo($url) {
-        $headers = get_headers($url, 1);
-        if($headers['Server'] === "cloudflare") {
-            $response = self::getHttpCodeCloudflare($url);
-        }else {
-            $response = self::getHttpCode($url);
-        }
-
-        return $response;
-    }
-
-    public static function getHttpCodeComment($httpcode)
-    {
-        switch ($httpcode) {
-            case 301:
-                return " - Moved Permanently: the resource has a new permanent home — update your bookmarks or links.";
-            case 302:
-                return " - Found: tells the client to look at (browse to) another URL.";
-            //case 307:
-            //case 308:
-            case 400:
-                return " - Bad request: this and all future requests should be directed to the given URI.";
-        }
-    }
-
-    public static function youtubeVideoExist($videoID)
-    {
-        $headers = get_headers('https://www.youtube.com/oembed?format=json&url=http://www.youtube.com/watch?v=' . $videoID);
-        return (is_array($headers) && preg_match('/^HTTP\\/\\d+\\.\\d+\\s+2\\d\\d\\s+.*$/', $headers[0]));
-    }
 }
index e82699d885e9a3e241628d3643aff71445eafb27..12824fca0a15131d19e4748289ba87a6164343dd 100644 (file)
 
 namespace App\Jobs;
 
-use App\Fluidbook\Link\LinksData;
+
 use App\Models\FluidbookAuditLink;
-use App\Models\FluidbookPublication;
-use App\Models\FluidbookCollection;
-use Cubist\Net\Util;
-use Cubist\Util\CommandLine;
+use Cubist\Util\ArrayUtil;
 use Cubist\Util\Files\Files;
 use Cubist\Util\WebVideo;
-use Illuminate\Support\Arr;
-use Illuminate\Support\Facades\DB;
 use Illuminate\Support\Facades\Log;
-use Illuminate\Support\Facades\Http;
+use Cubist\Net\HTTP;
 
 class AuditLink extends Base
 {
+    protected $_batchSize = 500;
+    protected static $minTimeBetweenRequests = 10;
+
     /**
      * Create a new job instance.
      */
-    public function __construct() {
-        //
+    public function __construct($batchSize = 500)
+    {
+        $this->_batchSize = $batchSize;
     }
+
     public function handle()
     {
-        $allLinks = FluidbookAuditLink::orderBy('last_date_test', 'asc')->limit(10);
-        $allLinks = $allLinks->inRandomOrder()->get()->toArray();
-        $externalLinks = [];
-
-        foreach ($allLinks as $fb => $link) {
-            // Error code start with 4 or 5
-            // Redirection code start with 3
-            try {
-                $curlResponse = FluidbookCollection::getUrlInfo($link['url']);
-            }catch (\Error $e) {
-                echo "Error when trying to get http code: " . $e->getMessage().PHP_EOL;
-                continue;
-            }catch (\Exception $e) {
-                echo "Exception when trying to get http code: " . $e->getMessage() . PHP_EOL;
-                continue;
+        $urls = [];
+        foreach (FluidbookAuditLink::orderBy('updated_at', 'asc')->get() as $link) {
+            if (!isset($urls[$link->url])) {
+                $urls[$link->url] = $link->http_code ?? 0;
             }
+        }
+        $urls = array_slice($urls, 0, $this->_batchSize, true);
+        //$urls = ['https://www.mckinsey.com/business-functions/people-and-organizational-performance/our-insights/dynamic-management-better-decisions-in-uncertain-times' => 403];
+        //$urls = ['https://www.eurekalert.org/news-releases/569770' => 403];
+        //$urls = ['https://www.practicallyperfectpa.com/tips-on-good-minute-taking#technologyto' => 403];
+        $urls = ['https://dev.toolbox.fluidbook.com/httpcode/200' => 200,
+            'https://dev.toolbox.fluidbook.com/httpcode/308' => 200,];
+        ArrayUtil::shuffle($urls);
+
+        HTTP::setCookieFile(Files::mkdir(protected_path('fluidbookpublication/linkaudit')) . 'cookies.txt');
 
-            $httpCode = $curlResponse['httpcode'];
-            $finalurl = '';
-            $finalcodeurl = '';
+        $this->extracted($urls);
+        Log::info('Job exécuté avec succès');
+    }
 
-            print_r($curlResponse['httpcode'].' '.$link['id'].PHP_EOL);
+    public function compareLinks()
+    {
 
-            $webvideo = WebVideo::parse($link['url'], true);
-            if($webvideo !== false) {
-                if($webvideo['service'] === 'youtube') {
-                    if (!FluidbookCollection::youtubeVideoExist($webvideo['id'])) {
-                        $httpCode = "404";
-                    }
-                }
-            }
+    }
 
-            if (str_starts_with($httpCode, 3)) {
-                try {
-                    $finalurl = $curlResponse['finalurl'];
-                    $finalcodeurl = $curlResponse['finalHttpCode'];
-                }catch (\Error $e) {
-                    echo "Error when trying to get final url: "  . $e->getMessage().PHP_EOL;
-                    continue;
-                }catch (\Exception $e) {
-                    echo "Exception when trying to get final url: " . $e->getMessage() . PHP_EOL;
-                    continue;
+    /**
+     * @param array $urls
+     * @return void
+     */
+    public function extracted(array $urls): void
+    {
+        foreach ($urls as $url => $previousCode) {
+            $start = time();
+            $res = [];
+
+            $webvideo = WebVideo::parse($url, true);
+            if ($webvideo && $webvideo['service'] === 'youtube') {
+                if (!HTTP::youtubeVideoExist($webvideo['id'])) {
+                    $res['http_code'] = "404";
+                } else {
+                    $res['http_code'] = "200";
                 }
+            } else {
+                $res = HTTP::getResponseCode($url, 10, 'http://paris.cubedesigners.com:8191/v1');
             }
 
-            $comment = FluidbookCollection::getHttpCodeComment($httpCode);
-
-            $firstTimeError = '';
-            if(str_starts_with($httpCode, 3) || str_starts_with($httpCode, 4) || str_starts_with($httpCode, 5)) {
-                $firstTimeError = null === $link['first_time_error'] || $httpCode !== $link['error_code'] ? date('Y-m-d H:i:s') : $link['first_time_error'];
+            if ($res['http_code'] != $previousCode) {
+                $res['code_date'] = new \DateTime();
+            }
+            FluidbookAuditLink::where('url', $url)->update($res);
+            Log::info('Tested ' . $url . ' : ' . $res['http_code']);
+            $diff = time() - $start;
+            if ($diff < self::$minTimeBetweenRequests) {
+                sleep(self::$minTimeBetweenRequests - $diff);
             }
-
-            $externalLinks[] = [
-                'id' => $link['id'],
-                'fluidbook_id' => $link['fluidbook_id'],
-                'page' => $link['page'],
-                'link_id' => $link['link_id'],
-                'url' => $link['url'],
-                'error_code' => str_starts_with($httpCode, 4) || str_starts_with($httpCode, 5) ? $httpCode.$comment : "",
-                'first_time_error' => $firstTimeError, // Datetime of the first time we saw this error
-                'last_date_test' => date('Y-m-d H:i:s'),
-                'redirection_code' => str_starts_with($httpCode, 3) ? $httpCode : "",
-                'final_code_url' => $finalcodeurl,
-                'final_target' => $finalurl,
-                'updated_at'=> date('Y-m-d H:i:s')
-            ];
-        }
-
-        if($externalLinks) {
-            $keys = array_slice(array_keys($externalLinks[0]), 1);
-
-            FluidbookAuditLink::upsert($externalLinks, ['id'], $keys);
         }
-
-        Log::info('Job exécuté avec succès');
-    }
-
-    public function compareLinks() {
-
     }
 }
index 2b2747e71646eba1d16694699db08c1680adf5f4..1dd0f4df125908c55a47c75b92044638ec469c64 100644 (file)
@@ -33,4 +33,8 @@ class FluidbookAuditLink extends CubistMagicAbstractModel
 
         $this->addUniqueKey(['fluidbook_id', 'link_id']);
     }
+
+    public static function getURLToTest($limit){
+
+    }
 }
index 73668f8aadcb0d3fb04151769af359bde37f9a45..a51487a9c9c38465ed3460d12c5bf240b832c0fc 100644 (file)
@@ -36,27 +36,14 @@ Route::group([
     Route::any('slack/endpoint', [\App\Http\Controllers\SlackController::class, 'endpoint']);
 });
 
-Route::get('/httpcode/{error}/{redirection?}', function($error, $redirection = null){
-    if($error == 200) {
-        return "ok";
-    }elseif($error == 401){
-        abort(401);
-    }elseif($error == 403){
-        abort(403);
-    }elseif($error == 404){
-        abort(404);
-    }elseif($error == 500){
-        abort(500);
-    }elseif($error == 502){
-        abort(502);
-    }elseif($error == 503){
-        abort(503);
-    }elseif($error == 308 && !$redirection){
-        return redirect('/httpcode/308/urlderedirection', 308);
-    }elseif($error == 302 && !$redirection){
-        return redirect('/httpcode/302/urlderedirection');
-    }elseif($error == 301 && !$redirection){
-        return redirect('/httpcode/301/urlderedirection', 301);
+Route::get('/httpcode/{code}/{redirection?}', function ($code, $redirection = null) {
+    $redirections = [301, 302, 307, 308];
+    $error = [401, 403, 404, 500, 502, 503];
+    if (in_array($code, $error)) {
+        abort($code);
+    } else if (in_array($code, $redirections) && !$redirection) {
+        return redirect('/httpcode/' . $code . '/urlderedirection', $code);
     }
+    return 'ok (' . $code . ')';
 });