networks:
- fluidbook-toolbox
- flaresolverr:
- # DockerHub mirror flaresolverr/flaresolverr:latest
- image: ghcr.io/flaresolverr/flaresolverr:latest
- container_name: flaresolverr
- environment:
- - LOG_LEVEL=${LOG_LEVEL:-info}
- - LOG_HTML=${LOG_HTML:-false}
- - CAPTCHA_SOLVER=${CAPTCHA_SOLVER:-none}
- - TZ=Europe/London
- ports:
- - "${PORT:-8191}:8191"
- restart: unless-stopped
+
networks:
fluidbook-toolbox:
$schedule->command('job:dispatchNow Maintenance\\\\ListWorkingSymlinks')->dailyAt('0:30');
// Email config
$schedule->command('job:dispatchNow Maintenance\\\\EmailServerRefresh')->twiceDailyAt();
- // WS to Toolbox migration
- //$schedule->command('ws:migrate --publications=v2 --documents=missing')->dailyAt('1:00');
- //$schedule->command('syncfluidbooksv3toworkshop')->dailyAt('3:15');
- //$schedule->command('ws:migrate --publications=missing --documents=missing')->everyTwoHours();
// Quotes
$schedule->command('fluidbook:quote --reminder')->weekdays()->at('8:00');
// DSN
$schedule->command('dsn:check')->at('7:00');
// Mailjet
$schedule->command('job:dispatchNow ListmonkSyncList')->dailyAt('5:00');
+ // Link audit
+ $schedule->command('job:dispatchNow AuditLinkRegister')->dailyAt('0:30');
+ $schedule->command('job:dispatchNow AuditLink')->dailyAt('1:00');
}
$schedule->command('job:dispatchNow ProcessTotals')->everyTwoHours();
return response()->download($tmpfile, $filename)->deleteFileAfterSend();
}
-
- public static function getHttpCode($url) {
- $ch = curl_init($url);
- self::setCurlOpt([CURLOPT_FOLLOWLOCATION => false]);
- curl_setopt_array($ch, self::$curlOpt);
- curl_exec($ch);
-
- $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- curl_close($ch);
-
- $ch = curl_init($url);
- self::$curlOpt[CURLOPT_FOLLOWLOCATION] = true;
- curl_setopt_array($ch, self::$curlOpt);
- curl_exec($ch);
-
- $finalHttpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- $finalUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
-
- curl_close($ch);
-
- return ['httpcode' => $httpcode, 'finalurl' => $finalUrl, 'finalHttpCode' => $finalHttpCode];
- }
-
- public static function getHttpCodeCloudflare($url) {
- $apiUrl = 'http://flaresolverr:8191/v1';
-
- $payload = json_encode([
- 'cmd' => 'request.get',
- 'url' => $url,
- 'maxTimeout' => 60000
- ]);
-
- /*$flaresolverrOptions = [
- CURLOPT_POST => true,
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
- CURLOPT_POSTFIELDS => $payload,
- ];*/
-
- $ch = curl_init($apiUrl);
- self::$curlOpt = [
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_HEADER => true,
- CURLOPT_NOBODY => true,
- CURLOPT_TIMEOUT => self::$timeout,
- CURLOPT_POST => true,
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
- CURLOPT_POSTFIELDS => $payload,
- ];
- curl_setopt_array($ch, self::$curlOpt);
- $response = curl_exec($ch);
-
- if(preg_match('/(error|code) \b(301|302|308|404|401|403|405|500|502|503)\b/', $response, $matches)) {
- $httpcode = $matches[2] ?? '';
- $finalUrl = '';
- $finalHttpCode = '';
- }
-
- curl_close($ch);
-
- return ['httpcode' => $httpcode, 'finalurl' => $finalUrl, 'finalHttpCode' => $finalHttpCode];
- }
-
- protected static function setCurlOpt($moreOptions = []) {
- self::$curlOpt = [
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_HEADER => true,
- CURLOPT_NOBODY => true,
- CURLOPT_TIMEOUT => self::$timeout,
- ...$moreOptions
- ];
- }
-
- public static function getUrlInfo($url) {
- $headers = get_headers($url, 1);
- if($headers['Server'] === "cloudflare") {
- $response = self::getHttpCodeCloudflare($url);
- }else {
- $response = self::getHttpCode($url);
- }
-
- return $response;
- }
-
- public static function getHttpCodeComment($httpcode)
- {
- switch ($httpcode) {
- case 301:
- return " - Moved Permanently: the resource has a new permanent home — update your bookmarks or links.";
- case 302:
- return " - Found: tells the client to look at (browse to) another URL.";
- //case 307:
- //case 308:
- case 400:
- return " - Bad request: this and all future requests should be directed to the given URI.";
- }
- }
-
- public static function youtubeVideoExist($videoID)
- {
- $headers = get_headers('https://www.youtube.com/oembed?format=json&url=http://www.youtube.com/watch?v=' . $videoID);
- return (is_array($headers) && preg_match('/^HTTP\\/\\d+\\.\\d+\\s+2\\d\\d\\s+.*$/', $headers[0]));
- }
}
namespace App\Jobs;
-use App\Fluidbook\Link\LinksData;
+
use App\Models\FluidbookAuditLink;
-use App\Models\FluidbookPublication;
-use App\Models\FluidbookCollection;
-use Cubist\Net\Util;
-use Cubist\Util\CommandLine;
+use Cubist\Util\ArrayUtil;
use Cubist\Util\Files\Files;
use Cubist\Util\WebVideo;
-use Illuminate\Support\Arr;
-use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Log;
-use Illuminate\Support\Facades\Http;
+use Cubist\Net\HTTP;
class AuditLink extends Base
{
+ protected $_batchSize = 500;
+ protected static $minTimeBetweenRequests = 10;
+
/**
* Create a new job instance.
*/
- public function __construct() {
- //
+ public function __construct($batchSize = 500)
+ {
+ $this->_batchSize = $batchSize;
}
+
public function handle()
{
- $allLinks = FluidbookAuditLink::orderBy('last_date_test', 'asc')->limit(10);
- $allLinks = $allLinks->inRandomOrder()->get()->toArray();
- $externalLinks = [];
-
- foreach ($allLinks as $fb => $link) {
- // Error code start with 4 or 5
- // Redirection code start with 3
- try {
- $curlResponse = FluidbookCollection::getUrlInfo($link['url']);
- }catch (\Error $e) {
- echo "Error when trying to get http code: " . $e->getMessage().PHP_EOL;
- continue;
- }catch (\Exception $e) {
- echo "Exception when trying to get http code: " . $e->getMessage() . PHP_EOL;
- continue;
+ $urls = [];
+ foreach (FluidbookAuditLink::orderBy('updated_at', 'asc')->get() as $link) {
+ if (!isset($urls[$link->url])) {
+ $urls[$link->url] = $link->http_code ?? 0;
}
+ }
+ $urls = array_slice($urls, 0, $this->_batchSize, true);
+ //$urls = ['https://www.mckinsey.com/business-functions/people-and-organizational-performance/our-insights/dynamic-management-better-decisions-in-uncertain-times' => 403];
+ //$urls = ['https://www.eurekalert.org/news-releases/569770' => 403];
+ //$urls = ['https://www.practicallyperfectpa.com/tips-on-good-minute-taking#technologyto' => 403];
+ $urls = ['https://dev.toolbox.fluidbook.com/httpcode/200' => 200,
+ 'https://dev.toolbox.fluidbook.com/httpcode/308' => 200,];
+ ArrayUtil::shuffle($urls);
+
+ HTTP::setCookieFile(Files::mkdir(protected_path('fluidbookpublication/linkaudit')) . 'cookies.txt');
- $httpCode = $curlResponse['httpcode'];
- $finalurl = '';
- $finalcodeurl = '';
+ $this->extracted($urls);
+ Log::info('Job exécuté avec succès');
+ }
- print_r($curlResponse['httpcode'].' '.$link['id'].PHP_EOL);
+ public function compareLinks()
+ {
- $webvideo = WebVideo::parse($link['url'], true);
- if($webvideo !== false) {
- if($webvideo['service'] === 'youtube') {
- if (!FluidbookCollection::youtubeVideoExist($webvideo['id'])) {
- $httpCode = "404";
- }
- }
- }
+ }
- if (str_starts_with($httpCode, 3)) {
- try {
- $finalurl = $curlResponse['finalurl'];
- $finalcodeurl = $curlResponse['finalHttpCode'];
- }catch (\Error $e) {
- echo "Error when trying to get final url: " . $e->getMessage().PHP_EOL;
- continue;
- }catch (\Exception $e) {
- echo "Exception when trying to get final url: " . $e->getMessage() . PHP_EOL;
- continue;
+ /**
+ * @param array $urls
+ * @return void
+ */
+ public function extracted(array $urls): void
+ {
+ foreach ($urls as $url => $previousCode) {
+ $start = time();
+ $res = [];
+
+ $webvideo = WebVideo::parse($url, true);
+ if ($webvideo && $webvideo['service'] === 'youtube') {
+ if (!HTTP::youtubeVideoExist($webvideo['id'])) {
+ $res['http_code'] = "404";
+ } else {
+ $res['http_code'] = "200";
}
+ } else {
+ $res = HTTP::getResponseCode($url, 10, 'http://paris.cubedesigners.com:8191/v1');
}
- $comment = FluidbookCollection::getHttpCodeComment($httpCode);
-
- $firstTimeError = '';
- if(str_starts_with($httpCode, 3) || str_starts_with($httpCode, 4) || str_starts_with($httpCode, 5)) {
- $firstTimeError = null === $link['first_time_error'] || $httpCode !== $link['error_code'] ? date('Y-m-d H:i:s') : $link['first_time_error'];
+ if ($res['http_code'] != $previousCode) {
+ $res['code_date'] = new \DateTime();
+ }
+ FluidbookAuditLink::where('url', $url)->update($res);
+ Log::info('Tested ' . $url . ' : ' . $res['http_code']);
+ $diff = time() - $start;
+ if ($diff < self::$minTimeBetweenRequests) {
+ sleep(self::$minTimeBetweenRequests - $diff);
}
-
- $externalLinks[] = [
- 'id' => $link['id'],
- 'fluidbook_id' => $link['fluidbook_id'],
- 'page' => $link['page'],
- 'link_id' => $link['link_id'],
- 'url' => $link['url'],
- 'error_code' => str_starts_with($httpCode, 4) || str_starts_with($httpCode, 5) ? $httpCode.$comment : "",
- 'first_time_error' => $firstTimeError, // Datetime of the first time we saw this error
- 'last_date_test' => date('Y-m-d H:i:s'),
- 'redirection_code' => str_starts_with($httpCode, 3) ? $httpCode : "",
- 'final_code_url' => $finalcodeurl,
- 'final_target' => $finalurl,
- 'updated_at'=> date('Y-m-d H:i:s')
- ];
- }
-
- if($externalLinks) {
- $keys = array_slice(array_keys($externalLinks[0]), 1);
-
- FluidbookAuditLink::upsert($externalLinks, ['id'], $keys);
}
-
- Log::info('Job exécuté avec succès');
- }
-
- public function compareLinks() {
-
}
}
$this->addUniqueKey(['fluidbook_id', 'link_id']);
}
+
+ public static function getURLToTest($limit){
+
+ }
}
Route::any('slack/endpoint', [\App\Http\Controllers\SlackController::class, 'endpoint']);
});
-Route::get('/httpcode/{error}/{redirection?}', function($error, $redirection = null){
- if($error == 200) {
- return "ok";
- }elseif($error == 401){
- abort(401);
- }elseif($error == 403){
- abort(403);
- }elseif($error == 404){
- abort(404);
- }elseif($error == 500){
- abort(500);
- }elseif($error == 502){
- abort(502);
- }elseif($error == 503){
- abort(503);
- }elseif($error == 308 && !$redirection){
- return redirect('/httpcode/308/urlderedirection', 308);
- }elseif($error == 302 && !$redirection){
- return redirect('/httpcode/302/urlderedirection');
- }elseif($error == 301 && !$redirection){
- return redirect('/httpcode/301/urlderedirection', 301);
+Route::get('/httpcode/{code}/{redirection?}', function ($code, $redirection = null) {
+ $redirections = [301, 302, 307, 308];
+ $error = [401, 403, 404, 500, 502, 503];
+ if (in_array($code, $error)) {
+ abort($code);
+ } else if (in_array($code, $redirections) && !$redirection) {
+ return redirect('/httpcode/' . $code . '/urlderedirection', $code);
}
+ return 'ok (' . $code . ')';
});