From 0576cb613f86065aabaeb847c17f3d761a1558c2 Mon Sep 17 00:00:00 2001 From: Yassine Guedidi Date: Thu, 8 Feb 2024 21:13:52 +0100 Subject: [PATCH] Make DownloadImages use HttpClient --- app/config/config.yml | 7 +- app/config/services.yml | 3 - .../CoreBundle/Helper/DownloadImages.php | 34 +++--- .../CoreBundle/Helper/DownloadImagesTest.php | 111 +++++++----------- 4 files changed, 65 insertions(+), 90 deletions(-) diff --git a/app/config/config.yml b/app/config/config.yml index 13be589da..5bf12a6bc 100644 --- a/app/config/config.yml +++ b/app/config/config.yml @@ -36,6 +36,10 @@ framework: assets: ~ mailer: dsn: "%mailer_dsn%" + http_client: + scoped_clients: + download_images.client: + scope: '.*' # Twig Configuration twig: @@ -421,9 +425,6 @@ httplug: defaults: timeout: 10 plugins: ['httplug.plugin.logger'] - wallabag_core.entry.download_images: - factory: 'httplug.factory.auto' - plugins: ['httplug.plugin.logger'] wallabag_core.pocket.client: factory: 'httplug.factory.auto' plugins: diff --git a/app/config/services.yml b/app/config/services.yml index 1e1cf6a23..7e17f0110 100644 --- a/app/config/services.yml +++ b/app/config/services.yml @@ -258,9 +258,6 @@ services: $defaultSettings: '%wallabag_core.default_internal_settings%' $defaultIgnoreOriginInstanceRules: '%wallabag_core.default_ignore_origin_instance_rules%' - wallabag_core.entry.download_images.client: - alias: 'httplug.client.wallabag_core.entry.download_images' - Wallabag\CoreBundle\Mailer\UserMailer: arguments: $parameters: diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 9c0f59522..42c0fc878 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -5,19 +5,13 @@ namespace Wallabag\CoreBundle\Helper; use enshrined\svgSanitize\Sanitizer; use GuzzleHttp\Psr7\Uri; use GuzzleHttp\Psr7\UriResolver; -use Http\Client\Common\HttpMethodsClient; -use Http\Client\Common\Plugin\ErrorPlugin; -use Http\Client\Common\Plugin\RedirectPlugin; -use Http\Client\Common\PluginClient; -use Http\Discovery\Psr17FactoryDiscovery; -use Psr\Http\Client\ClientInterface; -use Psr\Http\Message\RequestFactoryInterface; -use Psr\Http\Message\ResponseInterface; -use Psr\Http\Message\StreamFactoryInterface; use Psr\Log\LoggerInterface; use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\Finder\Finder; +use Symfony\Component\HttpFoundation\Request; use Symfony\Component\Mime\MimeTypes; +use Symfony\Contracts\HttpClient\HttpClientInterface; +use Symfony\Contracts\HttpClient\ResponseInterface; class DownloadImages { @@ -29,9 +23,9 @@ class DownloadImages private $mimeTypes; private $wallabagUrl; - public function __construct(ClientInterface $client, $baseFolder, $wallabagUrl, LoggerInterface $logger, RequestFactoryInterface $requestFactory = null, StreamFactoryInterface $streamFactory = null) + public function __construct(HttpClientInterface $downloadImagesClient, $baseFolder, $wallabagUrl, LoggerInterface $logger) { - $this->client = new HttpMethodsClient(new PluginClient($client, [new ErrorPlugin(), new RedirectPlugin()]), $requestFactory ?: Psr17FactoryDiscovery::findRequestFactory(), $streamFactory ?: Psr17FactoryDiscovery::findStreamFactory()); + $this->client = $downloadImagesClient; $this->baseFolder = $baseFolder; $this->wallabagUrl = rtrim($wallabagUrl, '/'); $this->logger = $logger; @@ -137,7 +131,7 @@ class DownloadImages } try { - $res = $this->client->get($absolutePath); + $res = $this->client->request(Request::METHOD_GET, $absolutePath); } catch (\Exception $e) { $this->logger->error('DownloadImages: Can not retrieve image, skipping.', ['exception' => $e]); @@ -159,7 +153,7 @@ class DownloadImages $sanitizer = new Sanitizer(); $sanitizer->minify(true); $sanitizer->removeRemoteReferences(true); - $cleanSVG = $sanitizer->sanitize((string) $res->getBody()); + $cleanSVG = $sanitizer->sanitize($res->getContent()); // add an extra validation by checking about `getBody()); + $im = imagecreatefromstring($res->getContent()); } catch (\Exception $e) { $im = false; } @@ -196,7 +190,7 @@ class DownloadImages if (class_exists(\Imagick::class)) { try { $imagick = new \Imagick(); - $imagick->readImageBlob($res->getBody()); + $imagick->readImageBlob($res->getContent()); $imagick->setImageFormat('gif'); $imagick->writeImages($localPath, true); } catch (\Exception $e) { @@ -362,8 +356,12 @@ class DownloadImages */ private function getExtensionFromResponse(ResponseInterface $res, $imagePath) { - $ext = current($this->mimeTypes->getExtensions(current($res->getHeader('content-type')))); - $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); + if (200 !== $res->getStatusCode()) { + return false; + } + + $ext = current($this->mimeTypes->getExtensions(current($res->getHeaders()['content-type'] ?? []))); + $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeaders()['content-type'] ?? []]); // ok header doesn't have the extension, try a different way if (empty($ext)) { @@ -373,7 +371,7 @@ class DownloadImages 'png' => "\x89\x50\x4e\x47\x0d\x0a", 'webp' => "\x52\x49\x46\x46", ]; - $bytes = substr((string) $res->getBody(), 0, 8); + $bytes = substr($res->getContent(), 0, 8); foreach ($types as $type => $header) { if (str_starts_with($bytes, $header)) { diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php index 2dc446394..2dac72659 100644 --- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php +++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php @@ -2,11 +2,11 @@ namespace Tests\Wallabag\CoreBundle\Helper; -use GuzzleHttp\Psr7\Response; -use Http\Mock\Client as HttpMockClient; use Monolog\Handler\TestHandler; use Monolog\Logger; use PHPUnit\Framework\TestCase; +use Symfony\Component\HttpClient\MockHttpClient; +use Symfony\Component\HttpClient\Response\MockResponse; use Wallabag\CoreBundle\Helper\DownloadImages; class DownloadImagesTest extends TestCase @@ -30,13 +30,12 @@ class DownloadImagesTest extends TestCase */ public function testProcessHtml($html, $url) { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png'))); + $mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: image/png']])]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processHtml(123, $html, $url); @@ -46,13 +45,12 @@ class DownloadImagesTest extends TestCase public function testProcessHtmlWithBadImage() { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => 'application/json'], '')); + $mockHttpClient = new MockHttpClient([new MockResponse('', ['response_headers' => ['Content-Type: application/json']])]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processHtml(123, '
', 'http://imgur.com/gallery/WxtWY'); $this->assertStringContainsString('http://i.imgur.com/T9qgcHc.jpg', $res, 'Image were not replace because of content-type'); @@ -74,13 +72,12 @@ class DownloadImagesTest extends TestCase */ public function testProcessSingleImage($header, $extension) { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => $header], file_get_contents(__DIR__ . '/../fixtures/unnamed.png'))); + $mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: ' . $header]])]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processSingleImage(123, 'T9qgcHc.jpg', 'http://imgur.com/gallery/WxtWY'); $this->assertStringContainsString('/assets/images/9/b/9b0ead26/ebe60399.' . $extension, $res); @@ -88,13 +85,12 @@ class DownloadImagesTest extends TestCase public function testProcessSingleImageWithBadUrl() { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(404, [])); + $mockHttpClient = new MockHttpClient([new MockResponse('', ['http_code' => 404])]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processSingleImage(123, 'T9qgcHc.jpg', 'http://imgur.com/gallery/WxtWY'); $this->assertFalse($res, 'Image can not be found, so it will not be replaced'); @@ -102,13 +98,12 @@ class DownloadImagesTest extends TestCase public function testProcessSingleImageWithBadImage() { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], '')); + $mockHttpClient = new MockHttpClient([new MockResponse('', ['response_headers' => ['Content-Type: image/png']])]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processSingleImage(123, 'http://i.imgur.com/T9qgcHc.jpg', 'http://imgur.com/gallery/WxtWY'); $this->assertFalse($res, 'Image can not be loaded, so it will not be replaced'); @@ -116,13 +111,12 @@ class DownloadImagesTest extends TestCase public function testProcessSingleImageFailAbsolute() { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png'))); + $mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: image/png']])]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processSingleImage(123, '/i.imgur.com/T9qgcHc.jpg', 'imgur.com/gallery/WxtWY'); $this->assertFalse($res, 'Absolute image can not be determined, so it will not be replaced'); @@ -130,13 +124,12 @@ class DownloadImagesTest extends TestCase public function testProcessRealImage() { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))); + $mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processSingleImage( 123, @@ -150,15 +143,16 @@ class DownloadImagesTest extends TestCase public function testProcessImageWithSrcset() { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))); - $httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))); - $httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))); + $mockHttpClient = new MockHttpClient([ + new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')), + new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')), + new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')), + ]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processHtml(123, '

', 'http://piketty.blog.lemonde.fr/2017/10/12/budget-2018-la-jeunesse-sacrifiee/'); $this->assertStringNotContainsString('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced'); @@ -166,15 +160,16 @@ class DownloadImagesTest extends TestCase public function testProcessImageWithTrickySrcset() { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))); - $httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))); - $httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))); + $mockHttpClient = new MockHttpClient([ + new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')), + new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')), + new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')), + ]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processHtml(123, '
', 'https://example.com/about/'); @@ -203,13 +199,12 @@ class DownloadImagesTest extends TestCase public function testProcessImageWithNullPath() { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))); + $mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processSingleImage( 123, @@ -221,14 +216,15 @@ class DownloadImagesTest extends TestCase public function testEnsureOnlyFirstOccurrenceIsReplaced() { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png'))); - $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png'))); + $mockHttpClient = new MockHttpClient([ + new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: image/png']]), + new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: image/png']]), + ]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $html = ''; $url = 'https://www.wsj.com/articles/5-interior-design-tips-to-max-out-your-basement-space-11633435201'; @@ -240,13 +236,12 @@ class DownloadImagesTest extends TestCase public function testProcessSingleImageWithSvg() { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/svg+xml'], file_get_contents(__DIR__ . '/../fixtures/modal-content.svg'))); + $mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/modal-content.svg'), ['response_headers' => ['Content-Type: image/svg+xml']])]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processSingleImage(123, 'modal-content.svg', 'http://imgur.com/gallery/WxtWY'); $this->assertStringContainsString('/assets/images/9/b/9b0ead26/400e29f9.svg', $res); @@ -254,30 +249,14 @@ class DownloadImagesTest extends TestCase public function testProcessSingleImageWithBadSvg() { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/svg+xml'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png'))); + $mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: image/svg+xml']])]); $logHandler = new TestHandler(); $logger = new Logger('test', [$logHandler]); - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); $res = $download->processSingleImage(123, 'modal-content.svg', 'http://imgur.com/gallery/WxtWY'); $this->assertFalse($res); } - - public function testFollowRedirection() - { - $httpMockClient = new HttpMockClient(); - $httpMockClient->addResponse(new Response(301, ['content-type' => 'image/png', 'location' => '/final-path.png'])); - $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png'))); - - $logHandler = new TestHandler(); - $logger = new Logger('test', [$logHandler]); - - $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); - $res = $download->processSingleImage(123, '', 'https://example.com/unnamed.png'); - - $this->assertStringContainsString('/assets/images/9/b/9b0ead26/66953334.png', $res, "Fetch client didn't follow the HTTP redirection"); - } }