Make DownloadImages use HttpClient

This commit is contained in:
Yassine Guedidi 2024-02-08 21:13:52 +01:00
parent a8634d7116
commit 0576cb613f
4 changed files with 65 additions and 90 deletions

View file

@ -36,6 +36,10 @@ framework:
assets: ~
mailer:
dsn: "%mailer_dsn%"
http_client:
scoped_clients:
download_images.client:
scope: '.*'
# Twig Configuration
twig:
@ -421,9 +425,6 @@ httplug:
defaults:
timeout: 10
plugins: ['httplug.plugin.logger']
wallabag_core.entry.download_images:
factory: 'httplug.factory.auto'
plugins: ['httplug.plugin.logger']
wallabag_core.pocket.client:
factory: 'httplug.factory.auto'
plugins:

View file

@ -258,9 +258,6 @@ services:
$defaultSettings: '%wallabag_core.default_internal_settings%'
$defaultIgnoreOriginInstanceRules: '%wallabag_core.default_ignore_origin_instance_rules%'
wallabag_core.entry.download_images.client:
alias: 'httplug.client.wallabag_core.entry.download_images'
Wallabag\CoreBundle\Mailer\UserMailer:
arguments:
$parameters:

View file

@ -5,19 +5,13 @@ namespace Wallabag\CoreBundle\Helper;
use enshrined\svgSanitize\Sanitizer;
use GuzzleHttp\Psr7\Uri;
use GuzzleHttp\Psr7\UriResolver;
use Http\Client\Common\HttpMethodsClient;
use Http\Client\Common\Plugin\ErrorPlugin;
use Http\Client\Common\Plugin\RedirectPlugin;
use Http\Client\Common\PluginClient;
use Http\Discovery\Psr17FactoryDiscovery;
use Psr\Http\Client\ClientInterface;
use Psr\Http\Message\RequestFactoryInterface;
use Psr\Http\Message\ResponseInterface;
use Psr\Http\Message\StreamFactoryInterface;
use Psr\Log\LoggerInterface;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Finder\Finder;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\Mime\MimeTypes;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Symfony\Contracts\HttpClient\ResponseInterface;
class DownloadImages
{
@ -29,9 +23,9 @@ class DownloadImages
private $mimeTypes;
private $wallabagUrl;
public function __construct(ClientInterface $client, $baseFolder, $wallabagUrl, LoggerInterface $logger, RequestFactoryInterface $requestFactory = null, StreamFactoryInterface $streamFactory = null)
public function __construct(HttpClientInterface $downloadImagesClient, $baseFolder, $wallabagUrl, LoggerInterface $logger)
{
$this->client = new HttpMethodsClient(new PluginClient($client, [new ErrorPlugin(), new RedirectPlugin()]), $requestFactory ?: Psr17FactoryDiscovery::findRequestFactory(), $streamFactory ?: Psr17FactoryDiscovery::findStreamFactory());
$this->client = $downloadImagesClient;
$this->baseFolder = $baseFolder;
$this->wallabagUrl = rtrim($wallabagUrl, '/');
$this->logger = $logger;
@ -137,7 +131,7 @@ class DownloadImages
}
try {
$res = $this->client->get($absolutePath);
$res = $this->client->request(Request::METHOD_GET, $absolutePath);
} catch (\Exception $e) {
$this->logger->error('DownloadImages: Can not retrieve image, skipping.', ['exception' => $e]);
@ -159,7 +153,7 @@ class DownloadImages
$sanitizer = new Sanitizer();
$sanitizer->minify(true);
$sanitizer->removeRemoteReferences(true);
$cleanSVG = $sanitizer->sanitize((string) $res->getBody());
$cleanSVG = $sanitizer->sanitize($res->getContent());
// add an extra validation by checking about `<svg `
if (false === $cleanSVG || !str_contains($cleanSVG, '<svg ')) {
@ -179,7 +173,7 @@ class DownloadImages
}
try {
$im = imagecreatefromstring((string) $res->getBody());
$im = imagecreatefromstring($res->getContent());
} catch (\Exception $e) {
$im = false;
}
@ -196,7 +190,7 @@ class DownloadImages
if (class_exists(\Imagick::class)) {
try {
$imagick = new \Imagick();
$imagick->readImageBlob($res->getBody());
$imagick->readImageBlob($res->getContent());
$imagick->setImageFormat('gif');
$imagick->writeImages($localPath, true);
} catch (\Exception $e) {
@ -362,8 +356,12 @@ class DownloadImages
*/
private function getExtensionFromResponse(ResponseInterface $res, $imagePath)
{
$ext = current($this->mimeTypes->getExtensions(current($res->getHeader('content-type'))));
$this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
if (200 !== $res->getStatusCode()) {
return false;
}
$ext = current($this->mimeTypes->getExtensions(current($res->getHeaders()['content-type'] ?? [])));
$this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeaders()['content-type'] ?? []]);
// ok header doesn't have the extension, try a different way
if (empty($ext)) {
@ -373,7 +371,7 @@ class DownloadImages
'png' => "\x89\x50\x4e\x47\x0d\x0a",
'webp' => "\x52\x49\x46\x46",
];
$bytes = substr((string) $res->getBody(), 0, 8);
$bytes = substr($res->getContent(), 0, 8);
foreach ($types as $type => $header) {
if (str_starts_with($bytes, $header)) {

View file

@ -2,11 +2,11 @@
namespace Tests\Wallabag\CoreBundle\Helper;
use GuzzleHttp\Psr7\Response;
use Http\Mock\Client as HttpMockClient;
use Monolog\Handler\TestHandler;
use Monolog\Logger;
use PHPUnit\Framework\TestCase;
use Symfony\Component\HttpClient\MockHttpClient;
use Symfony\Component\HttpClient\Response\MockResponse;
use Wallabag\CoreBundle\Helper\DownloadImages;
class DownloadImagesTest extends TestCase
@ -30,13 +30,12 @@ class DownloadImagesTest extends TestCase
*/
public function testProcessHtml($html, $url)
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: image/png']])]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processHtml(123, $html, $url);
@ -46,13 +45,12 @@ class DownloadImagesTest extends TestCase
public function testProcessHtmlWithBadImage()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'application/json'], ''));
$mockHttpClient = new MockHttpClient([new MockResponse('', ['response_headers' => ['Content-Type: application/json']])]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processHtml(123, '<div><img src="http://i.imgur.com/T9qgcHc.jpg" /></div>', 'http://imgur.com/gallery/WxtWY');
$this->assertStringContainsString('http://i.imgur.com/T9qgcHc.jpg', $res, 'Image were not replace because of content-type');
@ -74,13 +72,12 @@ class DownloadImagesTest extends TestCase
*/
public function testProcessSingleImage($header, $extension)
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => $header], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: ' . $header]])]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processSingleImage(123, 'T9qgcHc.jpg', 'http://imgur.com/gallery/WxtWY');
$this->assertStringContainsString('/assets/images/9/b/9b0ead26/ebe60399.' . $extension, $res);
@ -88,13 +85,12 @@ class DownloadImagesTest extends TestCase
public function testProcessSingleImageWithBadUrl()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(404, []));
$mockHttpClient = new MockHttpClient([new MockResponse('', ['http_code' => 404])]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processSingleImage(123, 'T9qgcHc.jpg', 'http://imgur.com/gallery/WxtWY');
$this->assertFalse($res, 'Image can not be found, so it will not be replaced');
@ -102,13 +98,12 @@ class DownloadImagesTest extends TestCase
public function testProcessSingleImageWithBadImage()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], ''));
$mockHttpClient = new MockHttpClient([new MockResponse('', ['response_headers' => ['Content-Type: image/png']])]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processSingleImage(123, 'http://i.imgur.com/T9qgcHc.jpg', 'http://imgur.com/gallery/WxtWY');
$this->assertFalse($res, 'Image can not be loaded, so it will not be replaced');
@ -116,13 +111,12 @@ class DownloadImagesTest extends TestCase
public function testProcessSingleImageFailAbsolute()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: image/png']])]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processSingleImage(123, '/i.imgur.com/T9qgcHc.jpg', 'imgur.com/gallery/WxtWY');
$this->assertFalse($res, 'Absolute image can not be determined, so it will not be replaced');
@ -130,13 +124,12 @@ class DownloadImagesTest extends TestCase
public function testProcessRealImage()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processSingleImage(
123,
@ -150,15 +143,16 @@ class DownloadImagesTest extends TestCase
public function testProcessImageWithSrcset()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$mockHttpClient = new MockHttpClient([
new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')),
new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')),
new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')),
]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processHtml(123, '<p><img class="alignnone wp-image-1153" src="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg" alt="" width="628" height="444" srcset="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg 530w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-768x543.jpg 768w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-900x636.jpg 900w" sizes="(max-width: 628px) 100vw, 628px" /></p>', 'http://piketty.blog.lemonde.fr/2017/10/12/budget-2018-la-jeunesse-sacrifiee/');
$this->assertStringNotContainsString('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced');
@ -166,15 +160,16 @@ class DownloadImagesTest extends TestCase
public function testProcessImageWithTrickySrcset()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$mockHttpClient = new MockHttpClient([
new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')),
new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')),
new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')),
]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processHtml(123, '<figure id="post-257260" class="align-none media-257260"><img src="https://cdn.css-tricks.com/wp-content/uploads/2017/08/the-critical-request.png" srcset="https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_1000,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 1000w, https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_200,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 200w" sizes="(min-width: 1850px) calc( (100vw - 555px) / 3 )
(min-width: 1251px) calc( (100vw - 530px) / 2 )
(min-width: 1086px) calc(100vw - 480px)
@ -186,15 +181,16 @@ class DownloadImagesTest extends TestCase
public function testProcessImageWithNumericHtmlEntitySeparator()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$mockHttpClient = new MockHttpClient([
new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'), ['response_headers' => ['Content-Type: image/jpeg']]),
new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'), ['response_headers' => ['Content-Type: image/jpeg']]),
new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'), ['response_headers' => ['Content-Type: image/jpeg']]),
]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
// wordpress.com sites using &#038; as an &amp; alternative
$res = $download->processHtml(123, '<img srcset="https://example.com/20191204_133626-scaled.jpg?strip=info&#038;w=600&#038;ssl=1 600w,https://example.com/20191204_133626-scaled.jpg?strip=info&#038;w=900&#038;ssl=1 900w" src="https://example.com/20191204_133626-scaled.jpg?ssl=1"/>', 'https://example.com/about/');
@ -203,13 +199,12 @@ class DownloadImagesTest extends TestCase
public function testProcessImageWithNullPath()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => null], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processSingleImage(
123,
@ -221,14 +216,15 @@ class DownloadImagesTest extends TestCase
public function testEnsureOnlyFirstOccurrenceIsReplaced()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$mockHttpClient = new MockHttpClient([
new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: image/png']]),
new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: image/png']]),
]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$html = '<img src="https://images.wsj.net/im-410981?width=860&height=573" srcset="https://images.wsj.net/im-410981?width=860&height=573&pixel_ratio=1.5 1290w" height="573" width="860" alt="" referrerpolicy="no-referrer">';
$url = 'https://www.wsj.com/articles/5-interior-design-tips-to-max-out-your-basement-space-11633435201';
@ -240,13 +236,12 @@ class DownloadImagesTest extends TestCase
public function testProcessSingleImageWithSvg()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/svg+xml'], file_get_contents(__DIR__ . '/../fixtures/modal-content.svg')));
$mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/modal-content.svg'), ['response_headers' => ['Content-Type: image/svg+xml']])]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processSingleImage(123, 'modal-content.svg', 'http://imgur.com/gallery/WxtWY');
$this->assertStringContainsString('/assets/images/9/b/9b0ead26/400e29f9.svg', $res);
@ -254,30 +249,14 @@ class DownloadImagesTest extends TestCase
public function testProcessSingleImageWithBadSvg()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/svg+xml'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$mockHttpClient = new MockHttpClient([new MockResponse(file_get_contents(__DIR__ . '/../fixtures/unnamed.png'), ['response_headers' => ['Content-Type: image/svg+xml']])]);
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$download = new DownloadImages($mockHttpClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processSingleImage(123, 'modal-content.svg', 'http://imgur.com/gallery/WxtWY');
$this->assertFalse($res);
}
public function testFollowRedirection()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(301, ['content-type' => 'image/png', 'location' => '/final-path.png']));
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processSingleImage(123, '', 'https://example.com/unnamed.png');
$this->assertStringContainsString('/assets/images/9/b/9b0ead26/66953334.png', $res, "Fetch client didn't follow the HTTP redirection");
}
}