From 548b610a17ae9648903ecdaa68200eabd247a7c3 Mon Sep 17 00:00:00 2001 From: Simounet Date: Mon, 29 May 2023 15:12:04 +0200 Subject: [PATCH] Fix images downloading with numeric HTML entity --- .../CoreBundle/Helper/DownloadImages.php | 12 +++++++----- .../CoreBundle/Helper/DownloadImagesTest.php | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 477b509ac..8a4ec9031 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -86,12 +86,14 @@ class DownloadImages continue; } - // if image contains "&" and we can't find it in the html it might be because it's encoded as & - if (false !== stripos($image, '&') && false === stripos($html, $image)) { - $image = str_replace('&', '&', $image); - } - $html = str_replace($image, $newImage, $html); + // if image contains "&" and we can't find it in the html it might be because it's encoded as & or unicode + if (false !== stripos($image, '&') && false === stripos($html, $image)) { + $imageAmp = str_replace('&', '&', $image); + $html = str_replace($imageAmp, $newImage, $html); + $imageUnicode = str_replace('&', '&', $image); + $html = str_replace($imageUnicode, $newImage, $html); + } } return $html; diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php index f0735719d..4b78fb251 100644 --- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php +++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php @@ -184,6 +184,23 @@ class DownloadImagesTest extends TestCase $this->assertStringNotContainsString('f_auto,q_auto', $res, 'Image srcset attribute were not replaced'); } + public function testProcessImageWithNumericHtmlEntitySeparator() + { + $httpMockClient = new HttpMockClient(); + $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))); + $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))); + $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))); + + $logHandler = new TestHandler(); + $logger = new Logger('test', [$logHandler]); + + $download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + // wordpress.com sites using & as an & alternative + $res = $download->processHtml(123, '', 'https://example.com/about/'); + + $this->assertStringNotContainsString('https://example.com', $res, 'Image srcset attribute were not replaced'); + } + public function testProcessImageWithNullPath() { $httpMockClient = new HttpMockClient();