Merge pull request #6563 from Simounet/fix/images-download-with-numeric-html-entity

Fix images downloading with numeric HTML entity
This commit is contained in:
Jérémy Benoist 2023-05-31 15:03:20 +02:00 committed by GitHub
commit 4032dd493f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 24 additions and 5 deletions

View file

@ -86,12 +86,14 @@ class DownloadImages
continue; continue;
} }
// if image contains "&" and we can't find it in the html it might be because it's encoded as &
if (false !== stripos($image, '&') && false === stripos($html, $image)) {
$image = str_replace('&', '&', $image);
}
$html = str_replace($image, $newImage, $html); $html = str_replace($image, $newImage, $html);
// if image contains "&" and we can't find it in the html it might be because it's encoded as & or unicode
if (false !== stripos($image, '&') && false === stripos($html, $image)) {
$imageAmp = str_replace('&', '&', $image);
$html = str_replace($imageAmp, $newImage, $html);
$imageUnicode = str_replace('&', '&', $image);
$html = str_replace($imageUnicode, $newImage, $html);
}
} }
return $html; return $html;

View file

@ -184,6 +184,23 @@ class DownloadImagesTest extends TestCase
$this->assertStringNotContainsString('f_auto,q_auto', $res, 'Image srcset attribute were not replaced'); $this->assertStringNotContainsString('f_auto,q_auto', $res, 'Image srcset attribute were not replaced');
} }
public function testProcessImageWithNumericHtmlEntitySeparator()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
// wordpress.com sites using & as an & alternative
$res = $download->processHtml(123, '<img srcset="https://example.com/20191204_133626-scaled.jpg?strip=info&#038;w=600&#038;ssl=1 600w,https://example.com/20191204_133626-scaled.jpg?strip=info&#038;w=900&#038;ssl=1 900w" src="https://example.com/20191204_133626-scaled.jpg?ssl=1"/>', 'https://example.com/about/');
$this->assertStringNotContainsString('https://example.com', $res, 'Image srcset attribute were not replaced');
}
public function testProcessImageWithNullPath() public function testProcessImageWithNullPath()
{ {
$httpMockClient = new HttpMockClient(); $httpMockClient = new HttpMockClient();