Avoid overlapping images when downloading them

This commit is contained in:
Jeremy Benoist 2022-02-03 13:29:40 +01:00
parent 485405190d
commit 2b3ff84829
No known key found for this signature in database
GPG key ID: BCA73962457ACC3C
3 changed files with 29 additions and 5 deletions

View file

@ -164,7 +164,10 @@
"platform": { "platform": {
"php": "7.2.5" "php": "7.2.5"
}, },
"sort-packages": true "sort-packages": true,
"allow-plugins": {
"phpstan/extension-installer": true
}
}, },
"minimum-stability": "dev", "minimum-stability": "dev",
"prefer-stable": true "prefer-stable": true

View file

@ -72,13 +72,16 @@ class DownloadImages
{ {
$imagesUrls = self::extractImagesUrlsFromHtml($html); $imagesUrls = self::extractImagesUrlsFromHtml($html);
// ensure images aren't overlapping
arsort($imagesUrls);
$relativePath = $this->getRelativePath($entryId); $relativePath = $this->getRelativePath($entryId);
// download and save the image to the folder // download and save the image to the folder
foreach ($imagesUrls as $image) { foreach ($imagesUrls as $image) {
$imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); $newImage = $this->processSingleImage($entryId, $image, $url, $relativePath);
if (false === $imagePath) { if (false === $newImage) {
continue; continue;
} }
@ -87,7 +90,7 @@ class DownloadImages
$image = str_replace('&', '&', $image); $image = str_replace('&', '&', $image);
} }
$html = str_replace($image, $imagePath, $html); $html = str_replace($image, $newImage, $html);
} }
return $html; return $html;

View file

@ -31,7 +31,6 @@ class DownloadImagesTest extends TestCase
public function testProcessHtml($html, $url) public function testProcessHtml($html, $url)
{ {
$httpMockClient = new HttpMockClient(); $httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png'))); $httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$logHandler = new TestHandler(); $logHandler = new TestHandler();
@ -201,4 +200,23 @@ class DownloadImagesTest extends TestCase
); );
$this->assertFalse($res); $this->assertFalse($res);
} }
public function testEnsureOnlyFirstOccurenceIsReplaced()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$html = '<img src="https://images.wsj.net/im-410981?width=860&height=573" srcset="https://images.wsj.net/im-410981?width=860&height=573&pixel_ratio=1.5 1290w" height="573" width="860" alt="" referrerpolicy="no-referrer">';
$url = 'https://www.wsj.com/articles/5-interior-design-tips-to-max-out-your-basement-space-11633435201';
$res = $download->processHtml(123, $html, $url);
$this->assertSame('<img src="http://wallabag.io/assets/images/9/b/9b0ead26/6bef06fe.png" srcset="http://wallabag.io/assets/images/9/b/9b0ead26/43cc0123.png 1290w" height="573" width="860" alt="" referrerpolicy="no-referrer">', $res);
}
} }