Avoid overlapping images when downloading them

This commit is contained in:
Jeremy Benoist 2022-02-03 13:29:40 +01:00
parent 485405190d
commit 2b3ff84829
No known key found for this signature in database
GPG key ID: BCA73962457ACC3C
3 changed files with 29 additions and 5 deletions

View file

@ -164,7 +164,10 @@
"platform": {
"php": "7.2.5"
},
"sort-packages": true
"sort-packages": true,
"allow-plugins": {
"phpstan/extension-installer": true
}
},
"minimum-stability": "dev",
"prefer-stable": true

View file

@ -72,13 +72,16 @@ class DownloadImages
{
$imagesUrls = self::extractImagesUrlsFromHtml($html);
// ensure images aren't overlapping
arsort($imagesUrls);
$relativePath = $this->getRelativePath($entryId);
// download and save the image to the folder
foreach ($imagesUrls as $image) {
$imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath);
$newImage = $this->processSingleImage($entryId, $image, $url, $relativePath);
if (false === $imagePath) {
if (false === $newImage) {
continue;
}
@ -87,7 +90,7 @@ class DownloadImages
$image = str_replace('&', '&', $image);
}
$html = str_replace($image, $imagePath, $html);
$html = str_replace($image, $newImage, $html);
}
return $html;

View file

@ -31,7 +31,6 @@ class DownloadImagesTest extends TestCase
public function testProcessHtml($html, $url)
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$logHandler = new TestHandler();
@ -201,4 +200,23 @@ class DownloadImagesTest extends TestCase
);
$this->assertFalse($res);
}
public function testEnsureOnlyFirstOccurenceIsReplaced()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/png'], file_get_contents(__DIR__ . '/../fixtures/unnamed.png')));
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
$html = '<img src="https://images.wsj.net/im-410981?width=860&height=573" srcset="https://images.wsj.net/im-410981?width=860&height=573&pixel_ratio=1.5 1290w" height="573" width="860" alt="" referrerpolicy="no-referrer">';
$url = 'https://www.wsj.com/articles/5-interior-design-tips-to-max-out-your-basement-space-11633435201';
$res = $download->processHtml(123, $html, $url);
$this->assertSame('<img src="http://wallabag.io/assets/images/9/b/9b0ead26/6bef06fe.png" srcset="http://wallabag.io/assets/images/9/b/9b0ead26/43cc0123.png 1290w" height="573" width="860" alt="" referrerpolicy="no-referrer">', $res);
}
}