mirror of
https://github.com/wallabag/wallabag.git
synced 2024-12-17 13:16:28 +00:00
Merge pull request #3661 from Simounet/fix/2981-srcset-image
Fix srcset attribute on images downloaded
This commit is contained in:
commit
8fe73e076c
2 changed files with 54 additions and 3 deletions
|
@ -42,14 +42,17 @@ class DownloadImages
|
||||||
public function processHtml($entryId, $html, $url)
|
public function processHtml($entryId, $html, $url)
|
||||||
{
|
{
|
||||||
$crawler = new Crawler($html);
|
$crawler = new Crawler($html);
|
||||||
$result = $crawler
|
$imagesCrawler = $crawler
|
||||||
->filterXpath('//img')
|
->filterXpath('//img');
|
||||||
|
$imagesUrls = $imagesCrawler
|
||||||
->extract(['src']);
|
->extract(['src']);
|
||||||
|
$imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
|
||||||
|
$imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
|
||||||
|
|
||||||
$relativePath = $this->getRelativePath($entryId);
|
$relativePath = $this->getRelativePath($entryId);
|
||||||
|
|
||||||
// download and save the image to the folder
|
// download and save the image to the folder
|
||||||
foreach ($result as $image) {
|
foreach ($imagesUrls as $image) {
|
||||||
$imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath);
|
$imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath);
|
||||||
|
|
||||||
if (false === $imagePath) {
|
if (false === $imagePath) {
|
||||||
|
@ -171,6 +174,33 @@ class DownloadImages
|
||||||
@rmdir($folderPath);
|
@rmdir($folderPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get images urls from the srcset image attribute.
|
||||||
|
*
|
||||||
|
* @param Crawler $imagesCrawler
|
||||||
|
*
|
||||||
|
* @return array An array of urls
|
||||||
|
*/
|
||||||
|
protected function getSrcsetUrls(Crawler $imagesCrawler)
|
||||||
|
{
|
||||||
|
$urls = [];
|
||||||
|
$iterator = $imagesCrawler
|
||||||
|
->getIterator();
|
||||||
|
while ($iterator->valid()) {
|
||||||
|
$srcsetAttribute = $iterator->current()->getAttribute('srcset');
|
||||||
|
if ('' !== $srcsetAttribute) {
|
||||||
|
$srcset = array_map('trim', explode(',', $srcsetAttribute));
|
||||||
|
$srcsetUrls = array_map(function ($src) {
|
||||||
|
return explode(' ', $src)[0];
|
||||||
|
}, $srcset);
|
||||||
|
$urls = array_merge($srcsetUrls, $urls);
|
||||||
|
}
|
||||||
|
$iterator->next();
|
||||||
|
}
|
||||||
|
|
||||||
|
return $urls;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Setup base folder where all images are going to be saved.
|
* Setup base folder where all images are going to be saved.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -183,4 +183,25 @@ class DownloadImagesTest extends TestCase
|
||||||
$this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image');
|
$this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image');
|
||||||
$this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']);
|
$this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testProcessImageWithSrcset()
|
||||||
|
{
|
||||||
|
$client = new Client();
|
||||||
|
|
||||||
|
$mock = new Mock([
|
||||||
|
new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
|
||||||
|
new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
|
||||||
|
new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
|
||||||
|
]);
|
||||||
|
|
||||||
|
$client->getEmitter()->attach($mock);
|
||||||
|
|
||||||
|
$logHandler = new TestHandler();
|
||||||
|
$logger = new Logger('test', [$logHandler]);
|
||||||
|
|
||||||
|
$download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
|
||||||
|
$res = $download->processHtml(123, '<p><img class="alignnone wp-image-1153" src="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg" alt="" width="628" height="444" srcset="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg 530w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-768x543.jpg 768w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-900x636.jpg 900w" sizes="(max-width: 628px) 100vw, 628px" /></p>', 'http://piketty.blog.lemonde.fr/2017/10/12/budget-2018-la-jeunesse-sacrifiee/');
|
||||||
|
|
||||||
|
$this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue