mirror of
https://github.com/wallabag/wallabag.git
synced 2025-01-08 16:05:25 +00:00
Fix srcset attribute on images downloaded
This commit is contained in:
parent
9707ac4661
commit
c15bb5ad72
2 changed files with 54 additions and 3 deletions
|
@ -42,14 +42,17 @@ class DownloadImages
|
|||
public function processHtml($entryId, $html, $url)
|
||||
{
|
||||
$crawler = new Crawler($html);
|
||||
$result = $crawler
|
||||
->filterXpath('//img')
|
||||
$imagesCrawler = $crawler
|
||||
->filterXpath('//img');
|
||||
$imagesUrls = $imagesCrawler
|
||||
->extract(['src']);
|
||||
$imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
|
||||
$imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
|
||||
|
||||
$relativePath = $this->getRelativePath($entryId);
|
||||
|
||||
// download and save the image to the folder
|
||||
foreach ($result as $image) {
|
||||
foreach ($imagesUrls as $image) {
|
||||
$imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath);
|
||||
|
||||
if (false === $imagePath) {
|
||||
|
@ -171,6 +174,33 @@ class DownloadImages
|
|||
@rmdir($folderPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get images urls from the srcset image attribute.
|
||||
*
|
||||
* @param Crawler $imagesCrawler
|
||||
*
|
||||
* @return array An array of urls
|
||||
*/
|
||||
protected function getSrcsetUrls(Crawler $imagesCrawler)
|
||||
{
|
||||
$urls = [];
|
||||
$iterator = $imagesCrawler
|
||||
->getIterator();
|
||||
while ($iterator->valid()) {
|
||||
$srcsetAttribute = $iterator->current()->getAttribute('srcset');
|
||||
if ('' !== $srcsetAttribute) {
|
||||
$srcset = array_map('trim', explode(',', $srcsetAttribute));
|
||||
$srcsetUrls = array_map(function ($src) {
|
||||
return explode(' ', $src)[0];
|
||||
}, $srcset);
|
||||
$urls = array_merge($srcsetUrls, $urls);
|
||||
}
|
||||
$iterator->next();
|
||||
}
|
||||
|
||||
return $urls;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup base folder where all images are going to be saved.
|
||||
*/
|
||||
|
|
|
@ -183,4 +183,25 @@ class DownloadImagesTest extends TestCase
|
|||
$this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image');
|
||||
$this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']);
|
||||
}
|
||||
|
||||
public function testProcessImageWithSrcset()
|
||||
{
|
||||
$client = new Client();
|
||||
|
||||
$mock = new Mock([
|
||||
new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
|
||||
new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
|
||||
new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
|
||||
]);
|
||||
|
||||
$client->getEmitter()->attach($mock);
|
||||
|
||||
$logHandler = new TestHandler();
|
||||
$logger = new Logger('test', [$logHandler]);
|
||||
|
||||
$download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
|
||||
$res = $download->processHtml(123, '<p><img class="alignnone wp-image-1153" src="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg" alt="" width="628" height="444" srcset="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg 530w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-768x543.jpg 768w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-900x636.jpg 900w" sizes="(max-width: 628px) 100vw, 628px" /></p>', 'http://piketty.blog.lemonde.fr/2017/10/12/budget-2018-la-jeunesse-sacrifiee/');
|
||||
|
||||
$this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced');
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue