Replace images with &

Images with `&` in the path weren’t well replaced because they might be with `&` in the html instead.

Replacing `&` with `&` fix the problem.
This commit is contained in:
Jeremy Benoist 2017-06-01 22:50:33 +02:00
parent 5901516805
commit fcad69a427
No known key found for this signature in database
GPG key ID: BCA73962457ACC3C
2 changed files with 28 additions and 4 deletions

View file

@ -66,6 +66,12 @@ class DownloadImages
continue;
}
// if image contains "&"" and we can't find it in the html
// it might be because it's encoded as &
if (false !== stripos($image, '&') && false === stripos($html, $image)) {
$image = str_replace('&', '&', $image);
}
$html = str_replace($image, $imagePath, $html);
}
@ -114,7 +120,7 @@ class DownloadImages
$ext = $this->mimeGuesser->guess($res->getHeader('content-type'));
$this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
$this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping '.$imagePath);
$this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath);
return false;
}

View file

@ -12,7 +12,24 @@ use GuzzleHttp\Stream\Stream;
class DownloadImagesTest extends \PHPUnit_Framework_TestCase
{
public function testProcessHtml()
public function dataForSuccessImage()
{
return [
'imgur' => [
'<div><img src="http://i.imgur.com/T9qgcHc.jpg" /></div>',
'http://imgur.com/gallery/WxtWY',
],
'image with &' => [
'<div><img src="https://i2.wp.com/www.tvaddons.ag/wp-content/uploads/2017/01/Screen-Shot-2017-01-07-at-10.17.40-PM.jpg?w=640&amp;ssl=1" /></div>',
'https://www.tvaddons.ag/realdebrid-kodi-jarvis/',
],
];
}
/**
* @dataProvider dataForSuccessImage
*/
public function testProcessHtml($html, $url)
{
$client = new Client();
@ -27,9 +44,10 @@ class DownloadImagesTest extends \PHPUnit_Framework_TestCase
$download = new DownloadImages($client, sys_get_temp_dir().'/wallabag_test', 'http://wallabag.io/', $logger);
$res = $download->processHtml(123, '<div><img src="http://i.imgur.com/T9qgcHc.jpg" /></div>', 'http://imgur.com/gallery/WxtWY');
$res = $download->processHtml(123, $html, $url);
$this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/c638b4c2.png', $res);
// this the base path of all image (since it's calculated using the entry id: 123)
$this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res);
}
public function testProcessHtmlWithBadImage()