diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 54e23a052..ed888cdb0 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -5,6 +5,7 @@ namespace Wallabag\CoreBundle\Helper; use Psr\Log\LoggerInterface; use Symfony\Component\DomCrawler\Crawler; use GuzzleHttp\Client; +use GuzzleHttp\Message\Response; use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; use Symfony\Component\Finder\Finder; @@ -116,13 +117,11 @@ class DownloadImages return false; } - $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); - $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); - if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); - + $ext = $this->getExtensionFromResponse($res, $imagePath); + if (false === $res) { return false; } + $hashImage = hash('crc32', $absolutePath); $localPath = $folderPath.'/'.$hashImage.'.'.$ext; @@ -237,4 +236,45 @@ class DownloadImages return false; } + + /** + * Retrieve and validate the extension from the response of the url of the image. + * + * @param Response $res Guzzle Response + * @param string $imagePath Path from the src image from the content (used for log only) + * + * @return string|false Extension name or false if validation failed + */ + private function getExtensionFromResponse(Response $res, $imagePath) + { + $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); + $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); + + // ok header doesn't have the extension, try a different way + if (empty($ext)) { + $types = [ + 'jpeg' => "\xFF\xD8\xFF", + 'gif' => 'GIF', + 'png' => "\x89\x50\x4e\x47\x0d\x0a", + ]; + $bytes = substr((string) $res->getBody(), 0, 8); + + foreach ($types as $type => $header) { + if (0 === strpos($bytes, $header)) { + $ext = $type; + break; + } + } + + $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); + } + + if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { + $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); + + return false; + } + + return $ext; + } } diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php index 9125f8dcb..c02f96587 100644 --- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php +++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php @@ -157,4 +157,29 @@ class DownloadImagesTest extends \PHPUnit_Framework_TestCase $this->assertFalse($res, 'Absolute image can not be determined, so it will not be replaced'); } + + public function testProcessRealImage() + { + $client = new Client(); + + $mock = new Mock([ + new Response(200, ['content-type' => null], Stream::factory(file_get_contents(__DIR__.'/../fixtures/image-no-content-type.jpg'))), + ]); + + $client->getEmitter()->attach($mock); + + $logHandler = new TestHandler(); + $logger = new Logger('test', array($logHandler)); + + $download = new DownloadImages($client, sys_get_temp_dir().'/wallabag_test', 'http://wallabag.io/', $logger); + + $res = $download->processSingleImage( + 123, + 'https://cdn.theconversation.com/files/157200/article/width926/gsj2rjp2-1487348607.jpg', + 'https://theconversation.com/conversation-avec-gerald-bronner-ce-nest-pas-la-post-verite-qui-nous-menace-mais-lextension-de-notre-credulite-73089' + ); + + $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image'); + $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']); + } } diff --git a/tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg b/tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg new file mode 100644 index 000000000..0c60e952c Binary files /dev/null and b/tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg differ