mirror of
https://github.com/wallabag/wallabag.git
synced 2024-11-26 19:11:07 +00:00
Use an alternative way to detect image
When parsing content to retrieve images to save locally, we only check for the content-type of the image response. In some case, that value is empty. Now we’re also checking for the first few bytes of the content as an alternative to detect if it’s an image wallabag can handle. We might get higher image supports using that alternative method.
This commit is contained in:
parent
a687c8d915
commit
577c0b6dd8
3 changed files with 70 additions and 5 deletions
|
@ -5,6 +5,7 @@ namespace Wallabag\CoreBundle\Helper;
|
|||
use Psr\Log\LoggerInterface;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Message\Response;
|
||||
use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
|
||||
use Symfony\Component\Finder\Finder;
|
||||
|
||||
|
@ -116,13 +117,11 @@ class DownloadImages
|
|||
return false;
|
||||
}
|
||||
|
||||
$ext = $this->mimeGuesser->guess($res->getHeader('content-type'));
|
||||
$this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
|
||||
if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
|
||||
$this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath);
|
||||
|
||||
$ext = $this->getExtensionFromResponse($res, $imagePath);
|
||||
if (false === $res) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$hashImage = hash('crc32', $absolutePath);
|
||||
$localPath = $folderPath.'/'.$hashImage.'.'.$ext;
|
||||
|
||||
|
@ -237,4 +236,45 @@ class DownloadImages
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve and validate the extension from the response of the url of the image.
|
||||
*
|
||||
* @param Response $res Guzzle Response
|
||||
* @param string $imagePath Path from the src image from the content (used for log only)
|
||||
*
|
||||
* @return string|false Extension name or false if validation failed
|
||||
*/
|
||||
private function getExtensionFromResponse(Response $res, $imagePath)
|
||||
{
|
||||
$ext = $this->mimeGuesser->guess($res->getHeader('content-type'));
|
||||
$this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
|
||||
|
||||
// ok header doesn't have the extension, try a different way
|
||||
if (empty($ext)) {
|
||||
$types = [
|
||||
'jpeg' => "\xFF\xD8\xFF",
|
||||
'gif' => 'GIF',
|
||||
'png' => "\x89\x50\x4e\x47\x0d\x0a",
|
||||
];
|
||||
$bytes = substr((string) $res->getBody(), 0, 8);
|
||||
|
||||
foreach ($types as $type => $header) {
|
||||
if (0 === strpos($bytes, $header)) {
|
||||
$ext = $type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]);
|
||||
}
|
||||
|
||||
if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
|
||||
$this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return $ext;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -157,4 +157,29 @@ class DownloadImagesTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->assertFalse($res, 'Absolute image can not be determined, so it will not be replaced');
|
||||
}
|
||||
|
||||
public function testProcessRealImage()
|
||||
{
|
||||
$client = new Client();
|
||||
|
||||
$mock = new Mock([
|
||||
new Response(200, ['content-type' => null], Stream::factory(file_get_contents(__DIR__.'/../fixtures/image-no-content-type.jpg'))),
|
||||
]);
|
||||
|
||||
$client->getEmitter()->attach($mock);
|
||||
|
||||
$logHandler = new TestHandler();
|
||||
$logger = new Logger('test', array($logHandler));
|
||||
|
||||
$download = new DownloadImages($client, sys_get_temp_dir().'/wallabag_test', 'http://wallabag.io/', $logger);
|
||||
|
||||
$res = $download->processSingleImage(
|
||||
123,
|
||||
'https://cdn.theconversation.com/files/157200/article/width926/gsj2rjp2-1487348607.jpg',
|
||||
'https://theconversation.com/conversation-avec-gerald-bronner-ce-nest-pas-la-post-verite-qui-nous-menace-mais-lextension-de-notre-credulite-73089'
|
||||
);
|
||||
|
||||
$this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image');
|
||||
$this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']);
|
||||
}
|
||||
}
|
||||
|
|
BIN
tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg
Normal file
BIN
tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 346 KiB |
Loading…
Reference in a new issue