diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index bc257ffbc..ca01dec8d 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry; use Wallabag\CoreBundle\Tools\Utils; /** - * This kind of proxy class take care of getting the content from an url - * and update the entry with what it found. + * This kind of proxy class takes care of getting the content from an url + * and updates the entry with what it found. */ class ContentProxy { @@ -289,13 +289,25 @@ class ContentProxy $this->updateLanguage($entry, $content['language']); } + $previewPictureUrl = ''; if (!empty($content['open_graph']['og_image'])) { - $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); + $previewPictureUrl = $content['open_graph']['og_image']; } // if content is an image, define it as a preview too if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->updatePreviewPicture($entry, $content['url']); + $previewPictureUrl = $content['url']; + } elseif (empty($previewPictureUrl)) { + $this->logger->debug('Extracting images from content to provide a default preview picture'); + $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']); + $this->logger->debug(\count($imagesUrls) . ' pictures found'); + if (!empty($imagesUrls)) { + $previewPictureUrl = $imagesUrls[0]; + } + } + + if (!empty($previewPictureUrl)) { + $this->updatePreviewPicture($entry, $previewPictureUrl); } if (!empty($content['content_type'])) { diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 9a7e98285..c1645e45a 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -30,6 +30,25 @@ class DownloadImages $this->setFolder(); } + /** + * Process the html and extract images URLs from it. + * + * @param string $html + * + * @return string[] + */ + public static function extractImagesUrlsFromHtml($html) + { + $crawler = new Crawler($html); + $imagesCrawler = $crawler + ->filterXpath('//img'); + $imagesUrls = $imagesCrawler + ->extract(['src']); + $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); + + return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); + } + /** * Process the html and extract image from it, save them to local and return the updated html. * @@ -41,13 +60,7 @@ class DownloadImages */ public function processHtml($entryId, $html, $url) { - $crawler = new Crawler($html); - $imagesCrawler = $crawler - ->filterXpath('//img'); - $imagesUrls = $imagesCrawler - ->extract(['src']); - $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); - $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); + $imagesUrls = self::extractImagesUrlsFromHtml($html); $relativePath = $this->getRelativePath($entryId); @@ -199,7 +212,7 @@ class DownloadImages * * @return array An array of urls */ - private function getSrcsetUrls(Crawler $imagesCrawler) + private static function getSrcsetUrls(Crawler $imagesCrawler) { $urls = []; $iterator = $imagesCrawler