Merge pull request #3965 from nicofrand/previewPic

Preview picture: use the 1st pic retrieved if no og:image set
This commit is contained in:
Kevin Decherf 2019-05-26 17:47:44 +02:00 committed by GitHub
commit 5c0701ba41
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 123 additions and 14 deletions

View file

@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Tools\Utils;
/**
* This kind of proxy class take care of getting the content from an url
* and update the entry with what it found.
* This kind of proxy class takes care of getting the content from an url
* and updates the entry with what it found.
*/
class ContentProxy
{
@ -289,13 +289,25 @@ class ContentProxy
$this->updateLanguage($entry, $content['language']);
}
$previewPictureUrl = '';
if (!empty($content['open_graph']['og_image'])) {
$this->updatePreviewPicture($entry, $content['open_graph']['og_image']);
$previewPictureUrl = $content['open_graph']['og_image'];
}
// if content is an image, define it as a preview too
if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
$this->updatePreviewPicture($entry, $content['url']);
$previewPictureUrl = $content['url'];
} elseif (empty($previewPictureUrl)) {
$this->logger->debug('Extracting images from content to provide a default preview picture');
$imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
$this->logger->debug(\count($imagesUrls) . ' pictures found');
if (!empty($imagesUrls)) {
$previewPictureUrl = $imagesUrls[0];
}
}
if (!empty($previewPictureUrl)) {
$this->updatePreviewPicture($entry, $previewPictureUrl);
}
if (!empty($content['content_type'])) {

View file

@ -30,6 +30,25 @@ class DownloadImages
$this->setFolder();
}
/**
* Process the html and extract images URLs from it.
*
* @param string $html
*
* @return string[]
*/
public static function extractImagesUrlsFromHtml($html)
{
$crawler = new Crawler($html);
$imagesCrawler = $crawler
->filterXpath('//img');
$imagesUrls = $imagesCrawler
->extract(['src']);
$imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
}
/**
* Process the html and extract image from it, save them to local and return the updated html.
*
@ -41,13 +60,7 @@ class DownloadImages
*/
public function processHtml($entryId, $html, $url)
{
$crawler = new Crawler($html);
$imagesCrawler = $crawler
->filterXpath('//img');
$imagesUrls = $imagesCrawler
->extract(['src']);
$imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
$imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
$imagesUrls = self::extractImagesUrlsFromHtml($html);
$relativePath = $this->getRelativePath($entryId);
@ -199,7 +212,7 @@ class DownloadImages
*
* @return array An array of urls
*/
private function getSrcsetUrls(Crawler $imagesCrawler)
private static function getSrcsetUrls(Crawler $imagesCrawler)
{
$urls = [];
$iterator = $imagesCrawler

View file

@ -214,6 +214,90 @@ class ContentProxyTest extends TestCase
$this->assertSame('1.1.1.1', $entry->getDomainName());
}
public function testWithContentAndContentImage()
{
$tagger = $this->getTaggerMock();
$tagger->expects($this->once())
->method('tag');
$graby = $this->getMockBuilder('Graby\Graby')
->setMethods(['fetchContent'])
->disableOriginalConstructor()
->getMock();
$graby->expects($this->any())
->method('fetchContent')
->willReturn([
'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
'title' => 'this is my title',
'url' => 'http://1.1.1.1',
'content_type' => 'text/html',
'language' => 'fr',
'status' => '200',
'open_graph' => [
'og_title' => 'my OG title',
'og_description' => 'OG desc',
'og_image' => null,
],
]);
$proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://0.0.0.0');
$this->assertSame('http://1.1.1.1', $entry->getUrl());
$this->assertSame('this is my title', $entry->getTitle());
$this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
$this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
$this->assertSame('text/html', $entry->getMimetype());
$this->assertSame('fr', $entry->getLanguage());
$this->assertSame('200', $entry->getHttpStatus());
$this->assertSame(0.0, $entry->getReadingTime());
$this->assertSame('1.1.1.1', $entry->getDomainName());
}
public function testWithContentImageAndOgImage()
{
$tagger = $this->getTaggerMock();
$tagger->expects($this->once())
->method('tag');
$graby = $this->getMockBuilder('Graby\Graby')
->setMethods(['fetchContent'])
->disableOriginalConstructor()
->getMock();
$graby->expects($this->any())
->method('fetchContent')
->willReturn([
'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
'title' => 'this is my title',
'url' => 'http://1.1.1.1',
'content_type' => 'text/html',
'language' => 'fr',
'status' => '200',
'open_graph' => [
'og_title' => 'my OG title',
'og_description' => 'OG desc',
'og_image' => 'http://3.3.3.3/cover.jpg',
],
]);
$proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://0.0.0.0');
$this->assertSame('http://1.1.1.1', $entry->getUrl());
$this->assertSame('this is my title', $entry->getTitle());
$this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
$this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
$this->assertSame('text/html', $entry->getMimetype());
$this->assertSame('fr', $entry->getLanguage());
$this->assertSame('200', $entry->getHttpStatus());
$this->assertSame(0.0, $entry->getReadingTime());
$this->assertSame('1.1.1.1', $entry->getDomainName());
}
public function testWithContentAndBadLanguage()
{
$tagger = $this->getTaggerMock();
@ -415,7 +499,7 @@ class ContentProxyTest extends TestCase
$records = $handler->getRecords();
$this->assertCount(1, $records);
$this->assertCount(3, $records);
$this->assertContains('Error while defining date', $records[0]['message']);
}

View file

@ -121,7 +121,7 @@ class WallabagV1ControllerTest extends WallabagCoreTestCase
$this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content);
$this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is empty');
$this->assertEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is empty');
$this->assertSame($content->getPreviewPicture(), 'http://www.framablog.org/public/_img/framablog/wallaby_baby.jpg');
$this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is empty');
$tags = $content->getTags();