mirror of
https://github.com/wallabag/wallabag.git
synced 2024-11-15 21:41:06 +00:00
Merge pull request #3965 from nicofrand/previewPic
Preview picture: use the 1st pic retrieved if no og:image set
This commit is contained in:
commit
5c0701ba41
4 changed files with 123 additions and 14 deletions
|
@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry;
|
|||
use Wallabag\CoreBundle\Tools\Utils;
|
||||
|
||||
/**
|
||||
* This kind of proxy class take care of getting the content from an url
|
||||
* and update the entry with what it found.
|
||||
* This kind of proxy class takes care of getting the content from an url
|
||||
* and updates the entry with what it found.
|
||||
*/
|
||||
class ContentProxy
|
||||
{
|
||||
|
@ -289,13 +289,25 @@ class ContentProxy
|
|||
$this->updateLanguage($entry, $content['language']);
|
||||
}
|
||||
|
||||
$previewPictureUrl = '';
|
||||
if (!empty($content['open_graph']['og_image'])) {
|
||||
$this->updatePreviewPicture($entry, $content['open_graph']['og_image']);
|
||||
$previewPictureUrl = $content['open_graph']['og_image'];
|
||||
}
|
||||
|
||||
// if content is an image, define it as a preview too
|
||||
if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
|
||||
$this->updatePreviewPicture($entry, $content['url']);
|
||||
$previewPictureUrl = $content['url'];
|
||||
} elseif (empty($previewPictureUrl)) {
|
||||
$this->logger->debug('Extracting images from content to provide a default preview picture');
|
||||
$imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
|
||||
$this->logger->debug(\count($imagesUrls) . ' pictures found');
|
||||
if (!empty($imagesUrls)) {
|
||||
$previewPictureUrl = $imagesUrls[0];
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($previewPictureUrl)) {
|
||||
$this->updatePreviewPicture($entry, $previewPictureUrl);
|
||||
}
|
||||
|
||||
if (!empty($content['content_type'])) {
|
||||
|
|
|
@ -30,6 +30,25 @@ class DownloadImages
|
|||
$this->setFolder();
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the html and extract images URLs from it.
|
||||
*
|
||||
* @param string $html
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
public static function extractImagesUrlsFromHtml($html)
|
||||
{
|
||||
$crawler = new Crawler($html);
|
||||
$imagesCrawler = $crawler
|
||||
->filterXpath('//img');
|
||||
$imagesUrls = $imagesCrawler
|
||||
->extract(['src']);
|
||||
$imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
|
||||
|
||||
return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the html and extract image from it, save them to local and return the updated html.
|
||||
*
|
||||
|
@ -41,13 +60,7 @@ class DownloadImages
|
|||
*/
|
||||
public function processHtml($entryId, $html, $url)
|
||||
{
|
||||
$crawler = new Crawler($html);
|
||||
$imagesCrawler = $crawler
|
||||
->filterXpath('//img');
|
||||
$imagesUrls = $imagesCrawler
|
||||
->extract(['src']);
|
||||
$imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
|
||||
$imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
|
||||
$imagesUrls = self::extractImagesUrlsFromHtml($html);
|
||||
|
||||
$relativePath = $this->getRelativePath($entryId);
|
||||
|
||||
|
@ -199,7 +212,7 @@ class DownloadImages
|
|||
*
|
||||
* @return array An array of urls
|
||||
*/
|
||||
private function getSrcsetUrls(Crawler $imagesCrawler)
|
||||
private static function getSrcsetUrls(Crawler $imagesCrawler)
|
||||
{
|
||||
$urls = [];
|
||||
$iterator = $imagesCrawler
|
||||
|
|
|
@ -214,6 +214,90 @@ class ContentProxyTest extends TestCase
|
|||
$this->assertSame('1.1.1.1', $entry->getDomainName());
|
||||
}
|
||||
|
||||
public function testWithContentAndContentImage()
|
||||
{
|
||||
$tagger = $this->getTaggerMock();
|
||||
$tagger->expects($this->once())
|
||||
->method('tag');
|
||||
|
||||
$graby = $this->getMockBuilder('Graby\Graby')
|
||||
->setMethods(['fetchContent'])
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
|
||||
$graby->expects($this->any())
|
||||
->method('fetchContent')
|
||||
->willReturn([
|
||||
'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
|
||||
'title' => 'this is my title',
|
||||
'url' => 'http://1.1.1.1',
|
||||
'content_type' => 'text/html',
|
||||
'language' => 'fr',
|
||||
'status' => '200',
|
||||
'open_graph' => [
|
||||
'og_title' => 'my OG title',
|
||||
'og_description' => 'OG desc',
|
||||
'og_image' => null,
|
||||
],
|
||||
]);
|
||||
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
|
||||
$entry = new Entry(new User());
|
||||
$proxy->updateEntry($entry, 'http://0.0.0.0');
|
||||
|
||||
$this->assertSame('http://1.1.1.1', $entry->getUrl());
|
||||
$this->assertSame('this is my title', $entry->getTitle());
|
||||
$this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
|
||||
$this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
|
||||
$this->assertSame('text/html', $entry->getMimetype());
|
||||
$this->assertSame('fr', $entry->getLanguage());
|
||||
$this->assertSame('200', $entry->getHttpStatus());
|
||||
$this->assertSame(0.0, $entry->getReadingTime());
|
||||
$this->assertSame('1.1.1.1', $entry->getDomainName());
|
||||
}
|
||||
|
||||
public function testWithContentImageAndOgImage()
|
||||
{
|
||||
$tagger = $this->getTaggerMock();
|
||||
$tagger->expects($this->once())
|
||||
->method('tag');
|
||||
|
||||
$graby = $this->getMockBuilder('Graby\Graby')
|
||||
->setMethods(['fetchContent'])
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
|
||||
$graby->expects($this->any())
|
||||
->method('fetchContent')
|
||||
->willReturn([
|
||||
'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
|
||||
'title' => 'this is my title',
|
||||
'url' => 'http://1.1.1.1',
|
||||
'content_type' => 'text/html',
|
||||
'language' => 'fr',
|
||||
'status' => '200',
|
||||
'open_graph' => [
|
||||
'og_title' => 'my OG title',
|
||||
'og_description' => 'OG desc',
|
||||
'og_image' => 'http://3.3.3.3/cover.jpg',
|
||||
],
|
||||
]);
|
||||
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
|
||||
$entry = new Entry(new User());
|
||||
$proxy->updateEntry($entry, 'http://0.0.0.0');
|
||||
|
||||
$this->assertSame('http://1.1.1.1', $entry->getUrl());
|
||||
$this->assertSame('this is my title', $entry->getTitle());
|
||||
$this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
|
||||
$this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
|
||||
$this->assertSame('text/html', $entry->getMimetype());
|
||||
$this->assertSame('fr', $entry->getLanguage());
|
||||
$this->assertSame('200', $entry->getHttpStatus());
|
||||
$this->assertSame(0.0, $entry->getReadingTime());
|
||||
$this->assertSame('1.1.1.1', $entry->getDomainName());
|
||||
}
|
||||
|
||||
public function testWithContentAndBadLanguage()
|
||||
{
|
||||
$tagger = $this->getTaggerMock();
|
||||
|
@ -415,7 +499,7 @@ class ContentProxyTest extends TestCase
|
|||
|
||||
$records = $handler->getRecords();
|
||||
|
||||
$this->assertCount(1, $records);
|
||||
$this->assertCount(3, $records);
|
||||
$this->assertContains('Error while defining date', $records[0]['message']);
|
||||
}
|
||||
|
||||
|
|
|
@ -121,7 +121,7 @@ class WallabagV1ControllerTest extends WallabagCoreTestCase
|
|||
|
||||
$this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content);
|
||||
$this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is empty');
|
||||
$this->assertEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is empty');
|
||||
$this->assertSame($content->getPreviewPicture(), 'http://www.framablog.org/public/_img/framablog/wallaby_baby.jpg');
|
||||
$this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is empty');
|
||||
|
||||
$tags = $content->getTags();
|
||||
|
|
Loading…
Reference in a new issue