mirror of
https://github.com/wallabag/wallabag.git
synced 2025-04-25 19:34:07 +00:00
Merge pull request #3965 from nicofrand/previewPic
Preview picture: use the 1st pic retrieved if no og:image set
This commit is contained in:
commit
5c0701ba41
4 changed files with 123 additions and 14 deletions
|
@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry;
|
||||||
use Wallabag\CoreBundle\Tools\Utils;
|
use Wallabag\CoreBundle\Tools\Utils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This kind of proxy class take care of getting the content from an url
|
* This kind of proxy class takes care of getting the content from an url
|
||||||
* and update the entry with what it found.
|
* and updates the entry with what it found.
|
||||||
*/
|
*/
|
||||||
class ContentProxy
|
class ContentProxy
|
||||||
{
|
{
|
||||||
|
@ -289,13 +289,25 @@ class ContentProxy
|
||||||
$this->updateLanguage($entry, $content['language']);
|
$this->updateLanguage($entry, $content['language']);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$previewPictureUrl = '';
|
||||||
if (!empty($content['open_graph']['og_image'])) {
|
if (!empty($content['open_graph']['og_image'])) {
|
||||||
$this->updatePreviewPicture($entry, $content['open_graph']['og_image']);
|
$previewPictureUrl = $content['open_graph']['og_image'];
|
||||||
}
|
}
|
||||||
|
|
||||||
// if content is an image, define it as a preview too
|
// if content is an image, define it as a preview too
|
||||||
if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
|
if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
|
||||||
$this->updatePreviewPicture($entry, $content['url']);
|
$previewPictureUrl = $content['url'];
|
||||||
|
} elseif (empty($previewPictureUrl)) {
|
||||||
|
$this->logger->debug('Extracting images from content to provide a default preview picture');
|
||||||
|
$imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
|
||||||
|
$this->logger->debug(\count($imagesUrls) . ' pictures found');
|
||||||
|
if (!empty($imagesUrls)) {
|
||||||
|
$previewPictureUrl = $imagesUrls[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!empty($previewPictureUrl)) {
|
||||||
|
$this->updatePreviewPicture($entry, $previewPictureUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!empty($content['content_type'])) {
|
if (!empty($content['content_type'])) {
|
||||||
|
|
|
@ -30,6 +30,25 @@ class DownloadImages
|
||||||
$this->setFolder();
|
$this->setFolder();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process the html and extract images URLs from it.
|
||||||
|
*
|
||||||
|
* @param string $html
|
||||||
|
*
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public static function extractImagesUrlsFromHtml($html)
|
||||||
|
{
|
||||||
|
$crawler = new Crawler($html);
|
||||||
|
$imagesCrawler = $crawler
|
||||||
|
->filterXpath('//img');
|
||||||
|
$imagesUrls = $imagesCrawler
|
||||||
|
->extract(['src']);
|
||||||
|
$imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
|
||||||
|
|
||||||
|
return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process the html and extract image from it, save them to local and return the updated html.
|
* Process the html and extract image from it, save them to local and return the updated html.
|
||||||
*
|
*
|
||||||
|
@ -41,13 +60,7 @@ class DownloadImages
|
||||||
*/
|
*/
|
||||||
public function processHtml($entryId, $html, $url)
|
public function processHtml($entryId, $html, $url)
|
||||||
{
|
{
|
||||||
$crawler = new Crawler($html);
|
$imagesUrls = self::extractImagesUrlsFromHtml($html);
|
||||||
$imagesCrawler = $crawler
|
|
||||||
->filterXpath('//img');
|
|
||||||
$imagesUrls = $imagesCrawler
|
|
||||||
->extract(['src']);
|
|
||||||
$imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
|
|
||||||
$imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
|
|
||||||
|
|
||||||
$relativePath = $this->getRelativePath($entryId);
|
$relativePath = $this->getRelativePath($entryId);
|
||||||
|
|
||||||
|
@ -199,7 +212,7 @@ class DownloadImages
|
||||||
*
|
*
|
||||||
* @return array An array of urls
|
* @return array An array of urls
|
||||||
*/
|
*/
|
||||||
private function getSrcsetUrls(Crawler $imagesCrawler)
|
private static function getSrcsetUrls(Crawler $imagesCrawler)
|
||||||
{
|
{
|
||||||
$urls = [];
|
$urls = [];
|
||||||
$iterator = $imagesCrawler
|
$iterator = $imagesCrawler
|
||||||
|
|
|
@ -214,6 +214,90 @@ class ContentProxyTest extends TestCase
|
||||||
$this->assertSame('1.1.1.1', $entry->getDomainName());
|
$this->assertSame('1.1.1.1', $entry->getDomainName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testWithContentAndContentImage()
|
||||||
|
{
|
||||||
|
$tagger = $this->getTaggerMock();
|
||||||
|
$tagger->expects($this->once())
|
||||||
|
->method('tag');
|
||||||
|
|
||||||
|
$graby = $this->getMockBuilder('Graby\Graby')
|
||||||
|
->setMethods(['fetchContent'])
|
||||||
|
->disableOriginalConstructor()
|
||||||
|
->getMock();
|
||||||
|
|
||||||
|
$graby->expects($this->any())
|
||||||
|
->method('fetchContent')
|
||||||
|
->willReturn([
|
||||||
|
'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
|
||||||
|
'title' => 'this is my title',
|
||||||
|
'url' => 'http://1.1.1.1',
|
||||||
|
'content_type' => 'text/html',
|
||||||
|
'language' => 'fr',
|
||||||
|
'status' => '200',
|
||||||
|
'open_graph' => [
|
||||||
|
'og_title' => 'my OG title',
|
||||||
|
'og_description' => 'OG desc',
|
||||||
|
'og_image' => null,
|
||||||
|
],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
|
||||||
|
$entry = new Entry(new User());
|
||||||
|
$proxy->updateEntry($entry, 'http://0.0.0.0');
|
||||||
|
|
||||||
|
$this->assertSame('http://1.1.1.1', $entry->getUrl());
|
||||||
|
$this->assertSame('this is my title', $entry->getTitle());
|
||||||
|
$this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
|
||||||
|
$this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
|
||||||
|
$this->assertSame('text/html', $entry->getMimetype());
|
||||||
|
$this->assertSame('fr', $entry->getLanguage());
|
||||||
|
$this->assertSame('200', $entry->getHttpStatus());
|
||||||
|
$this->assertSame(0.0, $entry->getReadingTime());
|
||||||
|
$this->assertSame('1.1.1.1', $entry->getDomainName());
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testWithContentImageAndOgImage()
|
||||||
|
{
|
||||||
|
$tagger = $this->getTaggerMock();
|
||||||
|
$tagger->expects($this->once())
|
||||||
|
->method('tag');
|
||||||
|
|
||||||
|
$graby = $this->getMockBuilder('Graby\Graby')
|
||||||
|
->setMethods(['fetchContent'])
|
||||||
|
->disableOriginalConstructor()
|
||||||
|
->getMock();
|
||||||
|
|
||||||
|
$graby->expects($this->any())
|
||||||
|
->method('fetchContent')
|
||||||
|
->willReturn([
|
||||||
|
'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
|
||||||
|
'title' => 'this is my title',
|
||||||
|
'url' => 'http://1.1.1.1',
|
||||||
|
'content_type' => 'text/html',
|
||||||
|
'language' => 'fr',
|
||||||
|
'status' => '200',
|
||||||
|
'open_graph' => [
|
||||||
|
'og_title' => 'my OG title',
|
||||||
|
'og_description' => 'OG desc',
|
||||||
|
'og_image' => 'http://3.3.3.3/cover.jpg',
|
||||||
|
],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
|
||||||
|
$entry = new Entry(new User());
|
||||||
|
$proxy->updateEntry($entry, 'http://0.0.0.0');
|
||||||
|
|
||||||
|
$this->assertSame('http://1.1.1.1', $entry->getUrl());
|
||||||
|
$this->assertSame('this is my title', $entry->getTitle());
|
||||||
|
$this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
|
||||||
|
$this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
|
||||||
|
$this->assertSame('text/html', $entry->getMimetype());
|
||||||
|
$this->assertSame('fr', $entry->getLanguage());
|
||||||
|
$this->assertSame('200', $entry->getHttpStatus());
|
||||||
|
$this->assertSame(0.0, $entry->getReadingTime());
|
||||||
|
$this->assertSame('1.1.1.1', $entry->getDomainName());
|
||||||
|
}
|
||||||
|
|
||||||
public function testWithContentAndBadLanguage()
|
public function testWithContentAndBadLanguage()
|
||||||
{
|
{
|
||||||
$tagger = $this->getTaggerMock();
|
$tagger = $this->getTaggerMock();
|
||||||
|
@ -415,7 +499,7 @@ class ContentProxyTest extends TestCase
|
||||||
|
|
||||||
$records = $handler->getRecords();
|
$records = $handler->getRecords();
|
||||||
|
|
||||||
$this->assertCount(1, $records);
|
$this->assertCount(3, $records);
|
||||||
$this->assertContains('Error while defining date', $records[0]['message']);
|
$this->assertContains('Error while defining date', $records[0]['message']);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -121,7 +121,7 @@ class WallabagV1ControllerTest extends WallabagCoreTestCase
|
||||||
|
|
||||||
$this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content);
|
$this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content);
|
||||||
$this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is empty');
|
$this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is empty');
|
||||||
$this->assertEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is empty');
|
$this->assertSame($content->getPreviewPicture(), 'http://www.framablog.org/public/_img/framablog/wallaby_baby.jpg');
|
||||||
$this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is empty');
|
$this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is empty');
|
||||||
|
|
||||||
$tags = $content->getTags();
|
$tags = $content->getTags();
|
||||||
|
|
Loading…
Reference in a new issue