Merge pull request #1397 from wallabag/v2-graby

Integrate graby
This commit is contained in:
Nicolas Lœuillet 2015-09-11 20:17:42 +02:00
commit 9c08a891f9
12 changed files with 730 additions and 358 deletions

View file

@ -27,20 +27,10 @@
"email": "hello@wallabag.org", "email": "hello@wallabag.org",
"issues": "https://github.com/wallabag/wallabag/issues" "issues": "https://github.com/wallabag/wallabag/issues"
}, },
"repositories": [
{
"type": "vcs",
"url": "https://github.com/wallabag/php-readability"
},
{
"type": "vcs",
"url": "https://github.com/wallabag/Fivefilters_Libraries"
}
],
"require": { "require": {
"php": ">=5.3.3", "php": ">=5.3.3",
"symfony/symfony": "~2.7.0", "symfony/symfony": "~2.7.0",
"doctrine/orm": "~2.2,>=2.2.3", "doctrine/orm": "~2.3",
"doctrine/doctrine-bundle": "~1.2", "doctrine/doctrine-bundle": "~1.2",
"twig/extensions": "~1.0", "twig/extensions": "~1.0",
"symfony/assetic-bundle": "~2.3", "symfony/assetic-bundle": "~2.3",
@ -60,10 +50,9 @@
"willdurand/hateoas-bundle": "~0.5.0", "willdurand/hateoas-bundle": "~0.5.0",
"htmlawed/htmlawed": "~1.1.19", "htmlawed/htmlawed": "~1.1.19",
"liip/theme-bundle": "~1.1.3", "liip/theme-bundle": "~1.1.3",
"wallabag/php-readability": "~1.0.0",
"wallabag/Fivefilters_Libraries": "~1.0",
"pagerfanta/pagerfanta": "~1.0.3", "pagerfanta/pagerfanta": "~1.0.3",
"lexik/form-filter-bundle": "~4.0" "lexik/form-filter-bundle": "~4.0",
"j0k3r/graby": "~1.0"
}, },
"require-dev": { "require-dev": {
"doctrine/doctrine-fixtures-bundle": "~2.2.0", "doctrine/doctrine-fixtures-bundle": "~2.2.0",

708
composer.lock generated

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,6 @@ use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\Response; use Symfony\Component\HttpFoundation\Response;
use Wallabag\CoreBundle\Entity\Entry; use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Entity\Tag; use Wallabag\CoreBundle\Entity\Tag;
use Wallabag\CoreBundle\Service\Extractor;
use Hateoas\Configuration\Route; use Hateoas\Configuration\Route;
use Hateoas\Representation\Factory\PagerfantaFactory; use Hateoas\Representation\Factory\PagerfantaFactory;
@ -147,11 +146,10 @@ class WallabagRestController extends Controller
{ {
$url = $request->request->get('url'); $url = $request->request->get('url');
$content = Extractor::extract($url); $entry = $this->get('wallabag_core.content_proxy')->updateEntry(
$entry = new Entry($this->getUser()); new Entry($this->getUser()),
$entry->setUrl($url); $url
$entry->setTitle($request->request->get('title') ?: $content->getTitle()); );
$entry->setContent($content->getBody());
$tags = $request->request->get('tags', ''); $tags = $request->request->get('tags', '');
if (!empty($tags)) { if (!empty($tags)) {

View file

@ -6,7 +6,6 @@ use Sensio\Bundle\FrameworkExtraBundle\Configuration\Route;
use Symfony\Bundle\FrameworkBundle\Controller\Controller; use Symfony\Bundle\FrameworkBundle\Controller\Controller;
use Symfony\Component\HttpFoundation\Request; use Symfony\Component\HttpFoundation\Request;
use Wallabag\CoreBundle\Entity\Entry; use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Service\Extractor;
use Wallabag\CoreBundle\Form\Type\NewEntryType; use Wallabag\CoreBundle\Form\Type\NewEntryType;
use Wallabag\CoreBundle\Form\Type\EditEntryType; use Wallabag\CoreBundle\Form\Type\EditEntryType;
use Wallabag\CoreBundle\Filter\EntryFilterType; use Wallabag\CoreBundle\Filter\EntryFilterType;
@ -31,10 +30,7 @@ class EntryController extends Controller
$form->handleRequest($request); $form->handleRequest($request);
if ($form->isValid()) { if ($form->isValid()) {
$content = Extractor::extract($entry->getUrl()); $entry = $this->get('wallabag_core.content_proxy')->updateEntry($entry, $entry->getUrl());
$entry->setTitle($content->getTitle());
$entry->setContent($content->getBody());
$em = $this->getDoctrine()->getManager(); $em = $this->getDoctrine()->getManager();
$em->persist($entry); $em->persist($entry);

View file

@ -108,6 +108,13 @@ class Entry
*/ */
private $domainName; private $domainName;
/**
* @var string
*
* @ORM\Column(name="preview_picture", type="text", nullable=true)
*/
private $previewPicture;
/** /**
* @var bool * @var bool
* *
@ -419,4 +426,28 @@ class Entry
{ {
$this->tags->removeElement($tag); $this->tags->removeElement($tag);
} }
/**
* Set previewPicture.
*
* @param string $previewPicture
*
* @return Entry
*/
public function setPreviewPicture($previewPicture)
{
$this->previewPicture = $previewPicture;
return $this;
}
/**
* Get previewPicture.
*
* @return string
*/
public function getPreviewPicture()
{
return $this->previewPicture;
}
} }

View file

@ -1,34 +0,0 @@
<?php
namespace Wallabag\CoreBundle\Helper;
class Content
{
private $title;
private $body;
public function __constructor()
{
}
public function getTitle()
{
return $this->title;
}
public function setTitle($title)
{
$this->title = $title;
}
public function getBody()
{
return $this->body;
}
public function setBody($body)
{
$this->body = $body;
}
}

View file

@ -0,0 +1,60 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use Graby\Graby;
use Wallabag\CoreBundle\Entity\Entry;
/**
* This kind of proxy class take care of getting the content from an url
* and update the entry with what it found.
*/
class ContentProxy
{
protected $graby;
public function __construct(Graby $graby)
{
$this->graby = $graby;
}
/**
* Fetch content using graby and hydrate given entry with results information.
* In case we couldn't find content, we'll try to use Open Graph data.
*
* @param Entry $entry Entry to update
* @param string $url Url to grab content for
*
* @return Entry
*/
public function updateEntry(Entry $entry, $url)
{
$content = $this->graby->fetchContent($url);
$title = $content['title'];
if (!$title && isset($content['open_graph']['og_title'])) {
$title = $content['open_graph']['og_title'];
}
$html = $content['html'];
if (false === $html) {
$html = '<p>Unable to retrieve readable content.</p>';
if (isset($content['open_graph']['og_description'])) {
$html .= '<p><i>But we found a short description: </i></p>';
$html .= $content['open_graph']['og_description'];
}
}
$entry->setUrl($content['url'] ?: $url);
$entry->setTitle($title);
$entry->setContent($html);
$entry->setMimetype($content['content_type']);
if (isset($content['open_graph']['og_image'])) {
$entry->setPreviewPicture($content['open_graph']['og_image']);
}
return $entry;
}
}

View file

@ -1,28 +0,0 @@
<?php
namespace Wallabag\CoreBundle\Helper;
class Url
{
public $url;
public function __construct($url)
{
$this->url = base64_decode($url);
}
public function getUrl()
{
return $this->url;
}
public function setUrl($url)
{
$this->url = $url;
}
public function isCorrect()
{
return filter_var($this->url, FILTER_VALIDATE_URL) !== false;
}
}

View file

@ -30,3 +30,13 @@ services:
wallabag_core.doctrine.prefixed_naming_strategy: wallabag_core.doctrine.prefixed_naming_strategy:
class: Wallabag\CoreBundle\Doctrine\Mapping\PrefixedNamingStrategy class: Wallabag\CoreBundle\Doctrine\Mapping\PrefixedNamingStrategy
arguments: [%database_table_prefix%] arguments: [%database_table_prefix%]
wallabag_core.graby:
class: Graby\Graby
arguments:
- { error_message: false }
wallabag_core.content_proxy:
class: Wallabag\CoreBundle\Helper\ContentProxy
arguments:
- @wallabag_core.graby

View file

@ -10,7 +10,7 @@
<div class="nav-wrapper cyan darken-1"> <div class="nav-wrapper cyan darken-1">
<ul> <ul>
<li> <li>
<a class="waves-effect" href="/"> <a class="waves-effect" href="{{ path('homepage') }}">
<i class="mdi-action-exit-to-app"></i> <i class="mdi-action-exit-to-app"></i>
</a> </a>
</li> </li>
@ -36,7 +36,7 @@
</nav> </nav>
<ul id="slide-out" class="collapsible side-nav fixed reader-mode" data-collapsible="accordion"> <ul id="slide-out" class="collapsible side-nav fixed reader-mode" data-collapsible="accordion">
<li class="bold border-bottom hide-on-med-and-down"> <li class="bold border-bottom hide-on-med-and-down">
<a class="waves-effect collapsible-header" href="/"> <a class="waves-effect collapsible-header" href="{{ path('homepage') }}">
<i class="mdi-action-exit-to-app small"></i> <i class="mdi-action-exit-to-app small"></i>
<span>{% trans %}back{% endtrans %}</span> <span>{% trans %}back{% endtrans %}</span>
</a> </a>

View file

@ -1,96 +0,0 @@
<?php
namespace Wallabag\CoreBundle\Service;
use Wallabag\CoreBundle\Helper\Content;
use Wallabag\CoreBundle\Helper\Url;
final class Extractor
{
public static function extract($url)
{
$pageContent = self::getPageContent(new Url(base64_encode($url)));
$title = $pageContent['rss']['channel']['item']['title'] ?: parse_url($url, PHP_URL_HOST);
$body = $pageContent['rss']['channel']['item']['description'];
$content = new Content();
$content->setTitle($title);
$content->setBody($body);
return $content;
}
/**
* Get the content for a given URL (by a call to FullTextFeed).
*
* @param Url $url
*
* @return mixed
*/
public static function getPageContent(Url $url)
{
// Saving and clearing context
$REAL = array();
foreach ($GLOBALS as $key => $value) {
if ($key != 'GLOBALS' && $key != '_SESSION' && $key != 'HTTP_SESSION_VARS') {
$GLOBALS[$key] = array();
$REAL[$key] = $value;
}
}
// Saving and clearing session
if (isset($_SESSION)) {
$REAL_SESSION = array();
foreach ($_SESSION as $key => $value) {
$REAL_SESSION[$key] = $value;
unset($_SESSION[$key]);
}
}
// Running code in different context
$scope = function () {
extract(func_get_arg(1));
$_GET = $_REQUEST = array(
'url' => $url->getUrl(),
'max' => 5,
'links' => 'preserve',
'exc' => '',
'format' => 'json',
'submit' => 'Create Feed',
);
ob_start();
require func_get_arg(0);
$json = ob_get_contents();
ob_end_clean();
return $json;
};
// Silence $scope function to avoid
// issues with FTRSS when error_reporting is to high
// FTRSS generates PHP warnings which break output
$json = @$scope(__DIR__.'/../../../../vendor/wallabag/Fivefilters_Libraries/makefulltextfeed.php', array('url' => $url));
// Clearing and restoring context
foreach ($GLOBALS as $key => $value) {
if ($key != 'GLOBALS' && $key != '_SESSION') {
unset($GLOBALS[$key]);
}
}
foreach ($REAL as $key => $value) {
$GLOBALS[$key] = $value;
}
// Clearing and restoring session
if (isset($REAL_SESSION)) {
foreach ($_SESSION as $key => $value) {
unset($_SESSION[$key]);
}
foreach ($REAL_SESSION as $key => $value) {
$_SESSION[$key] = $value;
}
}
return json_decode($json, true);
}
}

View file

@ -0,0 +1,84 @@
<?php
namespace Wallabag\CoreBundle\Tests\Helper;
use Symfony\Bundle\FrameworkBundle\Test\KernelTestCase;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Entity\User;
use Wallabag\CoreBundle\Helper\ContentProxy;
class ContentProxyTest extends KernelTestCase
{
public function testWithEmptyContent()
{
$graby = $this->getMockBuilder('Graby\Graby')
->setMethods(array('fetchContent'))
->disableOriginalConstructor()
->getMock();
$graby->expects($this->any())
->method('fetchContent')
->willReturn(array('html' => false, 'title' => '', 'url' => '', 'content_type' => ''));
$proxy = new ContentProxy($graby);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
$this->assertEquals('http://0.0.0.0', $entry->getUrl());
$this->assertEmpty($entry->getTitle());
$this->assertEquals('<p>Unable to retrieve readable content.</p>', $entry->getContent());
$this->assertEmpty($entry->getPreviewPicture());
$this->assertEmpty($entry->getMimetype());
}
public function testWithEmptyContentButOG()
{
$graby = $this->getMockBuilder('Graby\Graby')
->setMethods(array('fetchContent'))
->disableOriginalConstructor()
->getMock();
$graby->expects($this->any())
->method('fetchContent')
->willReturn(array('html' => false, 'title' => '', 'url' => '', 'content_type' => '', 'open_graph' => array('og_title' => 'my title', 'og_description' => 'desc')));
$proxy = new ContentProxy($graby);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
$this->assertEquals('http://0.0.0.0', $entry->getUrl());
$this->assertEquals('my title', $entry->getTitle());
$this->assertEquals('<p>Unable to retrieve readable content.</p><p><i>But we found a short description: </i></p>desc', $entry->getContent());
$this->assertEmpty($entry->getPreviewPicture());
$this->assertEmpty($entry->getMimetype());
}
public function testWithContent()
{
$graby = $this->getMockBuilder('Graby\Graby')
->setMethods(array('fetchContent'))
->disableOriginalConstructor()
->getMock();
$graby->expects($this->any())
->method('fetchContent')
->willReturn(array(
'html' => 'this is my content',
'title' => 'this is my title',
'url' => 'http://1.1.1.1',
'content_type' => 'text/html',
'open_graph' => array(
'og_title' => 'my OG title',
'og_description' => 'OG desc',
'og_image' => 'http://3.3.3.3/cover.jpg',
),
));
$proxy = new ContentProxy($graby);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
$this->assertEquals('http://1.1.1.1', $entry->getUrl());
$this->assertEquals('this is my title', $entry->getTitle());
$this->assertEquals('this is my content', $entry->getContent());
$this->assertEquals('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
$this->assertEquals('text/html', $entry->getMimetype());
}
}