Merge pull request #2708 from jcharaoui/import-disablecontentupdate

Import disableContentUpdate
This commit is contained in:
Jérémy Benoist 2017-06-02 11:26:37 +02:00 committed by GitHub
commit a687c8d915
20 changed files with 160 additions and 103 deletions

View file

@ -58,6 +58,7 @@ wallabag_core:
cache_lifetime: 10
action_mark_as_read: 1
list_mode: 0
fetching_error_message_title: 'No title found'
fetching_error_message: |
wallabag can't retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.
api_limit_mass_actions: 10

View file

@ -231,7 +231,6 @@ class EntryRestController extends WallabagRestController
$this->validateAuthentication();
$urls = json_decode($request->query->get('urls', []));
$results = [];
$limit = $this->container->getParameter('wallabag_core.api_limit_mass_actions');
@ -239,32 +238,34 @@ class EntryRestController extends WallabagRestController
throw new HttpException(400, 'API limit reached');
}
$results = [];
if (empty($urls)) {
return $this->sendResponse($results);
}
// handle multiple urls
if (!empty($urls)) {
foreach ($urls as $key => $url) {
$entry = $this->get('wallabag_core.entry_repository')->findByUrlAndUserId(
$url,
$this->getUser()->getId()
);
foreach ($urls as $key => $url) {
$entry = $this->get('wallabag_core.entry_repository')->findByUrlAndUserId(
$url,
$this->getUser()->getId()
);
$results[$key]['url'] = $url;
$results[$key]['url'] = $url;
if (false === $entry) {
$entry = $this->get('wallabag_core.content_proxy')->updateEntry(
new Entry($this->getUser()),
$url
);
}
if (false === $entry) {
$entry = new Entry($this->getUser());
$em = $this->getDoctrine()->getManager();
$em->persist($entry);
$em->flush();
$results[$key]['entry'] = $entry instanceof Entry ? $entry->getId() : false;
// entry saved, dispatch event about it!
$this->get('event_dispatcher')->dispatch(EntrySavedEvent::NAME, new EntrySavedEvent($entry));
$this->get('wallabag_core.content_proxy')->updateEntry($entry, $url);
}
$em = $this->getDoctrine()->getManager();
$em->persist($entry);
$em->flush();
$results[$key]['entry'] = $entry instanceof Entry ? $entry->getId() : false;
// entry saved, dispatch event about it!
$this->get('event_dispatcher')->dispatch(EntrySavedEvent::NAME, new EntrySavedEvent($entry));
}
return $this->sendResponse($results);
@ -315,7 +316,7 @@ class EntryRestController extends WallabagRestController
}
try {
$entry = $this->get('wallabag_core.content_proxy')->updateEntry(
$this->get('wallabag_core.content_proxy')->updateEntry(
$entry,
$url,
[
@ -428,7 +429,7 @@ class EntryRestController extends WallabagRestController
$this->validateUserAccess($entry->getUser()->getId());
try {
$entry = $this->get('wallabag_core.content_proxy')->updateEntry($entry, $entry->getUrl());
$this->get('wallabag_core.content_proxy')->updateEntry($entry, $entry->getUrl());
} catch (\Exception $e) {
$this->get('logger')->error('Error while saving an entry', [
'exception' => $e,

View file

@ -53,22 +53,17 @@ class EntryController extends Controller
/**
* Fetch content and update entry.
* In case it fails, entry will return to avod loosing the data.
* In case it fails, $entry->getContent will return an error message.
*
* @param Entry $entry
* @param string $prefixMessage Should be the translation key: entry_saved or entry_reloaded
*
* @return Entry
*/
private function updateEntry(Entry $entry, $prefixMessage = 'entry_saved')
{
// put default title in case of fetching content failed
$entry->setTitle('No title found');
$message = 'flashes.entry.notice.'.$prefixMessage;
try {
$entry = $this->get('wallabag_core.content_proxy')->updateEntry($entry, $entry->getUrl());
$this->get('wallabag_core.content_proxy')->updateEntry($entry, $entry->getUrl());
} catch (\Exception $e) {
$this->get('logger')->error('Error while saving an entry', [
'exception' => $e,
@ -79,8 +74,6 @@ class EntryController extends Controller
}
$this->get('session')->getFlashBag()->add('notice', $message);
return $entry;
}
/**

View file

@ -41,6 +41,8 @@ class Configuration implements ConfigurationInterface
->end()
->scalarNode('fetching_error_message')
->end()
->scalarNode('fetching_error_message_title')
->end()
->scalarNode('action_mark_as_read')
->defaultValue(1)
->end()

View file

@ -26,6 +26,7 @@ class WallabagCoreExtension extends Extension
$container->setParameter('wallabag_core.action_mark_as_read', $config['action_mark_as_read']);
$container->setParameter('wallabag_core.list_mode', $config['list_mode']);
$container->setParameter('wallabag_core.fetching_error_message', $config['fetching_error_message']);
$container->setParameter('wallabag_core.fetching_error_message_title', $config['fetching_error_message_title']);
$container->setParameter('wallabag_core.api_limit_mass_actions', $config['api_limit_mass_actions']);
$loader = new Loader\YamlFileLoader($container, new FileLocator(__DIR__.'/../Resources/config'));

View file

@ -31,27 +31,20 @@ class ContentProxy
}
/**
* Fetch content using graby and hydrate given $entry with results information.
* In case we couldn't find content, we'll try to use Open Graph data.
* Update entry using either fetched or provided content.
*
* We can also force the content, in case of an import from the v1 for example, so the function won't
* fetch the content from the website but rather use information given with the $content parameter.
*
* @param Entry $entry Entry to update
* @param string $url Url to grab content for
* @param array $content An array with AT LEAST keys title, html, url to skip the fetchContent from the url
*
* @return Entry
* @param Entry $entry Entry to update
* @param string $url Url of the content
* @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
* @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
*/
public function updateEntry(Entry $entry, $url, array $content = [])
public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false)
{
// ensure content is a bit cleaned up
if (!empty($content['html'])) {
$content['html'] = $this->graby->cleanupHtml($content['html'], $url);
}
// do we have to fetch the content or the provided one is ok?
if (empty($content) || false === $this->validateContent($content)) {
if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
$fetchedContent = $this->graby->fetchContent($url);
// when content is imported, we have information in $content
@ -61,6 +54,22 @@ class ContentProxy
}
}
// be sure to keep the url in case of error
// so we'll be able to refetch it in the future
$content['url'] = !empty($content['url']) ? $content['url'] : $url;
$this->stockEntry($entry, $content);
}
/**
* Stock entry with fetched or imported content.
* Will fall back to OpenGraph data if available.
*
* @param Entry $entry Entry to stock
* @param array $content Array with at least title, url & html
*/
private function stockEntry(Entry $entry, array $content)
{
$title = $content['title'];
if (!$title && !empty($content['open_graph']['og_title'])) {
$title = $content['open_graph']['og_title'];
@ -76,7 +85,7 @@ class ContentProxy
}
}
$entry->setUrl($content['url'] ?: $url);
$entry->setUrl($content['url']);
$entry->setTitle($title);
$entry->setContent($html);
$entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
@ -92,7 +101,7 @@ class ContentProxy
try {
$entry->setPublishedAt(new \DateTime($date));
} catch (\Exception $e) {
$this->logger->warning('Error while defining date', ['e' => $e, 'url' => $url, 'date' => $content['date']]);
$this->logger->warning('Error while defining date', ['e' => $e, 'url' => $content['url'], 'date' => $content['date']]);
}
}
@ -126,17 +135,14 @@ class ContentProxy
$this->tagger->tag($entry);
} catch (\Exception $e) {
$this->logger->error('Error while trying to automatically tag an entry.', [
'entry_url' => $url,
'entry_url' => $content['url'],
'error_msg' => $e->getMessage(),
]);
}
return $entry;
}
/**
* Validate that the given content as enough value to be used
* instead of fetch the content from the url.
* Validate that the given content has at least a title, an html and a url.
*
* @param array $content
*

View file

@ -41,6 +41,7 @@ services:
arguments:
-
error_message: '%wallabag_core.fetching_error_message%'
error_message_title: '%wallabag_core.fetching_error_message_title%'
- "@wallabag_core.guzzle.http_client"
- "@wallabag_core.graby.config_builder"
calls:

View file

@ -5,6 +5,7 @@ namespace Wallabag\ImportBundle\Command;
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
use Symfony\Component\Config\Definition\Exception\Exception;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
@ -17,9 +18,10 @@ class ImportCommand extends ContainerAwareCommand
->setDescription('Import entries from a JSON export')
->addArgument('username', InputArgument::REQUIRED, 'User to populate')
->addArgument('filepath', InputArgument::REQUIRED, 'Path to the JSON file')
->addOption('importer', null, InputArgument::OPTIONAL, 'The importer to use: v1, v2, instapaper, pinboard, readability, firefox or chrome', 'v1')
->addOption('markAsRead', null, InputArgument::OPTIONAL, 'Mark all entries as read', false)
->addOption('useUserId', null, InputArgument::OPTIONAL, 'Use user id instead of username to find account', false)
->addOption('importer', null, InputOption::VALUE_OPTIONAL, 'The importer to use: v1, v2, instapaper, pinboard, readability, firefox or chrome', 'v1')
->addOption('markAsRead', null, InputOption::VALUE_OPTIONAL, 'Mark all entries as read', false)
->addOption('useUserId', null, InputOption::VALUE_NONE, 'Use user id instead of username to find account')
->addOption('disableContentUpdate', null, InputOption::VALUE_NONE, 'Disable fetching updated content from URL')
;
}
@ -69,6 +71,7 @@ class ImportCommand extends ContainerAwareCommand
}
$import->setMarkAsRead($input->getOption('markAsRead'));
$import->setDisableContentUpdate($input->getOption('disableContentUpdate'));
$import->setUser($user);
$res = $import

View file

@ -24,6 +24,7 @@ abstract class AbstractImport implements ImportInterface
protected $producer;
protected $user;
protected $markAsRead;
protected $disableContentUpdate = false;
protected $skippedEntries = 0;
protected $importedEntries = 0;
protected $queuedEntries = 0;
@ -84,6 +85,18 @@ abstract class AbstractImport implements ImportInterface
return $this->markAsRead;
}
/**
* Set whether articles should be fetched for updated content.
*
* @param bool $disableContentUpdate
*/
public function setDisableContentUpdate($disableContentUpdate)
{
$this->disableContentUpdate = $disableContentUpdate;
return $this;
}
/**
* Fetch content from the ContentProxy (using graby).
* If it fails return the given entry to be saved in all case (to avoid user to loose the content).
@ -91,15 +104,16 @@ abstract class AbstractImport implements ImportInterface
* @param Entry $entry Entry to update
* @param string $url Url to grab content for
* @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url
*
* @return Entry
*/
protected function fetchContent(Entry $entry, $url, array $content = [])
{
try {
return $this->contentProxy->updateEntry($entry, $url, $content);
$this->contentProxy->updateEntry($entry, $url, $content, $this->disableContentUpdate);
} catch (\Exception $e) {
return $entry;
$this->logger->error('Error trying to import an entry.', [
'entry_url' => $url,
'error_msg' => $e->getMessage(),
]);
}
}

View file

@ -201,7 +201,7 @@ abstract class BrowserImport extends AbstractImport
$entry->setTitle($data['title']);
// update entry with content (in case fetching failed, the given entry will be return)
$entry = $this->fetchContent($entry, $data['url'], $data);
$this->fetchContent($entry, $data['url'], $data);
if (array_key_exists('tags', $data)) {
$this->tagsAssigner->assignTagsToEntry(

View file

@ -125,7 +125,7 @@ class InstapaperImport extends AbstractImport
$entry->setTitle($importedEntry['title']);
// update entry with content (in case fetching failed, the given entry will be return)
$entry = $this->fetchContent($entry, $importedEntry['url'], $importedEntry);
$this->fetchContent($entry, $importedEntry['url'], $importedEntry);
if (!empty($importedEntry['tags'])) {
$this->tagsAssigner->assignTagsToEntry(

View file

@ -109,7 +109,7 @@ class PinboardImport extends AbstractImport
$entry->setTitle($data['title']);
// update entry with content (in case fetching failed, the given entry will be return)
$entry = $this->fetchContent($entry, $data['url'], $data);
$this->fetchContent($entry, $data['url'], $data);
if (!empty($data['tags'])) {
$this->tagsAssigner->assignTagsToEntry(

View file

@ -192,7 +192,7 @@ class PocketImport extends AbstractImport
$entry->setUrl($url);
// update entry with content (in case fetching failed, the given entry will be return)
$entry = $this->fetchContent($entry, $url);
$this->fetchContent($entry, $url);
// 0, 1, 2 - 1 if the item is archived - 2 if the item should be deleted
$entry->setArchived($importedEntry['status'] == 1 || $this->markAsRead);

View file

@ -109,7 +109,7 @@ class ReadabilityImport extends AbstractImport
$entry->setTitle($data['title']);
// update entry with content (in case fetching failed, the given entry will be return)
$entry = $this->fetchContent($entry, $data['url'], $data);
$this->fetchContent($entry, $data['url'], $data);
$entry->setArchived($data['is_archived']);
$entry->setStarred($data['is_starred']);

View file

@ -108,7 +108,7 @@ abstract class WallabagImport extends AbstractImport
$entry->setTitle($data['title']);
// update entry with content (in case fetching failed, the given entry will be return)
$entry = $this->fetchContent($entry, $data['url'], $data);
$this->fetchContent($entry, $data['url'], $data);
if (array_key_exists('tags', $data)) {
$this->tagsAssigner->assignTagsToEntry(

View file

@ -4,6 +4,17 @@ namespace Wallabag\ImportBundle\Import;
class WallabagV1Import extends WallabagImport
{
protected $fetchingErrorMessage;
protected $fetchingErrorMessageTitle;
public function __construct($em, $contentProxy, $tagsAssigner, $eventDispatcher, $fetchingErrorMessageTitle, $fetchingErrorMessage)
{
$this->fetchingErrorMessageTitle = $fetchingErrorMessageTitle;
$this->fetchingErrorMessage = $fetchingErrorMessage;
parent::__construct($em, $contentProxy, $tagsAssigner, $eventDispatcher);
}
/**
* {@inheritdoc}
*/
@ -43,10 +54,11 @@ class WallabagV1Import extends WallabagImport
'created_at' => '',
];
// force content to be refreshed in case on bad fetch in the v1 installation
// In case of a bad fetch in v1, replace title and content with v2 error strings
// If fetching fails again, they will get this instead of the v1 strings
if (in_array($entry['title'], $this->untitled)) {
$data['title'] = '';
$data['html'] = '';
$data['title'] = $this->fetchingErrorMessageTitle;
$data['html'] = $this->fetchingErrorMessage;
}
if (array_key_exists('tags', $entry) && $entry['tags'] != '') {

View file

@ -35,6 +35,8 @@ services:
- "@wallabag_core.content_proxy"
- "@wallabag_core.tags_assigner"
- "@event_dispatcher"
- "%wallabag_core.fetching_error_message_title%"
- "%wallabag_core.fetching_error_message%"
calls:
- [ setLogger, [ "@logger" ]]
tags:

View file

@ -123,7 +123,7 @@ class TagControllerTest extends WallabagCoreTestCase
$this->assertEquals(302, $client->getResponse()->getStatusCode());
$this->assertEquals($entryUri, $client->getResponse()->getTargetUrl());
// re-retrieve the entry to be sure to get fresh data from database (mostly for tags)
// re-retrieve the entry to be sure to get fresh data from database (mostly for tags)
$entry = $this->getEntityManager()->getRepository(Entry::class)->find($entry->getId());
$this->assertNotContains($this->tagName, $entry->getTags());

View file

@ -3,14 +3,14 @@
namespace Tests\Wallabag\CoreBundle\Helper;
use Psr\Log\NullLogger;
use Monolog\Logger;
use Monolog\Handler\TestHandler;
use Wallabag\CoreBundle\Helper\ContentProxy;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Entity\Tag;
use Wallabag\UserBundle\Entity\User;
use Wallabag\CoreBundle\Helper\RuleBasedTagger;
use Graby\Graby;
use Monolog\Handler\TestHandler;
use Monolog\Logger;
class ContentProxyTest extends \PHPUnit_Framework_TestCase
{
@ -38,7 +38,8 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
]);
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://user@:80');
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://user@:80');
$this->assertEquals('http://user@:80', $entry->getUrl());
$this->assertEmpty($entry->getTitle());
@ -72,7 +73,8 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
]);
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://0.0.0.0');
$this->assertEquals('http://0.0.0.0', $entry->getUrl());
$this->assertEmpty($entry->getTitle());
@ -111,7 +113,8 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
]);
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://domain.io');
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://domain.io');
$this->assertEquals('http://domain.io', $entry->getUrl());
$this->assertEquals('my title', $entry->getTitle());
@ -152,7 +155,8 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
]);
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://0.0.0.0');
$this->assertEquals('http://1.1.1.1', $entry->getUrl());
$this->assertEquals('this is my title', $entry->getTitle());
@ -193,7 +197,8 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
]);
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://0.0.0.0');
$this->assertEquals('http://1.1.1.1', $entry->getUrl());
$this->assertEquals('this is my title', $entry->getTitle());
@ -213,8 +218,9 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
->method('tag');
$proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(
new Entry(new User()),
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
'http://0.0.0.0',
[
'html' => str_repeat('this is my content', 325),
@ -250,10 +256,14 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$tagger->expects($this->once())
->method('tag');
$proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(
new Entry(new User()),
'http://0.0.0.0',
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$proxy = new ContentProxy((new Graby()), $tagger, $logger, $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
'http://1.1.1.1',
[
'html' => str_repeat('this is my content', 325),
'title' => 'this is my title',
@ -285,9 +295,10 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$logger->pushHandler($handler);
$proxy = new ContentProxy((new Graby()), $tagger, $logger, $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(
new Entry(new User()),
'http://0.0.0.0',
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
'http://1.1.1.1',
[
'html' => str_repeat('this is my content', 325),
'title' => 'this is my title',
@ -315,24 +326,24 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
public function testTaggerThrowException()
{
$graby = $this->getMockBuilder('Graby\Graby')
->disableOriginalConstructor()
->getMock();
$tagger = $this->getTaggerMock();
$tagger->expects($this->once())
->method('tag')
->will($this->throwException(new \Exception()));
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
'html' => str_repeat('this is my content', 325),
'title' => 'this is my title',
'url' => 'http://1.1.1.1',
'content_type' => 'text/html',
'language' => 'fr',
]);
$proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
'http://1.1.1.1',
[
'html' => str_repeat('this is my content', 325),
'title' => 'this is my title',
'url' => 'http://1.1.1.1',
'content_type' => 'text/html',
'language' => 'fr',
]
);
$this->assertCount(0, $entry->getTags());
}
@ -361,8 +372,9 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
->method('tag');
$proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(
new Entry(new User()),
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
'http://1.1.1.1',
[
'html' => $html,

View file

@ -19,6 +19,8 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase
protected $contentProxy;
protected $tagsAssigner;
protected $uow;
protected $fetchingErrorMessageTitle = 'No title found';
protected $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/master/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
private function getWallabagV1Import($unsetUser = false, $dispatched = 0)
{
@ -58,7 +60,14 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase
->expects($this->exactly($dispatched))
->method('dispatch');
$wallabag = new WallabagV1Import($this->em, $this->contentProxy, $this->tagsAssigner, $dispatcher);
$wallabag = new WallabagV1Import(
$this->em,
$this->contentProxy,
$this->tagsAssigner,
$dispatcher,
$this->fetchingErrorMessageTitle,
$this->fetchingErrorMessage
);
$this->logHandler = new TestHandler();
$logger = new Logger('test', [$this->logHandler]);