diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index c73b8eafb..88873bd53 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -7,6 +7,7 @@ use Psr\Log\LoggerInterface; use Wallabag\CoreBundle\Entity\Entry; use Wallabag\CoreBundle\Tools\Utils; use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; +use Symfony\Component\Config\Definition\Exception\Exception; /** * This kind of proxy class take care of getting the content from an url @@ -31,34 +32,58 @@ class ContentProxy } /** - * Fetch content using graby and hydrate given $entry with results information. - * In case we couldn't find content, we'll try to use Open Graph data. - * - * We can also force the content, in case of an import from the v1 for example, so the function won't - * fetch the content from the website but rather use information given with the $content parameter. + * Update existing entry by fetching from URL using Graby. * * @param Entry $entry Entry to update * @param string $url Url to grab content for - * @param array $content An array with AT LEAST keys title, html, url to skip the fetchContent from the url */ - public function updateEntry(Entry $entry, $url, array $content = []) + public function updateEntry(Entry $entry, $url) { - // ensure content is a bit cleaned up - if (!empty($content['html'])) { - $content['html'] = $this->graby->cleanupHtml($content['html'], $url); - } + $content = $this->graby->fetchContent($url); - // do we have to fetch the content or the provided one is ok? - if (empty($content) || false === $this->validateContent($content)) { - $fetchedContent = $this->graby->fetchContent($url); + $this->stockEntry($entry, $content); + } + + /** + * Import entry using either fetched or provided content. + * + * @param Entry $entry Entry to update + * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url + * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby + */ + public function importEntry(Entry $entry, array $content, $disableContentUpdate = false) + { + $this->validateContent($content); + + if (false === $disableContentUpdate) { + try { + $fetchedContent = $this->graby->fetchContent($content['url']); + } catch (\Exception $e) { + $this->logger->error('Error while trying to fetch content from URL.', [ + 'entry_url' => $content['url'], + 'error_msg' => $e->getMessage(), + ]); + } // when content is imported, we have information in $content // in case fetching content goes bad, we'll keep the imported information instead of overriding them - if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) { + if ($fetchedContent['html'] !== $this->fetchingErrorMessage) { $content = $fetchedContent; } } + $this->stockEntry($entry, $content); + } + + /** + * Stock entry with fetched or imported content. + * Will fall back to OpenGraph data if available. + * + * @param Entry $entry Entry to stock + * @param array $content Array with at least title and URL + */ + private function stockEntry(Entry $entry, array $content) + { $title = $content['title']; if (!$title && !empty($content['open_graph']['og_title'])) { $title = $content['open_graph']['og_title']; @@ -74,7 +99,7 @@ class ContentProxy } } - $entry->setUrl($content['url'] ?: $url); + $entry->setUrl($content['url']); $entry->setTitle($title); $entry->setContent($html); $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); @@ -124,22 +149,29 @@ class ContentProxy $this->tagger->tag($entry); } catch (\Exception $e) { $this->logger->error('Error while trying to automatically tag an entry.', [ - 'entry_url' => $url, + 'entry_url' => $content['url'], 'error_msg' => $e->getMessage(), ]); } } /** - * Validate that the given content as enough value to be used - * instead of fetch the content from the url. + * Validate that the given content has at least a title, an html and a url. * * @param array $content - * - * @return bool true if valid otherwise false */ private function validateContent(array $content) { - return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); + if (!empty($content['title']))) { + throw new Exception('Missing title from imported entry!'); + } + + if (!empty($content['url']))) { + throw new Exception('Missing URL from imported entry!'); + } + + if (!empty($content['html']))) { + throw new Exception('Missing html from imported entry!'); + } } } diff --git a/src/Wallabag/ImportBundle/Command/ImportCommand.php b/src/Wallabag/ImportBundle/Command/ImportCommand.php index ce72837ad..bca800e6c 100644 --- a/src/Wallabag/ImportBundle/Command/ImportCommand.php +++ b/src/Wallabag/ImportBundle/Command/ImportCommand.php @@ -5,6 +5,7 @@ namespace Wallabag\ImportBundle\Command; use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; use Symfony\Component\Config\Definition\Exception\Exception; use Symfony\Component\Console\Input\InputArgument; +use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Output\OutputInterface; @@ -19,7 +20,7 @@ class ImportCommand extends ContainerAwareCommand ->addArgument('filepath', InputArgument::REQUIRED, 'Path to the JSON file') ->addOption('importer', null, InputArgument::OPTIONAL, 'The importer to use: v1, v2, instapaper, pinboard, readability, firefox or chrome', 'v1') ->addOption('markAsRead', null, InputArgument::OPTIONAL, 'Mark all entries as read', false) - ->addOption('useUserId', null, InputArgument::OPTIONAL, 'Use user id instead of username to find account', false) + ->addOption('disableContentUpdate', null, InputOption::VALUE_NONE, 'Disable fetching updated content from URL') ; } @@ -69,6 +70,7 @@ class ImportCommand extends ContainerAwareCommand } $import->setMarkAsRead($input->getOption('markAsRead')); + $import->setDisableContentUpdate($input->getOption('disableContentUpdate')); $import->setUser($user); $res = $import diff --git a/src/Wallabag/ImportBundle/Import/AbstractImport.php b/src/Wallabag/ImportBundle/Import/AbstractImport.php index fc462c4cd..167853aae 100644 --- a/src/Wallabag/ImportBundle/Import/AbstractImport.php +++ b/src/Wallabag/ImportBundle/Import/AbstractImport.php @@ -24,6 +24,7 @@ abstract class AbstractImport implements ImportInterface protected $producer; protected $user; protected $markAsRead; + protected $disableContentUpdate; protected $skippedEntries = 0; protected $importedEntries = 0; protected $queuedEntries = 0; @@ -84,6 +85,27 @@ abstract class AbstractImport implements ImportInterface return $this->markAsRead; } + /** + * Set whether articles should be fetched for updated content. + * + * @param bool $markAsRead + */ + public function setDisableContentUpdate($disableContentUpdate) + { + $this->disableContentUpdate = $disableContentUpdate; + + return $this; + } + + /** + * Get whether articles should be fetched for updated content. + */ + public function getDisableContentUpdate() + { + return $this->disableContentUpdate; + } + + /** * Fetch content from the ContentProxy (using graby). * If it fails return the given entry to be saved in all case (to avoid user to loose the content). @@ -95,9 +117,12 @@ abstract class AbstractImport implements ImportInterface protected function fetchContent(Entry $entry, $url, array $content = []) { try { - $this->contentProxy->updateEntry($entry, $url, $content); + $this->contentProxy->importEntry($entry, $content, $this->disableContentUpdate); } catch (\Exception $e) { - return $entry; + $this->logger->error('Error trying to import an entry.', [ + 'entry_url' => $content['url'], + 'error_msg' => $e->getMessage(), + ]); } } diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 166439387..1ad21d147 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -257,9 +257,8 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage); $entry = new Entry(new User()); - $proxy->updateEntry( + $proxy->importEntry( $entry, - 'http://0.0.0.0', [ 'html' => str_repeat('this is my content', 325), 'title' => 'this is my title', @@ -294,7 +293,6 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase $entry = new Entry(new User()); $proxy->updateEntry( $entry, - 'http://0.0.0.0', [ 'html' => str_repeat('this is my content', 325), 'title' => 'this is my title', @@ -334,13 +332,14 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage); $entry = new Entry(new User()); - $proxy->updateEntry($entry, 'http://0.0.0.0', [ + $content = array( 'html' => str_repeat('this is my content', 325), 'title' => 'this is my title', 'url' => 'http://1.1.1.1', 'content_type' => 'text/html', 'language' => 'fr', - ]); + ); + $proxy->importEntry($entry, $content, true); $this->assertCount(0, $entry->getTags()); } diff --git a/tests/Wallabag/ImportBundle/Import/ChromeImportTest.php b/tests/Wallabag/ImportBundle/Import/ChromeImportTest.php index cec195341..7a15e9187 100644 --- a/tests/Wallabag/ImportBundle/Import/ChromeImportTest.php +++ b/tests/Wallabag/ImportBundle/Import/ChromeImportTest.php @@ -89,7 +89,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(1)) - ->method('updateEntry') + ->method('importEntry') ->willReturn($entry); $res = $chromeImport->import(); @@ -118,7 +118,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(1)) - ->method('updateEntry') + ->method('importEntry') ->willReturn(new Entry($this->user)); // check that every entry persisted are archived @@ -158,7 +158,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer') ->disableOriginalConstructor() @@ -198,7 +198,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $factory = new RedisMockFactory(); $redisMock = $factory->getAdapter('Predis\Client', true); diff --git a/tests/Wallabag/ImportBundle/Import/FirefoxImportTest.php b/tests/Wallabag/ImportBundle/Import/FirefoxImportTest.php index c186c8202..09abac57e 100644 --- a/tests/Wallabag/ImportBundle/Import/FirefoxImportTest.php +++ b/tests/Wallabag/ImportBundle/Import/FirefoxImportTest.php @@ -89,7 +89,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(2)) - ->method('updateEntry') + ->method('importEntry') ->willReturn($entry); $res = $firefoxImport->import(); @@ -118,7 +118,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(1)) - ->method('updateEntry') + ->method('importEntry') ->willReturn(new Entry($this->user)); // check that every entry persisted are archived @@ -158,7 +158,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer') ->disableOriginalConstructor() @@ -198,7 +198,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $factory = new RedisMockFactory(); $redisMock = $factory->getAdapter('Predis\Client', true); diff --git a/tests/Wallabag/ImportBundle/Import/InstapaperImportTest.php b/tests/Wallabag/ImportBundle/Import/InstapaperImportTest.php index 9158c8a23..05844490d 100644 --- a/tests/Wallabag/ImportBundle/Import/InstapaperImportTest.php +++ b/tests/Wallabag/ImportBundle/Import/InstapaperImportTest.php @@ -104,7 +104,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(4)) - ->method('updateEntry') + ->method('importEntry') ->willReturn($entry); $res = $instapaperImport->import(); @@ -133,7 +133,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->once()) - ->method('updateEntry') + ->method('importEntry') ->willReturn(new Entry($this->user)); // check that every entry persisted are archived @@ -173,7 +173,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer') ->disableOriginalConstructor() @@ -213,7 +213,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $factory = new RedisMockFactory(); $redisMock = $factory->getAdapter('Predis\Client', true); diff --git a/tests/Wallabag/ImportBundle/Import/PocketImportTest.php b/tests/Wallabag/ImportBundle/Import/PocketImportTest.php index b81ebe15f..f75e6bea0 100644 --- a/tests/Wallabag/ImportBundle/Import/PocketImportTest.php +++ b/tests/Wallabag/ImportBundle/Import/PocketImportTest.php @@ -282,7 +282,7 @@ class PocketImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->once()) - ->method('updateEntry') + ->method('importEntry') ->willReturn($entry); $pocketImport->setClient($client); @@ -377,7 +377,7 @@ class PocketImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(2)) - ->method('updateEntry') + ->method('importEntry') ->willReturn($entry); $pocketImport->setClient($client); @@ -450,7 +450,7 @@ JSON; $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer') ->disableOriginalConstructor() @@ -536,7 +536,7 @@ JSON; $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('ImportEntry'); $factory = new RedisMockFactory(); $redisMock = $factory->getAdapter('Predis\Client', true); @@ -621,7 +621,7 @@ JSON; $this->contentProxy ->expects($this->once()) - ->method('updateEntry') + ->method('importEntry') ->will($this->throwException(new \Exception())); $pocketImport->setClient($client); diff --git a/tests/Wallabag/ImportBundle/Import/ReadabilityImportTest.php b/tests/Wallabag/ImportBundle/Import/ReadabilityImportTest.php index 8f466d383..1b0daa92d 100644 --- a/tests/Wallabag/ImportBundle/Import/ReadabilityImportTest.php +++ b/tests/Wallabag/ImportBundle/Import/ReadabilityImportTest.php @@ -89,7 +89,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(3)) - ->method('updateEntry') + ->method('importEntry') ->willReturn($entry); $res = $readabilityImport->import(); @@ -118,7 +118,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(1)) - ->method('updateEntry') + ->method('importEntry') ->willReturn(new Entry($this->user)); // check that every entry persisted are archived @@ -158,7 +158,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer') ->disableOriginalConstructor() @@ -198,7 +198,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $factory = new RedisMockFactory(); $redisMock = $factory->getAdapter('Predis\Client', true); diff --git a/tests/Wallabag/ImportBundle/Import/WallabagV1ImportTest.php b/tests/Wallabag/ImportBundle/Import/WallabagV1ImportTest.php index 7cbef6377..f23cb7489 100644 --- a/tests/Wallabag/ImportBundle/Import/WallabagV1ImportTest.php +++ b/tests/Wallabag/ImportBundle/Import/WallabagV1ImportTest.php @@ -104,7 +104,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(1)) - ->method('updateEntry') + ->method('importEntry') ->willReturn($entry); $res = $wallabagV1Import->import(); @@ -133,7 +133,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(3)) - ->method('updateEntry') + ->method('importEntry') ->willReturn(new Entry($this->user)); // check that every entry persisted are archived @@ -173,7 +173,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer') ->disableOriginalConstructor() @@ -213,7 +213,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $factory = new RedisMockFactory(); $redisMock = $factory->getAdapter('Predis\Client', true); diff --git a/tests/Wallabag/ImportBundle/Import/WallabagV2ImportTest.php b/tests/Wallabag/ImportBundle/Import/WallabagV2ImportTest.php index 5cc04aa59..e1acf5699 100644 --- a/tests/Wallabag/ImportBundle/Import/WallabagV2ImportTest.php +++ b/tests/Wallabag/ImportBundle/Import/WallabagV2ImportTest.php @@ -100,7 +100,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(2)) - ->method('updateEntry') + ->method('importEntry') ->willReturn(new Entry($this->user)); $res = $wallabagV2Import->import(); @@ -129,7 +129,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(2)) - ->method('updateEntry') + ->method('importEntry') ->willReturn(new Entry($this->user)); // check that every entry persisted are archived @@ -165,7 +165,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer') ->disableOriginalConstructor() @@ -201,7 +201,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->never()) - ->method('updateEntry'); + ->method('importEntry'); $factory = new RedisMockFactory(); $redisMock = $factory->getAdapter('Predis\Client', true); @@ -278,7 +278,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase $this->contentProxy ->expects($this->exactly(2)) - ->method('updateEntry') + ->method('importEntry') ->will($this->throwException(new \Exception())); $res = $wallabagV2Import->import();