mirror of
https://github.com/wallabag/wallabag.git
synced 2024-12-22 23:56:29 +00:00
Add disableContentUpdate import option
This commit also decouples the "import" and "update" functions inside ContentProxy. If a content array is available, it must be passed to the new importEntry method.
This commit is contained in:
parent
1c5da417e4
commit
d0e9b3d640
11 changed files with 118 additions and 60 deletions
|
@ -7,6 +7,7 @@ use Psr\Log\LoggerInterface;
|
|||
use Wallabag\CoreBundle\Entity\Entry;
|
||||
use Wallabag\CoreBundle\Tools\Utils;
|
||||
use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
|
||||
use Symfony\Component\Config\Definition\Exception\Exception;
|
||||
|
||||
/**
|
||||
* This kind of proxy class take care of getting the content from an url
|
||||
|
@ -31,34 +32,58 @@ class ContentProxy
|
|||
}
|
||||
|
||||
/**
|
||||
* Fetch content using graby and hydrate given $entry with results information.
|
||||
* In case we couldn't find content, we'll try to use Open Graph data.
|
||||
*
|
||||
* We can also force the content, in case of an import from the v1 for example, so the function won't
|
||||
* fetch the content from the website but rather use information given with the $content parameter.
|
||||
* Update existing entry by fetching from URL using Graby.
|
||||
*
|
||||
* @param Entry $entry Entry to update
|
||||
* @param string $url Url to grab content for
|
||||
* @param array $content An array with AT LEAST keys title, html, url to skip the fetchContent from the url
|
||||
*/
|
||||
public function updateEntry(Entry $entry, $url, array $content = [])
|
||||
public function updateEntry(Entry $entry, $url)
|
||||
{
|
||||
// ensure content is a bit cleaned up
|
||||
if (!empty($content['html'])) {
|
||||
$content['html'] = $this->graby->cleanupHtml($content['html'], $url);
|
||||
}
|
||||
$content = $this->graby->fetchContent($url);
|
||||
|
||||
// do we have to fetch the content or the provided one is ok?
|
||||
if (empty($content) || false === $this->validateContent($content)) {
|
||||
$fetchedContent = $this->graby->fetchContent($url);
|
||||
$this->stockEntry($entry, $content);
|
||||
}
|
||||
|
||||
/**
|
||||
* Import entry using either fetched or provided content.
|
||||
*
|
||||
* @param Entry $entry Entry to update
|
||||
* @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
|
||||
* @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
|
||||
*/
|
||||
public function importEntry(Entry $entry, array $content, $disableContentUpdate = false)
|
||||
{
|
||||
$this->validateContent($content);
|
||||
|
||||
if (false === $disableContentUpdate) {
|
||||
try {
|
||||
$fetchedContent = $this->graby->fetchContent($content['url']);
|
||||
} catch (\Exception $e) {
|
||||
$this->logger->error('Error while trying to fetch content from URL.', [
|
||||
'entry_url' => $content['url'],
|
||||
'error_msg' => $e->getMessage(),
|
||||
]);
|
||||
}
|
||||
|
||||
// when content is imported, we have information in $content
|
||||
// in case fetching content goes bad, we'll keep the imported information instead of overriding them
|
||||
if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) {
|
||||
if ($fetchedContent['html'] !== $this->fetchingErrorMessage) {
|
||||
$content = $fetchedContent;
|
||||
}
|
||||
}
|
||||
|
||||
$this->stockEntry($entry, $content);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stock entry with fetched or imported content.
|
||||
* Will fall back to OpenGraph data if available.
|
||||
*
|
||||
* @param Entry $entry Entry to stock
|
||||
* @param array $content Array with at least title and URL
|
||||
*/
|
||||
private function stockEntry(Entry $entry, array $content)
|
||||
{
|
||||
$title = $content['title'];
|
||||
if (!$title && !empty($content['open_graph']['og_title'])) {
|
||||
$title = $content['open_graph']['og_title'];
|
||||
|
@ -74,7 +99,7 @@ class ContentProxy
|
|||
}
|
||||
}
|
||||
|
||||
$entry->setUrl($content['url'] ?: $url);
|
||||
$entry->setUrl($content['url']);
|
||||
$entry->setTitle($title);
|
||||
$entry->setContent($html);
|
||||
$entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
|
||||
|
@ -124,22 +149,29 @@ class ContentProxy
|
|||
$this->tagger->tag($entry);
|
||||
} catch (\Exception $e) {
|
||||
$this->logger->error('Error while trying to automatically tag an entry.', [
|
||||
'entry_url' => $url,
|
||||
'entry_url' => $content['url'],
|
||||
'error_msg' => $e->getMessage(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that the given content as enough value to be used
|
||||
* instead of fetch the content from the url.
|
||||
* Validate that the given content has at least a title, an html and a url.
|
||||
*
|
||||
* @param array $content
|
||||
*
|
||||
* @return bool true if valid otherwise false
|
||||
*/
|
||||
private function validateContent(array $content)
|
||||
{
|
||||
return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
|
||||
if (!empty($content['title']))) {
|
||||
throw new Exception('Missing title from imported entry!');
|
||||
}
|
||||
|
||||
if (!empty($content['url']))) {
|
||||
throw new Exception('Missing URL from imported entry!');
|
||||
}
|
||||
|
||||
if (!empty($content['html']))) {
|
||||
throw new Exception('Missing html from imported entry!');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ namespace Wallabag\ImportBundle\Command;
|
|||
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
|
||||
use Symfony\Component\Config\Definition\Exception\Exception;
|
||||
use Symfony\Component\Console\Input\InputArgument;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
|
||||
|
@ -19,7 +20,7 @@ class ImportCommand extends ContainerAwareCommand
|
|||
->addArgument('filepath', InputArgument::REQUIRED, 'Path to the JSON file')
|
||||
->addOption('importer', null, InputArgument::OPTIONAL, 'The importer to use: v1, v2, instapaper, pinboard, readability, firefox or chrome', 'v1')
|
||||
->addOption('markAsRead', null, InputArgument::OPTIONAL, 'Mark all entries as read', false)
|
||||
->addOption('useUserId', null, InputArgument::OPTIONAL, 'Use user id instead of username to find account', false)
|
||||
->addOption('disableContentUpdate', null, InputOption::VALUE_NONE, 'Disable fetching updated content from URL')
|
||||
;
|
||||
}
|
||||
|
||||
|
@ -69,6 +70,7 @@ class ImportCommand extends ContainerAwareCommand
|
|||
}
|
||||
|
||||
$import->setMarkAsRead($input->getOption('markAsRead'));
|
||||
$import->setDisableContentUpdate($input->getOption('disableContentUpdate'));
|
||||
$import->setUser($user);
|
||||
|
||||
$res = $import
|
||||
|
|
|
@ -24,6 +24,7 @@ abstract class AbstractImport implements ImportInterface
|
|||
protected $producer;
|
||||
protected $user;
|
||||
protected $markAsRead;
|
||||
protected $disableContentUpdate;
|
||||
protected $skippedEntries = 0;
|
||||
protected $importedEntries = 0;
|
||||
protected $queuedEntries = 0;
|
||||
|
@ -84,6 +85,27 @@ abstract class AbstractImport implements ImportInterface
|
|||
return $this->markAsRead;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whether articles should be fetched for updated content.
|
||||
*
|
||||
* @param bool $markAsRead
|
||||
*/
|
||||
public function setDisableContentUpdate($disableContentUpdate)
|
||||
{
|
||||
$this->disableContentUpdate = $disableContentUpdate;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get whether articles should be fetched for updated content.
|
||||
*/
|
||||
public function getDisableContentUpdate()
|
||||
{
|
||||
return $this->disableContentUpdate;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fetch content from the ContentProxy (using graby).
|
||||
* If it fails return the given entry to be saved in all case (to avoid user to loose the content).
|
||||
|
@ -95,9 +117,12 @@ abstract class AbstractImport implements ImportInterface
|
|||
protected function fetchContent(Entry $entry, $url, array $content = [])
|
||||
{
|
||||
try {
|
||||
$this->contentProxy->updateEntry($entry, $url, $content);
|
||||
$this->contentProxy->importEntry($entry, $content, $this->disableContentUpdate);
|
||||
} catch (\Exception $e) {
|
||||
return $entry;
|
||||
$this->logger->error('Error trying to import an entry.', [
|
||||
'entry_url' => $content['url'],
|
||||
'error_msg' => $e->getMessage(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -257,9 +257,8 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
|
||||
$entry = new Entry(new User());
|
||||
$proxy->updateEntry(
|
||||
$proxy->importEntry(
|
||||
$entry,
|
||||
'http://0.0.0.0',
|
||||
[
|
||||
'html' => str_repeat('this is my content', 325),
|
||||
'title' => 'this is my title',
|
||||
|
@ -294,7 +293,6 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
$entry = new Entry(new User());
|
||||
$proxy->updateEntry(
|
||||
$entry,
|
||||
'http://0.0.0.0',
|
||||
[
|
||||
'html' => str_repeat('this is my content', 325),
|
||||
'title' => 'this is my title',
|
||||
|
@ -334,13 +332,14 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
|
||||
|
||||
$entry = new Entry(new User());
|
||||
$proxy->updateEntry($entry, 'http://0.0.0.0', [
|
||||
$content = array(
|
||||
'html' => str_repeat('this is my content', 325),
|
||||
'title' => 'this is my title',
|
||||
'url' => 'http://1.1.1.1',
|
||||
'content_type' => 'text/html',
|
||||
'language' => 'fr',
|
||||
]);
|
||||
);
|
||||
$proxy->importEntry($entry, $content, true);
|
||||
|
||||
$this->assertCount(0, $entry->getTags());
|
||||
}
|
||||
|
|
|
@ -89,7 +89,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(1))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn($entry);
|
||||
|
||||
$res = $chromeImport->import();
|
||||
|
@ -118,7 +118,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(1))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn(new Entry($this->user));
|
||||
|
||||
// check that every entry persisted are archived
|
||||
|
@ -158,7 +158,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
|
||||
->disableOriginalConstructor()
|
||||
|
@ -198,7 +198,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$factory = new RedisMockFactory();
|
||||
$redisMock = $factory->getAdapter('Predis\Client', true);
|
||||
|
|
|
@ -89,7 +89,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(2))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn($entry);
|
||||
|
||||
$res = $firefoxImport->import();
|
||||
|
@ -118,7 +118,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(1))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn(new Entry($this->user));
|
||||
|
||||
// check that every entry persisted are archived
|
||||
|
@ -158,7 +158,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
|
||||
->disableOriginalConstructor()
|
||||
|
@ -198,7 +198,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$factory = new RedisMockFactory();
|
||||
$redisMock = $factory->getAdapter('Predis\Client', true);
|
||||
|
|
|
@ -104,7 +104,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(4))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn($entry);
|
||||
|
||||
$res = $instapaperImport->import();
|
||||
|
@ -133,7 +133,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->once())
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn(new Entry($this->user));
|
||||
|
||||
// check that every entry persisted are archived
|
||||
|
@ -173,7 +173,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
|
||||
->disableOriginalConstructor()
|
||||
|
@ -213,7 +213,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$factory = new RedisMockFactory();
|
||||
$redisMock = $factory->getAdapter('Predis\Client', true);
|
||||
|
|
|
@ -282,7 +282,7 @@ class PocketImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->once())
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn($entry);
|
||||
|
||||
$pocketImport->setClient($client);
|
||||
|
@ -377,7 +377,7 @@ class PocketImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(2))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn($entry);
|
||||
|
||||
$pocketImport->setClient($client);
|
||||
|
@ -450,7 +450,7 @@ JSON;
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
|
||||
->disableOriginalConstructor()
|
||||
|
@ -536,7 +536,7 @@ JSON;
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('ImportEntry');
|
||||
|
||||
$factory = new RedisMockFactory();
|
||||
$redisMock = $factory->getAdapter('Predis\Client', true);
|
||||
|
@ -621,7 +621,7 @@ JSON;
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->once())
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->will($this->throwException(new \Exception()));
|
||||
|
||||
$pocketImport->setClient($client);
|
||||
|
|
|
@ -89,7 +89,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(3))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn($entry);
|
||||
|
||||
$res = $readabilityImport->import();
|
||||
|
@ -118,7 +118,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(1))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn(new Entry($this->user));
|
||||
|
||||
// check that every entry persisted are archived
|
||||
|
@ -158,7 +158,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
|
||||
->disableOriginalConstructor()
|
||||
|
@ -198,7 +198,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$factory = new RedisMockFactory();
|
||||
$redisMock = $factory->getAdapter('Predis\Client', true);
|
||||
|
|
|
@ -104,7 +104,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(1))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn($entry);
|
||||
|
||||
$res = $wallabagV1Import->import();
|
||||
|
@ -133,7 +133,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(3))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn(new Entry($this->user));
|
||||
|
||||
// check that every entry persisted are archived
|
||||
|
@ -173,7 +173,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
|
||||
->disableOriginalConstructor()
|
||||
|
@ -213,7 +213,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$factory = new RedisMockFactory();
|
||||
$redisMock = $factory->getAdapter('Predis\Client', true);
|
||||
|
|
|
@ -100,7 +100,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(2))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn(new Entry($this->user));
|
||||
|
||||
$res = $wallabagV2Import->import();
|
||||
|
@ -129,7 +129,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(2))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->willReturn(new Entry($this->user));
|
||||
|
||||
// check that every entry persisted are archived
|
||||
|
@ -165,7 +165,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
|
||||
->disableOriginalConstructor()
|
||||
|
@ -201,7 +201,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->never())
|
||||
->method('updateEntry');
|
||||
->method('importEntry');
|
||||
|
||||
$factory = new RedisMockFactory();
|
||||
$redisMock = $factory->getAdapter('Predis\Client', true);
|
||||
|
@ -278,7 +278,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$this->contentProxy
|
||||
->expects($this->exactly(2))
|
||||
->method('updateEntry')
|
||||
->method('importEntry')
|
||||
->will($this->throwException(new \Exception()));
|
||||
|
||||
$res = $wallabagV2Import->import();
|
||||
|
|
Loading…
Reference in a new issue