2016-08-19 21:52:19 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace Wallabag\ImportBundle\Import;
|
|
|
|
|
|
|
|
use Psr\Log\LoggerInterface;
|
|
|
|
use Psr\Log\NullLogger;
|
|
|
|
use Doctrine\ORM\EntityManager;
|
|
|
|
use Wallabag\CoreBundle\Helper\ContentProxy;
|
|
|
|
use Wallabag\CoreBundle\Entity\Entry;
|
2016-09-09 08:12:25 +00:00
|
|
|
use Wallabag\CoreBundle\Entity\Tag;
|
2017-05-27 20:08:14 +00:00
|
|
|
use Wallabag\CoreBundle\Helper\TagsAssigner;
|
2016-09-05 05:50:10 +00:00
|
|
|
use Wallabag\UserBundle\Entity\User;
|
2016-09-09 19:02:03 +00:00
|
|
|
use OldSound\RabbitMqBundle\RabbitMq\ProducerInterface;
|
2016-11-02 06:10:23 +00:00
|
|
|
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
|
|
|
|
use Wallabag\CoreBundle\Event\EntrySavedEvent;
|
2016-08-19 21:52:19 +00:00
|
|
|
|
|
|
|
abstract class AbstractImport implements ImportInterface
|
|
|
|
{
|
|
|
|
protected $em;
|
|
|
|
protected $logger;
|
|
|
|
protected $contentProxy;
|
2017-05-27 20:08:14 +00:00
|
|
|
protected $tagsAssigner;
|
2016-11-02 06:10:23 +00:00
|
|
|
protected $eventDispatcher;
|
2016-09-04 19:49:21 +00:00
|
|
|
protected $producer;
|
|
|
|
protected $user;
|
|
|
|
protected $markAsRead;
|
2017-05-30 15:48:24 +00:00
|
|
|
protected $disableContentUpdate = false;
|
2016-09-05 07:35:42 +00:00
|
|
|
protected $skippedEntries = 0;
|
|
|
|
protected $importedEntries = 0;
|
2016-09-13 19:09:05 +00:00
|
|
|
protected $queuedEntries = 0;
|
2016-08-19 21:52:19 +00:00
|
|
|
|
2017-05-27 20:08:14 +00:00
|
|
|
public function __construct(EntityManager $em, ContentProxy $contentProxy, TagsAssigner $tagsAssigner, EventDispatcherInterface $eventDispatcher)
|
2016-08-19 21:52:19 +00:00
|
|
|
{
|
|
|
|
$this->em = $em;
|
|
|
|
$this->logger = new NullLogger();
|
|
|
|
$this->contentProxy = $contentProxy;
|
2017-05-27 20:08:14 +00:00
|
|
|
$this->tagsAssigner = $tagsAssigner;
|
2016-11-02 06:10:23 +00:00
|
|
|
$this->eventDispatcher = $eventDispatcher;
|
2016-08-19 21:52:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
public function setLogger(LoggerInterface $logger)
|
|
|
|
{
|
|
|
|
$this->logger = $logger;
|
|
|
|
}
|
|
|
|
|
2016-09-04 19:49:21 +00:00
|
|
|
/**
|
2016-09-09 19:02:03 +00:00
|
|
|
* Set RabbitMQ/Redis Producer to send each entry to a queue.
|
2016-09-04 19:49:21 +00:00
|
|
|
* This method should be called when user has enabled RabbitMQ.
|
|
|
|
*
|
2016-09-09 19:02:03 +00:00
|
|
|
* @param ProducerInterface $producer
|
2016-09-04 19:49:21 +00:00
|
|
|
*/
|
2016-09-09 19:02:03 +00:00
|
|
|
public function setProducer(ProducerInterface $producer)
|
2016-09-04 19:49:21 +00:00
|
|
|
{
|
|
|
|
$this->producer = $producer;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Set current user.
|
|
|
|
* Could the current *connected* user or one retrieve by the consumer.
|
|
|
|
*
|
2016-09-05 05:50:10 +00:00
|
|
|
* @param User $user
|
2016-09-04 19:49:21 +00:00
|
|
|
*/
|
2016-09-05 05:50:10 +00:00
|
|
|
public function setUser(User $user)
|
2016-09-04 19:49:21 +00:00
|
|
|
{
|
|
|
|
$this->user = $user;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Set whether articles must be all marked as read.
|
|
|
|
*
|
|
|
|
* @param bool $markAsRead
|
|
|
|
*/
|
|
|
|
public function setMarkAsRead($markAsRead)
|
|
|
|
{
|
|
|
|
$this->markAsRead = $markAsRead;
|
|
|
|
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get whether articles must be all marked as read.
|
|
|
|
*/
|
|
|
|
public function getMarkAsRead()
|
|
|
|
{
|
|
|
|
return $this->markAsRead;
|
|
|
|
}
|
|
|
|
|
2016-12-07 20:16:49 +00:00
|
|
|
/**
|
|
|
|
* Set whether articles should be fetched for updated content.
|
|
|
|
*
|
2017-05-30 14:21:25 +00:00
|
|
|
* @param bool $disableContentUpdate
|
2016-12-07 20:16:49 +00:00
|
|
|
*/
|
|
|
|
public function setDisableContentUpdate($disableContentUpdate)
|
|
|
|
{
|
|
|
|
$this->disableContentUpdate = $disableContentUpdate;
|
|
|
|
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
2016-08-19 21:52:19 +00:00
|
|
|
/**
|
|
|
|
* Fetch content from the ContentProxy (using graby).
|
2016-09-17 05:40:56 +00:00
|
|
|
* If it fails return the given entry to be saved in all case (to avoid user to loose the content).
|
2016-08-19 21:52:19 +00:00
|
|
|
*
|
|
|
|
* @param Entry $entry Entry to update
|
|
|
|
* @param string $url Url to grab content for
|
|
|
|
* @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url
|
|
|
|
*/
|
|
|
|
protected function fetchContent(Entry $entry, $url, array $content = [])
|
|
|
|
{
|
|
|
|
try {
|
2017-06-01 09:31:45 +00:00
|
|
|
$this->contentProxy->updateEntry($entry, $url, $content, $this->disableContentUpdate);
|
2016-08-19 21:52:19 +00:00
|
|
|
} catch (\Exception $e) {
|
2016-12-07 20:16:49 +00:00
|
|
|
$this->logger->error('Error trying to import an entry.', [
|
2017-06-01 09:31:45 +00:00
|
|
|
'entry_url' => $url,
|
2016-12-07 20:16:49 +00:00
|
|
|
'error_msg' => $e->getMessage(),
|
|
|
|
]);
|
2016-08-19 21:52:19 +00:00
|
|
|
}
|
|
|
|
}
|
2016-09-04 19:49:21 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse and insert all given entries.
|
|
|
|
*
|
|
|
|
* @param $entries
|
|
|
|
*/
|
|
|
|
protected function parseEntries($entries)
|
|
|
|
{
|
|
|
|
$i = 1;
|
2016-11-02 06:10:23 +00:00
|
|
|
$entryToBeFlushed = [];
|
2016-09-04 19:49:21 +00:00
|
|
|
|
|
|
|
foreach ($entries as $importedEntry) {
|
2016-09-27 05:57:53 +00:00
|
|
|
if ($this->markAsRead) {
|
|
|
|
$importedEntry = $this->setEntryAsRead($importedEntry);
|
|
|
|
}
|
|
|
|
|
2016-09-04 19:49:21 +00:00
|
|
|
$entry = $this->parseEntry($importedEntry);
|
|
|
|
|
|
|
|
if (null === $entry) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-11-02 06:10:23 +00:00
|
|
|
// store each entry to be flushed so we can trigger the entry.saved event for each of them
|
|
|
|
// entry.saved needs the entry to be persisted in db because it needs it id to generate
|
|
|
|
// images (at least)
|
|
|
|
$entryToBeFlushed[] = $entry;
|
|
|
|
|
2016-09-04 19:49:21 +00:00
|
|
|
// flush every 20 entries
|
|
|
|
if (($i % 20) === 0) {
|
|
|
|
$this->em->flush();
|
2016-09-09 08:12:25 +00:00
|
|
|
|
2016-11-02 06:10:23 +00:00
|
|
|
foreach ($entryToBeFlushed as $entry) {
|
|
|
|
$this->eventDispatcher->dispatch(EntrySavedEvent::NAME, new EntrySavedEvent($entry));
|
|
|
|
}
|
|
|
|
|
|
|
|
$entryToBeFlushed = [];
|
|
|
|
|
2016-09-09 08:12:25 +00:00
|
|
|
// clear only affected entities
|
|
|
|
$this->em->clear(Entry::class);
|
|
|
|
$this->em->clear(Tag::class);
|
2016-09-04 19:49:21 +00:00
|
|
|
}
|
|
|
|
++$i;
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->em->flush();
|
2016-11-02 06:10:23 +00:00
|
|
|
|
|
|
|
if (!empty($entryToBeFlushed)) {
|
|
|
|
foreach ($entryToBeFlushed as $entry) {
|
|
|
|
$this->eventDispatcher->dispatch(EntrySavedEvent::NAME, new EntrySavedEvent($entry));
|
|
|
|
}
|
|
|
|
}
|
2016-09-04 19:49:21 +00:00
|
|
|
}
|
|
|
|
|
2016-09-05 05:50:10 +00:00
|
|
|
/**
|
|
|
|
* Parse entries and send them to the queue.
|
|
|
|
* It should just be a simple loop on all item, no call to the database should be done
|
|
|
|
* to speedup queuing.
|
|
|
|
*
|
|
|
|
* Faster parse entries for Producer.
|
|
|
|
* We don't care to make check at this time. They'll be done by the consumer.
|
|
|
|
*
|
|
|
|
* @param array $entries
|
|
|
|
*/
|
|
|
|
protected function parseEntriesForProducer(array $entries)
|
|
|
|
{
|
|
|
|
foreach ($entries as $importedEntry) {
|
|
|
|
// set userId for the producer (it won't know which user is connected)
|
|
|
|
$importedEntry['userId'] = $this->user->getId();
|
|
|
|
|
|
|
|
if ($this->markAsRead) {
|
|
|
|
$importedEntry = $this->setEntryAsRead($importedEntry);
|
|
|
|
}
|
|
|
|
|
2016-09-13 19:09:05 +00:00
|
|
|
++$this->queuedEntries;
|
2016-09-05 05:50:10 +00:00
|
|
|
|
|
|
|
$this->producer->publish(json_encode($importedEntry));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-13 19:09:05 +00:00
|
|
|
/**
|
|
|
|
* {@inheritdoc}
|
|
|
|
*/
|
|
|
|
public function getSummary()
|
|
|
|
{
|
|
|
|
return [
|
|
|
|
'skipped' => $this->skippedEntries,
|
|
|
|
'imported' => $this->importedEntries,
|
|
|
|
'queued' => $this->queuedEntries,
|
|
|
|
];
|
|
|
|
}
|
|
|
|
|
2016-09-04 19:49:21 +00:00
|
|
|
/**
|
|
|
|
* Parse one entry.
|
|
|
|
*
|
|
|
|
* @param array $importedEntry
|
|
|
|
*
|
|
|
|
* @return Entry
|
|
|
|
*/
|
|
|
|
abstract public function parseEntry(array $importedEntry);
|
2016-09-05 05:50:10 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Set current imported entry to archived / read.
|
|
|
|
* Implementation is different accross all imports.
|
|
|
|
*
|
|
|
|
* @param array $importedEntry
|
|
|
|
*
|
|
|
|
* @return array
|
|
|
|
*/
|
|
|
|
abstract protected function setEntryAsRead(array $importedEntry);
|
2016-08-19 21:52:19 +00:00
|
|
|
}
|