wallabag/src/Import/AbstractImport.php

224 lines
6.4 KiB
PHP
Raw Normal View History

<?php
2024-02-19 00:30:12 +00:00
namespace Wallabag\Import;
2022-08-27 18:22:48 +00:00
use Doctrine\ORM\EntityManagerInterface;
2017-07-01 07:52:38 +00:00
use OldSound\RabbitMqBundle\RabbitMq\ProducerInterface;
use Psr\Log\LoggerInterface;
2017-07-01 07:52:38 +00:00
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
2024-02-19 00:30:12 +00:00
use Wallabag\Entity\Entry;
use Wallabag\Entity\Tag;
use Wallabag\Entity\User;
use Wallabag\Event\EntrySavedEvent;
use Wallabag\Helper\ContentProxy;
use Wallabag\Helper\TagsAssigner;
abstract class AbstractImport implements ImportInterface
{
protected $em;
protected $logger;
protected $contentProxy;
protected $tagsAssigner;
2016-11-02 06:10:23 +00:00
protected $eventDispatcher;
2016-09-04 19:49:21 +00:00
protected $producer;
protected $user;
protected $markAsRead;
2017-05-30 15:48:24 +00:00
protected $disableContentUpdate = false;
2016-09-05 07:35:42 +00:00
protected $skippedEntries = 0;
protected $importedEntries = 0;
protected $queuedEntries = 0;
public function __construct(EntityManagerInterface $em, ContentProxy $contentProxy, TagsAssigner $tagsAssigner, EventDispatcherInterface $eventDispatcher, LoggerInterface $logger)
{
$this->em = $em;
$this->logger = $logger;
$this->contentProxy = $contentProxy;
$this->tagsAssigner = $tagsAssigner;
2016-11-02 06:10:23 +00:00
$this->eventDispatcher = $eventDispatcher;
}
public function setLogger(LoggerInterface $logger)
{
$this->logger = $logger;
}
2016-09-04 19:49:21 +00:00
/**
* Set RabbitMQ/Redis Producer to send each entry to a queue.
2016-09-04 19:49:21 +00:00
* This method should be called when user has enabled RabbitMQ.
*/
public function setProducer(ProducerInterface $producer)
2016-09-04 19:49:21 +00:00
{
$this->producer = $producer;
}
/**
* Set current user.
* Could the current *connected* user or one retrieve by the consumer.
*/
2016-09-05 05:50:10 +00:00
public function setUser(User $user)
2016-09-04 19:49:21 +00:00
{
$this->user = $user;
}
/**
* Set whether articles must be all marked as read.
*
* @param bool $markAsRead
*/
public function setMarkAsRead($markAsRead)
{
$this->markAsRead = $markAsRead;
return $this;
}
/**
* Get whether articles must be all marked as read.
*/
public function getMarkAsRead()
{
return $this->markAsRead;
}
/**
* Set whether articles should be fetched for updated content.
*
2017-05-30 14:21:25 +00:00
* @param bool $disableContentUpdate
*/
public function setDisableContentUpdate($disableContentUpdate)
{
$this->disableContentUpdate = $disableContentUpdate;
return $this;
}
2017-07-01 07:52:38 +00:00
public function getSummary()
{
return [
'skipped' => $this->skippedEntries,
'imported' => $this->importedEntries,
'queued' => $this->queuedEntries,
];
}
/**
* Parse one entry.
*
2023-08-08 01:27:21 +00:00
* @return Entry|null
2017-07-01 07:52:38 +00:00
*/
abstract public function parseEntry(array $importedEntry);
/**
* Validate that an entry is valid (like has some required keys, etc.).
*
* @return bool
*/
abstract public function validateEntry(array $importedEntry);
/**
* Fetch content from the ContentProxy (using graby).
* If it fails return the given entry to be saved in all case (to avoid user to loose the content).
*
* @param Entry $entry Entry to update
* @param string $url Url to grab content for
* @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url
*/
protected function fetchContent(Entry $entry, $url, array $content = [])
{
try {
2017-06-01 09:31:45 +00:00
$this->contentProxy->updateEntry($entry, $url, $content, $this->disableContentUpdate);
} catch (\Exception $e) {
$this->logger->error('Error trying to import an entry.', [
2017-06-01 09:31:45 +00:00
'entry_url' => $url,
'error_msg' => $e->getMessage(),
]);
}
}
2016-09-04 19:49:21 +00:00
/**
* Parse and insert all given entries.
*/
protected function parseEntries(array $entries)
2016-09-04 19:49:21 +00:00
{
$i = 1;
2016-11-02 06:10:23 +00:00
$entryToBeFlushed = [];
2016-09-04 19:49:21 +00:00
foreach ($entries as $importedEntry) {
if ($this->markAsRead) {
$importedEntry = $this->setEntryAsRead($importedEntry);
}
if (false === $this->validateEntry($importedEntry)) {
continue;
}
2016-09-04 19:49:21 +00:00
$entry = $this->parseEntry($importedEntry);
if (null === $entry) {
continue;
}
2016-11-02 06:10:23 +00:00
// store each entry to be flushed so we can trigger the entry.saved event for each of them
// entry.saved needs the entry to be persisted in db because it needs it id to generate
// images (at least)
$entryToBeFlushed[] = $entry;
2016-09-04 19:49:21 +00:00
// flush every 20 entries
2017-10-09 14:47:15 +00:00
if (0 === ($i % 20)) {
2016-09-04 19:49:21 +00:00
$this->em->flush();
2016-09-09 08:12:25 +00:00
2016-11-02 06:10:23 +00:00
foreach ($entryToBeFlushed as $entry) {
$this->eventDispatcher->dispatch(new EntrySavedEvent($entry), EntrySavedEvent::NAME);
2016-11-02 06:10:23 +00:00
}
$entryToBeFlushed = [];
2016-09-09 08:12:25 +00:00
// clear only affected entities
$this->em->clear(Entry::class);
$this->em->clear(Tag::class);
2016-09-04 19:49:21 +00:00
}
++$i;
}
$this->em->flush();
2016-11-02 06:10:23 +00:00
if (!empty($entryToBeFlushed)) {
foreach ($entryToBeFlushed as $entry) {
$this->eventDispatcher->dispatch(new EntrySavedEvent($entry), EntrySavedEvent::NAME);
2016-11-02 06:10:23 +00:00
}
}
2016-09-04 19:49:21 +00:00
}
2016-09-05 05:50:10 +00:00
/**
* Parse entries and send them to the queue.
* It should just be a simple loop on all item, no call to the database should be done
* to speedup queuing.
*
* Faster parse entries for Producer.
* We don't care to make check at this time. They'll be done by the consumer.
*/
protected function parseEntriesForProducer(array $entries)
{
foreach ($entries as $importedEntry) {
// set userId for the producer (it won't know which user is connected)
$importedEntry['userId'] = $this->user->getId();
if ($this->markAsRead) {
$importedEntry = $this->setEntryAsRead($importedEntry);
}
++$this->queuedEntries;
2016-09-05 05:50:10 +00:00
$this->producer->publish(json_encode($importedEntry));
}
}
/**
* Set current imported entry to archived / read.
2023-09-17 20:54:49 +00:00
* Implementation is different across all imports.
2016-09-05 05:50:10 +00:00
*
* @return array
*/
abstract protected function setEntryAsRead(array $importedEntry);
}