Add isNotParsed field on Entry entity

Fix #4350
This commit is contained in:
Nicolas Lœuillet 2023-07-28 14:58:43 +02:00 committed by Nicolas Lœuillet
parent 85065b509f
commit 20578f0b8e
16 changed files with 299 additions and 6 deletions

View file

@ -0,0 +1,50 @@
<?php
declare(strict_types=1);
namespace Application\Migrations;
use Doctrine\DBAL\Schema\Schema;
use Wallabag\CoreBundle\Doctrine\WallabagMigration;
/**
* Add is_not_parsed field to entry table.
*/
final class Version20230728093912 extends WallabagMigration
{
public function up(Schema $schema): void
{
$entryTable = $schema->getTable($this->getTable('entry'));
$this->skipIf($entryTable->hasColumn('is_not_parsed'), 'It seems that you already played this migration.');
$entryTable->addColumn('is_not_parsed', 'boolean', [
'default' => 0,
'notnull' => false,
]);
}
/**
* Query to update entries where content is equal to `fetching_error_message`.
*/
public function postUp(Schema $schema): void
{
$entryTable = $schema->getTable($this->getTable('entry'));
$this->skipIf(!$entryTable->hasColumn('is_not_parsed'), 'Unable to update is_not_parsed colum');
// Need to do a `LIKE` with a final percent to handle the new line character
$this->connection->executeQuery(
'UPDATE ' . $this->getTable('entry') . ' SET is_not_parsed = :isNotParsed WHERE content LIKE :content',
[
'isNotParsed' => true,
'content' => str_replace("\n", '', addslashes($this->container->getParameter('wallabag_core.fetching_error_message'))) . '%',
]
);
}
public function down(Schema $schema): void
{
$entryTable = $schema->getTable($this->getTable('entry'));
$entryTable->dropColumn('is_not_parsed');
}
}

View file

@ -176,6 +176,17 @@ class EntryRestController extends WallabagRestController
* )
* ),
* @OA\Parameter(
* name="notParsed",
* in="query",
* description="filter by notParsed status. all entries by default",
* required=false,
* @OA\Schema(
* type="integer",
* enum={"1", "0"},
* default="0"
* )
* ),
* @OA\Parameter(
* name="sort",
* in="query",
* description="sort entries by date.",
@ -286,6 +297,7 @@ class EntryRestController extends WallabagRestController
$isArchived = (null === $request->query->get('archive')) ? null : (bool) $request->query->get('archive');
$isStarred = (null === $request->query->get('starred')) ? null : (bool) $request->query->get('starred');
$isPublic = (null === $request->query->get('public')) ? null : (bool) $request->query->get('public');
$isNotParsed = (null === $request->query->get('notParsed')) ? null : (bool) $request->query->get('notParsed');
$sort = strtolower($request->query->get('sort', 'created'));
$order = strtolower($request->query->get('order', 'desc'));
$page = (int) $request->query->get('page', 1);
@ -307,7 +319,8 @@ class EntryRestController extends WallabagRestController
$since,
$tags,
$detail,
$domainName
$domainName,
$isNotParsed
);
} catch (\Exception $e) {
throw new BadRequestHttpException($e->getMessage());
@ -325,6 +338,7 @@ class EntryRestController extends WallabagRestController
'archive' => $isArchived,
'starred' => $isStarred,
'public' => $isPublic,
'notParsed' => $isNotParsed,
'sort' => $sort,
'order' => $order,
'page' => $page,

View file

@ -7,6 +7,7 @@ use Doctrine\ORM\NoResultException;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
@ -41,13 +42,19 @@ class ReloadEntryCommand extends Command
->setDescription('Reload entries')
->setHelp('This command reload entries')
->addArgument('username', InputArgument::OPTIONAL, 'Reload entries only for the given user')
;
->addOption(
'only-not-parsed',
null,
InputOption::VALUE_NONE,
'Only reload entries which have `is_not_parsed` set to `true`'
);
}
protected function execute(InputInterface $input, OutputInterface $output)
{
$io = new SymfonyStyle($input, $output);
$onlyNotParsed = (bool) $input->getOption('only-not-parsed');
$userId = null;
if ($username = $input->getArgument('username')) {
try {
@ -61,7 +68,8 @@ class ReloadEntryCommand extends Command
}
}
$entryIds = $this->entryRepository->findAllEntriesIdByUserId($userId);
$methodName = $onlyNotParsed ? 'findAllEntriesIdByUserIdAndNotParsed' : 'findAllEntriesIdByUserId';
$entryIds = $this->entryRepository->$methodName($userId);
$nbEntries = \count($entryIds);
if (!$nbEntries) {

View file

@ -85,6 +85,7 @@ class EntryFixtures extends Fixture implements DependentFixtureInterface
'language' => 'de',
'archived' => true,
'tags' => ['bar-tag'],
'is_not_parsed' => true,
],
];
@ -120,6 +121,10 @@ class EntryFixtures extends Fixture implements DependentFixtureInterface
$entry->setPreviewPicture($item['preview']);
}
if (isset($item['is_not_parsed'])) {
$entry->setNotParsed($item['is_not_parsed']);
}
$manager->persist($entry);
$this->addReference($reference, $entry);
}

View file

@ -276,6 +276,17 @@ class Entry
*/
private $headers;
/**
* @var bool
*
* @Exclude
*
* @ORM\Column(name="is_not_parsed", type="boolean")
*
* @Groups({"entries_for_user", "export_all"})
*/
private $isNotParsed = false;
/**
* @Exclude
*
@ -1006,4 +1017,28 @@ class Entry
return $this;
}
/**
* Set isNotParsed.
*
* @param bool $isNotParsed
*
* @return Entry
*/
public function setNotParsed($isNotParsed)
{
$this->isNotParsed = $isNotParsed;
return $this;
}
/**
* Get isNotParsed.
*
* @return bool
*/
public function isNotParsed()
{
return $this->isNotParsed;
}
}

View file

@ -151,6 +151,10 @@ class EntryFilterType extends AbstractType
$qb->innerJoin('e.annotations', 'a');
},
])
->add('isNotParsed', CheckboxFilterType::class, [
'label' => 'entry.filters.parsed_label',
'data' => $options['filter_parsed'],
])
->add('previewPicture', CheckboxFilterType::class, [
'apply_filter' => function (QueryInterface $filterQuery, $field, $values) {
if (false === $values['value']) {
@ -198,6 +202,7 @@ class EntryFilterType extends AbstractType
'filter_starred' => false,
'filter_unread' => false,
'filter_annotated' => false,
'filter_parsed' => false,
]);
}
}

View file

@ -260,6 +260,7 @@ class ContentProxy
if (empty($content['html'])) {
$content['html'] = $this->fetchingErrorMessage;
$entry->setNotParsed(true);
if (!empty($content['description'])) {
$content['html'] .= '<p><i>But we found a short description: </i></p>';

View file

@ -209,12 +209,13 @@ class EntryRepository extends ServiceEntityRepository
* @param string $tags
* @param string $detail 'metadata' or 'full'. Include content field if 'full'
* @param string $domainName
* @param bool $isNotParsed
*
* @todo Breaking change: replace default detail=full by detail=metadata in a future version
*
* @return Pagerfanta
*/
public function findEntries($userId, $isArchived = null, $isStarred = null, $isPublic = null, $sort = 'created', $order = 'asc', $since = 0, $tags = '', $detail = 'full', $domainName = '')
public function findEntries($userId, $isArchived = null, $isStarred = null, $isPublic = null, $sort = 'created', $order = 'asc', $since = 0, $tags = '', $detail = 'full', $domainName = '', $isNotParsed = null)
{
if (!\in_array(strtolower($detail), ['full', 'metadata'], true)) {
throw new \Exception('Detail "' . $detail . '" parameter is wrong, allowed: full or metadata');
@ -244,6 +245,10 @@ class EntryRepository extends ServiceEntityRepository
$qb->andWhere('e.uid IS ' . (true === $isPublic ? 'NOT' : '') . ' NULL');
}
if (null !== $isNotParsed) {
$qb->andWhere('e.isNotParsed = :isNotParsed')->setParameter('isNotParsed', (bool) $isNotParsed);
}
if ($since > 0) {
$qb->andWhere('e.updatedAt > :since')->setParameter('since', new \DateTime(date('Y-m-d H:i:s', $since)));
}
@ -563,6 +568,24 @@ class EntryRepository extends ServiceEntityRepository
return $qb->getQuery()->getArrayResult();
}
/**
* @param int $userId
*
* @return array
*/
public function findAllEntriesIdByUserIdAndNotParsed($userId = null)
{
$qb = $this->createQueryBuilder('e')
->select('e.id')
->where('e.isNotParsed = true');
if (null !== $userId) {
$qb->where('e.user = :userid')->setParameter(':userid', $userId);
}
return $qb->getQuery()->getArrayResult();
}
/**
* Find all entries by url and owner.
*

View file

@ -138,7 +138,7 @@
{{ form_label(form.isStarred) }}
</div>
<div class="input-field col s6 with-checkbox">
<div class="input-field col s12 with-checkbox">
{{ form_widget(form.isUnread) }}
{{ form_label(form.isUnread) }}
</div>
@ -148,6 +148,11 @@
{{ form_label(form.isAnnotated) }}
</div>
<div class="input-field col s12 with-checkbox">
{{ form_widget(form.isNotParsed) }}
{{ form_label(form.isNotParsed) }}
</div>
<div class="col s12">
<label>{{ 'entry.filters.preview_picture_help'|trans }}</label>
</div>

View file

@ -119,6 +119,10 @@ abstract class WallabagImport extends AbstractImport
$entry->setUrl($data['url']);
$entry->setTitle($data['title']);
if (\array_key_exists('is_parsed', $data)) {
$entry->setNotParsed(true);
}
// update entry with content (in case fetching failed, the given entry will be return)
$this->fetchContent($entry, $data['url'], $data);

View file

@ -65,6 +65,7 @@ class WallabagV1Import extends WallabagImport
if (\in_array($entry['title'], $this->untitled, true)) {
$data['title'] = $this->fetchingErrorMessageTitle;
$data['html'] = $this->fetchingErrorMessage;
$entry['is_not_parsed'] = 1;
}
if (\array_key_exists('tags', $entry) && '' !== $entry['tags']) {

View file

@ -190,6 +190,7 @@ class EntryRestControllerTest extends WallabagApiTestCase
'tags' => 'foo',
'since' => 1443274283,
'public' => 0,
'notParsed' => 0,
]);
$this->assertSame(200, $this->client->getResponse()->getStatusCode());
@ -348,6 +349,60 @@ class EntryRestControllerTest extends WallabagApiTestCase
$this->assertSame('application/json', $this->client->getResponse()->headers->get('Content-Type'));
}
public function testGetNotParsedEntries()
{
$this->client->request('GET', '/api/entries', ['notParsed' => 1]);
$this->assertSame(200, $this->client->getResponse()->getStatusCode());
$content = json_decode($this->client->getResponse()->getContent(), true);
$this->assertGreaterThanOrEqual(1, \count($content));
$this->assertNotEmpty($content['_embedded']['items']);
$this->assertGreaterThanOrEqual(1, $content['total']);
$this->assertSame(1, $content['page']);
$this->assertGreaterThanOrEqual(1, $content['pages']);
$this->assertArrayHasKey('_links', $content);
$this->assertArrayHasKey('self', $content['_links']);
$this->assertArrayHasKey('first', $content['_links']);
$this->assertArrayHasKey('last', $content['_links']);
foreach (['self', 'first', 'last'] as $link) {
$this->assertArrayHasKey('href', $content['_links'][$link]);
$this->assertStringContainsString('notParsed=1', $content['_links'][$link]['href']);
}
$this->assertSame('application/json', $this->client->getResponse()->headers->get('Content-Type'));
}
public function testGetParsedEntries()
{
$this->client->request('GET', '/api/entries', ['notParsed' => 0]);
$this->assertSame(200, $this->client->getResponse()->getStatusCode());
$content = json_decode($this->client->getResponse()->getContent(), true);
$this->assertGreaterThanOrEqual(1, \count($content));
$this->assertNotEmpty($content['_embedded']['items']);
$this->assertGreaterThanOrEqual(1, $content['total']);
$this->assertSame(1, $content['page']);
$this->assertGreaterThanOrEqual(1, $content['pages']);
$this->assertArrayHasKey('_links', $content);
$this->assertArrayHasKey('self', $content['_links']);
$this->assertArrayHasKey('first', $content['_links']);
$this->assertArrayHasKey('last', $content['_links']);
foreach (['self', 'first', 'last'] as $link) {
$this->assertArrayHasKey('href', $content['_links'][$link]);
$this->assertStringContainsString('notParsed=0', $content['_links'][$link]['href']);
}
$this->assertSame('application/json', $this->client->getResponse()->headers->get('Content-Type'));
}
public function testGetTaggedEntries()
{
$this->client->request('GET', '/api/entries', ['tags' => 'foo,bar']);

View file

@ -21,6 +21,16 @@ class ReloadEntryCommandTest extends WallabagCoreTestCase
*/
public $bobEntry;
/**
* @var Entry
*/
public $bobParsedEntry;
/**
* @var Entry
*/
public $bobNotParsedEntry;
protected function setUp(): void
{
parent::setUp();
@ -41,6 +51,19 @@ class ReloadEntryCommandTest extends WallabagCoreTestCase
$this->bobEntry->setContent('');
$this->getEntityManager()->persist($this->bobEntry);
$this->bobParsedEntry = new Entry($user);
$this->bobParsedEntry->setUrl($this->url);
$this->bobParsedEntry->setTitle('title foo');
$this->bobParsedEntry->setContent('');
$this->getEntityManager()->persist($this->bobParsedEntry);
$this->bobNotParsedEntry = new Entry($user);
$this->bobNotParsedEntry->setUrl($this->url);
$this->bobNotParsedEntry->setTitle('title foo');
$this->bobNotParsedEntry->setContent('');
$this->bobNotParsedEntry->setNotParsed(true);
$this->getEntityManager()->persist($this->bobNotParsedEntry);
$this->getEntityManager()->flush();
}
@ -95,6 +118,27 @@ class ReloadEntryCommandTest extends WallabagCoreTestCase
$this->assertStringContainsString('Done', $tester->getDisplay());
}
public function testRunReloadEntryWithNotParsedOption()
{
$application = new Application($this->getTestClient()->getKernel());
$command = $application->find('wallabag:entry:reload');
$tester = new CommandTester($command);
$tester->execute([
'--only-not-parsed' => true,
]);
$entryRepository = $this->getTestClient()->getContainer()->get('wallabag_core.entry_repository.test');
$reloadedBobParsedEntry = $entryRepository->find($this->bobParsedEntry->getId());
$this->assertEmpty($reloadedBobParsedEntry->getContent());
$reloadedBobNotParsedEntry = $entryRepository->find($this->bobNotParsedEntry->getId());
$this->assertNotEmpty($reloadedBobNotParsedEntry->getContent());
$this->assertStringContainsString('Done', $tester->getDisplay());
}
public function testRunReloadEntryWithoutEntryCommand()
{
$application = new Application($this->getTestClient()->getKernel());

View file

@ -967,6 +967,34 @@ class EntryControllerTest extends WallabagCoreTestCase
$this->assertCount(3, $crawler->filter('ol.entries > li'));
}
public function testFilterOnNotCorrectlyParsedStatus()
{
$this->logInAs('admin');
$client = $this->getTestClient();
$crawler = $client->request('GET', '/all/list');
$form = $crawler->filter('button[id=submit-filter]')->form();
$data = [
'entry_filter[isNotParsed]' => true,
];
$crawler = $client->submit($form, $data);
$this->assertCount(1, $crawler->filter($this->entryDataTestAttribute));
$entry = new Entry($this->getLoggedInUser());
$entry->setUrl($this->url);
$entry->setNotParsed(true);
$this->getEntityManager()->persist($entry);
$this->getEntityManager()->flush();
$crawler = $client->submit($form, $data);
$this->assertCount(2, $crawler->filter($this->entryDataTestAttribute));
}
public function testPaginationWithFilter()
{
$this->logInAs('admin');

View file

@ -57,6 +57,7 @@ class ContentProxyTest extends TestCase
$this->assertEmpty($entry->getLanguage());
$this->assertSame(0.0, $entry->getReadingTime());
$this->assertNull($entry->getDomainName());
$this->assertTrue($entry->isNotParsed());
}
public function testWithEmptyContent()
@ -96,6 +97,7 @@ class ContentProxyTest extends TestCase
$this->assertEmpty($entry->getLanguage());
$this->assertSame(0.0, $entry->getReadingTime());
$this->assertSame('0.0.0.0', $entry->getDomainName());
$this->assertTrue($entry->isNotParsed());
}
public function testWithEmptyContentButOG()
@ -138,6 +140,7 @@ class ContentProxyTest extends TestCase
$this->assertEmpty($entry->getMimetype());
$this->assertSame(0.0, $entry->getReadingTime());
$this->assertSame('domain.io', $entry->getDomainName());
$this->assertTrue($entry->isNotParsed());
}
public function testWithContent()
@ -183,6 +186,7 @@ class ContentProxyTest extends TestCase
$this->assertSame('200', $entry->getHttpStatus());
$this->assertSame(4.0, $entry->getReadingTime());
$this->assertSame('1.1.1.1', $entry->getDomainName());
$this->assertFalse($entry->isNotParsed());
}
public function testWithContentAndNoOgImage()
@ -228,6 +232,7 @@ class ContentProxyTest extends TestCase
$this->assertSame('200', $entry->getHttpStatus());
$this->assertSame(4.0, $entry->getReadingTime());
$this->assertSame('1.1.1.1', $entry->getDomainName());
$this->assertFalse($entry->isNotParsed());
}
public function testWithContentAndContentImage()
@ -272,6 +277,7 @@ class ContentProxyTest extends TestCase
$this->assertSame('200', $entry->getHttpStatus());
$this->assertSame(0.0, $entry->getReadingTime());
$this->assertSame('1.1.1.1', $entry->getDomainName());
$this->assertFalse($entry->isNotParsed());
}
public function testWithContentImageAndOgImage()
@ -316,6 +322,7 @@ class ContentProxyTest extends TestCase
$this->assertSame('200', $entry->getHttpStatus());
$this->assertSame(0.0, $entry->getReadingTime());
$this->assertSame('1.1.1.1', $entry->getDomainName());
$this->assertFalse($entry->isNotParsed());
}
public function testWithContentAndBadLanguage()
@ -363,6 +370,7 @@ class ContentProxyTest extends TestCase
$this->assertSame('200', $entry->getHttpStatus());
$this->assertSame(4.0, $entry->getReadingTime());
$this->assertSame('1.1.1.1', $entry->getDomainName());
$this->assertFalse($entry->isNotParsed());
}
public function testWithContentAndBadOgImage()
@ -416,6 +424,7 @@ class ContentProxyTest extends TestCase
$this->assertSame('200', $entry->getHttpStatus());
$this->assertSame(4.0, $entry->getReadingTime());
$this->assertSame('1.1.1.1', $entry->getDomainName());
$this->assertFalse($entry->isNotParsed());
}
public function testWithForcedContent()
@ -460,6 +469,7 @@ class ContentProxyTest extends TestCase
$this->assertContains('Thomas', $entry->getPublishedBy());
$this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
$this->assertContains('no-cache', $entry->getHeaders());
$this->assertFalse($entry->isNotParsed());
}
public function testWithForcedContentAndDateTime()
@ -498,6 +508,7 @@ class ContentProxyTest extends TestCase
$this->assertSame(4.0, $entry->getReadingTime());
$this->assertSame('1.1.1.1', $entry->getDomainName());
$this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
$this->assertFalse($entry->isNotParsed());
}
public function testWithForcedContentAndBadDate()
@ -537,6 +548,7 @@ class ContentProxyTest extends TestCase
$this->assertSame(4.0, $entry->getReadingTime());
$this->assertSame('1.1.1.1', $entry->getDomainName());
$this->assertNull($entry->getPublishedAt());
$this->assertFalse($entry->isNotParsed());
$records = $handler->getRecords();
@ -625,6 +637,7 @@ class ContentProxyTest extends TestCase
$this->assertSame('fr', $entry->getLanguage());
$this->assertSame('200', $entry->getHttpStatus());
$this->assertSame('1.1.1.1', $entry->getDomainName());
$this->assertFalse($entry->isNotParsed());
}
public function testWithImageAsContent()
@ -663,6 +676,7 @@ class ContentProxyTest extends TestCase
$this->assertSame('image/jpeg', $entry->getMimetype());
$this->assertSame('200', $entry->getHttpStatus());
$this->assertSame('1.1.1.1', $entry->getDomainName());
$this->assertFalse($entry->isNotParsed());
}
public function testWebsiteWithValidUTF8TitleDoNothing()

View file

@ -252,6 +252,7 @@ entry:
starred_label: Starred
unread_label: Unread
annotated_label: Annotated
parsed_label: Not correctly fetched
preview_picture_label: Has a preview picture
preview_picture_help: Preview picture
is_public_label: Has a public link