Use hash given url to avoid duplicate

Using hashed url we can ensure an index on them to ensure it's fast.
This commit is contained in:
Jeremy Benoist 2019-05-29 14:18:04 +02:00
parent b7fa51ae7d
commit f3bfb875e9
No known key found for this signature in database
GPG key ID: BCA73962457ACC3C
6 changed files with 134 additions and 132 deletions

View file

@ -1,38 +0,0 @@
<?php
namespace Application\Migrations;
use Doctrine\DBAL\Schema\Schema;
use Wallabag\CoreBundle\Doctrine\WallabagMigration;
/**
* Added `given_url` field in entry table.
*/
class Version20170710125843 extends WallabagMigration
{
/**
* @param Schema $schema
*/
public function up(Schema $schema)
{
$entryTable = $schema->getTable($this->getTable('entry'));
$this->skipIf($entryTable->hasColumn('given_url'), 'It seems that you already played this migration.');
$entryTable->addColumn('given_url', 'text', [
'notnull' => false,
]);
}
/**
* @param Schema $schema
*/
public function down(Schema $schema)
{
$entryTable = $schema->getTable($this->getTable('entry'));
$this->skipIf(!$entryTable->hasColumn('given_url'), 'It seems that you already played this migration.');
$entryTable->dropColumn('given_url');
}
}

View file

@ -1,48 +0,0 @@
<?php
namespace Application\Migrations;
use Doctrine\DBAL\Schema\Schema;
use Wallabag\CoreBundle\Doctrine\WallabagMigration;
/**
* Added indexes on wallabag_entry.url and wallabag_entry.given_url and wallabag_entry.user_id.
*/
class Version20171218135243 extends WallabagMigration
{
private $indexGivenUrl = 'IDX_entry_given_url';
/**
* @param Schema $schema
*/
public function up(Schema $schema)
{
$entryTable = $schema->getTable($this->getTable('entry'));
$this->skipIf($entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.');
switch ($this->connection->getDatabasePlatform()->getName()) {
case 'sqlite':
$sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);';
break;
case 'mysql':
$sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url (255), given_url (255), user_id);';
break;
case 'postgresql':
$sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);';
break;
}
$this->addSql($sql);
}
/**
* @param Schema $schema
*/
public function down(Schema $schema)
{
$entryTable = $schema->getTable($this->getTable('entry'));
$this->skipIf(false === $entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.');
$entryTable->dropIndex($this->indexGivenUrl);
}
}

View file

@ -0,0 +1,74 @@
<?php
namespace Application\Migrations;
use Doctrine\DBAL\Schema\Schema;
use Wallabag\CoreBundle\Doctrine\WallabagMigration;
/**
* Added `given_url` & `hashed_given_url` field in entry table.
*/
class Version20190601125843 extends WallabagMigration
{
/**
* @param Schema $schema
*/
public function up(Schema $schema)
{
$entryTable = $schema->getTable($this->getTable('entry'));
if (!$entryTable->hasColumn('given_url')) {
$entryTable->addColumn('given_url', 'text', [
'notnull' => false,
]);
}
if (!$entryTable->hasColumn('hashed_given_url')) {
$entryTable->addColumn('hashed_given_url', 'text', [
'length' => 40,
'notnull' => false,
]);
}
$entryTable->dropIndex('hashed_url_user_id');
$entryTable->addIndex(
[
'user_id',
'hashed_url',
'hashed_given_url',
],
'hashed_urls_user_id',
[],
[
// specify length for index which is required by MySQL on text field
'lengths' => [
// user_id
null,
// hashed_url
40,
// hashed_given_url
40,
],
]
);
}
/**
* @param Schema $schema
*/
public function down(Schema $schema)
{
$entryTable = $schema->getTable($this->getTable('entry'));
if ($entryTable->hasColumn('given_url')) {
$entryTable->dropColumn('given_url');
}
if ($entryTable->hasColumn('hashed_given_url')) {
$entryTable->dropColumn('hashed_given_url');
}
$entryTable->dropIndex('hashed_urls_user_id');
$entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id', [], ['lengths' => [null, 40]]);
}
}

View file

@ -27,9 +27,8 @@ use Wallabag\UserBundle\Entity\User;
* indexes={ * indexes={
* @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="created_at", columns={"created_at"}),
* @ORM\Index(name="uid", columns={"uid"}), * @ORM\Index(name="uid", columns={"uid"}),
* @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}) * @ORM\Index(name="hashed_urls_user_id", columns={"user_id", "hashed_url", "hashed_given_url"}, options={"lengths"={null, 40, 40}})
* }, * }
* uniqueConstraints={@ORM\UniqueConstraint(name="IDX_entry_given_url",columns={"url", "given_url", "user_id"})}
* ) * )
* @ORM\HasLifecycleCallbacks() * @ORM\HasLifecycleCallbacks()
* @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())") * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())")
@ -69,15 +68,8 @@ class Entry
private $title; private $title;
/** /**
* @var string * Define the url fetched by wallabag (the final url after potential redirections).
* *
* @ORM\Column(name="given_url", type="text", nullable=true)
*
* @Groups({"entries_for_user", "export_all"})
*/
private $givenUrl;
/**
* @var string * @var string
* *
* @Assert\NotBlank() * @Assert\NotBlank()
@ -94,6 +86,35 @@ class Entry
*/ */
private $hashedUrl; private $hashedUrl;
/**
* From where user retrieved/found the url (an other article, a twitter, or the given_url if non are provided).
*
* @var string
*
* @ORM\Column(name="origin_url", type="text", nullable=true)
*
* @Groups({"entries_for_user", "export_all"})
*/
private $originUrl;
/**
* Define the url entered by the user (without redirections).
*
* @var string
*
* @ORM\Column(name="given_url", type="text", nullable=true)
*
* @Groups({"entries_for_user", "export_all"})
*/
private $givenUrl;
/**
* @var string
*
* @ORM\Column(name="hashed_given_url", type="string", length=40, nullable=true)
*/
private $hashedGivenUrl;
/** /**
* @var bool * @var bool
* *
@ -273,15 +294,6 @@ class Entry
*/ */
private $tags; private $tags;
/**
* @var string
*
* @ORM\Column(name="origin_url", type="text", nullable=true)
*
* @Groups({"entries_for_user", "export_all"})
*/
private $originUrl;
/* /*
* @param User $user * @param User $user
*/ */
@ -325,30 +337,6 @@ class Entry
return $this->title; return $this->title;
} }
/**
* Set given url.
*
* @param string $givenUrl
*
* @return Entry
*/
public function setGivenUrl($givenUrl)
{
$this->givenUrl = $givenUrl;
return $this;
}
/**
* Get given Url.
*
* @return string
*/
public function getGivenUrl()
{
return $this->givenUrl;
}
/** /**
* Set url. * Set url.
* *
@ -956,6 +944,31 @@ class Entry
return $this->originUrl; return $this->originUrl;
} }
/**
* Set origin url.
*
* @param string $givenUrl
*
* @return Entry
*/
public function setGivenUrl($givenUrl)
{
$this->givenUrl = $givenUrl;
$this->hashedGivenUrl = UrlHasher::hashUrl($givenUrl);
return $this;
}
/**
* Get origin url.
*
* @return string
*/
public function getGivenUrl()
{
return $this->givenUrl;
}
/** /**
* @return string * @return string
*/ */

View file

@ -76,9 +76,10 @@ class ContentProxy
// Not sure what are the other possible cases where this property is empty // Not sure what are the other possible cases where this property is empty
if (empty($entry->getUrl()) && !empty($url)) { if (empty($entry->getUrl()) && !empty($url)) {
$entry->setUrl($url); $entry->setUrl($url);
$entry->setGivenUrl($url);
} }
$entry->setGivenUrl($url);
$this->stockEntry($entry, $content); $this->stockEntry($entry, $content);
} }

View file

@ -368,7 +368,7 @@ class EntryRepository extends EntityRepository
{ {
$res = $this->createQueryBuilder('e') $res = $this->createQueryBuilder('e')
->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl)
// ->orWhere('e.givenUrl = :url')->setParameter('url', $url) ->orWhere('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl)
->andWhere('e.user = :user_id')->setParameter('user_id', $userId) ->andWhere('e.user = :user_id')->setParameter('user_id', $userId)
->getQuery() ->getQuery()
->getResult(); ->getResult();