Use a better index for hashed_url

It'll most often be used in addition to the `user_id`.
Also, automatically generate the hash when saving the url.
Switch from `md5` to `sha1`.
This commit is contained in:
Jeremy Benoist 2019-04-01 13:51:57 +02:00
parent 9c2b2aae70
commit 8a64566298
No known key found for this signature in database
GPG key ID: BCA73962457ACC3C
7 changed files with 21 additions and 23 deletions

View file

@ -20,7 +20,7 @@ class Version20190401105353 extends WallabagMigration
$this->skipIf($entryTable->hasColumn('hashed_url'), 'It seems that you already played this migration.'); $this->skipIf($entryTable->hasColumn('hashed_url'), 'It seems that you already played this migration.');
$entryTable->addColumn('hashed_url', 'text', [ $entryTable->addColumn('hashed_url', 'text', [
'length' => 32, 'length' => 40,
'notnull' => false, 'notnull' => false,
]); ]);
@ -28,6 +28,8 @@ class Version20190401105353 extends WallabagMigration
if ('sqlite' !== $this->connection->getDatabasePlatform()->getName()) { if ('sqlite' !== $this->connection->getDatabasePlatform()->getName()) {
$this->addSql('UPDATE ' . $this->getTable('entry') . ' SET hashed_url = MD5(url)'); $this->addSql('UPDATE ' . $this->getTable('entry') . ' SET hashed_url = MD5(url)');
} }
$entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id');
} }
/** /**
@ -39,6 +41,7 @@ class Version20190401105353 extends WallabagMigration
$this->skipIf(!$entryTable->hasColumn('hashed_url'), 'It seems that you already played this migration.'); $this->skipIf(!$entryTable->hasColumn('hashed_url'), 'It seems that you already played this migration.');
$entryTable->dropIndex('hashed_url_user_id');
$entryTable->dropColumn('hashed_url'); $entryTable->dropColumn('hashed_url');
} }
} }

View file

@ -69,7 +69,7 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand
$i = 1; $i = 1;
foreach ($entries as $entry) { foreach ($entries as $entry) {
$entry->setHashedUrl(hash('md5', $entry->getUrl())); $entry->setHashedUrl(hash('sha1', $entry->getUrl()));
$em->persist($entry); $em->persist($entry);
if (0 === ($i % 20)) { if (0 === ($i % 20)) {

View file

@ -89,7 +89,6 @@ class EntryFixtures extends Fixture implements DependentFixtureInterface
foreach ($entries as $reference => $item) { foreach ($entries as $reference => $item) {
$entry = new Entry($this->getReference($item['user'])); $entry = new Entry($this->getReference($item['user']));
$entry->setUrl($item['url']); $entry->setUrl($item['url']);
$entry->setHashedUrl(hash('md5', $item['url']));
$entry->setReadingTime($item['reading_time']); $entry->setReadingTime($item['reading_time']);
$entry->setDomainName($item['domain']); $entry->setDomainName($item['domain']);
$entry->setMimetype($item['mime']); $entry->setMimetype($item['mime']);

View file

@ -26,7 +26,7 @@ use Wallabag\UserBundle\Entity\User;
* indexes={ * indexes={
* @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="created_at", columns={"created_at"}),
* @ORM\Index(name="uid", columns={"uid"}), * @ORM\Index(name="uid", columns={"uid"}),
* @ORM\Index(name="hashed_url", columns={"hashed_url"}) * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"})
* } * }
* ) * )
* @ORM\HasLifecycleCallbacks() * @ORM\HasLifecycleCallbacks()
@ -79,7 +79,7 @@ class Entry
/** /**
* @var string * @var string
* *
* @ORM\Column(name="hashed_url", type="string", length=32, nullable=true) * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true)
*/ */
private $hashedUrl; private $hashedUrl;
@ -324,6 +324,7 @@ class Entry
public function setUrl($url) public function setUrl($url)
{ {
$this->url = $url; $this->url = $url;
$this->hashedUrl = hash('sha1', $url);
return $this; return $this;
} }

View file

@ -248,8 +248,6 @@ class ContentProxy
{ {
$this->updateOriginUrl($entry, $content['url']); $this->updateOriginUrl($entry, $content['url']);
$entry->setHashedUrl(hash('md5', $entry->getUrl()));
$this->setEntryDomainName($entry); $this->setEntryDomainName($entry);
if (!empty($content['title'])) { if (!empty($content['title'])) {

View file

@ -973,7 +973,7 @@ class EntryRestControllerTest extends WallabagApiTestCase
public function dataForEntriesExistWithUrl() public function dataForEntriesExistWithUrl()
{ {
$url = hash('md5', 'http://0.0.0.0/entry2'); $url = hash('sha1', 'http://0.0.0.0/entry2');
return [ return [
'with_id' => [ 'with_id' => [
@ -1047,37 +1047,37 @@ class EntryRestControllerTest extends WallabagApiTestCase
{ {
$url1 = 'http://0.0.0.0/entry2'; $url1 = 'http://0.0.0.0/entry2';
$url2 = 'http://0.0.0.0/entry10'; $url2 = 'http://0.0.0.0/entry10';
$this->client->request('GET', '/api/entries/exists?hashed_urls[]=' . hash('md5', $url1) . '&hashed_urls[]=' . hash('md5', $url2) . '&return_id=1'); $this->client->request('GET', '/api/entries/exists?hashed_urls[]=' . hash('sha1', $url1) . '&hashed_urls[]=' . hash('sha1', $url2) . '&return_id=1');
$this->assertSame(200, $this->client->getResponse()->getStatusCode()); $this->assertSame(200, $this->client->getResponse()->getStatusCode());
$content = json_decode($this->client->getResponse()->getContent(), true); $content = json_decode($this->client->getResponse()->getContent(), true);
$this->assertArrayHasKey(hash('md5', $url1), $content); $this->assertArrayHasKey(hash('sha1', $url1), $content);
$this->assertArrayHasKey(hash('md5', $url2), $content); $this->assertArrayHasKey(hash('sha1', $url2), $content);
$this->assertSame(2, $content[hash('md5', $url1)]); $this->assertSame(2, $content[hash('sha1', $url1)]);
$this->assertNull($content[hash('md5', $url2)]); $this->assertNull($content[hash('sha1', $url2)]);
} }
public function testGetEntriesExistsWithManyUrlsHashedReturnBool() public function testGetEntriesExistsWithManyUrlsHashedReturnBool()
{ {
$url1 = 'http://0.0.0.0/entry2'; $url1 = 'http://0.0.0.0/entry2';
$url2 = 'http://0.0.0.0/entry10'; $url2 = 'http://0.0.0.0/entry10';
$this->client->request('GET', '/api/entries/exists?hashed_urls[]=' . hash('md5', $url1) . '&hashed_urls[]=' . hash('md5', $url2)); $this->client->request('GET', '/api/entries/exists?hashed_urls[]=' . hash('sha1', $url1) . '&hashed_urls[]=' . hash('sha1', $url2));
$this->assertSame(200, $this->client->getResponse()->getStatusCode()); $this->assertSame(200, $this->client->getResponse()->getStatusCode());
$content = json_decode($this->client->getResponse()->getContent(), true); $content = json_decode($this->client->getResponse()->getContent(), true);
$this->assertArrayHasKey(hash('md5', $url1), $content); $this->assertArrayHasKey(hash('sha1', $url1), $content);
$this->assertArrayHasKey(hash('md5', $url2), $content); $this->assertArrayHasKey(hash('sha1', $url2), $content);
$this->assertTrue($content[hash('md5', $url1)]); $this->assertTrue($content[hash('sha1', $url1)]);
$this->assertFalse($content[hash('md5', $url2)]); $this->assertFalse($content[hash('sha1', $url2)]);
} }
public function testGetEntriesExistsWhichDoesNotExists() public function testGetEntriesExistsWhichDoesNotExists()
{ {
$this->client->request('GET', '/api/entries/exists?hashed_url=' . hash('md5', 'http://google.com/entry2')); $this->client->request('GET', '/api/entries/exists?hashed_url=' . hash('sha1', 'http://google.com/entry2'));
$this->assertSame(200, $this->client->getResponse()->getStatusCode()); $this->assertSame(200, $this->client->getResponse()->getStatusCode());

View file

@ -72,11 +72,8 @@ class GenerateUrlHashesCommandTest extends WallabagCoreTestCase
$entry1->setUrl($url); $entry1->setUrl($url);
$em->persist($entry1); $em->persist($entry1);
$em->flush(); $em->flush();
$this->assertNull($entry1->getHashedUrl());
$application = new Application($this->getClient()->getKernel()); $application = new Application($this->getClient()->getKernel());
$application->add(new GenerateUrlHashesCommand()); $application->add(new GenerateUrlHashesCommand());
@ -92,7 +89,7 @@ class GenerateUrlHashesCommandTest extends WallabagCoreTestCase
$entry = $em->getRepository('WallabagCoreBundle:Entry')->findOneByUrl($url); $entry = $em->getRepository('WallabagCoreBundle:Entry')->findOneByUrl($url);
$this->assertSame($entry->getHashedUrl(), hash('md5', $url)); $this->assertSame($entry->getHashedUrl(), hash('sha1', $url));
$query = $em->createQuery('DELETE FROM Wallabag\CoreBundle\Entity\Entry e WHERE e.url = :url'); $query = $em->createQuery('DELETE FROM Wallabag\CoreBundle\Entity\Entry e WHERE e.url = :url');
$query->setParameter('url', $url); $query->setParameter('url', $url);