Use a better index for hashed_url

It'll most often be used in addition to the `user_id`.
Also, automatically generate the hash when saving the url.
Switch from `md5` to `sha1`.
This commit is contained in:
Jeremy Benoist 2019-04-01 13:51:57 +02:00
parent 9c2b2aae70
commit 8a64566298
No known key found for this signature in database
GPG key ID: BCA73962457ACC3C
7 changed files with 21 additions and 23 deletions

View file

@ -20,7 +20,7 @@ class Version20190401105353 extends WallabagMigration
$this->skipIf($entryTable->hasColumn('hashed_url'), 'It seems that you already played this migration.');
$entryTable->addColumn('hashed_url', 'text', [
'length' => 32,
'length' => 40,
'notnull' => false,
]);
@ -28,6 +28,8 @@ class Version20190401105353 extends WallabagMigration
if ('sqlite' !== $this->connection->getDatabasePlatform()->getName()) {
$this->addSql('UPDATE ' . $this->getTable('entry') . ' SET hashed_url = MD5(url)');
}
$entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id');
}
/**
@ -39,6 +41,7 @@ class Version20190401105353 extends WallabagMigration
$this->skipIf(!$entryTable->hasColumn('hashed_url'), 'It seems that you already played this migration.');
$entryTable->dropIndex('hashed_url_user_id');
$entryTable->dropColumn('hashed_url');
}
}

View file

@ -69,7 +69,7 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand
$i = 1;
foreach ($entries as $entry) {
$entry->setHashedUrl(hash('md5', $entry->getUrl()));
$entry->setHashedUrl(hash('sha1', $entry->getUrl()));
$em->persist($entry);
if (0 === ($i % 20)) {

View file

@ -89,7 +89,6 @@ class EntryFixtures extends Fixture implements DependentFixtureInterface
foreach ($entries as $reference => $item) {
$entry = new Entry($this->getReference($item['user']));
$entry->setUrl($item['url']);
$entry->setHashedUrl(hash('md5', $item['url']));
$entry->setReadingTime($item['reading_time']);
$entry->setDomainName($item['domain']);
$entry->setMimetype($item['mime']);

View file

@ -26,7 +26,7 @@ use Wallabag\UserBundle\Entity\User;
* indexes={
* @ORM\Index(name="created_at", columns={"created_at"}),
* @ORM\Index(name="uid", columns={"uid"}),
* @ORM\Index(name="hashed_url", columns={"hashed_url"})
* @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"})
* }
* )
* @ORM\HasLifecycleCallbacks()
@ -79,7 +79,7 @@ class Entry
/**
* @var string
*
* @ORM\Column(name="hashed_url", type="string", length=32, nullable=true)
* @ORM\Column(name="hashed_url", type="string", length=40, nullable=true)
*/
private $hashedUrl;
@ -324,6 +324,7 @@ class Entry
public function setUrl($url)
{
$this->url = $url;
$this->hashedUrl = hash('sha1', $url);
return $this;
}

View file

@ -248,8 +248,6 @@ class ContentProxy
{
$this->updateOriginUrl($entry, $content['url']);
$entry->setHashedUrl(hash('md5', $entry->getUrl()));
$this->setEntryDomainName($entry);
if (!empty($content['title'])) {

View file

@ -973,7 +973,7 @@ class EntryRestControllerTest extends WallabagApiTestCase
public function dataForEntriesExistWithUrl()
{
$url = hash('md5', 'http://0.0.0.0/entry2');
$url = hash('sha1', 'http://0.0.0.0/entry2');
return [
'with_id' => [
@ -1047,37 +1047,37 @@ class EntryRestControllerTest extends WallabagApiTestCase
{
$url1 = 'http://0.0.0.0/entry2';
$url2 = 'http://0.0.0.0/entry10';
$this->client->request('GET', '/api/entries/exists?hashed_urls[]=' . hash('md5', $url1) . '&hashed_urls[]=' . hash('md5', $url2) . '&return_id=1');
$this->client->request('GET', '/api/entries/exists?hashed_urls[]=' . hash('sha1', $url1) . '&hashed_urls[]=' . hash('sha1', $url2) . '&return_id=1');
$this->assertSame(200, $this->client->getResponse()->getStatusCode());
$content = json_decode($this->client->getResponse()->getContent(), true);
$this->assertArrayHasKey(hash('md5', $url1), $content);
$this->assertArrayHasKey(hash('md5', $url2), $content);
$this->assertSame(2, $content[hash('md5', $url1)]);
$this->assertNull($content[hash('md5', $url2)]);
$this->assertArrayHasKey(hash('sha1', $url1), $content);
$this->assertArrayHasKey(hash('sha1', $url2), $content);
$this->assertSame(2, $content[hash('sha1', $url1)]);
$this->assertNull($content[hash('sha1', $url2)]);
}
public function testGetEntriesExistsWithManyUrlsHashedReturnBool()
{
$url1 = 'http://0.0.0.0/entry2';
$url2 = 'http://0.0.0.0/entry10';
$this->client->request('GET', '/api/entries/exists?hashed_urls[]=' . hash('md5', $url1) . '&hashed_urls[]=' . hash('md5', $url2));
$this->client->request('GET', '/api/entries/exists?hashed_urls[]=' . hash('sha1', $url1) . '&hashed_urls[]=' . hash('sha1', $url2));
$this->assertSame(200, $this->client->getResponse()->getStatusCode());
$content = json_decode($this->client->getResponse()->getContent(), true);
$this->assertArrayHasKey(hash('md5', $url1), $content);
$this->assertArrayHasKey(hash('md5', $url2), $content);
$this->assertTrue($content[hash('md5', $url1)]);
$this->assertFalse($content[hash('md5', $url2)]);
$this->assertArrayHasKey(hash('sha1', $url1), $content);
$this->assertArrayHasKey(hash('sha1', $url2), $content);
$this->assertTrue($content[hash('sha1', $url1)]);
$this->assertFalse($content[hash('sha1', $url2)]);
}
public function testGetEntriesExistsWhichDoesNotExists()
{
$this->client->request('GET', '/api/entries/exists?hashed_url=' . hash('md5', 'http://google.com/entry2'));
$this->client->request('GET', '/api/entries/exists?hashed_url=' . hash('sha1', 'http://google.com/entry2'));
$this->assertSame(200, $this->client->getResponse()->getStatusCode());

View file

@ -72,11 +72,8 @@ class GenerateUrlHashesCommandTest extends WallabagCoreTestCase
$entry1->setUrl($url);
$em->persist($entry1);
$em->flush();
$this->assertNull($entry1->getHashedUrl());
$application = new Application($this->getClient()->getKernel());
$application->add(new GenerateUrlHashesCommand());
@ -92,7 +89,7 @@ class GenerateUrlHashesCommandTest extends WallabagCoreTestCase
$entry = $em->getRepository('WallabagCoreBundle:Entry')->findOneByUrl($url);
$this->assertSame($entry->getHashedUrl(), hash('md5', $url));
$this->assertSame($entry->getHashedUrl(), hash('sha1', $url));
$query = $em->createQuery('DELETE FROM Wallabag\CoreBundle\Entity\Entry e WHERE e.url = :url');
$query->setParameter('url', $url);