Merge pull request #3192 from wallabag/validate-content-fields

Validate language & preview picture fields
This commit is contained in:
Jérémy Benoist 2017-06-09 13:51:26 +02:00 committed by GitHub
commit a899399989
5 changed files with 275 additions and 41 deletions

View file

@ -7,6 +7,9 @@ use Psr\Log\LoggerInterface;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Tools\Utils;
use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
use Symfony\Component\Validator\Constraints\Locale as LocaleConstraint;
use Symfony\Component\Validator\Constraints\Url as UrlConstraint;
use Symfony\Component\Validator\Validator\ValidatorInterface;
/**
* This kind of proxy class take care of getting the content from an url
@ -16,15 +19,17 @@ class ContentProxy
{
protected $graby;
protected $tagger;
protected $validator;
protected $logger;
protected $mimeGuesser;
protected $fetchingErrorMessage;
protected $eventDispatcher;
public function __construct(Graby $graby, RuleBasedTagger $tagger, LoggerInterface $logger, $fetchingErrorMessage)
public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage)
{
$this->graby = $graby;
$this->tagger = $tagger;
$this->validator = $validator;
$this->logger = $logger;
$this->mimeGuesser = new MimeTypeExtensionGuesser();
$this->fetchingErrorMessage = $fetchingErrorMessage;
@ -113,7 +118,24 @@ class ContentProxy
$entry->setHeaders($content['all_headers']);
}
$entry->setLanguage(isset($content['language']) ? $content['language'] : '');
$this->validateAndSetLanguage(
$entry,
isset($content['language']) ? $content['language'] : ''
);
$this->validateAndSetPreviewPicture(
$entry,
isset($content['open_graph']['og_image']) ? $content['open_graph']['og_image'] : ''
);
// if content is an image, define it as a preview too
if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
$this->validateAndSetPreviewPicture(
$entry,
$content['url']
);
}
$entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : '');
$entry->setReadingTime(Utils::getReadingTime($html));
@ -122,15 +144,6 @@ class ContentProxy
$entry->setDomainName($domainName);
}
if (!empty($content['open_graph']['og_image'])) {
$entry->setPreviewPicture($content['open_graph']['og_image']);
}
// if content is an image define as a preview too
if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
$entry->setPreviewPicture($content['url']);
}
try {
$this->tagger->tag($entry);
} catch (\Exception $e) {
@ -152,4 +165,52 @@ class ContentProxy
{
return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
}
/**
* Use a Symfony validator to ensure the language is well formatted.
*
* @param Entry $entry
* @param string $value Language to validate
*/
private function validateAndSetLanguage($entry, $value)
{
// some lang are defined as fr-FR, es-ES.
// replacing - by _ might increase language support
$value = str_replace('-', '_', $value);
$errors = $this->validator->validate(
$value,
(new LocaleConstraint())
);
if (0 === count($errors)) {
$entry->setLanguage($value);
return;
}
$this->logger->warning('Language validation failed. '.(string) $errors);
}
/**
* Use a Symfony validator to ensure the preview picture is a real url.
*
* @param Entry $entry
* @param string $value URL to validate
*/
private function validateAndSetPreviewPicture($entry, $value)
{
$errors = $this->validator->validate(
$value,
(new UrlConstraint())
);
if (0 === count($errors)) {
$entry->setPreviewPicture($value);
return;
}
$this->logger->warning('PreviewPicture validation failed. '.(string) $errors);
}
}

View file

@ -90,6 +90,7 @@ services:
arguments:
- "@wallabag_core.graby"
- "@wallabag_core.rule_based_tagger"
- "@validator"
- "@logger"
- '%wallabag_core.fetching_error_message%'

View file

@ -345,7 +345,7 @@ class EntryRestControllerTest extends WallabagApiTestCase
'tags' => 'google',
'title' => 'New title for my article',
'content' => 'my content',
'language' => 'de_DE',
'language' => 'de',
'published_at' => '2016-09-08T11:55:58+0200',
'authors' => 'bob,helen',
]);
@ -362,7 +362,7 @@ class EntryRestControllerTest extends WallabagApiTestCase
$this->assertEquals(1, $content['user_id']);
$this->assertCount(2, $content['tags']);
$this->assertSame('my content', $content['content']);
$this->assertSame('de_DE', $content['language']);
$this->assertSame('de', $content['language']);
$this->assertSame('2016-09-08T11:55:58+0200', $content['published_at']);
$this->assertCount(2, $content['published_by']);
$this->assertContains('bob', $content['published_by']);
@ -477,7 +477,7 @@ class EntryRestControllerTest extends WallabagApiTestCase
'tags' => 'new tag '.uniqid(),
'starred' => '1',
'archive' => '0',
'language' => 'de_DE',
'language' => 'de_AT',
'preview_picture' => 'http://preview.io/picture.jpg',
'authors' => 'bob,sponge',
'content' => 'awesome',
@ -492,7 +492,7 @@ class EntryRestControllerTest extends WallabagApiTestCase
$this->assertEquals('New awesome title', $content['title']);
$this->assertGreaterThan($nbTags, count($content['tags']));
$this->assertEquals(1, $content['user_id']);
$this->assertEquals('de_DE', $content['language']);
$this->assertEquals('de_AT', $content['language']);
$this->assertEquals('http://preview.io/picture.jpg', $content['preview_picture']);
$this->assertContains('sponge', $content['published_by']);
$this->assertContains('bob', $content['published_by']);

View file

@ -158,6 +158,7 @@ class EntryControllerTest extends WallabagCoreTestCase
$this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content);
$this->assertEquals($this->url, $content->getUrl());
$this->assertContains('Google', $content->getTitle());
$this->assertEquals('fr', $content->getLanguage());
$this->assertEquals('2015-03-28 15:37:39', $content->getPublishedAt()->format('Y-m-d H:i:s'));
$this->assertEquals('Morgane Tual', $author[0]);
$this->assertArrayHasKey('x-varnish1', $content->getHeaders());
@ -190,6 +191,7 @@ class EntryControllerTest extends WallabagCoreTestCase
$authors = $content->getPublishedBy();
$this->assertEquals('2017-04-05 19:26:13', $content->getPublishedAt()->format('Y-m-d H:i:s'));
$this->assertEquals('fr', $content->getLanguage());
$this->assertEquals('Raphaël Balenieri, correspondant à Pékin', $authors[0]);
$this->assertEquals('Frédéric Autran, correspondant à New York', $authors[1]);
}
@ -254,15 +256,6 @@ class EntryControllerTest extends WallabagCoreTestCase
$this->assertEquals(302, $client->getResponse()->getStatusCode());
$this->assertContains('/view/', $client->getResponse()->getTargetUrl());
$em = $client->getContainer()
->get('doctrine.orm.entity_manager');
$entry = $em
->getRepository('WallabagCoreBundle:Entry')
->findOneByUrl(urldecode($url));
$em->remove($entry);
$em->flush();
}
/**
@ -297,6 +290,7 @@ class EntryControllerTest extends WallabagCoreTestCase
$this->assertCount(2, $tags);
$this->assertContains('wallabag', $tags);
$this->assertEquals('en', $entry->getLanguage());
$em->remove($entry);
$em->flush();
@ -392,8 +386,6 @@ class EntryControllerTest extends WallabagCoreTestCase
}
/**
* @depends testPostNewOk
*
* This test will require an internet connection.
*/
public function testReload()
@ -420,9 +412,6 @@ class EntryControllerTest extends WallabagCoreTestCase
$this->assertNotEmpty($entry->getContent());
}
/**
* @depends testPostNewOk
*/
public function testReloadWithFetchingFailed()
{
$this->logInAs('admin');
@ -1254,4 +1243,82 @@ class EntryControllerTest extends WallabagCoreTestCase
$this->assertCount(1, $crawler->filter('div[class=entry]'));
}
public function dataForLanguage()
{
return [
'ru' => [
'https://www.pravda.ru/world/09-06-2017/1337283-qatar-0/',
'ru',
],
'fr-FR' => [
'http://www.zataz.com/90-des-dossiers-medicaux-des-coreens-du-sud-vendus-a-des-entreprises-privees/',
'fr_FR',
],
'de' => [
'http://www.bild.de/politik/ausland/theresa-may/wahlbeben-grossbritannien-analyse-52108924.bild.html',
'de',
],
'it' => [
'http://www.ansa.it/sito/notizie/mondo/europa/2017/06/08/voto-gb-seggi-aperti-misure-sicurezza-rafforzate_0cb71f7f-e23b-4d5f-95ca-bc12296419f0.html',
'it',
],
'zh_CN' => [
'http://www.hao123.com/shequ?__noscript__-=1',
'zh_CN',
],
'de_AT' => [
'https://buy.garmin.com/de-AT/AT/catalog/product/compareResult.ep?compareProduct=112885&compareProduct=36728',
'de_AT',
],
'ru_RU' => [
'http://netler.ru/ikt/windows-error-reporting.htm',
'ru_RU',
],
'pt_BR' => [
'http://precodoscombustiveis.com.br/postos/cidade/4121/pr/maringa',
'pt_BR',
],
'fucked_list_of_languages' => [
'http://geocatalog.webservice-energy.org/geonetwork/srv/eng/main.home',
'',
],
'es-ES' => [
'http://www.muylinux.com/2015/04/17/odf-reino-unido-microsoft-google',
'es_ES',
],
];
}
/**
* @dataProvider dataForLanguage
*/
public function testLanguageValidation($url, $expectedLanguage)
{
$this->logInAs('admin');
$client = $this->getClient();
$crawler = $client->request('GET', '/new');
$this->assertEquals(200, $client->getResponse()->getStatusCode());
$form = $crawler->filter('form[name=entry]')->form();
$data = [
'entry[url]' => $url,
];
$client->submit($form, $data);
$this->assertEquals(302, $client->getResponse()->getStatusCode());
$content = $client->getContainer()
->get('doctrine.orm.entity_manager')
->getRepository('WallabagCoreBundle:Entry')
->findByUrlAndUserId($url, $this->getLoggedInUserId());
$this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content);
$this->assertEquals($url, $content->getUrl());
$this->assertEquals($expectedLanguage, $content->getLanguage());
}
}

View file

@ -11,6 +11,9 @@ use Wallabag\CoreBundle\Entity\Tag;
use Wallabag\UserBundle\Entity\User;
use Wallabag\CoreBundle\Helper\RuleBasedTagger;
use Graby\Graby;
use Symfony\Component\Validator\Validator\RecursiveValidator;
use Symfony\Component\Validator\ConstraintViolationList;
use Symfony\Component\Validator\ConstraintViolation;
class ContentProxyTest extends \PHPUnit_Framework_TestCase
{
@ -37,7 +40,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
'language' => '',
]);
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://user@:80');
@ -72,7 +75,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
'language' => '',
]);
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://0.0.0.0');
@ -112,7 +115,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
],
]);
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://domain.io');
@ -154,7 +157,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
],
]);
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://0.0.0.0');
@ -192,18 +195,112 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
'open_graph' => [
'og_title' => 'my OG title',
'og_description' => 'OG desc',
'og_image' => false,
'og_image' => null,
],
]);
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://0.0.0.0');
$this->assertEquals('http://1.1.1.1', $entry->getUrl());
$this->assertEquals('this is my title', $entry->getTitle());
$this->assertContains('this is my content', $entry->getContent());
$this->assertNull($entry->getPreviewPicture());
$this->assertEmpty($entry->getPreviewPicture());
$this->assertEquals('text/html', $entry->getMimetype());
$this->assertEquals('fr', $entry->getLanguage());
$this->assertEquals('200', $entry->getHttpStatus());
$this->assertEquals(4.0, $entry->getReadingTime());
$this->assertEquals('1.1.1.1', $entry->getDomainName());
}
public function testWithContentAndBadLanguage()
{
$tagger = $this->getTaggerMock();
$tagger->expects($this->once())
->method('tag');
$validator = $this->getValidator();
$validator->expects($this->exactly(2))
->method('validate')
->will($this->onConsecutiveCalls(
new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]),
new ConstraintViolationList()
));
$graby = $this->getMockBuilder('Graby\Graby')
->setMethods(['fetchContent'])
->disableOriginalConstructor()
->getMock();
$graby->expects($this->any())
->method('fetchContent')
->willReturn([
'html' => str_repeat('this is my content', 325),
'title' => 'this is my title',
'url' => 'http://1.1.1.1',
'content_type' => 'text/html',
'language' => 'dontexist',
'status' => '200',
]);
$proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://0.0.0.0');
$this->assertEquals('http://1.1.1.1', $entry->getUrl());
$this->assertEquals('this is my title', $entry->getTitle());
$this->assertContains('this is my content', $entry->getContent());
$this->assertEquals('text/html', $entry->getMimetype());
$this->assertEmpty($entry->getLanguage());
$this->assertEquals('200', $entry->getHttpStatus());
$this->assertEquals(4.0, $entry->getReadingTime());
$this->assertEquals('1.1.1.1', $entry->getDomainName());
}
public function testWithContentAndBadOgImage()
{
$tagger = $this->getTaggerMock();
$tagger->expects($this->once())
->method('tag');
$validator = $this->getValidator();
$validator->expects($this->exactly(2))
->method('validate')
->will($this->onConsecutiveCalls(
new ConstraintViolationList(),
new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
));
$graby = $this->getMockBuilder('Graby\Graby')
->setMethods(['fetchContent'])
->disableOriginalConstructor()
->getMock();
$graby->expects($this->any())
->method('fetchContent')
->willReturn([
'html' => str_repeat('this is my content', 325),
'title' => 'this is my title',
'url' => 'http://1.1.1.1',
'content_type' => 'text/html',
'language' => 'fr',
'status' => '200',
'open_graph' => [
'og_title' => 'my OG title',
'og_description' => 'OG desc',
'og_image' => 'https://',
],
]);
$proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry($entry, 'http://0.0.0.0');
$this->assertEquals('http://1.1.1.1', $entry->getUrl());
$this->assertEquals('this is my title', $entry->getTitle());
$this->assertContains('this is my content', $entry->getContent());
$this->assertEmpty($entry->getPreviewPicture());
$this->assertEquals('text/html', $entry->getMimetype());
$this->assertEquals('fr', $entry->getLanguage());
$this->assertEquals('200', $entry->getHttpStatus());
@ -217,7 +314,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$tagger->expects($this->once())
->method('tag');
$proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
@ -259,7 +356,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$proxy = new ContentProxy((new Graby()), $tagger, $logger, $this->fetchingErrorMessage);
$proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
@ -294,7 +391,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$handler = new TestHandler();
$logger->pushHandler($handler);
$proxy = new ContentProxy((new Graby()), $tagger, $logger, $this->fetchingErrorMessage);
$proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
@ -331,7 +428,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
->method('tag')
->will($this->throwException(new \Exception()));
$proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
@ -371,7 +468,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$tagger->expects($this->once())
->method('tag');
$proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
@ -413,4 +510,12 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
{
return new NullLogger();
}
private function getValidator()
{
return $this->getMockBuilder(RecursiveValidator::class)
->setMethods(['validate'])
->disableOriginalConstructor()
->getMock();
}
}