Merge pull request #2679 from jcharaoui/fix-2658

Fix content from imported entried being discarded when URL goes bad
This commit is contained in:
Nicolas Lœuillet 2016-12-04 19:30:11 +01:00 committed by GitHub
commit 4a1f963531
10 changed files with 40 additions and 35 deletions

View file

@ -21,14 +21,16 @@ class ContentProxy
protected $logger; protected $logger;
protected $tagRepository; protected $tagRepository;
protected $mimeGuesser; protected $mimeGuesser;
protected $fetchingErrorMessage;
public function __construct(Graby $graby, RuleBasedTagger $tagger, TagRepository $tagRepository, LoggerInterface $logger) public function __construct(Graby $graby, RuleBasedTagger $tagger, TagRepository $tagRepository, LoggerInterface $logger, $fetchingErrorMessage)
{ {
$this->graby = $graby; $this->graby = $graby;
$this->tagger = $tagger; $this->tagger = $tagger;
$this->logger = $logger; $this->logger = $logger;
$this->tagRepository = $tagRepository; $this->tagRepository = $tagRepository;
$this->mimeGuesser = new MimeTypeExtensionGuesser(); $this->mimeGuesser = new MimeTypeExtensionGuesser();
$this->fetchingErrorMessage = $fetchingErrorMessage;
} }
/** /**
@ -48,7 +50,13 @@ class ContentProxy
{ {
// do we have to fetch the content or the provided one is ok? // do we have to fetch the content or the provided one is ok?
if (empty($content) || false === $this->validateContent($content)) { if (empty($content) || false === $this->validateContent($content)) {
$content = $this->graby->fetchContent($url); $fetchedContent = $this->graby->fetchContent($url);
// when content is imported, we have information in $content
// in case fetching content goes bad, we'll keep the imported information instead of overriding them
if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) {
$content = $fetchedContent;
}
} }
$title = $content['title']; $title = $content['title'];
@ -58,7 +66,7 @@ class ContentProxy
$html = $content['html']; $html = $content['html'];
if (false === $html) { if (false === $html) {
$html = '<p>Unable to retrieve readable content.</p>'; $html = $this->fetchingErrorMessage;
if (isset($content['open_graph']['og_description'])) { if (isset($content['open_graph']['og_description'])) {
$html .= '<p><i>But we found a short description: </i></p>'; $html .= '<p><i>But we found a short description: </i></p>';
@ -71,8 +79,8 @@ class ContentProxy
$entry->setContent($html); $entry->setContent($html);
$entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); $entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
$entry->setLanguage($content['language']); $entry->setLanguage(isset($content['language']) ? $content['language'] : '');
$entry->setMimetype($content['content_type']); $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : '');
$entry->setReadingTime(Utils::getReadingTime($html)); $entry->setReadingTime(Utils::getReadingTime($html));
$domainName = parse_url($entry->getUrl(), PHP_URL_HOST); $domainName = parse_url($entry->getUrl(), PHP_URL_HOST);
@ -85,7 +93,7 @@ class ContentProxy
} }
// if content is an image define as a preview too // if content is an image define as a preview too
if (in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { if (isset($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
$entry->setPreviewPicture($content['url']); $entry->setPreviewPicture($content['url']);
} }

View file

@ -86,6 +86,7 @@ services:
- "@wallabag_core.rule_based_tagger" - "@wallabag_core.rule_based_tagger"
- "@wallabag_core.tag_repository" - "@wallabag_core.tag_repository"
- "@logger" - "@logger"
- '%wallabag_core.fetching_error_message%'
wallabag_core.rule_based_tagger: wallabag_core.rule_based_tagger:
class: Wallabag\CoreBundle\Helper\RuleBasedTagger class: Wallabag\CoreBundle\Helper\RuleBasedTagger

View file

@ -37,7 +37,7 @@ class ChromeImport extends BrowserImport
{ {
$data = [ $data = [
'title' => $entry['name'], 'title' => $entry['name'],
'html' => '', 'html' => false,
'url' => $entry['url'], 'url' => $entry['url'],
'is_archived' => $this->markAsRead, 'is_archived' => $this->markAsRead,
'tags' => '', 'tags' => '',

View file

@ -37,7 +37,7 @@ class FirefoxImport extends BrowserImport
{ {
$data = [ $data = [
'title' => $entry['title'], 'title' => $entry['title'],
'html' => '', 'html' => false,
'url' => $entry['uri'], 'url' => $entry['uri'],
'is_archived' => $this->markAsRead, 'is_archived' => $this->markAsRead,
'tags' => '', 'tags' => '',

View file

@ -74,8 +74,7 @@ class InstapaperImport extends AbstractImport
'status' => $data[3], 'status' => $data[3],
'is_archived' => $data[3] === 'Archive' || $data[3] === 'Starred', 'is_archived' => $data[3] === 'Archive' || $data[3] === 'Starred',
'is_starred' => $data[3] === 'Starred', 'is_starred' => $data[3] === 'Starred',
'content_type' => '', 'html' => false,
'language' => '',
]; ];
} }
fclose($handle); fclose($handle);

View file

@ -98,8 +98,6 @@ class PinboardImport extends AbstractImport
$data = [ $data = [
'title' => $importedEntry['description'], 'title' => $importedEntry['description'],
'url' => $importedEntry['href'], 'url' => $importedEntry['href'],
'content_type' => '',
'language' => '',
'is_archived' => ('no' === $importedEntry['toread']) || $this->markAsRead, 'is_archived' => ('no' === $importedEntry['toread']) || $this->markAsRead,
'is_starred' => false, 'is_starred' => false,
'created_at' => $importedEntry['time'], 'created_at' => $importedEntry['time'],

View file

@ -98,11 +98,10 @@ class ReadabilityImport extends AbstractImport
$data = [ $data = [
'title' => $importedEntry['article__title'], 'title' => $importedEntry['article__title'],
'url' => $importedEntry['article__url'], 'url' => $importedEntry['article__url'],
'content_type' => '',
'language' => '',
'is_archived' => $importedEntry['archive'] || $this->markAsRead, 'is_archived' => $importedEntry['archive'] || $this->markAsRead,
'is_starred' => $importedEntry['favorite'], 'is_starred' => $importedEntry['favorite'],
'created_at' => $importedEntry['date_added'], 'created_at' => $importedEntry['date_added'],
'html' => false,
]; ];
$entry = new Entry($this->user); $entry = new Entry($this->user);

View file

@ -37,8 +37,6 @@ class WallabagV1Import extends WallabagImport
'title' => $entry['title'], 'title' => $entry['title'],
'html' => $entry['content'], 'html' => $entry['content'],
'url' => $entry['url'], 'url' => $entry['url'],
'content_type' => '',
'language' => '',
'is_archived' => $entry['is_read'] || $this->markAsRead, 'is_archived' => $entry['is_read'] || $this->markAsRead,
'is_starred' => $entry['is_fav'], 'is_starred' => $entry['is_fav'],
'tags' => '', 'tags' => '',

View file

@ -10,6 +10,8 @@ use Wallabag\UserBundle\Entity\User;
class ContentProxyTest extends \PHPUnit_Framework_TestCase class ContentProxyTest extends \PHPUnit_Framework_TestCase
{ {
private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/master/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
public function testWithBadUrl() public function testWithBadUrl()
{ {
$tagger = $this->getTaggerMock(); $tagger = $this->getTaggerMock();
@ -31,12 +33,12 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
'language' => '', 'language' => '',
]); ]);
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger()); $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://user@:80'); $entry = $proxy->updateEntry(new Entry(new User()), 'http://user@:80');
$this->assertEquals('http://user@:80', $entry->getUrl()); $this->assertEquals('http://user@:80', $entry->getUrl());
$this->assertEmpty($entry->getTitle()); $this->assertEmpty($entry->getTitle());
$this->assertEquals('<p>Unable to retrieve readable content.</p>', $entry->getContent()); $this->assertEquals($this->fetchingErrorMessage, $entry->getContent());
$this->assertEmpty($entry->getPreviewPicture()); $this->assertEmpty($entry->getPreviewPicture());
$this->assertEmpty($entry->getMimetype()); $this->assertEmpty($entry->getMimetype());
$this->assertEmpty($entry->getLanguage()); $this->assertEmpty($entry->getLanguage());
@ -65,12 +67,12 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
'language' => '', 'language' => '',
]); ]);
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger()); $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0'); $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
$this->assertEquals('http://0.0.0.0', $entry->getUrl()); $this->assertEquals('http://0.0.0.0', $entry->getUrl());
$this->assertEmpty($entry->getTitle()); $this->assertEmpty($entry->getTitle());
$this->assertEquals('<p>Unable to retrieve readable content.</p>', $entry->getContent()); $this->assertEquals($this->fetchingErrorMessage, $entry->getContent());
$this->assertEmpty($entry->getPreviewPicture()); $this->assertEmpty($entry->getPreviewPicture());
$this->assertEmpty($entry->getMimetype()); $this->assertEmpty($entry->getMimetype());
$this->assertEmpty($entry->getLanguage()); $this->assertEmpty($entry->getLanguage());
@ -104,12 +106,12 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
], ],
]); ]);
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger()); $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://domain.io'); $entry = $proxy->updateEntry(new Entry(new User()), 'http://domain.io');
$this->assertEquals('http://domain.io', $entry->getUrl()); $this->assertEquals('http://domain.io', $entry->getUrl());
$this->assertEquals('my title', $entry->getTitle()); $this->assertEquals('my title', $entry->getTitle());
$this->assertEquals('<p>Unable to retrieve readable content.</p><p><i>But we found a short description: </i></p>desc', $entry->getContent()); $this->assertEquals($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
$this->assertEmpty($entry->getPreviewPicture()); $this->assertEmpty($entry->getPreviewPicture());
$this->assertEmpty($entry->getLanguage()); $this->assertEmpty($entry->getLanguage());
$this->assertEmpty($entry->getHttpStatus()); $this->assertEmpty($entry->getHttpStatus());
@ -145,7 +147,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
], ],
]); ]);
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger()); $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0'); $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
$this->assertEquals('http://1.1.1.1', $entry->getUrl()); $this->assertEquals('http://1.1.1.1', $entry->getUrl());
@ -167,7 +169,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$graby = $this->getMockBuilder('Graby\Graby')->getMock(); $graby = $this->getMockBuilder('Graby\Graby')->getMock();
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger()); $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [ $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
'html' => str_repeat('this is my content', 325), 'html' => str_repeat('this is my content', 325),
'title' => 'this is my title', 'title' => 'this is my title',
@ -197,7 +199,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
->will($this->throwException(new \Exception())); ->will($this->throwException(new \Exception()));
$tagRepo = $this->getTagRepositoryMock(); $tagRepo = $this->getTagRepositoryMock();
$proxy = new ContentProxy($graby, $tagger, $tagRepo, $this->getLogger()); $proxy = new ContentProxy($graby, $tagger, $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [ $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
'html' => str_repeat('this is my content', 325), 'html' => str_repeat('this is my content', 325),
@ -217,7 +219,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
->getMock(); ->getMock();
$tagRepo = $this->getTagRepositoryMock(); $tagRepo = $this->getTagRepositoryMock();
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger()); $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User()); $entry = new Entry(new User());
@ -235,7 +237,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
->getMock(); ->getMock();
$tagRepo = $this->getTagRepositoryMock(); $tagRepo = $this->getTagRepositoryMock();
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger()); $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User()); $entry = new Entry(new User());
@ -253,7 +255,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
->getMock(); ->getMock();
$tagRepo = $this->getTagRepositoryMock(); $tagRepo = $this->getTagRepositoryMock();
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger()); $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User()); $entry = new Entry(new User());
@ -269,7 +271,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
->getMock(); ->getMock();
$tagRepo = $this->getTagRepositoryMock(); $tagRepo = $this->getTagRepositoryMock();
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger()); $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User()); $entry = new Entry(new User());
@ -285,7 +287,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
->getMock(); ->getMock();
$tagRepo = $this->getTagRepositoryMock(); $tagRepo = $this->getTagRepositoryMock();
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger()); $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
$tagEntity = new Tag(); $tagEntity = new Tag();
$tagEntity->setLabel('tag1'); $tagEntity->setLabel('tag1');
@ -310,7 +312,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$tagRepo->expects($this->never()) $tagRepo->expects($this->never())
->method('__call'); ->method('__call');
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger()); $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
$tagEntity = new Tag(); $tagEntity = new Tag();
$tagEntity->setLabel('tag1'); $tagEntity->setLabel('tag1');

View file

@ -112,7 +112,7 @@ class WallabagV1ControllerTest extends WallabagCoreTestCase
->get('doctrine.orm.entity_manager') ->get('doctrine.orm.entity_manager')
->getRepository('WallabagCoreBundle:Entry') ->getRepository('WallabagCoreBundle:Entry')
->findByUrlAndUserId( ->findByUrlAndUserId(
'http://www.framablog.org/index.php/post/2014/02/05/Framabag-service-libre-gratuit-interview-developpeur', 'https://framablog.org/2014/02/05/framabag-service-libre-gratuit-interview-developpeur/',
$this->getLoggedInUserId() $this->getLoggedInUserId()
); );
@ -126,9 +126,9 @@ class WallabagV1ControllerTest extends WallabagCoreTestCase
$this->assertGreaterThan(1, $body = $crawler->filter('body')->extract(['_text'])); $this->assertGreaterThan(1, $body = $crawler->filter('body')->extract(['_text']));
$this->assertContains('flashes.import.notice.summary', $body[0]); $this->assertContains('flashes.import.notice.summary', $body[0]);
$this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is ok'); $this->assertNotEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is ok');
$this->assertEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is ok'); $this->assertNotEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is ok');
$this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is ok'); $this->assertNotEmpty($content->getLanguage(), 'Language for http://www.framablog.org is ok');
$this->assertEquals(1, count($content->getTags())); $this->assertEquals(1, count($content->getTags()));
$this->assertInstanceOf(\DateTime::class, $content->getCreatedAt()); $this->assertInstanceOf(\DateTime::class, $content->getCreatedAt());
} }