mirror of
https://github.com/wallabag/wallabag.git
synced 2024-11-15 21:41:06 +00:00
Merge pull request #2679 from jcharaoui/fix-2658
Fix content from imported entried being discarded when URL goes bad
This commit is contained in:
commit
4a1f963531
10 changed files with 40 additions and 35 deletions
|
@ -21,14 +21,16 @@ class ContentProxy
|
|||
protected $logger;
|
||||
protected $tagRepository;
|
||||
protected $mimeGuesser;
|
||||
protected $fetchingErrorMessage;
|
||||
|
||||
public function __construct(Graby $graby, RuleBasedTagger $tagger, TagRepository $tagRepository, LoggerInterface $logger)
|
||||
public function __construct(Graby $graby, RuleBasedTagger $tagger, TagRepository $tagRepository, LoggerInterface $logger, $fetchingErrorMessage)
|
||||
{
|
||||
$this->graby = $graby;
|
||||
$this->tagger = $tagger;
|
||||
$this->logger = $logger;
|
||||
$this->tagRepository = $tagRepository;
|
||||
$this->mimeGuesser = new MimeTypeExtensionGuesser();
|
||||
$this->fetchingErrorMessage = $fetchingErrorMessage;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -48,7 +50,13 @@ class ContentProxy
|
|||
{
|
||||
// do we have to fetch the content or the provided one is ok?
|
||||
if (empty($content) || false === $this->validateContent($content)) {
|
||||
$content = $this->graby->fetchContent($url);
|
||||
$fetchedContent = $this->graby->fetchContent($url);
|
||||
|
||||
// when content is imported, we have information in $content
|
||||
// in case fetching content goes bad, we'll keep the imported information instead of overriding them
|
||||
if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) {
|
||||
$content = $fetchedContent;
|
||||
}
|
||||
}
|
||||
|
||||
$title = $content['title'];
|
||||
|
@ -58,7 +66,7 @@ class ContentProxy
|
|||
|
||||
$html = $content['html'];
|
||||
if (false === $html) {
|
||||
$html = '<p>Unable to retrieve readable content.</p>';
|
||||
$html = $this->fetchingErrorMessage;
|
||||
|
||||
if (isset($content['open_graph']['og_description'])) {
|
||||
$html .= '<p><i>But we found a short description: </i></p>';
|
||||
|
@ -71,8 +79,8 @@ class ContentProxy
|
|||
$entry->setContent($html);
|
||||
$entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
|
||||
|
||||
$entry->setLanguage($content['language']);
|
||||
$entry->setMimetype($content['content_type']);
|
||||
$entry->setLanguage(isset($content['language']) ? $content['language'] : '');
|
||||
$entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : '');
|
||||
$entry->setReadingTime(Utils::getReadingTime($html));
|
||||
|
||||
$domainName = parse_url($entry->getUrl(), PHP_URL_HOST);
|
||||
|
@ -85,7 +93,7 @@ class ContentProxy
|
|||
}
|
||||
|
||||
// if content is an image define as a preview too
|
||||
if (in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
|
||||
if (isset($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
|
||||
$entry->setPreviewPicture($content['url']);
|
||||
}
|
||||
|
||||
|
|
|
@ -86,6 +86,7 @@ services:
|
|||
- "@wallabag_core.rule_based_tagger"
|
||||
- "@wallabag_core.tag_repository"
|
||||
- "@logger"
|
||||
- '%wallabag_core.fetching_error_message%'
|
||||
|
||||
wallabag_core.rule_based_tagger:
|
||||
class: Wallabag\CoreBundle\Helper\RuleBasedTagger
|
||||
|
|
|
@ -37,7 +37,7 @@ class ChromeImport extends BrowserImport
|
|||
{
|
||||
$data = [
|
||||
'title' => $entry['name'],
|
||||
'html' => '',
|
||||
'html' => false,
|
||||
'url' => $entry['url'],
|
||||
'is_archived' => $this->markAsRead,
|
||||
'tags' => '',
|
||||
|
|
|
@ -37,7 +37,7 @@ class FirefoxImport extends BrowserImport
|
|||
{
|
||||
$data = [
|
||||
'title' => $entry['title'],
|
||||
'html' => '',
|
||||
'html' => false,
|
||||
'url' => $entry['uri'],
|
||||
'is_archived' => $this->markAsRead,
|
||||
'tags' => '',
|
||||
|
|
|
@ -74,8 +74,7 @@ class InstapaperImport extends AbstractImport
|
|||
'status' => $data[3],
|
||||
'is_archived' => $data[3] === 'Archive' || $data[3] === 'Starred',
|
||||
'is_starred' => $data[3] === 'Starred',
|
||||
'content_type' => '',
|
||||
'language' => '',
|
||||
'html' => false,
|
||||
];
|
||||
}
|
||||
fclose($handle);
|
||||
|
|
|
@ -98,8 +98,6 @@ class PinboardImport extends AbstractImport
|
|||
$data = [
|
||||
'title' => $importedEntry['description'],
|
||||
'url' => $importedEntry['href'],
|
||||
'content_type' => '',
|
||||
'language' => '',
|
||||
'is_archived' => ('no' === $importedEntry['toread']) || $this->markAsRead,
|
||||
'is_starred' => false,
|
||||
'created_at' => $importedEntry['time'],
|
||||
|
|
|
@ -98,11 +98,10 @@ class ReadabilityImport extends AbstractImport
|
|||
$data = [
|
||||
'title' => $importedEntry['article__title'],
|
||||
'url' => $importedEntry['article__url'],
|
||||
'content_type' => '',
|
||||
'language' => '',
|
||||
'is_archived' => $importedEntry['archive'] || $this->markAsRead,
|
||||
'is_starred' => $importedEntry['favorite'],
|
||||
'created_at' => $importedEntry['date_added'],
|
||||
'html' => false,
|
||||
];
|
||||
|
||||
$entry = new Entry($this->user);
|
||||
|
|
|
@ -37,8 +37,6 @@ class WallabagV1Import extends WallabagImport
|
|||
'title' => $entry['title'],
|
||||
'html' => $entry['content'],
|
||||
'url' => $entry['url'],
|
||||
'content_type' => '',
|
||||
'language' => '',
|
||||
'is_archived' => $entry['is_read'] || $this->markAsRead,
|
||||
'is_starred' => $entry['is_fav'],
|
||||
'tags' => '',
|
||||
|
|
|
@ -10,6 +10,8 @@ use Wallabag\UserBundle\Entity\User;
|
|||
|
||||
class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/master/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
|
||||
|
||||
public function testWithBadUrl()
|
||||
{
|
||||
$tagger = $this->getTaggerMock();
|
||||
|
@ -31,12 +33,12 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
'language' => '',
|
||||
]);
|
||||
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
|
||||
$entry = $proxy->updateEntry(new Entry(new User()), 'http://user@:80');
|
||||
|
||||
$this->assertEquals('http://user@:80', $entry->getUrl());
|
||||
$this->assertEmpty($entry->getTitle());
|
||||
$this->assertEquals('<p>Unable to retrieve readable content.</p>', $entry->getContent());
|
||||
$this->assertEquals($this->fetchingErrorMessage, $entry->getContent());
|
||||
$this->assertEmpty($entry->getPreviewPicture());
|
||||
$this->assertEmpty($entry->getMimetype());
|
||||
$this->assertEmpty($entry->getLanguage());
|
||||
|
@ -65,12 +67,12 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
'language' => '',
|
||||
]);
|
||||
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
|
||||
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
|
||||
|
||||
$this->assertEquals('http://0.0.0.0', $entry->getUrl());
|
||||
$this->assertEmpty($entry->getTitle());
|
||||
$this->assertEquals('<p>Unable to retrieve readable content.</p>', $entry->getContent());
|
||||
$this->assertEquals($this->fetchingErrorMessage, $entry->getContent());
|
||||
$this->assertEmpty($entry->getPreviewPicture());
|
||||
$this->assertEmpty($entry->getMimetype());
|
||||
$this->assertEmpty($entry->getLanguage());
|
||||
|
@ -104,12 +106,12 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
],
|
||||
]);
|
||||
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
|
||||
$entry = $proxy->updateEntry(new Entry(new User()), 'http://domain.io');
|
||||
|
||||
$this->assertEquals('http://domain.io', $entry->getUrl());
|
||||
$this->assertEquals('my title', $entry->getTitle());
|
||||
$this->assertEquals('<p>Unable to retrieve readable content.</p><p><i>But we found a short description: </i></p>desc', $entry->getContent());
|
||||
$this->assertEquals($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
|
||||
$this->assertEmpty($entry->getPreviewPicture());
|
||||
$this->assertEmpty($entry->getLanguage());
|
||||
$this->assertEmpty($entry->getHttpStatus());
|
||||
|
@ -145,7 +147,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
],
|
||||
]);
|
||||
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
|
||||
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
|
||||
|
||||
$this->assertEquals('http://1.1.1.1', $entry->getUrl());
|
||||
|
@ -167,7 +169,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
|
||||
$graby = $this->getMockBuilder('Graby\Graby')->getMock();
|
||||
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
|
||||
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
|
||||
'html' => str_repeat('this is my content', 325),
|
||||
'title' => 'this is my title',
|
||||
|
@ -197,7 +199,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
->will($this->throwException(new \Exception()));
|
||||
|
||||
$tagRepo = $this->getTagRepositoryMock();
|
||||
$proxy = new ContentProxy($graby, $tagger, $tagRepo, $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $tagger, $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
|
||||
|
||||
$entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
|
||||
'html' => str_repeat('this is my content', 325),
|
||||
|
@ -217,7 +219,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
->getMock();
|
||||
|
||||
$tagRepo = $this->getTagRepositoryMock();
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
|
||||
|
||||
$entry = new Entry(new User());
|
||||
|
||||
|
@ -235,7 +237,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
->getMock();
|
||||
|
||||
$tagRepo = $this->getTagRepositoryMock();
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
|
||||
|
||||
$entry = new Entry(new User());
|
||||
|
||||
|
@ -253,7 +255,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
->getMock();
|
||||
|
||||
$tagRepo = $this->getTagRepositoryMock();
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
|
||||
|
||||
$entry = new Entry(new User());
|
||||
|
||||
|
@ -269,7 +271,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
->getMock();
|
||||
|
||||
$tagRepo = $this->getTagRepositoryMock();
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
|
||||
|
||||
$entry = new Entry(new User());
|
||||
|
||||
|
@ -285,7 +287,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
->getMock();
|
||||
|
||||
$tagRepo = $this->getTagRepositoryMock();
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
|
||||
|
||||
$tagEntity = new Tag();
|
||||
$tagEntity->setLabel('tag1');
|
||||
|
@ -310,7 +312,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
|
|||
$tagRepo->expects($this->never())
|
||||
->method('__call');
|
||||
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
|
||||
$proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger(), $this->fetchingErrorMessage);
|
||||
|
||||
$tagEntity = new Tag();
|
||||
$tagEntity->setLabel('tag1');
|
||||
|
|
|
@ -112,7 +112,7 @@ class WallabagV1ControllerTest extends WallabagCoreTestCase
|
|||
->get('doctrine.orm.entity_manager')
|
||||
->getRepository('WallabagCoreBundle:Entry')
|
||||
->findByUrlAndUserId(
|
||||
'http://www.framablog.org/index.php/post/2014/02/05/Framabag-service-libre-gratuit-interview-developpeur',
|
||||
'https://framablog.org/2014/02/05/framabag-service-libre-gratuit-interview-developpeur/',
|
||||
$this->getLoggedInUserId()
|
||||
);
|
||||
|
||||
|
@ -126,9 +126,9 @@ class WallabagV1ControllerTest extends WallabagCoreTestCase
|
|||
$this->assertGreaterThan(1, $body = $crawler->filter('body')->extract(['_text']));
|
||||
$this->assertContains('flashes.import.notice.summary', $body[0]);
|
||||
|
||||
$this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is ok');
|
||||
$this->assertEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is ok');
|
||||
$this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is ok');
|
||||
$this->assertNotEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is ok');
|
||||
$this->assertNotEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is ok');
|
||||
$this->assertNotEmpty($content->getLanguage(), 'Language for http://www.framablog.org is ok');
|
||||
$this->assertEquals(1, count($content->getTags()));
|
||||
$this->assertInstanceOf(\DateTime::class, $content->getCreatedAt());
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue