mirror of
https://github.com/wallabag/wallabag.git
synced 2025-01-10 17:05:26 +00:00
Refactor updateOriginUrl to include new behaviors behaviors
- Leave origin_url unchanged if difference is an ending slash - Leave origin_url unchanged if difference is scheme - Ignore (noop) if difference is query string or fragment Signed-off-by: Kevin Decherf <kevin@kdecherf.com>
This commit is contained in:
parent
781864b954
commit
e07fadea76
2 changed files with 140 additions and 35 deletions
|
@ -246,15 +246,7 @@ class ContentProxy
|
|||
*/
|
||||
private function stockEntry(Entry $entry, array $content)
|
||||
{
|
||||
// When a redirection occurs while fetching an entry
|
||||
// we move the original url in origin_url property if empty
|
||||
// and set the entry url with the final value
|
||||
if (!empty($content['url']) && $entry->getUrl() !== $content['url']) {
|
||||
if (empty($entry->getOriginUrl())) {
|
||||
$entry->setOriginUrl($entry->getUrl());
|
||||
}
|
||||
$entry->setUrl($content['url']);
|
||||
}
|
||||
$this->updateOriginUrl($entry, $content['url']);
|
||||
|
||||
$this->setEntryDomainName($entry);
|
||||
|
||||
|
@ -320,6 +312,50 @@ class ContentProxy
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the origin_url field when a redirection occurs
|
||||
* This field is set if it is empty and new url does not match ignore list.
|
||||
*
|
||||
* @param Entry $entry
|
||||
* @param string $url
|
||||
*/
|
||||
private function updateOriginUrl(Entry $entry, $url)
|
||||
{
|
||||
if (!empty($url) && $entry->getUrl() !== $url) {
|
||||
$parsed_entry_url = parse_url($entry->getUrl());
|
||||
$parsed_content_url = parse_url($url);
|
||||
|
||||
$diff_ec = array_diff_assoc($parsed_entry_url, $parsed_content_url);
|
||||
$diff_ce = array_diff_assoc($parsed_content_url, $parsed_entry_url);
|
||||
|
||||
$diff = array_merge($diff_ec, $diff_ce);
|
||||
$diff_keys = array_keys($diff);
|
||||
sort($diff_keys);
|
||||
|
||||
switch ($diff_keys) {
|
||||
case ['path']:
|
||||
if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry
|
||||
|| ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId
|
||||
$entry->setUrl($url);
|
||||
}
|
||||
break;
|
||||
case ['scheme']:
|
||||
$entry->setUrl($url);
|
||||
break;
|
||||
case ['fragment']:
|
||||
case ['query']:
|
||||
// noop
|
||||
break;
|
||||
default:
|
||||
if (empty($entry->getOriginUrl())) {
|
||||
$entry->setOriginUrl($entry->getUrl());
|
||||
}
|
||||
$entry->setUrl($url);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that the given content has at least a title, an html and a url.
|
||||
*
|
||||
|
|
|
@ -739,6 +739,101 @@ class ContentProxyTest extends TestCase
|
|||
$this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider for testWithChangedUrl.
|
||||
*
|
||||
* Arrays contain the following values:
|
||||
* $entry_url
|
||||
* $origin_url
|
||||
* $content_url
|
||||
* $expected_entry_url
|
||||
* $expected_origin_url
|
||||
* $expected_domain
|
||||
*/
|
||||
public function dataForChangedUrl()
|
||||
{
|
||||
return [
|
||||
'normal' => [
|
||||
'http://0.0.0.0',
|
||||
null,
|
||||
'http://1.1.1.1',
|
||||
'http://1.1.1.1',
|
||||
'http://0.0.0.0',
|
||||
'1.1.1.1',
|
||||
],
|
||||
'origin already set' => [
|
||||
'http://0.0.0.0',
|
||||
'http://hello',
|
||||
'http://1.1.1.1',
|
||||
'http://1.1.1.1',
|
||||
'http://hello',
|
||||
'1.1.1.1',
|
||||
],
|
||||
'trailing slash' => [
|
||||
'https://example.com/hello-world',
|
||||
null,
|
||||
'https://example.com/hello-world/',
|
||||
'https://example.com/hello-world/',
|
||||
null,
|
||||
'example.com',
|
||||
],
|
||||
'no query string in fetched content' => [
|
||||
'https://example.org/hello?world=1',
|
||||
null,
|
||||
'https://example.org/hello',
|
||||
'https://example.org/hello?world=1',
|
||||
null,
|
||||
'example.org',
|
||||
],
|
||||
'query string in fetched content' => [
|
||||
'https://example.org/hello',
|
||||
null,
|
||||
'https://example.org/hello?world=1',
|
||||
'https://example.org/hello',
|
||||
null,
|
||||
'example.org',
|
||||
],
|
||||
'fragment in fetched content' => [
|
||||
'https://example.org/hello',
|
||||
null,
|
||||
'https://example.org/hello#world',
|
||||
'https://example.org/hello',
|
||||
null,
|
||||
'example.org',
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataForChangedUrl
|
||||
*/
|
||||
public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
|
||||
{
|
||||
$tagger = $this->getTaggerMock();
|
||||
$tagger->expects($this->once())
|
||||
->method('tag');
|
||||
|
||||
$proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
|
||||
$entry = new Entry(new User());
|
||||
$entry->setOriginUrl($origin_url);
|
||||
$proxy->updateEntry(
|
||||
$entry,
|
||||
$entry_url,
|
||||
[
|
||||
'html' => false,
|
||||
'title' => '',
|
||||
'url' => $content_url,
|
||||
'content_type' => '',
|
||||
'language' => '',
|
||||
],
|
||||
true
|
||||
);
|
||||
|
||||
$this->assertSame($expected_entry_url, $entry->getUrl());
|
||||
$this->assertSame($expected_domain, $entry->getDomainName());
|
||||
$this->assertSame($expected_origin_url, $entry->getOriginUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* https://stackoverflow.com/a/18506801.
|
||||
*
|
||||
|
@ -775,32 +870,6 @@ class ContentProxyTest extends TestCase
|
|||
return $string;
|
||||
}
|
||||
|
||||
public function testWithChangedUrl()
|
||||
{
|
||||
$tagger = $this->getTaggerMock();
|
||||
$tagger->expects($this->once())
|
||||
->method('tag');
|
||||
|
||||
$proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
|
||||
$entry = new Entry(new User());
|
||||
$proxy->updateEntry(
|
||||
$entry,
|
||||
'http://0.0.0.0',
|
||||
[
|
||||
'html' => false,
|
||||
'title' => '',
|
||||
'url' => 'http://1.1.1.1',
|
||||
'content_type' => '',
|
||||
'language' => '',
|
||||
],
|
||||
true
|
||||
);
|
||||
|
||||
$this->assertSame('http://1.1.1.1', $entry->getUrl());
|
||||
$this->assertSame('1.1.1.1', $entry->getDomainName());
|
||||
$this->assertSame('http://0.0.0.0', $entry->getOriginUrl());
|
||||
}
|
||||
|
||||
private function getTaggerMock()
|
||||
{
|
||||
return $this->getMockBuilder(RuleBasedTagger::class)
|
||||
|
|
Loading…
Reference in a new issue