mirror of
https://github.com/wallabag/wallabag.git
synced 2024-12-16 20:56:28 +00:00
Merge pull request #3987 from wallabag/fix/drop-simplepie
Drop SimplePie
This commit is contained in:
commit
8671da5ead
3 changed files with 17 additions and 19 deletions
|
@ -63,7 +63,6 @@
|
||||||
"nelmio/api-doc-bundle": "^2.13.2",
|
"nelmio/api-doc-bundle": "^2.13.2",
|
||||||
"mgargano/simplehtmldom": "~1.5",
|
"mgargano/simplehtmldom": "~1.5",
|
||||||
"wallabag/tcpdf": "^6.2.26",
|
"wallabag/tcpdf": "^6.2.26",
|
||||||
"simplepie/simplepie": "~1.5",
|
|
||||||
"willdurand/hateoas-bundle": "~1.3",
|
"willdurand/hateoas-bundle": "~1.3",
|
||||||
"liip/theme-bundle": "^1.4.6",
|
"liip/theme-bundle": "^1.4.6",
|
||||||
"lexik/form-filter-bundle": "^5.0.4",
|
"lexik/form-filter-bundle": "^5.0.4",
|
||||||
|
|
2
composer.lock
generated
2
composer.lock
generated
|
@ -4,7 +4,7 @@
|
||||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||||
"This file is @generated automatically"
|
"This file is @generated automatically"
|
||||||
],
|
],
|
||||||
"content-hash": "883f44eda34a48c8ddabc3294498d996",
|
"content-hash": "c42e1b50f4a2b8a59ca06c5ccb24e6a3",
|
||||||
"packages": [
|
"packages": [
|
||||||
{
|
{
|
||||||
"name": "bdunogier/guzzle-site-authenticator",
|
"name": "bdunogier/guzzle-site-authenticator",
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
namespace Wallabag\CoreBundle\Helper;
|
namespace Wallabag\CoreBundle\Helper;
|
||||||
|
|
||||||
|
use GuzzleHttp\Psr7\Uri;
|
||||||
|
use GuzzleHttp\Psr7\UriResolver;
|
||||||
use Http\Client\Common\HttpMethodsClient;
|
use Http\Client\Common\HttpMethodsClient;
|
||||||
use Http\Client\Common\Plugin\ErrorPlugin;
|
use Http\Client\Common\Plugin\ErrorPlugin;
|
||||||
use Http\Client\Common\PluginClient;
|
use Http\Client\Common\PluginClient;
|
||||||
|
@ -45,10 +47,8 @@ class DownloadImages
|
||||||
public static function extractImagesUrlsFromHtml($html)
|
public static function extractImagesUrlsFromHtml($html)
|
||||||
{
|
{
|
||||||
$crawler = new Crawler($html);
|
$crawler = new Crawler($html);
|
||||||
$imagesCrawler = $crawler
|
$imagesCrawler = $crawler->filterXpath('//img');
|
||||||
->filterXpath('//img');
|
$imagesUrls = $imagesCrawler->extract(['src']);
|
||||||
$imagesUrls = $imagesCrawler
|
|
||||||
->extract(['src']);
|
|
||||||
$imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
|
$imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
|
||||||
|
|
||||||
return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
|
return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
|
||||||
|
@ -220,22 +220,25 @@ class DownloadImages
|
||||||
private static function getSrcsetUrls(Crawler $imagesCrawler)
|
private static function getSrcsetUrls(Crawler $imagesCrawler)
|
||||||
{
|
{
|
||||||
$urls = [];
|
$urls = [];
|
||||||
$iterator = $imagesCrawler
|
$iterator = $imagesCrawler->getIterator();
|
||||||
->getIterator();
|
|
||||||
while ($iterator->valid()) {
|
while ($iterator->valid()) {
|
||||||
$srcsetAttribute = $iterator->current()->getAttribute('srcset');
|
$srcsetAttribute = $iterator->current()->getAttribute('srcset');
|
||||||
|
|
||||||
if ('' !== $srcsetAttribute) {
|
if ('' !== $srcsetAttribute) {
|
||||||
// Couldn't start with " OR ' OR a white space
|
// Couldn't start with " OR ' OR a white space
|
||||||
// Could be one or more white space
|
// Could be one or more white space
|
||||||
// Must be one or more digits followed by w OR x
|
// Must be one or more digits followed by w OR x
|
||||||
$pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
|
$pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
|
||||||
preg_match_all($pattern, $srcsetAttribute, $matches);
|
preg_match_all($pattern, $srcsetAttribute, $matches);
|
||||||
|
|
||||||
$srcset = \call_user_func_array('array_merge', $matches);
|
$srcset = \call_user_func_array('array_merge', $matches);
|
||||||
$srcsetUrls = array_map(function ($src) {
|
$srcsetUrls = array_map(function ($src) {
|
||||||
return trim(explode(' ', $src, 2)[0]);
|
return trim(explode(' ', $src, 2)[0]);
|
||||||
}, $srcset);
|
}, $srcset);
|
||||||
$urls = array_merge($srcsetUrls, $urls);
|
$urls = array_merge($srcsetUrls, $urls);
|
||||||
}
|
}
|
||||||
|
|
||||||
$iterator->next();
|
$iterator->next();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,22 +295,18 @@ class DownloadImages
|
||||||
return $url;
|
return $url;
|
||||||
}
|
}
|
||||||
|
|
||||||
$base = new \SimplePie_IRI($base);
|
$base = new Uri($base);
|
||||||
|
|
||||||
// remove '//' in URL path (causes URLs not to resolve properly)
|
|
||||||
if (isset($base->ipath)) {
|
|
||||||
$base->ipath = preg_replace('!//+!', '/', $base->ipath);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($absolute = \SimplePie_IRI::absolutize($base, $url)) {
|
|
||||||
return $absolute->get_uri();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// in case the url has no scheme & host
|
||||||
|
if ('' === $base->getAuthority() || '' === $base->getScheme()) {
|
||||||
$this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]);
|
$this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return (string) UriResolver::resolve($base, new Uri($url));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve and validate the extension from the response of the url of the image.
|
* Retrieve and validate the extension from the response of the url of the image.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in a new issue