diff --git a/src/ExpressionLanguage/AuthenticatorProvider.php b/src/ExpressionLanguage/AuthenticatorProvider.php index dadf6c6bd..f51bc8b6e 100644 --- a/src/ExpressionLanguage/AuthenticatorProvider.php +++ b/src/ExpressionLanguage/AuthenticatorProvider.php @@ -3,6 +3,7 @@ namespace Wallabag\ExpressionLanguage; use GuzzleHttp\ClientInterface; +use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\ExpressionLanguage\ExpressionFunction; use Symfony\Component\ExpressionLanguage\ExpressionFunctionProviderInterface; @@ -69,27 +70,19 @@ class AuthenticatorProvider implements ExpressionFunctionProviderInterface throw new \Exception('Not supported'); }, function (array $arguments, $xpathQuery, $html) { - $useInternalErrors = libxml_use_internal_errors(true); + try { + $crawler = new Crawler((string) $html); - $doc = new \DOMDocument(); - $doc->loadHTML((string) $html, \LIBXML_NOCDATA | \LIBXML_NOWARNING | \LIBXML_NOERROR); - - $xpath = new \DOMXPath($doc); - $domNodeList = $xpath->query($xpathQuery); - - if (0 === $domNodeList->length) { + $crawler = $crawler->filterXPath($xpathQuery); + } catch (\Throwable $e) { return ''; } - $domNode = $domNodeList->item(0); - - libxml_use_internal_errors($useInternalErrors); - - if (null === $domNode || null === $domNode->attributes) { + if (0 === $crawler->count()) { return ''; } - return $domNode->attributes->getNamedItem('value')->nodeValue; + return (string) $crawler->first()->attr('value'); } ); } diff --git a/src/Import/HtmlImport.php b/src/Import/HtmlImport.php index e2f0600e5..909ff9bc8 100644 --- a/src/Import/HtmlImport.php +++ b/src/Import/HtmlImport.php @@ -2,6 +2,7 @@ namespace Wallabag\Import; +use Symfony\Component\DomCrawler\Crawler; use Wallabag\Entity\Entry; use Wallabag\Event\EntrySavedEvent; @@ -29,27 +30,23 @@ abstract class HtmlImport extends AbstractImport return false; } - $html = new \DOMDocument(); + $crawler = new Crawler(file_get_contents($this->filepath)); - libxml_use_internal_errors(true); - $html->loadHTMLFile($this->filepath); - $hrefs = $html->getElementsByTagName('a'); - libxml_use_internal_errors(false); + $hrefs = $crawler->filterXPath('//a'); - if (0 === $hrefs->length) { + if (0 === $hrefs->count()) { $this->logger->error('Wallabag HTML: no entries in imported file'); return false; } - $entries = []; - foreach ($hrefs as $href) { - $entry = []; - $entry['url'] = $href->getAttribute('href'); - $entry['tags'] = $href->getAttribute('tags'); - $entry['created_at'] = $href->getAttribute('add_date'); - $entries[] = $entry; - } + $entries = $hrefs->each(function (Crawler $node) { + return [ + 'url' => $node->attr('href'), + 'tags' => $node->attr('tags'), + 'created_at' => $node->attr('add_date'), + ]; + }); if ($this->producer) { $this->parseEntriesForProducer($entries); diff --git a/src/Import/PocketHtmlImport.php b/src/Import/PocketHtmlImport.php index d9ed7be21..b1c7e3edb 100644 --- a/src/Import/PocketHtmlImport.php +++ b/src/Import/PocketHtmlImport.php @@ -2,6 +2,8 @@ namespace Wallabag\Import; +use Symfony\Component\DomCrawler\Crawler; + class PocketHtmlImport extends HtmlImport { protected $filepath; @@ -44,27 +46,23 @@ class PocketHtmlImport extends HtmlImport return false; } - $html = new \DOMDocument(); + $crawler = new Crawler(file_get_contents($this->filepath)); - libxml_use_internal_errors(true); - $html->loadHTMLFile($this->filepath); - $hrefs = $html->getElementsByTagName('a'); - libxml_use_internal_errors(false); + $hrefs = $crawler->filterXPath('//a'); - if (0 === $hrefs->length) { + if (0 === $hrefs->count()) { $this->logger->error('Pocket HTML: no entries in imported file'); return false; } - $entries = []; - foreach ($hrefs as $href) { - $entry = []; - $entry['url'] = $href->getAttribute('href'); - $entry['tags'] = $href->getAttribute('tags'); - $entry['created_at'] = $href->getAttribute('time_added'); - $entries[] = $entry; - } + $entries = $hrefs->each(function (Crawler $node) { + return [ + 'url' => $node->attr('href'), + 'tags' => $node->attr('tags'), + 'created_at' => $node->attr('time_added'), + ]; + }); if ($this->producer) { $this->parseEntriesForProducer($entries); diff --git a/src/SiteConfig/Authenticator/LoginFormAuthenticator.php b/src/SiteConfig/Authenticator/LoginFormAuthenticator.php index 68dded2d3..3292e79c6 100644 --- a/src/SiteConfig/Authenticator/LoginFormAuthenticator.php +++ b/src/SiteConfig/Authenticator/LoginFormAuthenticator.php @@ -4,6 +4,7 @@ namespace Wallabag\SiteConfig\Authenticator; use GuzzleHttp\ClientInterface; use GuzzleHttp\Cookie\CookieJar; +use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\ExpressionLanguage\ExpressionLanguage; use Wallabag\ExpressionLanguage\AuthenticatorProvider; use Wallabag\SiteConfig\SiteConfig; @@ -54,22 +55,16 @@ class LoginFormAuthenticator implements Authenticator public function isLoginRequired($html) { - $useInternalErrors = libxml_use_internal_errors(true); - // need to check for the login dom element ($options['not_logged_in_xpath']) in the HTML - $doc = new \DOMDocument(); - $doc->loadHTML($html); + try { + $crawler = new Crawler((string) $html); - $xpath = new \DOMXPath($doc); - $loggedIn = $xpath->evaluate((string) $this->siteConfig->getNotLoggedInXpath()); - - if (false === $loggedIn) { + $loggedIn = $crawler->evaluate((string) $this->siteConfig->getNotLoggedInXpath()); + } catch (\Throwable $e) { return false; } - libxml_use_internal_errors($useInternalErrors); - - return $loggedIn->length > 0; + return \count($loggedIn) > 0; } /**