mirror of
https://github.com/wallabag/wallabag.git
synced 2024-12-24 16:40:30 +00:00
fix bug #209: titles with colon bad parsed
This commit is contained in:
parent
084ec2a63d
commit
b9523a0ba0
3 changed files with 49 additions and 9 deletions
46
inc/poche/PocheReadability.php
Normal file
46
inc/poche/PocheReadability.php
Normal file
|
@ -0,0 +1,46 @@
|
|||
<?php
|
||||
|
||||
class PocheReadability extends Readability
|
||||
{
|
||||
/**
|
||||
* Get the article title as an H1.
|
||||
*
|
||||
* @return DOMElement
|
||||
*/
|
||||
protected function getArticleTitle() {
|
||||
$curTitle = '';
|
||||
$origTitle = '';
|
||||
|
||||
try {
|
||||
$curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
|
||||
} catch(Exception $e) {}
|
||||
|
||||
if (preg_match('/ [\|\-] /', $curTitle))
|
||||
{
|
||||
$curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle);
|
||||
|
||||
if (count(explode(' ', $curTitle)) < 3) {
|
||||
$curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle);
|
||||
}
|
||||
}
|
||||
else if(strlen($curTitle) > 150 || strlen($curTitle) < 15)
|
||||
{
|
||||
$hOnes = $this->dom->getElementsByTagName('h1');
|
||||
if($hOnes->length == 1)
|
||||
{
|
||||
$curTitle = $this->getInnerText($hOnes->item(0));
|
||||
}
|
||||
}
|
||||
|
||||
$curTitle = trim($curTitle);
|
||||
|
||||
if (count(explode(' ', $curTitle)) <= 4) {
|
||||
$curTitle = $origTitle;
|
||||
}
|
||||
|
||||
$articleTitle = $this->dom->createElement('h1');
|
||||
$articleTitle->innerHTML = $curTitle;
|
||||
|
||||
return $articleTitle;
|
||||
}
|
||||
}
|
|
@ -354,7 +354,7 @@ class Url
|
|||
}
|
||||
if (isset($splink)) {
|
||||
// Build DOM tree from HTML
|
||||
$readability = new Readability($html, $url);
|
||||
$readability = new PocheReadability($html, $url);
|
||||
$xpath = new DOMXPath($readability->dom);
|
||||
// Loop through single_page_link xpath expressions
|
||||
$single_page_url = null;
|
||||
|
|
|
@ -20,6 +20,7 @@ require_once __DIR__ . '/../../inc/poche/Url.class.php';
|
|||
require_once __DIR__ . '/../../inc/3rdparty/class.messages.php';
|
||||
require_once __DIR__ . '/../../inc/poche/Poche.class.php';
|
||||
require_once __DIR__ . '/../../inc/3rdparty/Readability.php';
|
||||
require_once __DIR__ . '/../../inc/poche/PocheReadability.php';
|
||||
require_once __DIR__ . '/../../inc/3rdparty/Encoding.php';
|
||||
require_once __DIR__ . '/../../inc/poche/Database.class.php';
|
||||
require_once __DIR__ . '/../../vendor/autoload.php';
|
||||
|
@ -47,11 +48,4 @@ if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timez
|
|||
date_default_timezone_set('UTC');
|
||||
}
|
||||
|
||||
$poche = new Poche();
|
||||
#XSRF protection with token
|
||||
// if (!empty($_POST)) {
|
||||
// if (!Session::isToken($_POST['token'])) {
|
||||
// die(_('Wrong token'));
|
||||
// }
|
||||
// unset($_SESSION['tokens']);
|
||||
// }
|
||||
$poche = new Poche();
|
Loading…
Reference in a new issue