mirror of
https://github.com/wallabag/wallabag.git
synced 2025-01-24 23:58:13 +00:00
Run php-cs-fixer for fixing coding standard issues
This commit is contained in:
parent
d64139d812
commit
83f1c3274f
1 changed files with 53 additions and 41 deletions
|
@ -69,47 +69,6 @@ class ContentProxy
|
||||||
$this->stockEntry($entry, $content);
|
$this->stockEntry($entry, $content);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character.
|
|
||||||
* @param $title
|
|
||||||
* @param $contentType
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
private function sanitizeContentTitle($title, $contentType) {
|
|
||||||
if ('application/pdf' === $contentType) {
|
|
||||||
$title = $this->convertPdfEncodingToUTF8($title);
|
|
||||||
}
|
|
||||||
return $this->sanitizeUTF8Text($title);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If the title from the fetched content comes from a PDF, then its very possible that the character encoding is not
|
|
||||||
* UTF-8. This methods tries to identify the character encoding and translate the title to UTF-8.
|
|
||||||
* @param $title
|
|
||||||
* @return string (maybe contains invalid UTF-8 character)
|
|
||||||
*/
|
|
||||||
private function convertPdfEncodingToUTF8($title) {
|
|
||||||
// first try UTF-8 because its easier to detect its present/absence
|
|
||||||
foreach (array('UTF-8', 'UTF-16BE', 'WINDOWS-1252') as $encoding) {
|
|
||||||
if (mb_check_encoding($title, $encoding)) {
|
|
||||||
return mb_convert_encoding($title, 'UTF-8', $encoding);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return $title;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Remove invalid UTF-8 characters from the given string.
|
|
||||||
* @param String $rawText
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
private function sanitizeUTF8Text($rawText) {
|
|
||||||
if (mb_check_encoding($rawText, 'UTF-8')) {
|
|
||||||
return $rawText;
|
|
||||||
}
|
|
||||||
return iconv("UTF-8", "UTF-8//IGNORE", $rawText);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use a Symfony validator to ensure the language is well formatted.
|
* Use a Symfony validator to ensure the language is well formatted.
|
||||||
*
|
*
|
||||||
|
@ -218,6 +177,59 @@ class ContentProxy
|
||||||
$entry->setTitle($path);
|
$entry->setTitle($path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character.
|
||||||
|
*
|
||||||
|
* @param $title
|
||||||
|
* @param $contentType
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private function sanitizeContentTitle($title, $contentType)
|
||||||
|
{
|
||||||
|
if ('application/pdf' === $contentType) {
|
||||||
|
$title = $this->convertPdfEncodingToUTF8($title);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->sanitizeUTF8Text($title);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If the title from the fetched content comes from a PDF, then its very possible that the character encoding is not
|
||||||
|
* UTF-8. This methods tries to identify the character encoding and translate the title to UTF-8.
|
||||||
|
*
|
||||||
|
* @param $title
|
||||||
|
*
|
||||||
|
* @return string (maybe contains invalid UTF-8 character)
|
||||||
|
*/
|
||||||
|
private function convertPdfEncodingToUTF8($title)
|
||||||
|
{
|
||||||
|
// first try UTF-8 because its easier to detect its present/absence
|
||||||
|
foreach (['UTF-8', 'UTF-16BE', 'WINDOWS-1252'] as $encoding) {
|
||||||
|
if (mb_check_encoding($title, $encoding)) {
|
||||||
|
return mb_convert_encoding($title, 'UTF-8', $encoding);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $title;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove invalid UTF-8 characters from the given string.
|
||||||
|
*
|
||||||
|
* @param string $rawText
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private function sanitizeUTF8Text($rawText)
|
||||||
|
{
|
||||||
|
if (mb_check_encoding($rawText, 'UTF-8')) {
|
||||||
|
return $rawText;
|
||||||
|
}
|
||||||
|
|
||||||
|
return iconv('UTF-8', 'UTF-8//IGNORE', $rawText);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stock entry with fetched or imported content.
|
* Stock entry with fetched or imported content.
|
||||||
* Will fall back to OpenGraph data if available.
|
* Will fall back to OpenGraph data if available.
|
||||||
|
|
Loading…
Reference in a new issue