[change] we now use Full-Text RSS 3.1, thank you so much @fivefilters

This commit is contained in:
Nicolas Lœuillet 2013-12-06 09:45:27 +01:00
parent 59cc585271
commit 42c80841c8
83 changed files with 23898 additions and 7845 deletions

View file

@ -1,262 +0,0 @@
<?php
/**
* @author "Sebastián Grignoli" <grignoli@framework2.com.ar>
* @package Encoding
* @version 1.1
* @link http://www.framework2.com.ar/dzone/forceUTF8-es/
* @example http://www.framework2.com.ar/dzone/forceUTF8-es/
*/
class Encoding {
protected static $win1252ToUtf8 = array(
128 => "\xe2\x82\xac",
130 => "\xe2\x80\x9a",
131 => "\xc6\x92",
132 => "\xe2\x80\x9e",
133 => "\xe2\x80\xa6",
134 => "\xe2\x80\xa0",
135 => "\xe2\x80\xa1",
136 => "\xcb\x86",
137 => "\xe2\x80\xb0",
138 => "\xc5\xa0",
139 => "\xe2\x80\xb9",
140 => "\xc5\x92",
142 => "\xc5\xbd",
145 => "\xe2\x80\x98",
146 => "\xe2\x80\x99",
147 => "\xe2\x80\x9c",
148 => "\xe2\x80\x9d",
149 => "\xe2\x80\xa2",
150 => "\xe2\x80\x93",
151 => "\xe2\x80\x94",
152 => "\xcb\x9c",
153 => "\xe2\x84\xa2",
154 => "\xc5\xa1",
155 => "\xe2\x80\xba",
156 => "\xc5\x93",
158 => "\xc5\xbe",
159 => "\xc5\xb8"
);
protected static $brokenUtf8ToUtf8 = array(
"\xc2\x80" => "\xe2\x82\xac",
"\xc2\x82" => "\xe2\x80\x9a",
"\xc2\x83" => "\xc6\x92",
"\xc2\x84" => "\xe2\x80\x9e",
"\xc2\x85" => "\xe2\x80\xa6",
"\xc2\x86" => "\xe2\x80\xa0",
"\xc2\x87" => "\xe2\x80\xa1",
"\xc2\x88" => "\xcb\x86",
"\xc2\x89" => "\xe2\x80\xb0",
"\xc2\x8a" => "\xc5\xa0",
"\xc2\x8b" => "\xe2\x80\xb9",
"\xc2\x8c" => "\xc5\x92",
"\xc2\x8e" => "\xc5\xbd",
"\xc2\x91" => "\xe2\x80\x98",
"\xc2\x92" => "\xe2\x80\x99",
"\xc2\x93" => "\xe2\x80\x9c",
"\xc2\x94" => "\xe2\x80\x9d",
"\xc2\x95" => "\xe2\x80\xa2",
"\xc2\x96" => "\xe2\x80\x93",
"\xc2\x97" => "\xe2\x80\x94",
"\xc2\x98" => "\xcb\x9c",
"\xc2\x99" => "\xe2\x84\xa2",
"\xc2\x9a" => "\xc5\xa1",
"\xc2\x9b" => "\xe2\x80\xba",
"\xc2\x9c" => "\xc5\x93",
"\xc2\x9e" => "\xc5\xbe",
"\xc2\x9f" => "\xc5\xb8"
);
protected static $utf8ToWin1252 = array(
"\xe2\x82\xac" => "\x80",
"\xe2\x80\x9a" => "\x82",
"\xc6\x92" => "\x83",
"\xe2\x80\x9e" => "\x84",
"\xe2\x80\xa6" => "\x85",
"\xe2\x80\xa0" => "\x86",
"\xe2\x80\xa1" => "\x87",
"\xcb\x86" => "\x88",
"\xe2\x80\xb0" => "\x89",
"\xc5\xa0" => "\x8a",
"\xe2\x80\xb9" => "\x8b",
"\xc5\x92" => "\x8c",
"\xc5\xbd" => "\x8e",
"\xe2\x80\x98" => "\x91",
"\xe2\x80\x99" => "\x92",
"\xe2\x80\x9c" => "\x93",
"\xe2\x80\x9d" => "\x94",
"\xe2\x80\xa2" => "\x95",
"\xe2\x80\x93" => "\x96",
"\xe2\x80\x94" => "\x97",
"\xcb\x9c" => "\x98",
"\xe2\x84\xa2" => "\x99",
"\xc5\xa1" => "\x9a",
"\xe2\x80\xba" => "\x9b",
"\xc5\x93" => "\x9c",
"\xc5\xbe" => "\x9e",
"\xc5\xb8" => "\x9f"
);
static function toUTF8($text){
/**
* Function Encoding::toUTF8
*
* This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
*
* It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
*
* It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
*
* 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
* are followed by any of these: ("group B")
* ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
* For example: %ABREPRESENT%C9%BB. «REPRESENTÉ»
* The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
* is also a valid unicode character, and will be left unchanged.
*
* 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B,
* 3) when any of these: ðñòó are followed by THREE chars from group B.
*
* @name toUTF8
* @param string $text Any string.
* @return string The same string, UTF8 encoded
*
*/
if(is_array($text))
{
foreach($text as $k => $v)
{
$text[$k] = self::toUTF8($v);
}
return $text;
} elseif(is_string($text)) {
$max = strlen($text);
$buf = "";
for($i = 0; $i < $max; $i++){
$c1 = $text{$i};
if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already
$c2 = $i+1 >= $max? "\x00" : $text{$i+1};
$c3 = $i+2 >= $max? "\x00" : $text{$i+2};
$c4 = $i+3 >= $max? "\x00" : $text{$i+3};
if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8
if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2;
$i++;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = ($c1 & "\x3f") | "\x80";
$buf .= $cc1 . $cc2;
}
} elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2 . $c3;
$i = $i + 2;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = ($c1 & "\x3f") | "\x80";
$buf .= $cc1 . $cc2;
}
} elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2 . $c3;
$i = $i + 2;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = ($c1 & "\x3f") | "\x80";
$buf .= $cc1 . $cc2;
}
} else { //doesn't look like UTF8, but should be converted
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = (($c1 & "\x3f") | "\x80");
$buf .= $cc1 . $cc2;
}
} elseif(($c1 & "\xc0") == "\x80"){ // needs conversion
if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
$buf .= self::$win1252ToUtf8[ord($c1)];
} else {
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = (($c1 & "\x3f") | "\x80");
$buf .= $cc1 . $cc2;
}
} else { // it doesn't need convesion
$buf .= $c1;
}
}
return $buf;
} else {
return $text;
}
}
static function toWin1252($text) {
if(is_array($text)) {
foreach($text as $k => $v) {
$text[$k] = self::toWin1252($v);
}
return $text;
} elseif(is_string($text)) {
return utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text)));
} else {
return $text;
}
}
static function toISO8859($text) {
return self::toWin1252($text);
}
static function toLatin1($text) {
return self::toWin1252($text);
}
static function fixUTF8($text){
if(is_array($text)) {
foreach($text as $k => $v) {
$text[$k] = self::fixUTF8($v);
}
return $text;
}
$last = "";
while($last <> $text){
$last = $text;
$text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
}
$text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
return $text;
}
static function UTF8FixWin1252Chars($text){
// If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
// (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
// See: http://en.wikipedia.org/wiki/Windows-1252
return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
}
static function removeBOM($str=""){
if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) {
$str=substr($str, 3);
}
return $str;
}
}

File diff suppressed because it is too large Load diff

405
inc/3rdparty/config.php vendored Normal file
View file

@ -0,0 +1,405 @@
<?php
/* Full-Text RSS config */
// ......IMPORTANT......................................
// .....................................................
// Please do not change this file (config.php) directly.
// Save a copy as custom_config.php and make your
// changes to that instead. It will automatically
// override anything in config.php. Because config.php
// always gets loaded anyway, you can simply specify
// options you'd like to override in custom_config.php.
// .....................................................
// Create config object
if (!isset($options)) $options = new stdClass();
// Enable service
// ----------------------
// Set this to false if you want to disable the service.
// If set to false, no feed is produced and users will
// be told that the service is disabled.
$options->enabled = true;
// Debug mode
// ----------------------
// Enable or disable debugging. When enabled debugging works by passing
// &debug to the makefulltextfeed.php querystring.
// Valid values:
// true or 'user' (default) - let user decide
// 'admin' - debug works only for logged in admin users
// false - disabled
$options->debug = true;
// Default entries (without access key)
// ----------------------
// The number of feed items to process when no API key is supplied
// and no &max=x value is supplied in the querystring.
$options->default_entries = 5;
// Max entries (without access key)
// ----------------------
// The maximum number of feed items to process when no access key is supplied.
// This limits the user-supplied &max=x value. For example, if the user
// asks for 20 items to be processed (&max=20), if max_entries is set to
// 10, only 10 will be processed.
$options->max_entries = 10;
// Rewrite relative URLs
// ----------------------
// With this enabled relative URLs found in the extracted content
// block are automatically rewritten as absolute URLs.
$options->rewrite_relative_urls = true;
// Exclude items if extraction fails
// ---------------------------------
// Excludes items from the resulting feed
// if we cannot extract any content from the
// item URL.
// Possible values...
// Enable: true
// Disable: false (default)
// User decides: 'user' (this option will appear on the form)
$options->exclude_items_on_fail = 'user';
// Enable multi-page support
// -------------------------
// If enabled, we will try to follow next page links on multi-page articles.
// Currently this only happens for sites where next_page_link has been defined
// in a site config file.
$options->multipage = true;
// Enable caching
// ----------------------
// Enable this if you'd like to cache results
// for 10 minutes. Cache files are written to disk (in cache/ subfolders
// - which must be writable).
// Initially it's best to keep this disabled to make sure everything works
// as expected. If you have APC enabled, please also see smart_cache in the
// advanced section.
$options->caching = false;
// Cache directory
// ----------------------
// Only used if caching is true
$options->cache_dir = dirname(__FILE__).'/cache';
// Message to prepend (without access key)
// ----------------------
// HTML to insert at the beginning of each feed item when no access key is supplied.
// Substitution tags:
// {url} - Feed item URL
// {effective-url} - Feed item URL after we've followed all redirects
$options->message_to_prepend = '';
// Message to append (without access key)
// ----------------------
// HTML to insert at the end of each feed item when no access key is supplied.
// Substitution tags:
// {url} - Feed item URL
// {effective-url} - Feed item URL after we've followed all redirects
$options->message_to_append = '';
// Error message when content extraction fails (without access key)
// ----------------------
$options->error_message = '[unable to retrieve full-text content]';
// Keep enclosure in feed items
// If enabled, we will try to preserve enclosures if present.
// ----------------------
$options->keep_enclosures = true;
// Detect language
// ---------------
// Should we try and find/guess the language of the article being processed?
// Values will be placed inside the <dc:language> element inside each <item> element
// Possible values:
// * Ignore language: 0
// * Use article/feed metadata (e.g. HTML lang attribute): 1 (default)
// * As above, but guess if not present: 2
// * Always guess: 3
// * User decides: 'user' (value of 0-3 can be passed in querystring: e.g. &l=2)
$options->detect_language = 1;
// Registration key
// ---------------
// The registration key is optional. It is not required to use Full-Text RSS,
// and does not affect the normal operation of Full-Text RSS. It is currently
// only used on admin pages which help you update site patterns with the
// latest version offered by FiveFilters.org. For these admin-related
// tasks to complete, we will require a valid registration key.
// If you would like one, you can purchase the latest version of Full-Text RSS
// at http://fivefilters.org/content-only/
// Your registration key will automatically be sent in the confirmation email.
// Once you have it, simply copy and paste it here.
$options->registration_key = '';
/////////////////////////////////////////////////
/// RESTRICT ACCESS /////////////////////////////
/////////////////////////////////////////////////
// Admin credentials
// ----------------------
// Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.
// To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.
// If no password or username is set, pages requiring admin privelages will be inaccessible.
// The default username is 'admin'.
// If overriding with an environment variable, separate username and password with a colon, e.g.:
// ftr_admin_credentials: admin:my-secret-password
// Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');
$options->admin_credentials = array('username'=>'admin', 'password'=>'admin');
// URLs to allow
// ----------------------
// List of URLs (or parts of a URL) which the service will accept.
// If the list is empty, all URLs (except those specified in the blocked list below)
// will be permitted.
// Empty: array();
// Non-empty example: array('example.com', 'anothersite.org');
$options->allowed_urls = array();
// URLs to block
// ----------------------
// List of URLs (or parts of a URL) which the service will not accept.
// Note: this list is ignored if allowed_urls is not empty
$options->blocked_urls = array();
// Key holder(s) only?
// ----------------------
// Set this to true if you want to restrict access only to
// those with a key (see below to specify key(s)).
// If set to true, no feed is produced unless a valid
// key is provided.
$options->key_required = false;
// Favour item titles in feed
// ----------------------
// By default, when processing feeds, we assume item titles in the feed
// have not been truncated. So after processing web pages, the extracted titles
// are not used in the generated feed. If you prefer to have extracted titles in
// the feed you can either set this to false, in which case we will always favour
// extracted titles. Alternatively, if set to 'user' (default) we'll use the
// extracted title if you pass '&use_extracted_title' in the querystring.
// Possible values:
// * Favour feed titles: true
// * Favour extracted titles: false
// * Favour feed titles with user override: 'user' (default)
// Note: this has no effect when the input URL is to a web page - in these cases
// we always use the extracted title in the generated feed.
$options->favour_feed_titles = 'user';
// Access keys (password protected access)
// ------------------------------------
// NOTE: You do not need an API key from fivefilters.org to run your own
// copy of the code. This is here if you'd like to restrict access to
// _your_ copy.
// Keys let you group users - those with a key and those without - and
// restrict access to the service to those without a key.
// If you want everyone to access the service in the same way, you can
// leave the array below empty and ignore the access key options further down.
// The options further down let you control how the service should behave
// in each mode.
// Note: Explicitly including the index number (1 and 2 in the examples below)
// is highly recommended (when generating feeds, we encode the key and
// refer to it by index number and hash).
$options->api_keys = array();
// Example:
// $options->api_keys[1] = 'secret-key-1';
// $options->api_keys[2] = 'secret-key-2';
// Default entries (with access key)
// ----------------------
// The number of feed items to process when a valid access key is supplied.
$options->default_entries_with_key = 5;
// Max entries (with access key)
// ----------------------
// The maximum number of feed items to process when a valid access key is supplied.
$options->max_entries_with_key = 10;
/////////////////////////////////////////////////
/// ADVANCED OPTIONS ////////////////////////////
/////////////////////////////////////////////////
// Enable XSS filter?
// ----------------------
// We have not enabled this by default because we assume the majority of
// our users do not display the HTML retrieved by Full-Text RSS
// in a web page without further processing. If you subscribe to our generated
// feeds in your news reader application, it should, if it's good software, already
// filter the resulting HTML for XSS attacks, making it redundant for
// Full-Text RSS do the same. Similarly with frameworks/CMS which display
// feed content - the content should be treated like any other user-submitted content.
//
// If you are writing an application yourself which is processing feeds generated by
// Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks
// or enable this option. This might be useful if you are processing our generated
// feeds with JavaScript on the client side - although there's client side xss
// filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
//
// If enabled, we'll pass retrieved HTML content through htmLawed with
// safe flag on and style attributes denied, see
// http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
// Note: if enabled this will also remove certain elements you may want to preserve, such as iframes.
//
// Valid values:
// true - enabled, all content will be filtered
// 'user' (default) - user must pass &xss in makefulltextfeed.php querystring to enable
// false - disabled
$options->xss_filter = 'user';
// Allowed parsers
// ----------------------
// Full-Text RSS attempts to use PHP's libxml extension to process HTML.
// While fast, on some sites it may not always produce good results.
// For these sites, you can specify an alternative HTML parser:
// parser: html5lib
// The html5lib parser is bundled with Full-Text RSS.
// see http://code.google.com/p/html5lib/
//
// To disable HTML parsing with html5lib, you can remove it from this list.
// By default we allow both: libxml and html5lib.
$options->allowed_parsers = array('libxml', 'html5lib');
//$options->allowed_parsers = array('libxml'); //disable html5lib - forcing libxml in all cases
// Enable Cross-Origin Resource Sharing (CORS)
// ----------------------
// If enabled we'll send the following HTTP header
// Access-Control-Allow-Origin: *
// see http://en.wikipedia.org/wiki/Cross-origin_resource_sharing
$options->cors = false;
// Use APC user cache?
// ----------------------
// If enabled we will store site config files (when requested
// for the first time) in APC's user cache. Keys prefixed with 'sc.'
// This improves performance by reducing disk access.
// Note: this has no effect if APC is unavailable on your server.
$options->apc = true;
// Smart cache (experimental)
// ----------------------
// With this option enabled we will not cache to disk immediately.
// We will store the cache key in APC and if it's requested again
// we will cache results to disk. Keys prefixed with 'cache.'
// This improves performance by reducing disk access.
// Note: this has no effect if APC is disabled or unavailable on your server,
// or if you have caching disabled.
$options->smart_cache = true;
// Fingerprints
// ----------------------
// key is fingerprint (fragment to find in HTML)
// value is host name to use for site config lookup if fingerprint matches
$options->fingerprints = array(
// Posterous
'<meta name="generator" content="Posterous"' => array('hostname'=>'fingerprint.posterous.com', 'head'=>true),
// Blogger
'<meta content=\'blogger\' name=\'generator\'' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
'<meta name="generator" content="Blogger"' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
// WordPress (hosted)
// '<meta name="generator" content="WordPress.com"' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true),
// WordPress (self-hosted and hosted)
'<meta name="generator" content="WordPress' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true)
);
// User Agent strings - mapping domain names
// ----------------------
// e.g. $options->user_agents = array('example.org' => 'PHP/5.2');
$options->user_agents = array( 'lifehacker.com' => 'PHP/5.2',
'gawker.com' => 'PHP/5.2',
'deadspin.com' => 'PHP/5.2',
'kotaku.com' => 'PHP/5.2',
'jezebel.com' => 'PHP/5.2',
'io9.com' => 'PHP/5.2',
'jalopnik.com' => 'PHP/5.2',
'gizmodo.com' => 'PHP/5.2',
'.wikipedia.org' => 'Mozilla/5.2',
'.fok.nl' => 'Googlebot/2.1',
'getpocket.com' => 'PHP/5.2'
);
// URL Rewriting
// ----------------------
// Currently allows simple string replace of URLs.
// Useful for rewriting certain URLs to point to a single page
// or HTML view. Although using the single_page_link site config
// instruction is the preferred way to do this, sometimes, as
// with Google Docs URLs, it's not possible.
// Note: this might move to the site config file at some point.
$options->rewrite_url = array(
// Rewrite public Google Docs URLs to point to HTML view:
// if a URL contains docs.google.com, replace /Doc? with /View?
'docs.google.com' => array('/Doc?' => '/View?'),
'tnr.com' => array('tnr.com/article/' => 'tnr.com/print/article/'),
'.m.wikipedia.org' => array('.m.wikipedia.org' => '.wikipedia.org'),
'm.vanityfair.com' => array('m.vanityfair.com' => 'www.vanityfair.com')
);
// Content-Type exceptions
// -----------------------
// Here you can define different actions based
// on the Content-Type header returned by server.
// MIME type as key, action as value.
// Valid actions:
// * 'exclude' - exclude this item from the result
// * 'link' - create HTML link to the item
$options->content_type_exc = array(
'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
'image' => array('action'=>'link', 'name'=>'Image'),
'audio' => array('action'=>'link', 'name'=>'Audio'),
'video' => array('action'=>'link', 'name'=>'Video')
);
// Cache directory level
// ----------------------
// Spread cache files over different directories (only used if caching is enabled).
// Used to prevent large number of files in one directory.
// This corresponds to Zend_Cache's hashed_directory_level
// see http://framework.zend.com/manual/en/zend.cache.backends.html
// It's best not to change this if you're unsure.
$options->cache_directory_level = 0;
// Cache cleanup
// -------------
// 0 = script will not clean cache (rename cachecleanup.php and use it for scheduled (e.g. cron) cache cleanup)
// 1 = clean cache everytime the script runs (not recommended)
// 100 = clean cache roughly once every 100 script runs
// x = clean cache roughly once every x script runs
// ...you get the idea :)
$options->cache_cleanup = 100;
/////////////////////////////////////////////////
/// DO NOT CHANGE ANYTHING BELOW THIS ///////////
/////////////////////////////////////////////////
if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.1');
if (basename(__FILE__) == 'config.php') {
if (file_exists(dirname(__FILE__).'/custom_config.php')) {
require_once dirname(__FILE__).'/custom_config.php';
}
// check for environment variables - often used on cloud platforms
// environment variables should be prefixed with 'ftr_', e.g.
// ftr_max_entries: 1
// will set the max_entries value to 1.
foreach ($options as $_key=>&$_val) {
$_key = "ftr_$_key";
if (($_env = getenv($_key)) !== false) {
if (is_array($_val)) {
if ($_key === 'ftr_admin_credentials') {
$_val = array_combine(array('username', 'password'), array_map('trim', explode(':', $_env, 2)));
if ($_val === false) $_val = array('username'=>'admin', 'password'=>'');
}
} elseif ($_env === 'true' || $_env === 'false') {
$_val = ($_env === 'true');
} elseif (is_numeric($_env)) {
$_val = (int)$_env;
} else { // string
$_val = $_env;
}
}
}
unset($_key, $_val, $_env);
}

View file

@ -1,184 +0,0 @@
<?php
/**
* Site Config
*
* Each instance of this class should hold extraction patterns and other directives
* for a website. See ContentExtractor class to see how it's used.
*
* @version 0.6
* @date 2011-10-30
* @author Keyvan Minoukadeh
* @copyright 2011 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/
class SiteConfig
{
// Use first matching element as title (0 or more xpath expressions)
public $title = array();
// Use first matching element as body (0 or more xpath expressions)
public $body = array();
// Use first matching element as author (0 or more xpath expressions)
public $author = array();
// Use first matching element as date (0 or more xpath expressions)
public $date = array();
// Strip elements matching these xpath expressions (0 or more)
public $strip = array();
// Strip elements which contain these strings (0 or more) in the id or class attribute
public $strip_id_or_class = array();
// Strip images which contain these strings (0 or more) in the src attribute
public $strip_image_src = array();
// Additional HTTP headers to send
// NOT YET USED
public $http_header = array();
// Process HTML with tidy before creating DOM
public $tidy = true;
// Autodetect title/body if xpath expressions fail to produce results.
// Note that this applies to title and body separately, ie.
// * if we get a body match but no title match, this option will determine whether we autodetect title
// * if neither match, this determines whether we autodetect title and body.
// Also note that this only applies when there is at least one xpath expression in title or body, ie.
// * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
// * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
public $autodetect_on_failure = true;
// Clean up content block - attempt to remove elements that appear to be superfluous
public $prune = true;
// Test URL - if present, can be used to test the config above
public $test_url = null;
// Single-page link - should identify a link element or URL pointing to the page holding the entire article
// This is useful for sites which split their articles across multiple pages. Links to such pages tend to
// display the first page with links to the other pages at the bottom. Often there is also a link to a page
// which displays the entire article on one page (e.g. 'print view').
// This should be an XPath expression identifying the link to that page. If present and we find a match,
// we will retrieve that page and the rest of the options in this config will be applied to the new page.
public $single_page_link = array();
// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
public $single_page_link_in_feed = array();
// TODO: which parser to use for turning raw HTML into a DOMDocument
public $parser = 'libxml';
// String replacement to be made on HTML before processing begins
public $replace_string = array();
// the options below cannot be set in the config files which this class represents
public static $debug = false;
protected static $config_path;
protected static $config_path_fallback;
protected static $config_cache = array();
const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
protected static function debug($msg) {
if (self::$debug) {
$mem = round(memory_get_usage()/1024, 2);
$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
ob_flush();
flush();
}
}
public static function set_config_path($path, $fallback=null) {
self::$config_path = $path;
self::$config_path_fallback = $fallback;
}
public static function add_to_cache($host, SiteConfig $config) {
$host = strtolower($host);
self::$config_cache[$host] = $config;
}
// returns SiteConfig instance if an appropriate one is found, false otherwise
public static function build($host) {
$host = strtolower($host);
if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, $host)) return false;
// check for site configuration
$try = array($host);
$split = explode('.', $host);
if (count($split) > 1) {
array_shift($split);
$try[] = implode('.', $split);
}
foreach ($try as $h) {
if (array_key_exists($h, self::$config_cache)) {
self::debug("... cached ($h)");
return self::$config_cache[$h];
} elseif (file_exists(self::$config_path."/$h.txt")) {
self::debug("... from file ($h)");
$file = self::$config_path."/$h.txt";
break;
}
}
if (!isset($file)) {
if (isset(self::$config_path_fallback)) {
self::debug("... trying fallback ($host)");
foreach ($try as $h) {
if (file_exists(self::$config_path_fallback."/$h.txt")) {
self::debug("... from fallback file ($h)");
$file = self::$config_path_fallback."/$h.txt";
break;
}
}
if (!isset($file)) {
self::debug("... no match in fallback directory");
return false;
}
} else {
self::debug("... no match ($host)");
return false;
}
}
$config_file = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
if (!$config_file || !is_array($config_file)) return false;
$config = new SiteConfig();
foreach ($config_file as $line) {
$line = trim($line);
// skip comments, empty lines
if ($line == '' || $line[0] == '#') continue;
// get command
$command = explode(':', $line, 2);
// if there's no colon ':', skip this line
if (count($command) != 2) continue;
$val = trim($command[1]);
$command = trim($command[0]);
if ($command == '' || $val == '') continue;
// check for commands where we accept multiple statements
if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'http_header'))) {
array_push($config->$command, $val);
// check for single statement commands that evaluate to true or false
} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
$config->$command = ($val == 'yes');
// check for single statement commands stored as strings
} elseif (in_array($command, array('test_url', 'parser'))) {
$config->$command = $val;
} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
if (in_array($match[1], array('replace_string'))) {
$command = $match[1];
array_push($config->$command, array($match[2], $val));
}
}
}
return $config;
}
}
?>

View file

@ -1,24 +0,0 @@
<?php
// create single item dummy feed object
class DummySingleItemFeed {
public $item;
function __construct($url) { $this->item = new DummySingleItem($url); }
public function get_title() { return ''; }
public function get_description() { return 'Content extracted from '.$this->item->url; }
public function get_link() { return $this->item->url; }
public function get_language() { return false; }
public function get_image_url() { return false; }
public function get_items($start=0, $max=1) { return array(0=>$this->item); }
}
class DummySingleItem {
public $url;
function __construct($url) { $this->url = $url; }
public function get_permalink() { return $this->url; }
public function get_title() { return ''; }
public function get_date($format='') { return false; }
public function get_author($key=0) { return null; }
public function get_authors() { return null; }
public function get_description() { return ''; }
public function get_enclosure($key=0, $prefer=null) { return null; }
public function get_enclosures() { return null; }
}

View file

@ -1,435 +0,0 @@
<?php
// RSS 0.90 Officially obsoleted by 1.0
// RSS 0.91, 0.92, 0.93 and 0.94 Officially obsoleted by 2.0
// So, define constants for RSS 1.0, RSS 2.0 and ATOM
define('RSS1', 'RSS 1.0', true);
define('RSS2', 'RSS 2.0', true);
define('ATOM', 'ATOM', true);
/**
* Univarsel Feed Writer class
*
* Genarate RSS 1.0, RSS2.0 and ATOM Feed
*
* @package UnivarselFeedWriter
* @author Anis uddin Ahmad <anisniit@gmail.com>
* @link http://www.ajaxray.com/projects/rss
*/
class FeedWriter
{
private $channels = array(); // Collection of channel elements
private $items = array(); // Collection of items as object of FeedItem class.
private $data = array(); // Store some other version wise data
private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA
private $version = null;
/**
* Constructor
*
* @param constant the version constant (RSS1/RSS2/ATOM).
*/
function __construct($version = RSS2)
{
$this->version = $version;
// Setting default value for assential channel elements
$this->channels['title'] = $version . ' Feed';
$this->channels['link'] = 'http://www.ajaxray.com/blog';
//Tag names to encode in CDATA
$this->CDATAEncoding = array('description', 'content:encoded', 'summary');
}
// Start # public functions ---------------------------------------------
/**
* Set a channel element
* @access public
* @param srting name of the channel tag
* @param string content of the channel tag
* @return void
*/
public function setChannelElement($elementName, $content)
{
$this->channels[$elementName] = $content ;
}
/**
* Set multiple channel elements from an array. Array elements
* should be 'channelName' => 'channelContent' format.
*
* @access public
* @param array array of channels
* @return void
*/
public function setChannelElementsFromArray($elementArray)
{
if(! is_array($elementArray)) return;
foreach ($elementArray as $elementName => $content)
{
$this->setChannelElement($elementName, $content);
}
}
/**
* Genarate the actual RSS/ATOM file
*
* @access public
* @return void
*/
public function genarateFeed()
{
header("Content-type: text/xml");
$this->printHead();
$this->printChannels();
$this->printItems();
$this->printTale();
}
/**
* Create a new FeedItem.
*
* @access public
* @return object instance of FeedItem class
*/
public function createNewItem()
{
$Item = new FeedItem($this->version);
return $Item;
}
/**
* Add a FeedItem to the main class
*
* @access public
* @param object instance of FeedItem class
* @return void
*/
public function addItem($feedItem)
{
$this->items[] = $feedItem;
}
// Wrapper functions -------------------------------------------------------------------
/**
* Set the 'title' channel element
*
* @access public
* @param srting value of 'title' channel tag
* @return void
*/
public function setTitle($title)
{
$this->setChannelElement('title', $title);
}
/**
* Set the 'description' channel element
*
* @access public
* @param srting value of 'description' channel tag
* @return void
*/
public function setDescription($desciption)
{
$this->setChannelElement('description', $desciption);
}
/**
* Set the 'link' channel element
*
* @access public
* @param srting value of 'link' channel tag
* @return void
*/
public function setLink($link)
{
$this->setChannelElement('link', $link);
}
/**
* Set the 'image' channel element
*
* @access public
* @param srting title of image
* @param srting link url of the imahe
* @param srting path url of the image
* @return void
*/
public function setImage($title, $link, $url)
{
$this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url));
}
/**
* Set the 'about' channel element. Only for RSS 1.0
*
* @access public
* @param srting value of 'about' channel tag
* @return void
*/
public function setChannelAbout($url)
{
$this->data['ChannelAbout'] = $url;
}
/**
* Genarates an UUID
* @author Anis uddin Ahmad <admin@ajaxray.com>
* @param string an optional prefix
* @return string the formated uuid
*/
public static function uuid($key = null, $prefix = '')
{
$key = ($key == null)? uniqid(rand()) : $key;
$chars = md5($key);
$uuid = substr($chars,0,8) . '-';
$uuid .= substr($chars,8,4) . '-';
$uuid .= substr($chars,12,4) . '-';
$uuid .= substr($chars,16,4) . '-';
$uuid .= substr($chars,20,12);
return $prefix . $uuid;
}
// End # public functions ----------------------------------------------
// Start # private functions ----------------------------------------------
/**
* Prints the xml and rss namespace
*
* @access private
* @return void
*/
private function printHead()
{
$out = '<?xml version="1.0" encoding="utf-8"?>' . "\n";
if($this->version == RSS2)
{
$out .= '<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
>' . PHP_EOL;
}
elseif($this->version == RSS1)
{
$out .= '<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://purl.org/rss/1.0/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
>' . PHP_EOL;;
}
else if($this->version == ATOM)
{
$out .= '<feed xmlns="http://www.w3.org/2005/Atom">' . PHP_EOL;;
}
echo $out;
}
/**
* Closes the open tags at the end of file
*
* @access private
* @return void
*/
private function printTale()
{
if($this->version == RSS2)
{
echo '</channel>' . PHP_EOL . '</rss>';
}
elseif($this->version == RSS1)
{
echo '</rdf:RDF>';
}
else if($this->version == ATOM)
{
echo '</feed>';
}
}
/**
* Creates a single node as xml format
*
* @access private
* @param srting name of the tag
* @param mixed tag value as string or array of nested tags in 'tagName' => 'tagValue' format
* @param array Attributes(if any) in 'attrName' => 'attrValue' format
* @return string formatted xml tag
*/
private function makeNode($tagName, $tagContent, $attributes = null)
{
$nodeText = '';
$attrText = '';
if(is_array($attributes))
{
foreach ($attributes as $key => $value)
{
$attrText .= " $key=\"$value\" ";
}
}
if(is_array($tagContent) && $this->version == RSS1)
{
$attrText = ' rdf:parseType="Resource"';
}
$attrText .= (in_array($tagName, $this->CDATAEncoding) && $this->version == ATOM)? ' type="html" ' : '';
$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "<{$tagName}{$attrText}><![CDATA[" : "<{$tagName}{$attrText}>";
if(is_array($tagContent))
{
foreach ($tagContent as $key => $value)
{
$nodeText .= $this->makeNode($key, $value);
}
}
else
{
$nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent);
}
$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]></$tagName>" : "</$tagName>";
return $nodeText . PHP_EOL;
}
/**
* @desc Print channels
* @access private
* @return void
*/
private function printChannels()
{
//Start channel tag
switch ($this->version)
{
case RSS2:
echo '<channel>' . PHP_EOL;
break;
case RSS1:
echo (isset($this->data['ChannelAbout']))? "<channel rdf:about=\"{$this->data['ChannelAbout']}\">" : "<channel rdf:about=\"{$this->channels['link']}\">";
break;
}
//Print Items of channel
foreach ($this->channels as $key => $value)
{
if($this->version == ATOM && $key == 'link')
{
// ATOM prints link element as href attribute
echo $this->makeNode($key,'',array('href'=>$value));
//Add the id for ATOM
echo $this->makeNode('id',self::uuid($value,'urn:uuid:'));
}
else
{
echo $this->makeNode($key, $value);
}
}
//RSS 1.0 have special tag <rdf:Seq> with channel
if($this->version == RSS1)
{
echo "<items>" . PHP_EOL . "<rdf:Seq>" . PHP_EOL;
foreach ($this->items as $item)
{
$thisItems = $item->getElements();
echo "<rdf:li resource=\"{$thisItems['link']['content']}\"/>" . PHP_EOL;
}
echo "</rdf:Seq>" . PHP_EOL . "</items>" . PHP_EOL . "</channel>" . PHP_EOL;
}
}
/**
* Prints formatted feed items
*
* @access private
* @return void
*/
private function printItems()
{
foreach ($this->items as $item)
{
$thisItems = $item->getElements();
//the argument is printed as rdf:about attribute of item in rss 1.0
echo $this->startItem($thisItems['link']['content']);
foreach ($thisItems as $feedItem )
{
echo $this->makeNode($feedItem['name'], $feedItem['content'], $feedItem['attributes']);
}
echo $this->endItem();
}
}
/**
* Make the starting tag of channels
*
* @access private
* @param srting The vale of about tag which is used for only RSS 1.0
* @return void
*/
private function startItem($about = false)
{
if($this->version == RSS2)
{
echo '<item>' . PHP_EOL;
}
elseif($this->version == RSS1)
{
if($about)
{
echo "<item rdf:about=\"$about\">" . PHP_EOL;
}
else
{
die('link element is not set .\n It\'s required for RSS 1.0 to be used as about attribute of item');
}
}
else if($this->version == ATOM)
{
echo "<entry>" . PHP_EOL;
}
}
/**
* Closes feed item tag
*
* @access private
* @return void
*/
private function endItem()
{
if($this->version == RSS2 || $this->version == RSS1)
{
echo '</item>' . PHP_EOL;
}
else if($this->version == ATOM)
{
echo "</entry>" . PHP_EOL;
}
}
// End # private functions ----------------------------------------------
} // end of class FeedWriter
// autoload classes
function __autoload($class_name)
{
require_once $class_name . '.php';
}

250
inc/3rdparty/libraries/Zend/Cache.php vendored Normal file
View file

@ -0,0 +1,250 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Cache.php 24656 2012-02-26 06:02:53Z adamlundrigan $
*/
/**
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Cache
{
/**
* Standard frontends
*
* @var array
*/
public static $standardFrontends = array('Core', 'Output', 'Class', 'File', 'Function', 'Page');
/**
* Standard backends
*
* @var array
*/
public static $standardBackends = array('File', 'Sqlite', 'Memcached', 'Libmemcached', 'Apc', 'ZendPlatform',
'Xcache', 'TwoLevels', 'WinCache', 'ZendServer_Disk', 'ZendServer_ShMem');
/**
* Standard backends which implement the ExtendedInterface
*
* @var array
*/
public static $standardExtendedBackends = array('File', 'Apc', 'TwoLevels', 'Memcached', 'Libmemcached', 'Sqlite', 'WinCache');
/**
* Only for backward compatibility (may be removed in next major release)
*
* @var array
* @deprecated
*/
public static $availableFrontends = array('Core', 'Output', 'Class', 'File', 'Function', 'Page');
/**
* Only for backward compatibility (may be removed in next major release)
*
* @var array
* @deprecated
*/
public static $availableBackends = array('File', 'Sqlite', 'Memcached', 'Libmemcached', 'Apc', 'ZendPlatform', 'Xcache', 'WinCache', 'TwoLevels');
/**
* Consts for clean() method
*/
const CLEANING_MODE_ALL = 'all';
const CLEANING_MODE_OLD = 'old';
const CLEANING_MODE_MATCHING_TAG = 'matchingTag';
const CLEANING_MODE_NOT_MATCHING_TAG = 'notMatchingTag';
const CLEANING_MODE_MATCHING_ANY_TAG = 'matchingAnyTag';
/**
* Factory
*
* @param mixed $frontend frontend name (string) or Zend_Cache_Frontend_ object
* @param mixed $backend backend name (string) or Zend_Cache_Backend_ object
* @param array $frontendOptions associative array of options for the corresponding frontend constructor
* @param array $backendOptions associative array of options for the corresponding backend constructor
* @param boolean $customFrontendNaming if true, the frontend argument is used as a complete class name ; if false, the frontend argument is used as the end of "Zend_Cache_Frontend_[...]" class name
* @param boolean $customBackendNaming if true, the backend argument is used as a complete class name ; if false, the backend argument is used as the end of "Zend_Cache_Backend_[...]" class name
* @param boolean $autoload if true, there will no require_once for backend and frontend (useful only for custom backends/frontends)
* @throws Zend_Cache_Exception
* @return Zend_Cache_Core|Zend_Cache_Frontend
*/
public static function factory($frontend, $backend, $frontendOptions = array(), $backendOptions = array(), $customFrontendNaming = false, $customBackendNaming = false, $autoload = false)
{
if (is_string($backend)) {
$backendObject = self::_makeBackend($backend, $backendOptions, $customBackendNaming, $autoload);
} else {
if ((is_object($backend)) && (in_array('Zend_Cache_Backend_Interface', class_implements($backend)))) {
$backendObject = $backend;
} else {
self::throwException('backend must be a backend name (string) or an object which implements Zend_Cache_Backend_Interface');
}
}
if (is_string($frontend)) {
$frontendObject = self::_makeFrontend($frontend, $frontendOptions, $customFrontendNaming, $autoload);
} else {
if (is_object($frontend)) {
$frontendObject = $frontend;
} else {
self::throwException('frontend must be a frontend name (string) or an object');
}
}
$frontendObject->setBackend($backendObject);
return $frontendObject;
}
/**
* Backend Constructor
*
* @param string $backend
* @param array $backendOptions
* @param boolean $customBackendNaming
* @param boolean $autoload
* @return Zend_Cache_Backend
*/
public static function _makeBackend($backend, $backendOptions, $customBackendNaming = false, $autoload = false)
{
if (!$customBackendNaming) {
$backend = self::_normalizeName($backend);
}
if (in_array($backend, Zend_Cache::$standardBackends)) {
// we use a standard backend
$backendClass = 'Zend_Cache_Backend_' . $backend;
// security controls are explicit
require_once realpath(dirname(__FILE__).'/..').DIRECTORY_SEPARATOR.str_replace('_', DIRECTORY_SEPARATOR, $backendClass) . '.php';
} else {
// we use a custom backend
if (!preg_match('~^[\w\\\\]+$~D', $backend)) {
Zend_Cache::throwException("Invalid backend name [$backend]");
}
if (!$customBackendNaming) {
// we use this boolean to avoid an API break
$backendClass = 'Zend_Cache_Backend_' . $backend;
} else {
$backendClass = $backend;
}
if (!$autoload) {
$file = str_replace('_', DIRECTORY_SEPARATOR, $backendClass) . '.php';
if (!(self::_isReadable($file))) {
self::throwException("file $file not found in include_path");
}
require_once $file;
}
}
return new $backendClass($backendOptions);
}
/**
* Frontend Constructor
*
* @param string $frontend
* @param array $frontendOptions
* @param boolean $customFrontendNaming
* @param boolean $autoload
* @return Zend_Cache_Core|Zend_Cache_Frontend
*/
public static function _makeFrontend($frontend, $frontendOptions = array(), $customFrontendNaming = false, $autoload = false)
{
if (!$customFrontendNaming) {
$frontend = self::_normalizeName($frontend);
}
if (in_array($frontend, self::$standardFrontends)) {
// we use a standard frontend
// For perfs reasons, with frontend == 'Core', we can interact with the Core itself
$frontendClass = 'Zend_Cache_' . ($frontend != 'Core' ? 'Frontend_' : '') . $frontend;
// security controls are explicit
require_once realpath(dirname(__FILE__).'/..').DIRECTORY_SEPARATOR.str_replace('_', DIRECTORY_SEPARATOR, $frontendClass) . '.php';
} else {
// we use a custom frontend
if (!preg_match('~^[\w\\\\]+$~D', $frontend)) {
Zend_Cache::throwException("Invalid frontend name [$frontend]");
}
if (!$customFrontendNaming) {
// we use this boolean to avoid an API break
$frontendClass = 'Zend_Cache_Frontend_' . $frontend;
} else {
$frontendClass = $frontend;
}
if (!$autoload) {
$file = str_replace('_', DIRECTORY_SEPARATOR, $frontendClass) . '.php';
if (!(self::_isReadable($file))) {
self::throwException("file $file not found in include_path");
}
require_once $file;
}
}
return new $frontendClass($frontendOptions);
}
/**
* Throw an exception
*
* Note : for perf reasons, the "load" of Zend/Cache/Exception is dynamic
* @param string $msg Message for the exception
* @throws Zend_Cache_Exception
*/
public static function throwException($msg, Exception $e = null)
{
// For perfs reasons, we use this dynamic inclusion
require_once 'Zend/Cache/Exception.php';
throw new Zend_Cache_Exception($msg, 0, $e);
}
/**
* Normalize frontend and backend names to allow multiple words TitleCased
*
* @param string $name Name to normalize
* @return string
*/
protected static function _normalizeName($name)
{
$name = ucfirst(strtolower($name));
$name = str_replace(array('-', '_', '.'), ' ', $name);
$name = ucwords($name);
$name = str_replace(' ', '', $name);
if (stripos($name, 'ZendServer') === 0) {
$name = 'ZendServer_' . substr($name, strlen('ZendServer'));
}
return $name;
}
/**
* Returns TRUE if the $filename is readable, or FALSE otherwise.
* This function uses the PHP include_path, where PHP's is_readable()
* does not.
*
* Note : this method comes from Zend_Loader (see #ZF-2891 for details)
*
* @param string $filename
* @return boolean
*/
private static function _isReadable($filename)
{
if (!$fh = @fopen($filename, 'r', true)) {
return false;
}
@fclose($fh);
return true;
}
}

View file

@ -0,0 +1,290 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Backend.php 24989 2012-06-21 07:24:13Z mabe $
*/
/**
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Cache_Backend
{
/**
* Frontend or Core directives
*
* =====> (int) lifetime :
* - Cache lifetime (in seconds)
* - If null, the cache is valid forever
*
* =====> (int) logging :
* - if set to true, a logging is activated throw Zend_Log
*
* @var array directives
*/
protected $_directives = array(
'lifetime' => 3600,
'logging' => false,
'logger' => null
);
/**
* Available options
*
* @var array available options
*/
protected $_options = array();
/**
* Constructor
*
* @param array $options Associative array of options
* @throws Zend_Cache_Exception
* @return void
*/
public function __construct(array $options = array())
{
while (list($name, $value) = each($options)) {
$this->setOption($name, $value);
}
}
/**
* Set the frontend directives
*
* @param array $directives Assoc of directives
* @throws Zend_Cache_Exception
* @return void
*/
public function setDirectives($directives)
{
if (!is_array($directives)) Zend_Cache::throwException('Directives parameter must be an array');
while (list($name, $value) = each($directives)) {
if (!is_string($name)) {
Zend_Cache::throwException("Incorrect option name : $name");
}
$name = strtolower($name);
if (array_key_exists($name, $this->_directives)) {
$this->_directives[$name] = $value;
}
}
$this->_loggerSanity();
}
/**
* Set an option
*
* @param string $name
* @param mixed $value
* @throws Zend_Cache_Exception
* @return void
*/
public function setOption($name, $value)
{
if (!is_string($name)) {
Zend_Cache::throwException("Incorrect option name : $name");
}
$name = strtolower($name);
if (array_key_exists($name, $this->_options)) {
$this->_options[$name] = $value;
}
}
/**
* Returns an option
*
* @param string $name Optional, the options name to return
* @throws Zend_Cache_Exceptions
* @return mixed
*/
public function getOption($name)
{
$name = strtolower($name);
if (array_key_exists($name, $this->_options)) {
return $this->_options[$name];
}
if (array_key_exists($name, $this->_directives)) {
return $this->_directives[$name];
}
Zend_Cache::throwException("Incorrect option name : {$name}");
}
/**
* Get the life time
*
* if $specificLifetime is not false, the given specific life time is used
* else, the global lifetime is used
*
* @param int $specificLifetime
* @return int Cache life time
*/
public function getLifetime($specificLifetime)
{
if ($specificLifetime === false) {
return $this->_directives['lifetime'];
}
return $specificLifetime;
}
/**
* Return true if the automatic cleaning is available for the backend
*
* DEPRECATED : use getCapabilities() instead
*
* @deprecated
* @return boolean
*/
public function isAutomaticCleaningAvailable()
{
return true;
}
/**
* Determine system TMP directory and detect if we have read access
*
* inspired from Zend_File_Transfer_Adapter_Abstract
*
* @return string
* @throws Zend_Cache_Exception if unable to determine directory
*/
public function getTmpDir()
{
$tmpdir = array();
foreach (array($_ENV, $_SERVER) as $tab) {
foreach (array('TMPDIR', 'TEMP', 'TMP', 'windir', 'SystemRoot') as $key) {
if (isset($tab[$key]) && is_string($tab[$key])) {
if (($key == 'windir') or ($key == 'SystemRoot')) {
$dir = realpath($tab[$key] . '\\temp');
} else {
$dir = realpath($tab[$key]);
}
if ($this->_isGoodTmpDir($dir)) {
return $dir;
}
}
}
}
$upload = ini_get('upload_tmp_dir');
if ($upload) {
$dir = realpath($upload);
if ($this->_isGoodTmpDir($dir)) {
return $dir;
}
}
if (function_exists('sys_get_temp_dir')) {
$dir = sys_get_temp_dir();
if ($this->_isGoodTmpDir($dir)) {
return $dir;
}
}
// Attemp to detect by creating a temporary file
$tempFile = tempnam(md5(uniqid(rand(), TRUE)), '');
if ($tempFile) {
$dir = realpath(dirname($tempFile));
unlink($tempFile);
if ($this->_isGoodTmpDir($dir)) {
return $dir;
}
}
if ($this->_isGoodTmpDir('/tmp')) {
return '/tmp';
}
if ($this->_isGoodTmpDir('\\temp')) {
return '\\temp';
}
Zend_Cache::throwException('Could not determine temp directory, please specify a cache_dir manually');
}
/**
* Verify if the given temporary directory is readable and writable
*
* @param string $dir temporary directory
* @return boolean true if the directory is ok
*/
protected function _isGoodTmpDir($dir)
{
if (is_readable($dir)) {
if (is_writable($dir)) {
return true;
}
}
return false;
}
/**
* Make sure if we enable logging that the Zend_Log class
* is available.
* Create a default log object if none is set.
*
* @throws Zend_Cache_Exception
* @return void
*/
protected function _loggerSanity()
{
if (!isset($this->_directives['logging']) || !$this->_directives['logging']) {
return;
}
if (isset($this->_directives['logger'])) {
if ($this->_directives['logger'] instanceof Zend_Log) {
return;
}
Zend_Cache::throwException('Logger object is not an instance of Zend_Log class.');
}
// Create a default logger to the standard output stream
require_once 'Zend/Log.php';
require_once 'Zend/Log/Writer/Stream.php';
require_once 'Zend/Log/Filter/Priority.php';
$logger = new Zend_Log(new Zend_Log_Writer_Stream('php://output'));
$logger->addFilter(new Zend_Log_Filter_Priority(Zend_Log::WARN, '<='));
$this->_directives['logger'] = $logger;
}
/**
* Log a message at the WARN (4) priority.
*
* @param string $message
* @throws Zend_Cache_Exception
* @return void
*/
protected function _log($message, $priority = 4)
{
if (!$this->_directives['logging']) {
return;
}
if (!isset($this->_directives['logger'])) {
Zend_Cache::throwException('Logging is enabled but logger is not set.');
}
$logger = $this->_directives['logger'];
if (!$logger instanceof Zend_Log) {
Zend_Cache::throwException('Logger object is not an instance of Zend_Log class.');
}
$logger->log($message, $priority);
}
}

View file

@ -0,0 +1,127 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: ExtendedInterface.php 24593 2012-01-05 20:35:02Z matthew $
*/
/**
* @see Zend_Cache_Backend_Interface
*/
//require_once 'Zend/Cache/Backend/Interface.php';
require_once dirname(__FILE__).'/Interface.php';
/**
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
interface Zend_Cache_Backend_ExtendedInterface extends Zend_Cache_Backend_Interface
{
/**
* Return an array of stored cache ids
*
* @return array array of stored cache ids (string)
*/
public function getIds();
/**
* Return an array of stored tags
*
* @return array array of stored tags (string)
*/
public function getTags();
/**
* Return an array of stored cache ids which match given tags
*
* In case of multiple tags, a logical AND is made between tags
*
* @param array $tags array of tags
* @return array array of matching cache ids (string)
*/
public function getIdsMatchingTags($tags = array());
/**
* Return an array of stored cache ids which don't match given tags
*
* In case of multiple tags, a logical OR is made between tags
*
* @param array $tags array of tags
* @return array array of not matching cache ids (string)
*/
public function getIdsNotMatchingTags($tags = array());
/**
* Return an array of stored cache ids which match any given tags
*
* In case of multiple tags, a logical AND is made between tags
*
* @param array $tags array of tags
* @return array array of any matching cache ids (string)
*/
public function getIdsMatchingAnyTags($tags = array());
/**
* Return the filling percentage of the backend storage
*
* @return int integer between 0 and 100
*/
public function getFillingPercentage();
/**
* Return an array of metadatas for the given cache id
*
* The array must include these keys :
* - expire : the expire timestamp
* - tags : a string array of tags
* - mtime : timestamp of last modification time
*
* @param string $id cache id
* @return array array of metadatas (false if the cache id is not found)
*/
public function getMetadatas($id);
/**
* Give (if possible) an extra lifetime to the given cache id
*
* @param string $id cache id
* @param int $extraLifetime
* @return boolean true if ok
*/
public function touch($id, $extraLifetime);
/**
* Return an associative array of capabilities (booleans) of the backend
*
* The array must include these keys :
* - automatic_cleaning (is automating cleaning necessary)
* - tags (are tags supported)
* - expired_read (is it possible to read expired cache records
* (for doNotTestCacheValidity option for example))
* - priority does the backend deal with priority when saving
* - infinite_lifetime (is infinite lifetime can work with this backend)
* - get_list (is it possible to get the list of cache ids and the complete list of tags)
*
* @return array associative of with capabilities
*/
public function getCapabilities();
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,99 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Interface.php 24593 2012-01-05 20:35:02Z matthew $
*/
/**
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
interface Zend_Cache_Backend_Interface
{
/**
* Set the frontend directives
*
* @param array $directives assoc of directives
*/
public function setDirectives($directives);
/**
* Test if a cache is available for the given id and (if yes) return it (false else)
*
* Note : return value is always "string" (unserialization is done by the core not by the backend)
*
* @param string $id Cache id
* @param boolean $doNotTestCacheValidity If set to true, the cache validity won't be tested
* @return string|false cached datas
*/
public function load($id, $doNotTestCacheValidity = false);
/**
* Test if a cache is available or not (for the given id)
*
* @param string $id cache id
* @return mixed|false (a cache is not available) or "last modified" timestamp (int) of the available cache record
*/
public function test($id);
/**
* Save some string datas into a cache record
*
* Note : $data is always "string" (serialization is done by the
* core not by the backend)
*
* @param string $data Datas to cache
* @param string $id Cache id
* @param array $tags Array of strings, the cache record will be tagged by each string entry
* @param int $specificLifetime If != false, set a specific lifetime for this cache record (null => infinite lifetime)
* @return boolean true if no problem
*/
public function save($data, $id, $tags = array(), $specificLifetime = false);
/**
* Remove a cache record
*
* @param string $id Cache id
* @return boolean True if no problem
*/
public function remove($id);
/**
* Clean some cache records
*
* Available modes are :
* Zend_Cache::CLEANING_MODE_ALL (default) => remove all cache entries ($tags is not used)
* Zend_Cache::CLEANING_MODE_OLD => remove too old cache entries ($tags is not used)
* Zend_Cache::CLEANING_MODE_MATCHING_TAG => remove cache entries matching all given tags
* ($tags can be an array of strings or a single string)
* Zend_Cache::CLEANING_MODE_NOT_MATCHING_TAG => remove cache entries not {matching one of the given tags}
* ($tags can be an array of strings or a single string)
* Zend_Cache::CLEANING_MODE_MATCHING_ANY_TAG => remove cache entries matching any given tags
* ($tags can be an array of strings or a single string)
*
* @param string $mode Clean mode
* @param array $tags Array of tags
* @return boolean true if no problem
*/
public function clean($mode = Zend_Cache::CLEANING_MODE_ALL, $tags = array());
}

View file

@ -0,0 +1,765 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Core.php 24989 2012-06-21 07:24:13Z mabe $
*/
/**
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Cache_Core
{
/**
* Messages
*/
const BACKEND_NOT_SUPPORTS_TAG = 'tags are not supported by the current backend';
const BACKEND_NOT_IMPLEMENTS_EXTENDED_IF = 'Current backend doesn\'t implement the Zend_Cache_Backend_ExtendedInterface, so this method is not available';
/**
* Backend Object
*
* @var Zend_Cache_Backend_Interface $_backend
*/
protected $_backend = null;
/**
* Available options
*
* ====> (boolean) write_control :
* - Enable / disable write control (the cache is read just after writing to detect corrupt entries)
* - Enable write control will lightly slow the cache writing but not the cache reading
* Write control can detect some corrupt cache files but maybe it's not a perfect control
*
* ====> (boolean) caching :
* - Enable / disable caching
* (can be very useful for the debug of cached scripts)
*
* =====> (string) cache_id_prefix :
* - prefix for cache ids (namespace)
*
* ====> (boolean) automatic_serialization :
* - Enable / disable automatic serialization
* - It can be used to save directly datas which aren't strings (but it's slower)
*
* ====> (int) automatic_cleaning_factor :
* - Disable / Tune the automatic cleaning process
* - The automatic cleaning process destroy too old (for the given life time)
* cache files when a new cache file is written :
* 0 => no automatic cache cleaning
* 1 => systematic cache cleaning
* x (integer) > 1 => automatic cleaning randomly 1 times on x cache write
*
* ====> (int) lifetime :
* - Cache lifetime (in seconds)
* - If null, the cache is valid forever.
*
* ====> (boolean) logging :
* - If set to true, logging is activated (but the system is slower)
*
* ====> (boolean) ignore_user_abort
* - If set to true, the core will set the ignore_user_abort PHP flag inside the
* save() method to avoid cache corruptions in some cases (default false)
*
* @var array $_options available options
*/
protected $_options = array(
'write_control' => true,
'caching' => true,
'cache_id_prefix' => null,
'automatic_serialization' => false,
'automatic_cleaning_factor' => 10,
'lifetime' => 3600,
'logging' => false,
'logger' => null,
'ignore_user_abort' => false
);
/**
* Array of options which have to be transfered to backend
*
* @var array $_directivesList
*/
protected static $_directivesList = array('lifetime', 'logging', 'logger');
/**
* Not used for the core, just a sort a hint to get a common setOption() method (for the core and for frontends)
*
* @var array $_specificOptions
*/
protected $_specificOptions = array();
/**
* Last used cache id
*
* @var string $_lastId
*/
private $_lastId = null;
/**
* True if the backend implements Zend_Cache_Backend_ExtendedInterface
*
* @var boolean $_extendedBackend
*/
protected $_extendedBackend = false;
/**
* Array of capabilities of the backend (only if it implements Zend_Cache_Backend_ExtendedInterface)
*
* @var array
*/
protected $_backendCapabilities = array();
/**
* Constructor
*
* @param array|Zend_Config $options Associative array of options or Zend_Config instance
* @throws Zend_Cache_Exception
* @return void
*/
public function __construct($options = array())
{
if ($options instanceof Zend_Config) {
$options = $options->toArray();
}
if (!is_array($options)) {
Zend_Cache::throwException("Options passed were not an array"
. " or Zend_Config instance.");
}
while (list($name, $value) = each($options)) {
$this->setOption($name, $value);
}
$this->_loggerSanity();
}
/**
* Set options using an instance of type Zend_Config
*
* @param Zend_Config $config
* @return Zend_Cache_Core
*/
public function setConfig(Zend_Config $config)
{
$options = $config->toArray();
while (list($name, $value) = each($options)) {
$this->setOption($name, $value);
}
return $this;
}
/**
* Set the backend
*
* @param Zend_Cache_Backend $backendObject
* @throws Zend_Cache_Exception
* @return void
*/
public function setBackend(Zend_Cache_Backend $backendObject)
{
$this->_backend= $backendObject;
// some options (listed in $_directivesList) have to be given
// to the backend too (even if they are not "backend specific")
$directives = array();
foreach (Zend_Cache_Core::$_directivesList as $directive) {
$directives[$directive] = $this->_options[$directive];
}
$this->_backend->setDirectives($directives);
if (in_array('Zend_Cache_Backend_ExtendedInterface', class_implements($this->_backend))) {
$this->_extendedBackend = true;
$this->_backendCapabilities = $this->_backend->getCapabilities();
}
}
/**
* Returns the backend
*
* @return Zend_Cache_Backend backend object
*/
public function getBackend()
{
return $this->_backend;
}
/**
* Public frontend to set an option
*
* There is an additional validation (relatively to the protected _setOption method)
*
* @param string $name Name of the option
* @param mixed $value Value of the option
* @throws Zend_Cache_Exception
* @return void
*/
public function setOption($name, $value)
{
if (!is_string($name)) {
Zend_Cache::throwException("Incorrect option name!");
}
$name = strtolower($name);
if (array_key_exists($name, $this->_options)) {
// This is a Core option
$this->_setOption($name, $value);
return;
}
if (array_key_exists($name, $this->_specificOptions)) {
// This a specic option of this frontend
$this->_specificOptions[$name] = $value;
return;
}
}
/**
* Public frontend to get an option value
*
* @param string $name Name of the option
* @throws Zend_Cache_Exception
* @return mixed option value
*/
public function getOption($name)
{
$name = strtolower($name);
if (array_key_exists($name, $this->_options)) {
// This is a Core option
return $this->_options[$name];
}
if (array_key_exists($name, $this->_specificOptions)) {
// This a specic option of this frontend
return $this->_specificOptions[$name];
}
Zend_Cache::throwException("Incorrect option name : $name");
}
/**
* Set an option
*
* @param string $name Name of the option
* @param mixed $value Value of the option
* @throws Zend_Cache_Exception
* @return void
*/
private function _setOption($name, $value)
{
if (!is_string($name) || !array_key_exists($name, $this->_options)) {
Zend_Cache::throwException("Incorrect option name : $name");
}
if ($name == 'lifetime' && empty($value)) {
$value = null;
}
$this->_options[$name] = $value;
}
/**
* Force a new lifetime
*
* The new value is set for the core/frontend but for the backend too (directive)
*
* @param int $newLifetime New lifetime (in seconds)
* @return void
*/
public function setLifetime($newLifetime)
{
$this->_options['lifetime'] = $newLifetime;
$this->_backend->setDirectives(array(
'lifetime' => $newLifetime
));
}
/**
* Test if a cache is available for the given id and (if yes) return it (false else)
*
* @param string $id Cache id
* @param boolean $doNotTestCacheValidity If set to true, the cache validity won't be tested
* @param boolean $doNotUnserialize Do not serialize (even if automatic_serialization is true) => for internal use
* @return mixed|false Cached datas
*/
public function load($id, $doNotTestCacheValidity = false, $doNotUnserialize = false)
{
if (!$this->_options['caching']) {
return false;
}
$id = $this->_id($id); // cache id may need prefix
$this->_lastId = $id;
self::_validateIdOrTag($id);
$this->_log("Zend_Cache_Core: load item '{$id}'", 7);
$data = $this->_backend->load($id, $doNotTestCacheValidity);
if ($data===false) {
// no cache available
return false;
}
if ((!$doNotUnserialize) && $this->_options['automatic_serialization']) {
// we need to unserialize before sending the result
return unserialize($data);
}
return $data;
}
/**
* Test if a cache is available for the given id
*
* @param string $id Cache id
* @return int|false Last modified time of cache entry if it is available, false otherwise
*/
public function test($id)
{
if (!$this->_options['caching']) {
return false;
}
$id = $this->_id($id); // cache id may need prefix
self::_validateIdOrTag($id);
$this->_lastId = $id;
$this->_log("Zend_Cache_Core: test item '{$id}'", 7);
return $this->_backend->test($id);
}
/**
* Save some data in a cache
*
* @param mixed $data Data to put in cache (can be another type than string if automatic_serialization is on)
* @param string $id Cache id (if not set, the last cache id will be used)
* @param array $tags Cache tags
* @param int $specificLifetime If != false, set a specific lifetime for this cache record (null => infinite lifetime)
* @param int $priority integer between 0 (very low priority) and 10 (maximum priority) used by some particular backends
* @throws Zend_Cache_Exception
* @return boolean True if no problem
*/
public function save($data, $id = null, $tags = array(), $specificLifetime = false, $priority = 8)
{
if (!$this->_options['caching']) {
return true;
}
if ($id === null) {
$id = $this->_lastId;
} else {
$id = $this->_id($id);
}
self::_validateIdOrTag($id);
self::_validateTagsArray($tags);
if ($this->_options['automatic_serialization']) {
// we need to serialize datas before storing them
$data = serialize($data);
} else {
if (!is_string($data)) {
Zend_Cache::throwException("Datas must be string or set automatic_serialization = true");
}
}
// automatic cleaning
if ($this->_options['automatic_cleaning_factor'] > 0) {
$rand = rand(1, $this->_options['automatic_cleaning_factor']);
if ($rand==1) {
// new way || deprecated way
if ($this->_extendedBackend || method_exists($this->_backend, 'isAutomaticCleaningAvailable')) {
$this->_log("Zend_Cache_Core::save(): automatic cleaning running", 7);
$this->clean(Zend_Cache::CLEANING_MODE_OLD);
} else {
$this->_log("Zend_Cache_Core::save(): automatic cleaning is not available/necessary with current backend", 4);
}
}
}
$this->_log("Zend_Cache_Core: save item '{$id}'", 7);
if ($this->_options['ignore_user_abort']) {
$abort = ignore_user_abort(true);
}
if (($this->_extendedBackend) && ($this->_backendCapabilities['priority'])) {
$result = $this->_backend->save($data, $id, $tags, $specificLifetime, $priority);
} else {
$result = $this->_backend->save($data, $id, $tags, $specificLifetime);
}
if ($this->_options['ignore_user_abort']) {
ignore_user_abort($abort);
}
if (!$result) {
// maybe the cache is corrupted, so we remove it !
$this->_log("Zend_Cache_Core::save(): failed to save item '{$id}' -> removing it", 4);
$this->_backend->remove($id);
return false;
}
if ($this->_options['write_control']) {
$data2 = $this->_backend->load($id, true);
if ($data!=$data2) {
$this->_log("Zend_Cache_Core::save(): write control of item '{$id}' failed -> removing it", 4);
$this->_backend->remove($id);
return false;
}
}
return true;
}
/**
* Remove a cache
*
* @param string $id Cache id to remove
* @return boolean True if ok
*/
public function remove($id)
{
if (!$this->_options['caching']) {
return true;
}
$id = $this->_id($id); // cache id may need prefix
self::_validateIdOrTag($id);
$this->_log("Zend_Cache_Core: remove item '{$id}'", 7);
return $this->_backend->remove($id);
}
/**
* Clean cache entries
*
* Available modes are :
* 'all' (default) => remove all cache entries ($tags is not used)
* 'old' => remove too old cache entries ($tags is not used)
* 'matchingTag' => remove cache entries matching all given tags
* ($tags can be an array of strings or a single string)
* 'notMatchingTag' => remove cache entries not matching one of the given tags
* ($tags can be an array of strings or a single string)
* 'matchingAnyTag' => remove cache entries matching any given tags
* ($tags can be an array of strings or a single string)
*
* @param string $mode
* @param array|string $tags
* @throws Zend_Cache_Exception
* @return boolean True if ok
*/
public function clean($mode = 'all', $tags = array())
{
if (!$this->_options['caching']) {
return true;
}
if (!in_array($mode, array(Zend_Cache::CLEANING_MODE_ALL,
Zend_Cache::CLEANING_MODE_OLD,
Zend_Cache::CLEANING_MODE_MATCHING_TAG,
Zend_Cache::CLEANING_MODE_NOT_MATCHING_TAG,
Zend_Cache::CLEANING_MODE_MATCHING_ANY_TAG))) {
Zend_Cache::throwException('Invalid cleaning mode');
}
self::_validateTagsArray($tags);
return $this->_backend->clean($mode, $tags);
}
/**
* Return an array of stored cache ids which match given tags
*
* In case of multiple tags, a logical AND is made between tags
*
* @param array $tags array of tags
* @return array array of matching cache ids (string)
*/
public function getIdsMatchingTags($tags = array())
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
if (!($this->_backendCapabilities['tags'])) {
Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG);
}
$ids = $this->_backend->getIdsMatchingTags($tags);
// we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600)
if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') {
$prefix = & $this->_options['cache_id_prefix'];
$prefixLen = strlen($prefix);
foreach ($ids as &$id) {
if (strpos($id, $prefix) === 0) {
$id = substr($id, $prefixLen);
}
}
}
return $ids;
}
/**
* Return an array of stored cache ids which don't match given tags
*
* In case of multiple tags, a logical OR is made between tags
*
* @param array $tags array of tags
* @return array array of not matching cache ids (string)
*/
public function getIdsNotMatchingTags($tags = array())
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
if (!($this->_backendCapabilities['tags'])) {
Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG);
}
$ids = $this->_backend->getIdsNotMatchingTags($tags);
// we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600)
if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') {
$prefix = & $this->_options['cache_id_prefix'];
$prefixLen = strlen($prefix);
foreach ($ids as &$id) {
if (strpos($id, $prefix) === 0) {
$id = substr($id, $prefixLen);
}
}
}
return $ids;
}
/**
* Return an array of stored cache ids which match any given tags
*
* In case of multiple tags, a logical OR is made between tags
*
* @param array $tags array of tags
* @return array array of matching any cache ids (string)
*/
public function getIdsMatchingAnyTags($tags = array())
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
if (!($this->_backendCapabilities['tags'])) {
Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG);
}
$ids = $this->_backend->getIdsMatchingAnyTags($tags);
// we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600)
if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') {
$prefix = & $this->_options['cache_id_prefix'];
$prefixLen = strlen($prefix);
foreach ($ids as &$id) {
if (strpos($id, $prefix) === 0) {
$id = substr($id, $prefixLen);
}
}
}
return $ids;
}
/**
* Return an array of stored cache ids
*
* @return array array of stored cache ids (string)
*/
public function getIds()
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
$ids = $this->_backend->getIds();
// we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600)
if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') {
$prefix = & $this->_options['cache_id_prefix'];
$prefixLen = strlen($prefix);
foreach ($ids as &$id) {
if (strpos($id, $prefix) === 0) {
$id = substr($id, $prefixLen);
}
}
}
return $ids;
}
/**
* Return an array of stored tags
*
* @return array array of stored tags (string)
*/
public function getTags()
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
if (!($this->_backendCapabilities['tags'])) {
Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG);
}
return $this->_backend->getTags();
}
/**
* Return the filling percentage of the backend storage
*
* @return int integer between 0 and 100
*/
public function getFillingPercentage()
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
return $this->_backend->getFillingPercentage();
}
/**
* Return an array of metadatas for the given cache id
*
* The array will include these keys :
* - expire : the expire timestamp
* - tags : a string array of tags
* - mtime : timestamp of last modification time
*
* @param string $id cache id
* @return array array of metadatas (false if the cache id is not found)
*/
public function getMetadatas($id)
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
$id = $this->_id($id); // cache id may need prefix
return $this->_backend->getMetadatas($id);
}
/**
* Give (if possible) an extra lifetime to the given cache id
*
* @param string $id cache id
* @param int $extraLifetime
* @return boolean true if ok
*/
public function touch($id, $extraLifetime)
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
$id = $this->_id($id); // cache id may need prefix
$this->_log("Zend_Cache_Core: touch item '{$id}'", 7);
return $this->_backend->touch($id, $extraLifetime);
}
/**
* Validate a cache id or a tag (security, reliable filenames, reserved prefixes...)
*
* Throw an exception if a problem is found
*
* @param string $string Cache id or tag
* @throws Zend_Cache_Exception
* @return void
*/
protected static function _validateIdOrTag($string)
{
if (!is_string($string)) {
Zend_Cache::throwException('Invalid id or tag : must be a string');
}
if (substr($string, 0, 9) == 'internal-') {
Zend_Cache::throwException('"internal-*" ids or tags are reserved');
}
if (!preg_match('~^[a-zA-Z0-9_]+$~D', $string)) {
Zend_Cache::throwException("Invalid id or tag '$string' : must use only [a-zA-Z0-9_]");
}
}
/**
* Validate a tags array (security, reliable filenames, reserved prefixes...)
*
* Throw an exception if a problem is found
*
* @param array $tags Array of tags
* @throws Zend_Cache_Exception
* @return void
*/
protected static function _validateTagsArray($tags)
{
if (!is_array($tags)) {
Zend_Cache::throwException('Invalid tags array : must be an array');
}
foreach($tags as $tag) {
self::_validateIdOrTag($tag);
}
reset($tags);
}
/**
* Make sure if we enable logging that the Zend_Log class
* is available.
* Create a default log object if none is set.
*
* @throws Zend_Cache_Exception
* @return void
*/
protected function _loggerSanity()
{
if (!isset($this->_options['logging']) || !$this->_options['logging']) {
return;
}
if (isset($this->_options['logger']) && $this->_options['logger'] instanceof Zend_Log) {
return;
}
// Create a default logger to the standard output stream
require_once 'Zend/Log.php';
require_once 'Zend/Log/Writer/Stream.php';
require_once 'Zend/Log/Filter/Priority.php';
$logger = new Zend_Log(new Zend_Log_Writer_Stream('php://output'));
$logger->addFilter(new Zend_Log_Filter_Priority(Zend_Log::WARN, '<='));
$this->_options['logger'] = $logger;
}
/**
* Log a message at the WARN (4) priority.
*
* @param string $message
* @throws Zend_Cache_Exception
* @return void
*/
protected function _log($message, $priority = 4)
{
if (!$this->_options['logging']) {
return;
}
if (!(isset($this->_options['logger']) || $this->_options['logger'] instanceof Zend_Log)) {
Zend_Cache::throwException('Logging is enabled but logger is not set');
}
$logger = $this->_options['logger'];
$logger->log($message, $priority);
}
/**
* Make and return a cache id
*
* Checks 'cache_id_prefix' and returns new id with prefix or simply the id if null
*
* @param string $id Cache id
* @return string Cache id (with or without prefix)
*/
protected function _id($id)
{
if (($id !== null) && isset($this->_options['cache_id_prefix'])) {
return $this->_options['cache_id_prefix'] . $id; // return with prefix
}
return $id; // no prefix, just return the $id passed
}
}

View file

@ -0,0 +1,32 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Exception.php 24593 2012-01-05 20:35:02Z matthew $
*/
/**
* @see Zend_Exception
*/
require_once 'Zend/Exception.php';
/**
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Cache_Exception extends Zend_Exception {}

View file

@ -0,0 +1,96 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Exception.php 24593 2012-01-05 20:35:02Z matthew $
*/
/**
* @category Zend
* @package Zend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Exception extends Exception
{
/**
* @var null|Exception
*/
private $_previous = null;
/**
* Construct the exception
*
* @param string $msg
* @param int $code
* @param Exception $previous
* @return void
*/
public function __construct($msg = '', $code = 0, Exception $previous = null)
{
if (version_compare(PHP_VERSION, '5.3.0', '<')) {
parent::__construct($msg, (int) $code);
$this->_previous = $previous;
} else {
parent::__construct($msg, (int) $code, $previous);
}
}
/**
* Overloading
*
* For PHP < 5.3.0, provides access to the getPrevious() method.
*
* @param string $method
* @param array $args
* @return mixed
*/
public function __call($method, array $args)
{
if ('getprevious' == strtolower($method)) {
return $this->_getPrevious();
}
return null;
}
/**
* String representation of the exception
*
* @return string
*/
public function __toString()
{
if (version_compare(PHP_VERSION, '5.3.0', '<')) {
if (null !== ($e = $this->getPrevious())) {
return $e->__toString()
. "\n\nNext "
. parent::__toString();
}
}
return parent::__toString();
}
/**
* Returns previous Exception
*
* @return Exception|null
*/
protected function _getPrevious()
{
return $this->_previous;
}
}

View file

@ -5,10 +5,10 @@
* Uses patterns specified in site config files and auto detection (hNews/PHP Readability)
* to extract content from HTML files.
*
* @version 0.8
* @date 2012-02-21
* @version 1.0
* @date 2013-02-05
* @author Keyvan Minoukadeh
* @copyright 2011 Keyvan Minoukadeh
* @copyright 2013 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/
@ -39,9 +39,12 @@ class ContentExtractor
protected $date;
protected $body;
protected $success = false;
protected $nextPageUrl;
public $allowedParsers = array('libxml', 'html5lib');
public $fingerprints = array();
public $readability;
public $debug = false;
public $debugVerbose = false;
function __construct($path, $fallback=null) {
SiteConfig::set_config_path($path, $fallback);
@ -52,7 +55,8 @@ class ContentExtractor
$mem = round(memory_get_usage()/1024, 2);
$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
echo "\n";
ob_flush();
flush();
}
@ -67,6 +71,7 @@ class ContentExtractor
$this->author = array();
$this->language = null;
$this->date = null;
$this->nextPageUrl = null;
$this->success = false;
}
@ -86,74 +91,143 @@ class ContentExtractor
return $_fphost;
}
}
$this->debug('No fingerprint matches');
return false;
}
// returns SiteConfig instance (joined in order: exact match, wildcard, fingerprint, global, default)
public function buildSiteConfig($url, $html='', $add_to_cache=true) {
// extract host name
$host = @parse_url($url, PHP_URL_HOST);
$host = strtolower($host);
if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
// is merged version already cached?
if (SiteConfig::is_cached("$host.merged")) {
$this->debug("Returning cached and merged site config for $host");
return SiteConfig::build("$host.merged");
}
// let's build from site_config/custom/ and standard/
$config = SiteConfig::build($host);
if ($add_to_cache && $config && !SiteConfig::is_cached("$host")) {
SiteConfig::add_to_cache($host, $config);
}
// if no match, use defaults
if (!$config) $config = new SiteConfig();
// load fingerprint config?
if ($config->autodetect_on_failure()) {
// check HTML for fingerprints
if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) {
if ($config_fingerprint = SiteConfig::build($_fphost)) {
$this->debug("Appending site config settings from $_fphost (fingerprint match)");
$config->append($config_fingerprint);
if ($add_to_cache && !SiteConfig::is_cached($_fphost)) {
//$config_fingerprint->cache_in_apc = true;
SiteConfig::add_to_cache($_fphost, $config_fingerprint);
}
}
}
}
// load global config?
if ($config->autodetect_on_failure()) {
if ($config_global = SiteConfig::build('global', true)) {
$this->debug('Appending site config settings from global.txt');
$config->append($config_global);
if ($add_to_cache && !SiteConfig::is_cached('global')) {
//$config_global->cache_in_apc = true;
SiteConfig::add_to_cache('global', $config_global);
}
}
}
// store copy of merged config
if ($add_to_cache) {
// do not store in APC if wildcard match
$use_apc = ($host == $config->cache_key);
$config->cache_key = null;
SiteConfig::add_to_cache("$host.merged", $config, $use_apc);
}
return $config;
}
// returns true on success, false on failure
// $smart_tidy indicates that if tidy is used and no results are produced, we will
// try again without it. Tidy helps us deal with PHP's patchy HTML parsing most of the time
// but it has problems of its own which we try to avoid with this option.
public function process($html, $url, $smart_tidy=true) {
$this->reset();
// extract host name
$host = @parse_url($url, PHP_URL_HOST);
if (!($this->config = SiteConfig::build($host))) {
// no match, check HTML for fingerprints
if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) {
$this->config = SiteConfig::build($_fphost);
}
unset($_fphost);
if (!$this->config) {
// no match, so use defaults
$this->config = new SiteConfig();
}
}
// store copy of config in our static cache array in case we need to process another URL
SiteConfig::add_to_cache($host, $this->config);
$this->config = $this->buildSiteConfig($url, $html);
// do string replacements
foreach ($this->config->replace_string as $_repl) {
$html = str_replace($_repl[0], $_repl[1], $html);
if (!empty($this->config->find_string)) {
if (count($this->config->find_string) == count($this->config->replace_string)) {
$html = str_replace($this->config->find_string, $this->config->replace_string, $html, $_count);
$this->debug("Strings replaced: $_count (find_string and/or replace_string)");
} else {
$this->debug('Skipped string replacement - incorrect number of find-replace strings in site config');
}
unset($_count);
}
unset($_repl);
// use tidy (if it exists)?
// This fixes problems with some sites which would otherwise
// trouble DOMDocument's HTML parsing. (Although sometimes it
// makes matters worse, which is why you can override it in site config files.)
$tidied = false;
if ($this->config->tidy && function_exists('tidy_parse_string') && $smart_tidy) {
if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) {
$this->debug('Using Tidy');
$tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
if (tidy_clean_repair($tidy)) {
$original_html = $html;
$tidied = true;
// $html = $tidy->value;
}
$body = $tidy->body();
if (preg_replace('/\s+/', '', $body->value) !== "<body></body>") {
$html = $tidy->value;
}
unset($tidy);
}
// load and parse html
$this->readability = new PocheReadability($html, $url);
$_parser = $this->config->parser();
if (!in_array($_parser, $this->allowedParsers)) {
$this->debug("HTML parser $_parser not listed, using libxml instead");
$_parser = 'libxml';
}
$this->debug("Attempting to parse HTML with $_parser");
$this->readability = new Readability($html, $url, $_parser);
// we use xpath to find elements in the given HTML document
// see http://en.wikipedia.org/wiki/XPath_1.0
$xpath = new DOMXPath($this->readability->dom);
// try to get title
foreach ($this->config->title as $pattern) {
// try to get next page link
foreach ($this->config->next_page_link as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->debug('Title expression evaluated as string');
$this->title = trim($elems);
$this->nextPageUrl = trim($elems);
break;
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $item) {
if ($item instanceof DOMElement && $item->hasAttribute('href')) {
$this->nextPageUrl = $item->getAttribute('href');
break 2;
} elseif ($item instanceof DOMAttr && $item->value) {
$this->nextPageUrl = $item->value;
break 2;
}
}
}
}
// try to get title
foreach ($this->config->title as $pattern) {
// $this->debug("Trying $pattern");
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->title = trim($elems);
$this->debug('Title expression evaluated as string: '.$this->title);
$this->debug("...XPath match: $pattern");
break;
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
$this->debug('Title matched');
$this->title = $elems->item(0)->textContent;
$this->debug('Title matched: '.$this->title);
$this->debug("...XPath match: $pattern");
// remove title from document
try {
$elems->item(0)->parentNode->removeChild($elems->item(0));
@ -169,17 +243,22 @@ class ContentExtractor
foreach ($this->config->author as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->debug('Author expression evaluated as string');
if (trim($elems) != '') {
$this->author[] = trim($elems);
$this->debug('Author expression evaluated as string: '.trim($elems));
$this->debug("...XPath match: $pattern");
break;
}
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) continue;
$this->author[] = trim($elem->textContent);
$this->debug('Author matched: '.trim($elem->textContent));
}
if (!empty($this->author)) {
$this->debug("...XPath match: $pattern");
break;
}
if (!empty($this->author)) break;
}
}
}
@ -191,12 +270,14 @@ class ContentExtractor
if (is_string($elems)) {
if (trim($elems) != '') {
$this->language = trim($elems);
$this->debug('Language matched: '.$this->language);
break;
}
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) continue;
$this->language = trim($elem->textContent);
$this->debug('Language matched: '.$this->language);
}
if ($this->language) break;
}
@ -206,10 +287,8 @@ class ContentExtractor
foreach ($this->config->date as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->debug('Date expression evaluated as string');
$this->date = strtotime(trim($elems, "; \t\n\r\0\x0B"));
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
$this->debug('Date matched');
$this->date = $elems->item(0)->textContent;
$this->date = strtotime(trim($this->date, "; \t\n\r\0\x0B"));
// remove date from document
@ -218,6 +297,8 @@ class ContentExtractor
if (!$this->date) {
$this->date = null;
} else {
$this->debug('Date matched: '.date('Y-m-d H:i:s', $this->date));
$this->debug("...XPath match: $pattern");
break;
}
}
@ -288,11 +369,12 @@ class ContentExtractor
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Body matched');
$this->debug("...XPath match: $pattern");
if ($elems->length == 1) {
$this->body = $elems->item(0);
// prune (clean up elements that may not be content)
if ($this->config->prune) {
$this->debug('Pruning content');
if ($this->config->prune()) {
$this->debug('...pruning content');
$this->readability->prepArticle($this->body);
}
break;
@ -309,17 +391,18 @@ class ContentExtractor
}
}
if ($isDescendant) {
$this->debug('Element is child of another body element, skipping.');
$this->debug('...element is child of another body element, skipping.');
} else {
// prune (clean up elements that may not be content)
if ($this->config->prune) {
if ($this->config->prune()) {
$this->debug('Pruning content');
$this->readability->prepArticle($elem);
}
$this->debug('Element added to body');
$this->debug('...element added to body');
$this->body->appendChild($elem);
}
}
if ($this->body->hasChildNodes()) break;
}
}
}
@ -328,25 +411,25 @@ class ContentExtractor
$detect_title = $detect_body = $detect_author = $detect_date = false;
// detect title?
if (!isset($this->title)) {
if (empty($this->config->title) || $this->config->autodetect_on_failure) {
if (empty($this->config->title) || $this->config->autodetect_on_failure()) {
$detect_title = true;
}
}
// detect body?
if (!isset($this->body)) {
if (empty($this->config->body) || $this->config->autodetect_on_failure) {
if (empty($this->config->body) || $this->config->autodetect_on_failure()) {
$detect_body = true;
}
}
// detect author?
if (empty($this->author)) {
if (empty($this->config->author) || $this->config->autodetect_on_failure) {
if (empty($this->config->author) || $this->config->autodetect_on_failure()) {
$detect_author = true;
}
}
// detect date?
if (!isset($this->date)) {
if (empty($this->config->date) || $this->config->autodetect_on_failure) {
if (empty($this->config->date) || $this->config->autodetect_on_failure()) {
$detect_date = true;
}
}
@ -363,8 +446,8 @@ class ContentExtractor
// check for entry-title
$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-title ')]", $hentry);
if ($elems && $elems->length > 0) {
$this->debug('hNews: found entry-title');
$this->title = $elems->item(0)->textContent;
$this->debug('hNews: found entry-title: '.$this->title);
// remove title from document
$elems->item(0)->parentNode->removeChild($elems->item(0));
$detect_title = false;
@ -375,11 +458,11 @@ class ContentExtractor
// check for time element with pubdate attribute
$elems = @$xpath->query(".//time[@pubdate] | .//abbr[contains(concat(' ',normalize-space(@class),' '),' published ')]", $hentry);
if ($elems && $elems->length > 0) {
$this->debug('hNews: found publication date');
$this->date = strtotime(trim($elems->item(0)->textContent));
// remove date from document
//$elems->item(0)->parentNode->removeChild($elems->item(0));
if ($this->date) {
$this->debug('hNews: found publication date: '.date('Y-m-d H:i:s', $this->date));
$detect_date = false;
} else {
$this->date = null;
@ -391,18 +474,19 @@ class ContentExtractor
// check for time element with pubdate attribute
$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' vcard ') and (contains(concat(' ',normalize-space(@class),' '),' author ') or contains(concat(' ',normalize-space(@class),' '),' byline '))]", $hentry);
if ($elems && $elems->length > 0) {
$this->debug('hNews: found author');
$author = $elems->item(0);
$fn = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' fn ')]", $author);
if ($fn && $fn->length > 0) {
foreach ($fn as $_fn) {
if (trim($_fn->textContent) != '') {
$this->author[] = trim($_fn->textContent);
$this->debug('hNews: found author: '.trim($_fn->textContent));
}
}
} else {
if (trim($author->textContent) != '') {
$this->author[] = trim($author->textContent);
$this->debug('hNews: found author: '.trim($author->textContent));
}
}
$detect_author = empty($this->author);
@ -422,7 +506,7 @@ class ContentExtractor
if (($e->tagName == 'img') || (trim($e->textContent) != '')) {
$this->body = $elems->item(0);
// prune (clean up elements that may not be content)
if ($this->config->prune) {
if ($this->config->prune()) {
$this->debug('Pruning content');
$this->readability->prepArticle($this->body);
}
@ -447,7 +531,7 @@ class ContentExtractor
$this->debug('Element is child of another body element, skipping.');
} else {
// prune (clean up elements that may not be content)
if ($this->config->prune) {
if ($this->config->prune()) {
$this->debug('Pruning content');
$this->readability->prepArticle($elem);
}
@ -467,8 +551,8 @@ class ContentExtractor
// check for instapaper_title
$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_title ')]", $this->readability->dom);
if ($elems && $elems->length > 0) {
$this->debug('title found (.instapaper_title)');
$this->title = $elems->item(0)->textContent;
$this->debug('Title found (.instapaper_title): '.$this->title);
// remove title from document
$elems->item(0)->parentNode->removeChild($elems->item(0));
$detect_title = false;
@ -481,7 +565,7 @@ class ContentExtractor
$this->debug('body found (.instapaper_body)');
$this->body = $elems->item(0);
// prune (clean up elements that may not be content)
if ($this->config->prune) {
if ($this->config->prune()) {
$this->debug('Pruning content');
$this->readability->prepArticle($this->body);
}
@ -497,9 +581,9 @@ class ContentExtractor
if ($detect_author) {
$elems = @$xpath->query("//a[contains(concat(' ',normalize-space(@rel),' '),' author ')]", $this->readability->dom);
if ($elems && $elems->length == 1) {
$this->debug('Author found (rel="author")');
$author = trim($elems->item(0)->textContent);
if ($author != '') {
$this->debug("Author found (rel=\"author\"): $author");
$this->author[] = $author;
$detect_author = false;
}
@ -512,11 +596,11 @@ class ContentExtractor
if ($detect_date) {
$elems = @$xpath->query("//time[@pubdate]", $this->readability->dom);
if ($elems && $elems->length == 1) {
$this->debug('Date found (pubdate marked time element)');
$this->date = strtotime(trim($elems->item(0)->textContent));
// remove date from document
//$elems->item(0)->parentNode->removeChild($elems->item(0));
if ($this->date) {
$this->debug('Date found (pubdate marked time element): '.date('Y-m-d H:i:s', $this->date));
$detect_date = false;
} else {
$this->date = null;
@ -542,7 +626,7 @@ class ContentExtractor
$this->body = $this->body->firstChild;
}
// prune (clean up elements that may not be content)
if ($this->config->prune) {
if ($this->config->prune()) {
$this->debug('Pruning content');
$this->readability->prepArticle($this->body);
}
@ -563,6 +647,34 @@ class ContentExtractor
$this->body->removeChild($firstChild);
}
}
// prevent self-closing iframes
$elems = $this->body->getElementsByTagName('iframe');
for ($i = $elems->length-1; $i >= 0; $i--) {
$e = $elems->item($i);
if (!$e->hasChildNodes()) {
$e->appendChild($this->body->ownerDocument->createTextNode('[embedded content]'));
}
}
// remove image lazy loading - WordPress plugin http://wordpress.org/extend/plugins/lazy-load/
// the plugin replaces the src attribute to point to a 1x1 gif and puts the original src
// inside the data-lazy-src attribute. It also places the original image inside a noscript element
// next to the amended one.
$elems = @$xpath->query("//img[@data-lazy-src]", $this->body);
for ($i = $elems->length-1; $i >= 0; $i--) {
$e = $elems->item($i);
// let's see if we can grab image from noscript
if ($e->nextSibling !== null && $e->nextSibling->nodeName === 'noscript') {
$_new_elem = $e->ownerDocument->createDocumentFragment();
@$_new_elem->appendXML($e->nextSibling->innerHTML);
$e->nextSibling->parentNode->replaceChild($_new_elem, $e->nextSibling);
$e->parentNode->removeChild($e);
} else {
// Use data-lazy-src as src value
$e->setAttribute('src', $e->getAttribute('data-lazy-src'));
$e->removeAttribute('data-lazy-src');
}
}
$this->success = true;
}
@ -608,5 +720,9 @@ class ContentExtractor
public function getSiteConfig() {
return $this->config;
}
public function getNextPageUrl() {
return $this->nextPageUrl;
}
}
?>

View file

@ -0,0 +1,338 @@
<?php
/**
* Site Config
*
* Each instance of this class should hold extraction patterns and other directives
* for a website. See ContentExtractor class to see how it's used.
*
* @version 0.7
* @date 2012-08-27
* @author Keyvan Minoukadeh
* @copyright 2012 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/
class SiteConfig
{
// Use first matching element as title (0 or more xpath expressions)
public $title = array();
// Use first matching element as body (0 or more xpath expressions)
public $body = array();
// Use first matching element as author (0 or more xpath expressions)
public $author = array();
// Use first matching element as date (0 or more xpath expressions)
public $date = array();
// Strip elements matching these xpath expressions (0 or more)
public $strip = array();
// Strip elements which contain these strings (0 or more) in the id or class attribute
public $strip_id_or_class = array();
// Strip images which contain these strings (0 or more) in the src attribute
public $strip_image_src = array();
// Additional HTTP headers to send
// NOT YET USED
public $http_header = array();
// Process HTML with tidy before creating DOM (bool or null if undeclared)
public $tidy = null;
protected $default_tidy = true; // used if undeclared
// Autodetect title/body if xpath expressions fail to produce results.
// Note that this applies to title and body separately, ie.
// * if we get a body match but no title match, this option will determine whether we autodetect title
// * if neither match, this determines whether we autodetect title and body.
// Also note that this only applies when there is at least one xpath expression in title or body, ie.
// * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
// * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
// bool or null if undeclared
public $autodetect_on_failure = null;
protected $default_autodetect_on_failure = true; // used if undeclared
// Clean up content block - attempt to remove elements that appear to be superfluous
// bool or null if undeclared
public $prune = null;
protected $default_prune = true; // used if undeclared
// Test URL - if present, can be used to test the config above
public $test_url = array();
// Single-page link - should identify a link element or URL pointing to the page holding the entire article
// This is useful for sites which split their articles across multiple pages. Links to such pages tend to
// display the first page with links to the other pages at the bottom. Often there is also a link to a page
// which displays the entire article on one page (e.g. 'print view').
// This should be an XPath expression identifying the link to that page. If present and we find a match,
// we will retrieve that page and the rest of the options in this config will be applied to the new page.
public $single_page_link = array();
public $next_page_link = array();
// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
public $single_page_link_in_feed = array();
// Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
// string or null if undeclared
public $parser = null;
protected $default_parser = 'libxml'; // used if undeclared
// Strings to search for in HTML before processing begins (used with $replace_string)
public $find_string = array();
// Strings to replace those found in $find_string before HTML processing begins
public $replace_string = array();
// the options below cannot be set in the config files which this class represents
//public $cache_in_apc = false; // used to decide if we should cache in apc or not
public $cache_key = null;
public static $debug = false;
protected static $apc = false;
protected static $config_path;
protected static $config_path_fallback;
protected static $config_cache = array();
const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
protected static function debug($msg) {
if (self::$debug) {
//$mem = round(memory_get_usage()/1024, 2);
//$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
//echo ' - mem used: ',$mem," (peak: $memPeak)\n";
echo "\n";
ob_flush();
flush();
}
}
// enable APC caching of certain site config files?
// If enabled the following site config files will be
// cached in APC cache (when requested for first time):
// * anything in site_config/custom/ and its corresponding file in site_config/standard/
// * the site config files associated with HTML fingerprints
// * the global site config file
// returns true if enabled, false otherwise
public static function use_apc($apc=true) {
if (!function_exists('apc_add')) {
if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
return false;
}
self::$apc = $apc;
return $apc;
}
// return bool or null
public function tidy($use_default=true) {
if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
return $this->tidy;
}
// return bool or null
public function prune($use_default=true) {
if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
return $this->prune;
}
// return string or null
public function parser($use_default=true) {
if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
return $this->parser;
}
// return bool or null
public function autodetect_on_failure($use_default=true) {
if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
return $this->autodetect_on_failure;
}
public static function set_config_path($path, $fallback=null) {
self::$config_path = $path;
self::$config_path_fallback = $fallback;
}
public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
$key = strtolower($key);
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
if ($config->cache_key) $key = $config->cache_key;
self::$config_cache[$key] = $config;
if (self::$apc && $use_apc) {
self::debug("Adding site config to APC cache with key sc.$key");
apc_add("sc.$key", $config);
}
self::debug("Cached site config with key $key");
}
public static function is_cached($key) {
$key = strtolower($key);
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
if (array_key_exists($key, self::$config_cache)) {
return true;
} elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
return true;
}
return false;
}
public function append(SiteConfig $newconfig) {
// check for commands where we accept multiple statements (no test_url)
foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'find_string', 'replace_string') as $var) {
// append array elements for this config variable from $newconfig to this config
//$this->$var = $this->$var + $newconfig->$var;
$this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
}
// check for single statement commands
// we do not overwrite existing non null values
foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
if ($this->$var === null) $this->$var = $newconfig->$var;
}
}
// returns SiteConfig instance if an appropriate one is found, false otherwise
// if $exact_host_match is true, we will not look for wildcard config matches
// by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
public static function build($host, $exact_host_match=false) {
$host = strtolower($host);
if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
// check for site configuration
$try = array($host);
// should we look for wildcard matches
if (!$exact_host_match) {
$split = explode('.', $host);
if (count($split) > 1) {
array_shift($split);
$try[] = '.'.implode('.', $split);
}
}
// look for site config file in primary folder
self::debug(". looking for site config for $host in primary folder");
foreach ($try as $h) {
if (array_key_exists($h, self::$config_cache)) {
self::debug("... site config for $h already loaded in this request");
return self::$config_cache[$h];
} elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
self::debug("... site config for $h in APC cache");
return $sconfig;
} elseif (file_exists(self::$config_path."/$h.txt")) {
self::debug("... found site config ($h.txt)");
$file_primary = self::$config_path."/$h.txt";
$matched_name = $h;
break;
}
}
// if we found site config, process it
if (isset($file_primary)) {
$config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
if (!$config_lines || !is_array($config_lines)) return false;
$config = self::build_from_array($config_lines);
// if APC caching is available and enabled, mark this for cache
//$config->cache_in_apc = true;
$config->cache_key = $matched_name;
// if autodetec on failure is off (on by default) we do not need to look
// in secondary folder
if (!$config->autodetect_on_failure()) {
self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
return $config;
}
}
// look for site config file in secondary folder
if (isset(self::$config_path_fallback)) {
self::debug(". looking for site config for $host in secondary folder");
foreach ($try as $h) {
if (file_exists(self::$config_path_fallback."/$h.txt")) {
self::debug("... found site config in secondary folder ($h.txt)");
$file_secondary = self::$config_path_fallback."/$h.txt";
$matched_name = $h;
break;
}
}
if (!isset($file_secondary)) {
self::debug("... no site config match in secondary folder");
}
}
// return false if no config file found
if (!isset($file_primary) && !isset($file_secondary)) {
self::debug("... no site config match for $host");
return false;
}
// return primary config if secondary not found
if (!isset($file_secondary) && isset($config)) {
return $config;
}
// process secondary config file
$config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
if (!$config_lines || !is_array($config_lines)) {
// failed to process secondary
if (isset($config)) {
// return primary config
return $config;
} else {
return false;
}
}
// merge with primary and return
if (isset($config)) {
self::debug('. merging config files');
$config->append(self::build_from_array($config_lines));
return $config;
} else {
// return just secondary
$config = self::build_from_array($config_lines);
// if APC caching is available and enabled, mark this for cache
//$config->cache_in_apc = true;
$config->cache_key = $matched_name;
return $config;
}
}
public static function build_from_array(array $lines) {
$config = new SiteConfig();
foreach ($lines as $line) {
$line = trim($line);
// skip comments, empty lines
if ($line == '' || $line[0] == '#') continue;
// get command
$command = explode(':', $line, 2);
// if there's no colon ':', skip this line
if (count($command) != 2) continue;
$val = trim($command[1]);
$command = trim($command[0]);
if ($command == '' || $val == '') continue;
// check for commands where we accept multiple statements
if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
array_push($config->$command, $val);
// check for single statement commands that evaluate to true or false
} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
$config->$command = ($val == 'yes');
// check for single statement commands stored as strings
} elseif (in_array($command, array('parser'))) {
$config->$command = $val;
// check for replace_string(find): replace
} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
if (in_array($match[1], array('replace_string'))) {
$command = $match[1];
array_push($config->find_string, $match[2]);
array_push($config->$command, $val);
}
}
}
return $config;
}
}
?>

View file

@ -23,6 +23,23 @@
$this->version = $version;
}
/**
* Set element (overwrites existing elements with $elementName)
*
* @access public
* @param srting The tag name of an element
* @param srting The content of tag
* @param array Attributes(if any) in 'attrName' => 'attrValue' format
* @return void
*/
public function setElement($elementName, $content, $attributes = null)
{
if (isset($this->elements[$elementName])) {
unset($this->elements[$elementName]);
}
$this->addElement($elementName, $content, $attributes);
}
/**
* Add an element to elements array
*
@ -34,9 +51,15 @@
*/
public function addElement($elementName, $content, $attributes = null)
{
$this->elements[$elementName]['name'] = $elementName;
$this->elements[$elementName]['content'] = $content;
$this->elements[$elementName]['attributes'] = $attributes;
$i = 0;
if (isset($this->elements[$elementName])) {
$i = count($this->elements[$elementName]);
} else {
$this->elements[$elementName] = array();
}
$this->elements[$elementName][$i]['name'] = $elementName;
$this->elements[$elementName][$i]['content'] = $content;
$this->elements[$elementName][$i]['attributes'] = $attributes;
}
/**
@ -79,7 +102,7 @@
public function setDescription($description)
{
$tag = ($this->version == ATOM)? 'summary' : 'description';
$this->addElement($tag, $description);
$this->setElement($tag, $description);
}
/**
@ -90,7 +113,7 @@
*/
public function setTitle($title)
{
$this->addElement('title', $title);
$this->setElement('title', $title);
}
/**
@ -123,7 +146,7 @@
$value = date("Y-m-d", $date);
}
$this->addElement($tag, $value);
$this->setElement($tag, $value);
}
/**
@ -137,12 +160,12 @@
{
if($this->version == RSS2 || $this->version == RSS1)
{
$this->addElement('link', $link);
$this->setElement('link', $link);
}
else
{
$this->addElement('link','',array('href'=>$link));
$this->addElement('id', FeedWriter::uuid($link,'urn:uuid:'));
$this->setElement('link','',array('href'=>$link));
$this->setElement('id', FeedWriter::uuid($link,'urn:uuid:'));
}
}
@ -160,7 +183,7 @@
public function setEncloser($url, $length, $type)
{
$attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
$this->addElement('enclosure','',$attributes);
$this->setElement('enclosure','',$attributes);
}
} // end of class FeedItem

View file

@ -0,0 +1,441 @@
<?php
define('RSS2', 1, true);
define('JSON', 2, true);
define('JSONP', 3, true);
/**
* Univarsel Feed Writer class
*
* Genarate RSS2 or JSON (original: RSS 1.0, RSS2.0 and ATOM Feed)
*
* Modified for FiveFilters.org's Full-Text RSS project
* to allow for inclusion of hubs, JSON output.
* Stripped RSS1 and ATOM support.
*
* @package UnivarselFeedWriter
* @author Anis uddin Ahmad <anisniit@gmail.com>
* @link http://www.ajaxray.com/projects/rss
*/
class FeedWriter
{
private $self = null; // self URL - http://feed2.w3.org/docs/warning/MissingAtomSelfLink.html
private $hubs = array(); // PubSubHubbub hubs
private $channels = array(); // Collection of channel elements
private $items = array(); // Collection of items as object of FeedItem class.
private $data = array(); // Store some other version wise data
private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA
private $xsl = null; // stylesheet to render RSS (used by Chrome)
private $json = null; // JSON object
private $version = null;
/**
* Constructor
*
* @param constant the version constant (RSS2 or JSON).
*/
function __construct($version = RSS2)
{
$this->version = $version;
// Setting default value for assential channel elements
$this->channels['title'] = $version . ' Feed';
$this->channels['link'] = 'http://www.ajaxray.com/blog';
//Tag names to encode in CDATA
$this->CDATAEncoding = array('description', 'content:encoded', 'content', 'subtitle', 'summary');
}
public function setFormat($format) {
$this->version = $format;
}
// Start # public functions ---------------------------------------------
/**
* Set a channel element
* @access public
* @param srting name of the channel tag
* @param string content of the channel tag
* @return void
*/
public function setChannelElement($elementName, $content)
{
$this->channels[$elementName] = $content ;
}
/**
* Set multiple channel elements from an array. Array elements
* should be 'channelName' => 'channelContent' format.
*
* @access public
* @param array array of channels
* @return void
*/
public function setChannelElementsFromArray($elementArray)
{
if(! is_array($elementArray)) return;
foreach ($elementArray as $elementName => $content)
{
$this->setChannelElement($elementName, $content);
}
}
/**
* Genarate the actual RSS/JSON file
*
* @access public
* @return void
*/
public function genarateFeed()
{
if ($this->version == RSS2) {
header('Content-type: text/xml; charset=UTF-8');
// this line prevents Chrome 20 from prompting download
// used by Google: https://news.google.com/news/feeds?ned=us&topic=b&output=rss
header('X-content-type-options: nosniff');
} elseif ($this->version == JSON) {
header('Content-type: application/json; charset=UTF-8');
$this->json = new stdClass();
} elseif ($this->version == JSONP) {
header('Content-type: application/javascript; charset=UTF-8');
$this->json = new stdClass();
}
$this->printHead();
$this->printChannels();
$this->printItems();
$this->printTale();
if ($this->version == JSON || $this->version == JSONP) {
echo json_encode($this->json);
}
}
/**
* Create a new FeedItem.
*
* @access public
* @return object instance of FeedItem class
*/
public function createNewItem()
{
$Item = new FeedItem($this->version);
return $Item;
}
/**
* Add a FeedItem to the main class
*
* @access public
* @param object instance of FeedItem class
* @return void
*/
public function addItem($feedItem)
{
$this->items[] = $feedItem;
}
// Wrapper functions -------------------------------------------------------------------
/**
* Set the 'title' channel element
*
* @access public
* @param srting value of 'title' channel tag
* @return void
*/
public function setTitle($title)
{
$this->setChannelElement('title', $title);
}
/**
* Add a hub to the channel element
*
* @access public
* @param string URL
* @return void
*/
public function addHub($hub)
{
$this->hubs[] = $hub;
}
/**
* Set XSL URL
*
* @access public
* @param string URL
* @return void
*/
public function setXsl($xsl)
{
$this->xsl = $xsl;
}
/**
* Set self URL
*
* @access public
* @param string URL
* @return void
*/
public function setSelf($self)
{
$this->self = $self;
}
/**
* Set the 'description' channel element
*
* @access public
* @param srting value of 'description' channel tag
* @return void
*/
public function setDescription($desciption)
{
$tag = ($this->version == ATOM)? 'subtitle' : 'description';
$this->setChannelElement($tag, $desciption);
}
/**
* Set the 'link' channel element
*
* @access public
* @param srting value of 'link' channel tag
* @return void
*/
public function setLink($link)
{
$this->setChannelElement('link', $link);
}
/**
* Set the 'image' channel element
*
* @access public
* @param srting title of image
* @param srting link url of the imahe
* @param srting path url of the image
* @return void
*/
public function setImage($title, $link, $url)
{
$this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url));
}
// End # public functions ----------------------------------------------
// Start # private functions ----------------------------------------------
/**
* Prints the xml and rss namespace
*
* @access private
* @return void
*/
private function printHead()
{
if ($this->version == RSS2)
{
$out = '<?xml version="1.0" encoding="utf-8"?>'."\n";
if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL;
$out .= '<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
echo $out;
}
elseif ($this->version == JSON || $this->version == JSONP)
{
$this->json->rss = array('@attributes' => array('version' => '2.0'));
}
}
/**
* Closes the open tags at the end of file
*
* @access private
* @return void
*/
private function printTale()
{
if ($this->version == RSS2)
{
echo '</channel>',PHP_EOL,'</rss>';
}
// do nothing for JSON
}
/**
* Creates a single node as xml format
*
* @access private
* @param string name of the tag
* @param mixed tag value as string or array of nested tags in 'tagName' => 'tagValue' format
* @param array Attributes(if any) in 'attrName' => 'attrValue' format
* @return string formatted xml tag
*/
private function makeNode($tagName, $tagContent, $attributes = null)
{
if ($this->version == RSS2)
{
$nodeText = '';
$attrText = '';
if (is_array($attributes))
{
foreach ($attributes as $key => $value)
{
$attrText .= " $key=\"$value\" ";
}
}
$nodeText .= "<{$tagName}{$attrText}>";
if (is_array($tagContent))
{
foreach ($tagContent as $key => $value)
{
$nodeText .= $this->makeNode($key, $value);
}
}
else
{
//$nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent);
$nodeText .= htmlspecialchars($tagContent);
}
//$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]></$tagName>" : "</$tagName>";
$nodeText .= "</$tagName>";
return $nodeText . PHP_EOL;
}
elseif ($this->version == JSON || $this->version == JSONP)
{
$tagName = (string)$tagName;
$tagName = strtr($tagName, ':', '_');
$node = null;
if (!$tagContent && is_array($attributes) && count($attributes))
{
$node = array('@attributes' => $this->json_keys($attributes));
} else {
if (is_array($tagContent)) {
$node = $this->json_keys($tagContent);
} else {
$node = $tagContent;
}
}
return $node;
}
return ''; // should not get here
}
private function json_keys(array $array) {
$new = array();
foreach ($array as $key => $val) {
if (is_string($key)) $key = strtr($key, ':', '_');
if (is_array($val)) {
$new[$key] = $this->json_keys($val);
} else {
$new[$key] = $val;
}
}
return $new;
}
/**
* @desc Print channels
* @access private
* @return void
*/
private function printChannels()
{
//Start channel tag
if ($this->version == RSS2) {
echo '<channel>' . PHP_EOL;
// add hubs
foreach ($this->hubs as $hub) {
//echo $this->makeNode('link', '', array('rel'=>'hub', 'href'=>$hub, 'xmlns'=>'http://www.w3.org/2005/Atom'));
echo '<link rel="hub" href="'.htmlspecialchars($hub).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL;
}
// add self
if (isset($this->self)) {
//echo $this->makeNode('link', '', array('rel'=>'self', 'href'=>$this->self, 'xmlns'=>'http://www.w3.org/2005/Atom'));
echo '<link rel="self" href="'.htmlspecialchars($this->self).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL;
}
//Print Items of channel
foreach ($this->channels as $key => $value)
{
echo $this->makeNode($key, $value);
}
} elseif ($this->version == JSON || $this->version == JSONP) {
$this->json->rss['channel'] = (object)$this->json_keys($this->channels);
}
}
/**
* Prints formatted feed items
*
* @access private
* @return void
*/
private function printItems()
{
foreach ($this->items as $item) {
$itemElements = $item->getElements();
echo $this->startItem();
if ($this->version == JSON || $this->version == JSONP) {
$json_item = array();
}
foreach ($itemElements as $thisElement) {
foreach ($thisElement as $instance) {
if ($this->version == RSS2) {
echo $this->makeNode($instance['name'], $instance['content'], $instance['attributes']);
} elseif ($this->version == JSON || $this->version == JSONP) {
$_json_node = $this->makeNode($instance['name'], $instance['content'], $instance['attributes']);
if (count($thisElement) > 1) {
$json_item[strtr($instance['name'], ':', '_')][] = $_json_node;
} else {
$json_item[strtr($instance['name'], ':', '_')] = $_json_node;
}
}
}
}
echo $this->endItem();
if ($this->version == JSON || $this->version == JSONP) {
if (count($this->items) > 1) {
$this->json->rss['channel']->item[] = $json_item;
} else {
$this->json->rss['channel']->item = $json_item;
}
}
}
}
/**
* Make the starting tag of channels
*
* @access private
* @return void
*/
private function startItem()
{
if ($this->version == RSS2)
{
echo '<item>' . PHP_EOL;
}
// nothing for JSON
}
/**
* Closes feed item tag
*
* @access private
* @return void
*/
private function endItem()
{
if ($this->version == RSS2)
{
echo '</item>' . PHP_EOL;
}
// nothing for JSON
}
// End # private functions ----------------------------------------------
}

View file

@ -0,0 +1,728 @@
<?php
/*
htmLawed 1.1.14, 8 August 2012
OOP code, 8 August 2012
Copyright Santosh Patnaik
Dual LGPL v3 and GPL v2+ license
A PHP Labware internal utility; www.bioinformatics.org/phplabware/internal_utilities/htmLawed
See htmLawed_README.txt/htm
*/
class htmLawed{
// begin class
public static function hl($t, $C=1, $S=array()){
$C = is_array($C) ? $C : array();
if(!empty($C['valid_xhtml'])){
$C['elements'] = empty($C['elements']) ? '*-center-dir-font-isindex-menu-s-strike-u' : $C['elements'];
$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2;
$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2;
}
// config eles
$e = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'applet'=>1, 'area'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'blockquote'=>1, 'br'=>1, 'button'=>1, 'caption'=>1, 'center'=>1, 'cite'=>1, 'code'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'del'=>1, 'dfn'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'dt'=>1, 'em'=>1, 'embed'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'isindex'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'object'=>1, 'ol'=>1, 'optgroup'=>1, 'option'=>1, 'p'=>1, 'param'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'table'=>1, 'tbody'=>1, 'td'=>1, 'textarea'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'tt'=>1, 'u'=>1, 'ul'=>1, 'var'=>1); // 86/deprecated+embed+ruby
if(!empty($C['safe'])){
unset($e['applet'], $e['embed'], $e['iframe'], $e['object'], $e['script']);
}
$x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*';
if($x == '-*'){$e = array();}
elseif(strpos($x, '*') === false){$e = array_flip(explode(',', $x));}
else{
if(isset($x[1])){
preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, PREG_SET_ORDER);
for($i=count($m); --$i>=0;){$m[$i] = $m[$i][0];}
foreach($m as $v){
if($v[0] == '+'){$e[substr($v, 1)] = 1;}
if($v[0] == '-' && isset($e[($v = substr($v, 1))]) && !in_array('+'. $v, $m)){unset($e[$v]);}
}
}
}
$C['elements'] =& $e;
// config attrs
$x = !empty($C['deny_attribute']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute']) : '';
$x = array_flip((isset($x[0]) && $x[0] == '*') ? explode('-', $x) : explode(',', $x. (!empty($C['safe']) ? ',on*' : '')));
if(isset($x['on*'])){
unset($x['on*']);
$x += array('onblur'=>1, 'onchange'=>1, 'onclick'=>1, 'ondblclick'=>1, 'onfocus'=>1, 'onkeydown'=>1, 'onkeypress'=>1, 'onkeyup'=>1, 'onmousedown'=>1, 'onmousemove'=>1, 'onmouseout'=>1, 'onmouseover'=>1, 'onmouseup'=>1, 'onreset'=>1, 'onselect'=>1, 'onsubmit'=>1);
}
$C['deny_attribute'] = $x;
// config URL
$x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')) ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https';
$C['schemes'] = array();
foreach(explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $x)) as $v){
$x = $x2 = null; list($x, $x2) = explode(':', $v, 2);
if($x2){$C['schemes'][$x] = array_flip(explode(',', $x2));}
}
if(!isset($C['schemes']['*'])){$C['schemes']['*'] = array('file'=>1, 'http'=>1, 'https'=>1,);}
if(!empty($C['safe']) && empty($C['schemes']['style'])){$C['schemes']['style'] = array('!'=>1);}
$C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0;
if(!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])){
$C['base_url'] = $C['abs_url'] = 0;
}
// config rest
$C['and_mark'] = empty($C['and_mark']) ? 0 : 1;
$C['anti_link_spam'] = (isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && count($C['anti_link_spam']) == 2 && (empty($C['anti_link_spam'][0]) or htmLawed::hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or htmLawed::hl_regex($C['anti_link_spam'][1]))) ? $C['anti_link_spam'] : 0;
$C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0;
$C['balance'] = isset($C['balance']) ? (bool)$C['balance'] : 1;
$C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0);
$C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char'];
$C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0);
$C['css_expression'] = empty($C['css_expression']) ? 0 : 1;
$C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1;
$C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1;
$C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0;
$C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0;
$C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6;
$C['lc_std_val'] = isset($C['lc_std_val']) ? (bool)$C['lc_std_val'] : 1;
$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1;
$C['named_entity'] = isset($C['named_entity']) ? (bool)$C['named_entity'] : 1;
$C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1;
$C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body';
$C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0;
$C['style_pass'] = empty($C['style_pass']) ? 0 : 1;
$C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy'];
$C['unique_ids'] = isset($C['unique_ids']) ? $C['unique_ids'] : 1;
$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0;
if(isset($GLOBALS['C'])){$reC = $GLOBALS['C'];}
$GLOBALS['C'] = $C;
$S = is_array($S) ? $S : htmLawed::hl_spec($S);
if(isset($GLOBALS['S'])){$reS = $GLOBALS['S'];}
$GLOBALS['S'] = $S;
$t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t);
if($C['clean_ms_char']){
$x = array("\x7f"=>'', "\x80"=>'&#8364;', "\x81"=>'', "\x83"=>'&#402;', "\x85"=>'&#8230;', "\x86"=>'&#8224;', "\x87"=>'&#8225;', "\x88"=>'&#710;', "\x89"=>'&#8240;', "\x8a"=>'&#352;', "\x8b"=>'&#8249;', "\x8c"=>'&#338;', "\x8d"=>'', "\x8e"=>'&#381;', "\x8f"=>'', "\x90"=>'', "\x95"=>'&#8226;', "\x96"=>'&#8211;', "\x97"=>'&#8212;', "\x98"=>'&#732;', "\x99"=>'&#8482;', "\x9a"=>'&#353;', "\x9b"=>'&#8250;', "\x9c"=>'&#339;', "\x9d"=>'', "\x9e"=>'&#382;', "\x9f"=>'&#376;');
$x = $x + ($C['clean_ms_char'] == 1 ? array("\x82"=>'&#8218;', "\x84"=>'&#8222;', "\x91"=>'&#8216;', "\x92"=>'&#8217;', "\x93"=>'&#8220;', "\x94"=>'&#8221;') : array("\x82"=>'\'', "\x84"=>'"', "\x91"=>'\'', "\x92"=>'\'', "\x93"=>'"', "\x94"=>'"'));
$t = strtr($t, $x);
}
if($C['cdata'] or $C['comment']){$t = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\[CDATA\[.*?\]\]))>`sm', 'htmLawed::hl_cmtcd', $t);}
$t = preg_replace_callback('`&amp;([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'htmLawed::hl_ent', str_replace('&', '&amp;', $t));
if($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])){$GLOBALS['hl_Ids'] = array();}
if($C['hook']){$t = $C['hook']($t, $C, $S);}
if($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])){
$GLOBALS[$C['show_setting']] = array('config'=>$C, 'spec'=>$S, 'time'=>microtime());
}
// main
$t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'htmLawed::hl_tag', $t);
$t = $C['balance'] ? htmLawed::hl_bal($t, $C['keep_bad'], $C['parent']) : $t;
$t = (($C['cdata'] or $C['comment']) && strpos($t, "\x01") !== false) ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $t) : $t;
$t = $C['tidy'] ? htmLawed::hl_tidy($t, $C['tidy'], $C['parent']) : $t;
unset($C, $e);
if(isset($reC)){$GLOBALS['C'] = $reC;}
if(isset($reS)){$GLOBALS['S'] = $reS;}
return $t;
// eof
}
public static function hl_attrval($t, $p){
// check attr val against $S
$o = 1; $l = strlen($t);
foreach($p as $k=>$v){
switch($k){
case 'maxlen':if($l > $v){$o = 0;}
break; case 'minlen': if($l < $v){$o = 0;}
break; case 'maxval': if((float)($t) > $v){$o = 0;}
break; case 'minval': if((float)($t) < $v){$o = 0;}
break; case 'match': if(!preg_match($v, $t)){$o = 0;}
break; case 'nomatch': if(preg_match($v, $t)){$o = 0;}
break; case 'oneof':
$m = 0;
foreach(explode('|', $v) as $n){if($t == $n){$m = 1; break;}}
$o = $m;
break; case 'noneof':
$m = 1;
foreach(explode('|', $v) as $n){if($t == $n){$m = 0; break;}}
$o = $m;
break; default:
break;
}
if(!$o){break;}
}
return ($o ? $t : (isset($p['default']) ? $p['default'] : 0));
// eof
}
public static function hl_bal($t, $do=1, $in='div'){
// balance tags
// by content
$cB = array('blockquote'=>1, 'form'=>1, 'map'=>1, 'noscript'=>1); // Block
$cE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty
$cF = array('button'=>1, 'del'=>1, 'div'=>1, 'dd'=>1, 'fieldset'=>1, 'iframe'=>1, 'ins'=>1, 'li'=>1, 'noscript'=>1, 'object'=>1, 'td'=>1, 'th'=>1); // Flow; later context-wise dynamic move of ins & del to $cI
$cI = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'caption'=>1, 'cite'=>1, 'code'=>1, 'dfn'=>1, 'dt'=>1, 'em'=>1, 'font'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'i'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'p'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rt'=>1, 's'=>1, 'samp'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); // Inline
$cN = array('a'=>array('a'=>1), 'button'=>array('a'=>1, 'button'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'fieldset'=>array('fieldset'=>1), 'form'=>array('form'=>1), 'label'=>array('label'=>1), 'noscript'=>array('script'=>1), 'pre'=>array('big'=>1, 'font'=>1, 'img'=>1, 'object'=>1, 'script'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1)); // Illegal
$cN2 = array_keys($cN);
$cR = array('blockquote'=>1, 'dir'=>1, 'dl'=>1, 'form'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1);
$cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'=>1), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child
if($GLOBALS['C']['direct_list_nest']){$cS['ol'] = $cS['ul'] += array('ol'=>1, 'ul'=>1);}
$cO = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'blockquote'=>array('script'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'form'=>array('script'=>1), 'map'=>array('area'=>1), 'object'=>array('param'=>1, 'embed'=>1)); // Other
$cT = array('colgroup'=>1, 'dd'=>1, 'dt'=>1, 'li'=>1, 'option'=>1, 'p'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1); // Omitable closing
// block/inline type; ins & del both type; #pcdata: text
$eB = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'isindex'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'table'=>1, 'ul'=>1);
$eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1);
$eN = array('a'=>1, 'big'=>1, 'button'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'label'=>1, 'object'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1); // Exclude from specific ele; $cN values
$eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI
$eF = $eB + $eI;
// $in sets allowed child
$in = ((isset($eF[$in]) && $in != '#pcdata') or isset($eO[$in])) ? $in : 'div';
if(isset($cE[$in])){
return (!$do ? '' : str_replace(array('<', '>'), array('&lt;', '&gt;'), $t));
}
if(isset($cS[$in])){$inOk = $cS[$in];}
elseif(isset($cI[$in])){$inOk = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
elseif(isset($cF[$in])){$inOk = $eF; unset($cI['del'], $cI['ins']);}
elseif(isset($cB[$in])){$inOk = $eB; unset($cI['del'], $cI['ins']);}
if(isset($cO[$in])){$inOk = $inOk + $cO[$in];}
if(isset($cN[$in])){$inOk = array_diff_assoc($inOk, $cN[$in]);}
$t = explode('<', $t);
$ok = $q = array(); // $q seq list of open non-empty ele
ob_start();
for($i=-1, $ci=count($t); ++$i<$ci;){
// allowed $ok in parent $p
if($ql = count($q)){
$p = array_pop($q);
$q[] = $p;
if(isset($cS[$p])){$ok = $cS[$p];}
elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);}
elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);}
if(isset($cO[$p])){$ok = $ok + $cO[$p];}
if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);}
}else{$ok = $inOk; unset($cI['del'], $cI['ins']);}
// bad tags, & ele content
if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){
echo '&lt;', $s, $e, $a, '&gt;';
}
if(isset($x[0])){
if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){
echo '<div>', $x, '</div>';
}
elseif($do < 3 or isset($ok['#pcdata'])){echo $x;}
elseif(strpos($x, "\x02\x04")){
foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){
echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : ''));
}
}elseif($do > 4){echo preg_replace('`\S`', '', $x);}
}
// get markup
if(!preg_match('`^(/?)([a-z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)){$x = $t[$i]; continue;}
$s = null; $e = null; $a = null; $x = null; list($all, $s, $e, $a, $x) = $r;
// close tag
if($s){
if(isset($cE[$e]) or !in_array($e, $q)){continue;} // Empty/unopen
if($p == $e){array_pop($q); echo '</', $e, '>'; unset($e); continue;} // Last open
$add = ''; // Nesting - close open tags that need to be
for($j=-1, $cj=count($q); ++$j<$cj;){
if(($d = array_pop($q)) == $e){break;}
else{$add .= "</{$d}>";}
}
echo $add, '</', $e, '>'; unset($e); continue;
}
// open tag
// $cB ele needs $eB ele as child
if(isset($cB[$e]) && strlen(trim($x))){
$t[$i] = "{$e}{$a}>";
array_splice($t, $i+1, 0, 'div>'. $x); unset($e, $x); ++$ci; --$i; continue;
}
if((($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql)) && !isset($eB[$e]) && !isset($ok[$e])){
array_splice($t, $i, 0, 'div>'); unset($e, $x); ++$ci; --$i; continue;
}
// if no open ele, $in = parent; mostly immediate parent-child relation should hold
if(!$ql or !isset($eN[$e]) or !array_intersect($q, $cN2)){
if(!isset($ok[$e])){
if($ql && isset($cT[$p])){echo '</', array_pop($q), '>'; unset($e, $x); --$i;}
continue;
}
if(!isset($cE[$e])){$q[] = $e;}
echo '<', $e, $a, '>'; unset($e); continue;
}
// specific parent-child
if(isset($cS[$p][$e])){
if(!isset($cE[$e])){$q[] = $e;}
echo '<', $e, $a, '>'; unset($e); continue;
}
// nesting
$add = '';
$q2 = array();
for($k=-1, $kc=count($q); ++$k<$kc;){
$d = $q[$k];
$ok2 = array();
if(isset($cS[$d])){$q2[] = $d; continue;}
$ok2 = isset($cI[$d]) ? $eI : $eF;
if(isset($cO[$d])){$ok2 = $ok2 + $cO[$d];}
if(isset($cN[$d])){$ok2 = array_diff_assoc($ok2, $cN[$d]);}
if(!isset($ok2[$e])){
if(!$k && !isset($inOk[$e])){continue 2;}
$add = "</{$d}>";
for(;++$k<$kc;){$add = "</{$q[$k]}>{$add}";}
break;
}
else{$q2[] = $d;}
}
$q = $q2;
if(!isset($cE[$e])){$q[] = $e;}
echo $add, '<', $e, $a, '>'; unset($e); continue;
}
// end
if($ql = count($q)){
$p = array_pop($q);
$q[] = $p;
if(isset($cS[$p])){$ok = $cS[$p];}
elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);}
elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);}
if(isset($cO[$p])){$ok = $ok + $cO[$p];}
if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);}
}else{$ok = $inOk; unset($cI['del'], $cI['ins']);}
if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){
echo '&lt;', $s, $e, $a, '&gt;';
}
if(isset($x[0])){
if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){
echo '<div>', $x, '</div>';
}
elseif($do < 3 or isset($ok['#pcdata'])){echo $x;}
elseif(strpos($x, "\x02\x04")){
foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){
echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : ''));
}
}elseif($do > 4){echo preg_replace('`\S`', '', $x);}
}
while(!empty($q) && ($e = array_pop($q))){echo '</', $e, '>';}
$o = ob_get_contents();
ob_end_clean();
return $o;
// eof
}
public static function hl_cmtcd($t){
// comment/CDATA sec handler
$t = $t[0];
global $C;
if(!($v = $C[$n = $t[3] == '-' ? 'comment' : 'cdata'])){return $t;}
if($v == 1){return '';}
if($n == 'comment'){
if(substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1) != ' '){$t .= ' ';}
}
else{$t = substr($t, 1, -1);}
$t = $v == 2 ? str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $t) : $t;
return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), ($n == 'comment' ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01"));
// eof
}
public static function hl_ent($t){
// entitity handler
global $C;
$t = $t[1];
static $U = array('quot'=>1,'amp'=>1,'lt'=>1,'gt'=>1);
static $N = array('fnof'=>'402', 'Alpha'=>'913', 'Beta'=>'914', 'Gamma'=>'915', 'Delta'=>'916', 'Epsilon'=>'917', 'Zeta'=>'918', 'Eta'=>'919', 'Theta'=>'920', 'Iota'=>'921', 'Kappa'=>'922', 'Lambda'=>'923', 'Mu'=>'924', 'Nu'=>'925', 'Xi'=>'926', 'Omicron'=>'927', 'Pi'=>'928', 'Rho'=>'929', 'Sigma'=>'931', 'Tau'=>'932', 'Upsilon'=>'933', 'Phi'=>'934', 'Chi'=>'935', 'Psi'=>'936', 'Omega'=>'937', 'alpha'=>'945', 'beta'=>'946', 'gamma'=>'947', 'delta'=>'948', 'epsilon'=>'949', 'zeta'=>'950', 'eta'=>'951', 'theta'=>'952', 'iota'=>'953', 'kappa'=>'954', 'lambda'=>'955', 'mu'=>'956', 'nu'=>'957', 'xi'=>'958', 'omicron'=>'959', 'pi'=>'960', 'rho'=>'961', 'sigmaf'=>'962', 'sigma'=>'963', 'tau'=>'964', 'upsilon'=>'965', 'phi'=>'966', 'chi'=>'967', 'psi'=>'968', 'omega'=>'969', 'thetasym'=>'977', 'upsih'=>'978', 'piv'=>'982', 'bull'=>'8226', 'hellip'=>'8230', 'prime'=>'8242', 'Prime'=>'8243', 'oline'=>'8254', 'frasl'=>'8260', 'weierp'=>'8472', 'image'=>'8465', 'real'=>'8476', 'trade'=>'8482', 'alefsym'=>'8501', 'larr'=>'8592', 'uarr'=>'8593', 'rarr'=>'8594', 'darr'=>'8595', 'harr'=>'8596', 'crarr'=>'8629', 'lArr'=>'8656', 'uArr'=>'8657', 'rArr'=>'8658', 'dArr'=>'8659', 'hArr'=>'8660', 'forall'=>'8704', 'part'=>'8706', 'exist'=>'8707', 'empty'=>'8709', 'nabla'=>'8711', 'isin'=>'8712', 'notin'=>'8713', 'ni'=>'8715', 'prod'=>'8719', 'sum'=>'8721', 'minus'=>'8722', 'lowast'=>'8727', 'radic'=>'8730', 'prop'=>'8733', 'infin'=>'8734', 'ang'=>'8736', 'and'=>'8743', 'or'=>'8744', 'cap'=>'8745', 'cup'=>'8746', 'int'=>'8747', 'there4'=>'8756', 'sim'=>'8764', 'cong'=>'8773', 'asymp'=>'8776', 'ne'=>'8800', 'equiv'=>'8801', 'le'=>'8804', 'ge'=>'8805', 'sub'=>'8834', 'sup'=>'8835', 'nsub'=>'8836', 'sube'=>'8838', 'supe'=>'8839', 'oplus'=>'8853', 'otimes'=>'8855', 'perp'=>'8869', 'sdot'=>'8901', 'lceil'=>'8968', 'rceil'=>'8969', 'lfloor'=>'8970', 'rfloor'=>'8971', 'lang'=>'9001', 'rang'=>'9002', 'loz'=>'9674', 'spades'=>'9824', 'clubs'=>'9827', 'hearts'=>'9829', 'diams'=>'9830', 'apos'=>'39', 'OElig'=>'338', 'oelig'=>'339', 'Scaron'=>'352', 'scaron'=>'353', 'Yuml'=>'376', 'circ'=>'710', 'tilde'=>'732', 'ensp'=>'8194', 'emsp'=>'8195', 'thinsp'=>'8201', 'zwnj'=>'8204', 'zwj'=>'8205', 'lrm'=>'8206', 'rlm'=>'8207', 'ndash'=>'8211', 'mdash'=>'8212', 'lsquo'=>'8216', 'rsquo'=>'8217', 'sbquo'=>'8218', 'ldquo'=>'8220', 'rdquo'=>'8221', 'bdquo'=>'8222', 'dagger'=>'8224', 'Dagger'=>'8225', 'permil'=>'8240', 'lsaquo'=>'8249', 'rsaquo'=>'8250', 'euro'=>'8364', 'nbsp'=>'160', 'iexcl'=>'161', 'cent'=>'162', 'pound'=>'163', 'curren'=>'164', 'yen'=>'165', 'brvbar'=>'166', 'sect'=>'167', 'uml'=>'168', 'copy'=>'169', 'ordf'=>'170', 'laquo'=>'171', 'not'=>'172', 'shy'=>'173', 'reg'=>'174', 'macr'=>'175', 'deg'=>'176', 'plusmn'=>'177', 'sup2'=>'178', 'sup3'=>'179', 'acute'=>'180', 'micro'=>'181', 'para'=>'182', 'middot'=>'183', 'cedil'=>'184', 'sup1'=>'185', 'ordm'=>'186', 'raquo'=>'187', 'frac14'=>'188', 'frac12'=>'189', 'frac34'=>'190', 'iquest'=>'191', 'Agrave'=>'192', 'Aacute'=>'193', 'Acirc'=>'194', 'Atilde'=>'195', 'Auml'=>'196', 'Aring'=>'197', 'AElig'=>'198', 'Ccedil'=>'199', 'Egrave'=>'200', 'Eacute'=>'201', 'Ecirc'=>'202', 'Euml'=>'203', 'Igrave'=>'204', 'Iacute'=>'205', 'Icirc'=>'206', 'Iuml'=>'207', 'ETH'=>'208', 'Ntilde'=>'209', 'Ograve'=>'210', 'Oacute'=>'211', 'Ocirc'=>'212', 'Otilde'=>'213', 'Ouml'=>'214', 'times'=>'215', 'Oslash'=>'216', 'Ugrave'=>'217', 'Uacute'=>'218', 'Ucirc'=>'219', 'Uuml'=>'220', 'Yacute'=>'221', 'THORN'=>'222', 'szlig'=>'223', 'agrave'=>'224', 'aacute'=>'225', 'acirc'=>'226', 'atilde'=>'227', 'auml'=>'228', 'aring'=>'229', 'aelig'=>'230', 'ccedil'=>'231', 'egrave'=>'232', 'eacute'=>'233', 'ecirc'=>'234', 'euml'=>'235', 'igrave'=>'236', 'iacute'=>'237', 'icirc'=>'238', 'iuml'=>'239', 'eth'=>'240', 'ntilde'=>'241', 'ograve'=>'242', 'oacute'=>'243', 'ocirc'=>'244', 'otilde'=>'245', 'ouml'=>'246', 'divide'=>'247', 'oslash'=>'248', 'ugrave'=>'249', 'uacute'=>'250', 'ucirc'=>'251', 'uuml'=>'252', 'yacute'=>'253', 'thorn'=>'254', 'yuml'=>'255');
if($t[0] != '#'){
return ($C['and_mark'] ? "\x06" : '&'). (isset($U[$t]) ? $t : (isset($N[$t]) ? (!$C['named_entity'] ? '#'. ($C['hexdec_entity'] > 1 ? 'x'. dechex($N[$t]) : $N[$t]) : $t) : 'amp;'. $t)). ';';
}
if(($n = ctype_digit($t = substr($t, 1)) ? intval($t) : hexdec(substr($t, 1))) < 9 or ($n > 13 && $n < 32) or $n == 11 or $n == 12 or ($n > 126 && $n < 160 && $n != 133) or ($n > 55295 && ($n < 57344 or ($n > 64975 && $n < 64992) or $n == 65534 or $n == 65535 or $n > 1114111))){
return ($C['and_mark'] ? "\x06" : '&'). "amp;#{$t};";
}
return ($C['and_mark'] ? "\x06" : '&'). '#'. (((ctype_digit($t) && $C['hexdec_entity'] < 2) or !$C['hexdec_entity']) ? $n : 'x'. dechex($n)). ';';
// eof
}
public static function hl_prot($p, $c=null){
// check URL scheme
global $C;
$b = $a = '';
if($c == null){$c = 'style'; $b = $p[1]; $a = $p[3]; $p = trim($p[2]);}
$c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*'];
static $d = 'denied:';
if(isset($c['!']) && substr($p, 0, 7) != $d){$p = "$d$p";}
if(isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)){return "{$b}{$p}{$a}";} // All ok, frag, query, param
if(preg_match('`^([a-z\d\-+.&#; ]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot
return "{$b}{$d}{$p}{$a}";
}
if($C['abs_url']){
if($C['abs_url'] == -1 && strpos($p, $C['base_url']) === 0){ // Make url rel
$p = substr($p, strlen($C['base_url']));
}elseif(empty($m[1])){ // Make URL abs
if(substr($p, 0, 2) == '//'){$p = substr($C['base_url'], 0, strpos($C['base_url'], ':')+1). $p;}
elseif($p[0] == '/'){$p = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $C['base_url']). $p;}
elseif(strcspn($p, './')){$p = $C['base_url']. $p;}
else{
preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $C['base_url'], $m);
$p = preg_replace('`(?<=/)\./`', '', $m[2]. $p);
while(preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $p)){
$p = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $p);
}
$p = $m[1]. $p;
}
}
}
return "{$b}{$p}{$a}";
// eof
}
public static function hl_regex($p){
// ?regex
if(empty($p)){return 0;}
if($t = ini_get('track_errors')){$o = isset($php_errormsg) ? $php_errormsg : null;}
else{ini_set('track_errors', 1);}
unset($php_errormsg);
if(($d = ini_get('display_errors'))){ini_set('display_errors', 0);}
preg_match($p, '');
if($d){ini_set('display_errors', 1);}
$r = isset($php_errormsg) ? 0 : 1;
if($t){$php_errormsg = isset($o) ? $o : null;}
else{ini_set('track_errors', 0);}
return $r;
// eof
}
public static function hl_spec($t){
// final $spec
$s = array();
$t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace('/"(?>(`.|[^"])*)"/sme', 'substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", \'`"\'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\""), "$0"), 1, -1)', trim($t)));
for($i = count(($t = explode(';', $t))); --$i>=0;){
$w = $t[$i];
if(empty($w) or ($e = strpos($w, '=')) === false or !strlen(($a = substr($w, $e+1)))){continue;}
$y = $n = array();
foreach(explode(',', $a) as $v){
if(!preg_match('`^([a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)){continue;}
if(($x = strtolower($m[1])) == '-*'){$n['*'] = 1; continue;}
if($x[0] == '-'){$n[substr($x, 1)] = 1; continue;}
if(!isset($m[2])){$y[$x] = 1; continue;}
foreach(explode('/', $m[2]) as $m){
if(empty($m) or ($p = strpos($m, '=')) == 0 or $p < 5){$y[$x] = 1; continue;}
$y[$x][strtolower(substr($m, 0, $p))] = str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"), array(";", "|", "~", " ", ",", "/", "(", ")"), substr($m, $p+1));
}
if(isset($y[$x]['match']) && !htmLawed::hl_regex($y[$x]['match'])){unset($y[$x]['match']);}
if(isset($y[$x]['nomatch']) && !htmLawed::hl_regex($y[$x]['nomatch'])){unset($y[$x]['nomatch']);}
}
if(!count($y) && !count($n)){continue;}
foreach(explode(',', substr($w, 0, $e)) as $v){
if(!strlen(($v = strtolower($v)))){continue;}
if(count($y)){$s[$v] = $y;}
if(count($n)){$s[$v]['n'] = $n;}
}
}
return $s;
// eof
}
public static function hl_tag($t){
// tag/attribute handler
global $C;
$t = $t[0];
// invalid < >
if($t == '< '){return '&lt; ';}
if($t == '>'){return '&gt;';}
if(!preg_match('`^<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$`m', $t, $m)){
return str_replace(array('<', '>'), array('&lt;', '&gt;'), $t);
}elseif(!isset($C['elements'][($e = strtolower($m[2]))])){
return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $t) : '');
}
// attr string
$a = str_replace(array("\n", "\r", "\t"), ' ', trim($m[3]));
// tag transform
static $eD = array('applet'=>1, 'center'=>1, 'dir'=>1, 'embed'=>1, 'font'=>1, 'isindex'=>1, 'menu'=>1, 's'=>1, 'strike'=>1, 'u'=>1); // Deprecated
if($C['make_tag_strict'] && isset($eD[$e])){
$trt = htmLawed::hl_tag2($e, $a, $C['make_tag_strict']);
if(!$e){return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $t) : '');}
}
// close tag
static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty ele
if(!empty($m[1])){
return (!isset($eE[$e]) ? (empty($C['hook_tag']) ? "</$e>" : $C['hook_tag']($e)) : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $t) : ''));
}
// open tag & attr
static $aN = array('abbr'=>array('td'=>1, 'th'=>1), 'accept-charset'=>array('form'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accesskey'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'legend'=>1, 'textarea'=>1), 'action'=>array('form'=>1), 'align'=>array('caption'=>1, 'embed'=>1, 'applet'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'legend'=>1, 'table'=>1, 'hr'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'p'=>1, 'col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'tr'=>1, 'td'=>1, 'th'=>1), 'border'=>array('table'=>1, 'img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('input'=>1), 'cite'=>array('blockquote'=>1, 'q'=>1, 'del'=>1, 'ins'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('object'=>1, 'applet'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'coords'=>array('area'=>1, 'a'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1), 'declare'=>array('object'=>1), 'defer'=>array('script'=>1), 'dir'=>array('bdo'=>1), 'disabled'=>array('button'=>1, 'input'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'for'=>array('label'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('embed'=>1, 'iframe'=>1, 'td'=>1, 'th'=>1, 'img'=>1, 'object'=>1, 'applet'=>1), 'href'=>array('a'=>1, 'area'=>1), 'hreflang'=>array('a'=>1), 'hspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'label'=>array('option'=>1, 'optgroup'=>1), 'language'=>array('script'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'maxlength'=>array('input'=>1), 'method'=>array('form'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('select'=>1), 'name'=>array('button'=>1, 'embed'=>1, 'textarea'=>1, 'applet'=>1, 'select'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'a'=>1, 'input'=>1, 'object'=>1, 'map'=>1, 'param'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'onblur'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onchange'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'onfocus'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onreset'=>array('form'=>1), 'onselect'=>array('input'=>1, 'textarea'=>1), 'onsubmit'=>array('form'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'prompt'=>array('isindex'=>1), 'readonly'=>array('textarea'=>1, 'input'=>1), 'rel'=>array('a'=>1), 'rev'=>array('a'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scrolling'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('area'=>1, 'a'=>1), 'size'=>array('hr'=>1, 'font'=>1, 'input'=>1, 'select'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('embed'=>1, 'script'=>1, 'input'=>1, 'iframe'=>1, 'img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'summary'=>array('table'=>1), 'tabindex'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'object'=>1, 'select'=>1, 'textarea'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'embed'=>1, 'object'=>1, 'param'=>1, 'script'=>1, 'input'=>1, 'li'=>1, 'ol'=>1, 'ul'=>1, 'button'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('input'=>1, 'option'=>1, 'param'=>1, 'button'=>1, 'li'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'width'=>array('embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'object'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'applet'=>1, 'col'=>1, 'colgroup'=>1, 'pre'=>1), 'wmode'=>array('embed'=>1), 'xml:space'=>array('pre'=>1, 'script'=>1, 'style'=>1)); // Ele-specific
static $aNE = array('checked'=>1, 'compact'=>1, 'declare'=>1, 'defer'=>1, 'disabled'=>1, 'ismap'=>1, 'multiple'=>1, 'nohref'=>1, 'noresize'=>1, 'noshade'=>1, 'nowrap'=>1, 'readonly'=>1, 'selected'=>1); // Empty
static $aNP = array('action'=>1, 'cite'=>1, 'classid'=>1, 'codebase'=>1, 'data'=>1, 'href'=>1, 'longdesc'=>1, 'model'=>1, 'pluginspage'=>1, 'pluginurl'=>1, 'usemap'=>1); // Need scheme check; excludes style, on* & src
static $aNU = array('class'=>array('param'=>1, 'script'=>1), 'dir'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'id'=>array('script'=>1), 'lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'xml:lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'onclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'ondblclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeydown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeypress'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeyup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousedown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousemove'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseout'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseover'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'style'=>array('param'=>1, 'script'=>1), 'title'=>array('param'=>1, 'script'=>1)); // Univ & exceptions
if($C['lc_std_val']){
// predef attr vals for $eAL & $aNE ele
static $aNL = array('all'=>1, 'baseline'=>1, 'bottom'=>1, 'button'=>1, 'center'=>1, 'char'=>1, 'checkbox'=>1, 'circle'=>1, 'col'=>1, 'colgroup'=>1, 'cols'=>1, 'data'=>1, 'default'=>1, 'file'=>1, 'get'=>1, 'groups'=>1, 'hidden'=>1, 'image'=>1, 'justify'=>1, 'left'=>1, 'ltr'=>1, 'middle'=>1, 'none'=>1, 'object'=>1, 'password'=>1, 'poly'=>1, 'post'=>1, 'preserve'=>1, 'radio'=>1, 'rect'=>1, 'ref'=>1, 'reset'=>1, 'right'=>1, 'row'=>1, 'rowgroup'=>1, 'rows'=>1, 'rtl'=>1, 'submit'=>1, 'text'=>1, 'top'=>1);
static $eAL = array('a'=>1, 'area'=>1, 'bdo'=>1, 'button'=>1, 'col'=>1, 'form'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'xml:space'=>1);
$lcase = isset($eAL[$e]) ? 1 : 0;
}
$depTr = 0;
if($C['no_deprecated_attr']){
// dep attr:applicable ele
static $aND = array('align'=>array('caption'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'object'=>1, 'p'=>1, 'table'=>1), 'bgcolor'=>array('table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1), 'border'=>array('img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'clear'=>array('br'=>1), 'compact'=>array('dl'=>1, 'ol'=>1, 'ul'=>1), 'height'=>array('td'=>1, 'th'=>1), 'hspace'=>array('img'=>1, 'object'=>1), 'language'=>array('script'=>1), 'name'=>array('a'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'map'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'size'=>array('hr'=>1), 'start'=>array('ol'=>1), 'type'=>array('li'=>1, 'ol'=>1, 'ul'=>1), 'value'=>array('li'=>1), 'vspace'=>array('img'=>1, 'object'=>1), 'width'=>array('hr'=>1, 'pre'=>1, 'td'=>1, 'th'=>1));
static $eAD = array('a'=>1, 'br'=>1, 'caption'=>1, 'div'=>1, 'dl'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'object'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'script'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1, 'ul'=>1);
$depTr = isset($eAD[$e]) ? 1 : 0;
}
// attr name-vals
if(strpos($a, "\x01") !== false){$a = preg_replace('`\x01[^\x01]*\x01`', '', $a);} // No comment/CDATA sec
$mode = 0; $a = trim($a, ' /'); $aA = array();
while(strlen($a)){
$w = 0;
switch($mode){
case 0: // Name
if(preg_match('`^[a-zA-Z][\-a-zA-Z:]+`', $a, $m)){
$nm = strtolower($m[0]);
$w = $mode = 1; $a = ltrim(substr_replace($a, '', 0, strlen($m[0])));
}
break; case 1:
if($a[0] == '='){ // =
$w = 1; $mode = 2; $a = ltrim($a, '= ');
}else{ // No val
$w = 1; $mode = 0; $a = ltrim($a);
$aA[$nm] = '';
}
break; case 2: // Val
if(preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)){
$a = ltrim($m[2]); $m = $m[1]; $w = 1; $mode = 0;
$aA[$nm] = trim(($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m);
}
break;
}
if($w == 0){ // Parse errs, deal with space, " & '
$a = preg_replace('`^(?:"[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*`', '', $a);
$mode = 0;
}
}
if($mode == 1){$aA[$nm] = '';}
// clean attrs
global $S;
$rl = isset($S[$e]) ? $S[$e] : array();
$a = array(); $nfr = 0;
foreach($aA as $k=>$v){
if(((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e]))) && !isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])){
if(isset($aNE[$k])){$v = $k;}
elseif(!empty($lcase) && (($e != 'button' or $e != 'input') or $k == 'type')){ // Rather loose but ?not cause issues
$v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v;
}
if($k == 'style' && !$C['style_pass']){
if(false !== strpos($v, '&#')){
static $sC = array('&#x20;'=>' ', '&#32;'=>' ', '&#x45;'=>'e', '&#69;'=>'e', '&#x65;'=>'e', '&#101;'=>'e', '&#x58;'=>'x', '&#88;'=>'x', '&#x78;'=>'x', '&#120;'=>'x', '&#x50;'=>'p', '&#80;'=>'p', '&#x70;'=>'p', '&#112;'=>'p', '&#x53;'=>'s', '&#83;'=>'s', '&#x73;'=>'s', '&#115;'=>'s', '&#x49;'=>'i', '&#73;'=>'i', '&#x69;'=>'i', '&#105;'=>'i', '&#x4f;'=>'o', '&#79;'=>'o', '&#x6f;'=>'o', '&#111;'=>'o', '&#x4e;'=>'n', '&#78;'=>'n', '&#x6e;'=>'n', '&#110;'=>'n', '&#x55;'=>'u', '&#85;'=>'u', '&#x75;'=>'u', '&#117;'=>'u', '&#x52;'=>'r', '&#82;'=>'r', '&#x72;'=>'r', '&#114;'=>'r', '&#x4c;'=>'l', '&#76;'=>'l', '&#x6c;'=>'l', '&#108;'=>'l', '&#x28;'=>'(', '&#40;'=>'(', '&#x29;'=>')', '&#41;'=>')', '&#x20;'=>':', '&#32;'=>':', '&#x22;'=>'"', '&#34;'=>'"', '&#x27;'=>"'", '&#39;'=>"'", '&#x2f;'=>'/', '&#47;'=>'/', '&#x2a;'=>'*', '&#42;'=>'*', '&#x5c;'=>'\\', '&#92;'=>'\\');
$v = strtr($v, $sC);
}
$v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+?)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'htmLawed::hl_prot', $v);
$v = !$C['css_expression'] ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v)) : $v;
}elseif(isset($aNP[$k]) or strpos($k, 'src') !== false or $k[0] == 'o'){
$v = str_replace("\xad", ' ', (strpos($v, '&') !== false ? str_replace(array('&#xad;', '&#173;', '&shy;'), ' ', $v) : $v));
$v = htmLawed::hl_prot($v, $k);
if($k == 'href'){ // X-spam
if($C['anti_mail_spam'] && strpos($v, 'mailto:') === 0){
$v = str_replace('@', htmlspecialchars($C['anti_mail_spam']), $v);
}elseif($C['anti_link_spam']){
$r1 = $C['anti_link_spam'][1];
if(!empty($r1) && preg_match($r1, $v)){continue;}
$r0 = $C['anti_link_spam'][0];
if(!empty($r0) && preg_match($r0, $v)){
if(isset($a['rel'])){
if(!preg_match('`\bnofollow\b`i', $a['rel'])){$a['rel'] .= ' nofollow';}
}elseif(isset($aA['rel'])){
if(!preg_match('`\bnofollow\b`i', $aA['rel'])){$nfr = 1;}
}else{$a['rel'] = 'nofollow';}
}
}
}
}
if(isset($rl[$k]) && is_array($rl[$k]) && ($v = htmLawed::hl_attrval($v, $rl[$k])) === 0){continue;}
$a[$k] = str_replace('"', '&quot;', $v);
}
}
if($nfr){$a['rel'] = isset($a['rel']) ? $a['rel']. ' nofollow' : 'nofollow';}
// rqd attr
static $eAR = array('area'=>array('alt'=>'area'), 'bdo'=>array('dir'=>'ltr'), 'form'=>array('action'=>''), 'img'=>array('src'=>'', 'alt'=>'image'), 'map'=>array('name'=>''), 'optgroup'=>array('label'=>''), 'param'=>array('name'=>''), 'script'=>array('type'=>'text/javascript'), 'textarea'=>array('rows'=>'10', 'cols'=>'50'));
if(isset($eAR[$e])){
foreach($eAR[$e] as $k=>$v){
if(!isset($a[$k])){$a[$k] = isset($v[0]) ? $v : $k;}
}
}
// depr attrs
if($depTr){
$c = array();
foreach($a as $k=>$v){
if($k == 'style' or !isset($aND[$k][$e])){continue;}
if($k == 'align'){
unset($a['align']);
if($e == 'img' && ($v == 'left' or $v == 'right')){$c[] = 'float: '. $v;}
elseif(($e == 'div' or $e == 'table') && $v == 'center'){$c[] = 'margin: auto';}
else{$c[] = 'text-align: '. $v;}
}elseif($k == 'bgcolor'){
unset($a['bgcolor']);
$c[] = 'background-color: '. $v;
}elseif($k == 'border'){
unset($a['border']); $c[] = "border: {$v}px";
}elseif($k == 'bordercolor'){
unset($a['bordercolor']); $c[] = 'border-color: '. $v;
}elseif($k == 'clear'){
unset($a['clear']); $c[] = 'clear: '. ($v != 'all' ? $v : 'both');
}elseif($k == 'compact'){
unset($a['compact']); $c[] = 'font-size: 85%';
}elseif($k == 'height' or $k == 'width'){
unset($a[$k]); $c[] = $k. ': '. ($v[0] != '*' ? $v. (ctype_digit($v) ? 'px' : '') : 'auto');
}elseif($k == 'hspace'){
unset($a['hspace']); $c[] = "margin-left: {$v}px; margin-right: {$v}px";
}elseif($k == 'language' && !isset($a['type'])){
unset($a['language']);
$a['type'] = 'text/'. strtolower($v);
}elseif($k == 'name'){
if($C['no_deprecated_attr'] == 2 or ($e != 'a' && $e != 'map')){unset($a['name']);}
if(!isset($a['id']) && preg_match('`[a-zA-Z][a-zA-Z\d.:_\-]*`', $v)){$a['id'] = $v;}
}elseif($k == 'noshade'){
unset($a['noshade']); $c[] = 'border-style: none; border: 0; background-color: gray; color: gray';
}elseif($k == 'nowrap'){
unset($a['nowrap']); $c[] = 'white-space: nowrap';
}elseif($k == 'size'){
unset($a['size']); $c[] = 'size: '. $v. 'px';
}elseif($k == 'start' or $k == 'value'){
unset($a[$k]);
}elseif($k == 'type'){
unset($a['type']);
static $ol_type = array('i'=>'lower-roman', 'I'=>'upper-roman', 'a'=>'lower-latin', 'A'=>'upper-latin', '1'=>'decimal');
$c[] = 'list-style-type: '. (isset($ol_type[$v]) ? $ol_type[$v] : 'decimal');
}elseif($k == 'vspace'){
unset($a['vspace']); $c[] = "margin-top: {$v}px; margin-bottom: {$v}px";
}
}
if(count($c)){
$c = implode('; ', $c);
$a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $c. ';': $c. ';';
}
}
// unique ID
if($C['unique_ids'] && isset($a['id'])){
if(!preg_match('`^[A-Za-z][A-Za-z0-9_\-.:]*$`', ($id = $a['id'])) or (isset($GLOBALS['hl_Ids'][$id]) && $C['unique_ids'] == 1)){unset($a['id']);
}else{
while(isset($GLOBALS['hl_Ids'][$id])){$id = $C['unique_ids']. $id;}
$GLOBALS['hl_Ids'][($a['id'] = $id)] = 1;
}
}
// xml:lang
if($C['xml:lang'] && isset($a['lang'])){
$a['xml:lang'] = isset($a['xml:lang']) ? $a['xml:lang'] : $a['lang'];
if($C['xml:lang'] == 2){unset($a['lang']);}
}
// for transformed tag
if(!empty($trt)){
$a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $trt : $trt;
}
// return with empty ele /
if(empty($C['hook_tag'])){
$aA = '';
foreach($a as $k=>$v){$aA .= " {$k}=\"{$v}\"";}
return "<{$e}{$aA}". (isset($eE[$e]) ? ' /' : ''). '>';
}
else{return $C['hook_tag']($e, $a);}
// eof
}
public static function hl_tag2(&$e, &$a, $t=1){
// transform tag
if($e == 'center'){$e = 'div'; return 'text-align: center;';}
if($e == 'dir' or $e == 'menu'){$e = 'ul'; return '';}
if($e == 's' or $e == 'strike'){$e = 'span'; return 'text-decoration: line-through;';}
if($e == 'u'){$e = 'span'; return 'text-decoration: underline;';}
static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%');
if($e == 'font'){
$a2 = '';
if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=(\s*)(\S+)`i', $a, $m)){
$a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';';
}
if(preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){
$a2 .= ' color: '. trim($m[2]). ';';
}
if(preg_match('`size\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m) && isset($fs[($m = trim($m[2]))])){
$a2 .= ' font-size: '. $fs[$m]. ';';
}
$e = 'span'; return ltrim($a2);
}
if($t == 2){$e = 0; return 0;}
return '';
// eof
}
public static function hl_tidy($t, $w, $p){
// Tidy/compact HTM
if(strpos(' pre,script,textarea', "$p,")){return $t;}
$t = str_replace(' </', '</', preg_replace(array('`(<\w[^>]*(?<!/)>)\s+`', '`\s+`', '`(<\w[^>]*(?<!/)>) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(</\2>)`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t)));
if(($w = strtolower($w)) == -1){
return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t);
}
$s = strpos(" $w", 't') ? "\t" : ' ';
$s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ($s == "\t" ? 1 : 2));
$N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0;
$a = array('br'=>1);
$b = array('button'=>1, 'input'=>1, 'option'=>1);
$c = array('caption'=>1, 'dd'=>1, 'dt'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'isindex'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'object'=>1, 'p'=>1, 'pre'=>1, 'td'=>1, 'textarea'=>1, 'th'=>1);
$d = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'form'=>1, 'hr'=>1, 'iframe'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1);
$T = explode('<', $t);
$X = 1;
while($X){
$n = $N;
$t = $T;
ob_start();
if(isset($d[$p])){echo str_repeat($s, ++$n);}
echo ltrim(array_shift($t));
for($i=-1, $j=count($t); ++$i<$j;){
$r = ''; list($e, $r) = explode('>', $t[$i]);
$x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1));
$y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0);
$e = "<$e>";
if(isset($d[$y])){
if(!$x){
if($n){echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);}
else{++$N; ob_end_clean(); continue 2;}
}
else{echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));}
echo ltrim($r); continue;
}
$f = "\n". str_repeat($s, $n);
if(isset($c[$y])){
if(!$x){echo $e, $f, ltrim($r);}
else{echo $f, $e, $r;}
}elseif(isset($b[$y])){echo $f, $e, $r;
}elseif(isset($a[$y])){echo $e, $f, ltrim($r);
}elseif(!$y){echo $f, $e, $f, ltrim($r);
}else{echo $e, $r;}
}
$X = 0;
}
$t = preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents());
ob_end_clean();
if(($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)){
$t = str_replace("\n", $l, $t);
}
return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t);
// eof
}
public static function hl_version(){
// rel
return '1.1.14';
// eof
}
public static function kses($t, $h, $p=array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto')){
// kses compat
foreach($h as $k=>$v){
$h[$k]['n']['*'] = 1;
}
$C['cdata'] = $C['comment'] = $C['make_tag_strict'] = $C['no_deprecated_attr'] = $C['unique_ids'] = 0;
$C['keep_bad'] = 1;
$C['elements'] = count($h) ? strtolower(implode(',', array_keys($h))) : '-*';
$C['hook'] = 'htmLawed::kses_hook';
$C['schemes'] = '*:'. implode(',', $p);
return htmLawed::hl($t, $C, $h);
// eof
}
public static function kses_hook($t, &$C, &$S){
// kses compat
return $t;
// eof
}
// end class
}

114
inc/3rdparty/libraries/html5/Data.php vendored Normal file
View file

@ -0,0 +1,114 @@
<?php
// warning: this file is encoded in UTF-8!
class HTML5_Data
{
// at some point this should be moved to a .ser file. Another
// possible optimization is to give UTF-8 bytes, not Unicode
// codepoints
// XXX: Not quite sure why it's named this; this is
// actually the numeric entity dereference table.
protected static $realCodepointTable = array(
0x00 => 0xFFFD, // REPLACEMENT CHARACTER
0x0D => 0x000A, // LINE FEED (LF)
0x80 => 0x20AC, // EURO SIGN ('€')
0x81 => 0x0081, // <control>
0x82 => 0x201A, // SINGLE LOW-9 QUOTATION MARK ('')
0x83 => 0x0192, // LATIN SMALL LETTER F WITH HOOK ('ƒ')
0x84 => 0x201E, // DOUBLE LOW-9 QUOTATION MARK ('„')
0x85 => 0x2026, // HORIZONTAL ELLIPSIS ('…')
0x86 => 0x2020, // DAGGER ('†')
0x87 => 0x2021, // DOUBLE DAGGER ('‡')
0x88 => 0x02C6, // MODIFIER LETTER CIRCUMFLEX ACCENT ('ˆ')
0x89 => 0x2030, // PER MILLE SIGN ('‰')
0x8A => 0x0160, // LATIN CAPITAL LETTER S WITH CARON ('Š')
0x8B => 0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('')
0x8C => 0x0152, // LATIN CAPITAL LIGATURE OE ('Œ')
0x8D => 0x008D, // <control>
0x8E => 0x017D, // LATIN CAPITAL LETTER Z WITH CARON ('Ž')
0x8F => 0x008F, // <control>
0x90 => 0x0090, // <control>
0x91 => 0x2018, // LEFT SINGLE QUOTATION MARK ('')
0x92 => 0x2019, // RIGHT SINGLE QUOTATION MARK ('')
0x93 => 0x201C, // LEFT DOUBLE QUOTATION MARK ('“')
0x94 => 0x201D, // RIGHT DOUBLE QUOTATION MARK ('”')
0x95 => 0x2022, // BULLET ('•')
0x96 => 0x2013, // EN DASH ('')
0x97 => 0x2014, // EM DASH ('—')
0x98 => 0x02DC, // SMALL TILDE ('˜')
0x99 => 0x2122, // TRADE MARK SIGN ('™')
0x9A => 0x0161, // LATIN SMALL LETTER S WITH CARON ('š')
0x9B => 0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('')
0x9C => 0x0153, // LATIN SMALL LIGATURE OE ('œ')
0x9D => 0x009D, // <control>
0x9E => 0x017E, // LATIN SMALL LETTER Z WITH CARON ('ž')
0x9F => 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ÿ')
);
protected static $namedCharacterReferences;
protected static $namedCharacterReferenceMaxLength;
/**
* Returns the "real" Unicode codepoint of a malformed character
* reference.
*/
public static function getRealCodepoint($ref) {
if (!isset(self::$realCodepointTable[$ref])) return false;
else return self::$realCodepointTable[$ref];
}
public static function getNamedCharacterReferences() {
if (!self::$namedCharacterReferences) {
self::$namedCharacterReferences = unserialize(
file_get_contents(dirname(__FILE__) . '/named-character-references.ser'));
}
return self::$namedCharacterReferences;
}
/**
* Converts a Unicode codepoint to sequence of UTF-8 bytes.
* @note Shamelessly stolen from HTML Purifier, which is also
* shamelessly stolen from Feyd (which is in public domain).
*/
public static function utf8chr($code) {
/* We don't care: we live dangerously
* if($code > 0x10FFFF or $code < 0x0 or
($code >= 0xD800 and $code <= 0xDFFF) ) {
// bits are set outside the "valid" range as defined
// by UNICODE 4.1.0
return "\xEF\xBF\xBD";
}*/
$x = $y = $z = $w = 0;
if ($code < 0x80) {
// regular ASCII character
$x = $code;
} else {
// set up bits for UTF-8
$x = ($code & 0x3F) | 0x80;
if ($code < 0x800) {
$y = (($code & 0x7FF) >> 6) | 0xC0;
} else {
$y = (($code & 0xFC0) >> 6) | 0x80;
if($code < 0x10000) {
$z = (($code >> 12) & 0x0F) | 0xE0;
} else {
$z = (($code >> 12) & 0x3F) | 0x80;
$w = (($code >> 18) & 0x07) | 0xF0;
}
}
}
// set up the actual character
$ret = '';
if($w) $ret .= chr($w);
if($z) $ret .= chr($z);
if($y) $ret .= chr($y);
$ret .= chr($x);
return $ret;
}
}

View file

@ -0,0 +1,284 @@
<?php
/*
Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
// Some conventions:
// /* */ indicates verbatim text from the HTML 5 specification
// // indicates regular comments
class HTML5_InputStream {
/**
* The string data we're parsing.
*/
private $data;
/**
* The current integer byte position we are in $data
*/
private $char;
/**
* Length of $data; when $char === $data, we are at the end-of-file.
*/
private $EOF;
/**
* Parse errors.
*/
public $errors = array();
/**
* @param $data Data to parse
*/
public function __construct($data) {
/* Given an encoding, the bytes in the input stream must be
converted to Unicode characters for the tokeniser, as
described by the rules for that encoding, except that the
leading U+FEFF BYTE ORDER MARK character, if any, must not
be stripped by the encoding layer (it is stripped by the rule below).
Bytes or sequences of bytes in the original byte stream that
could not be converted to Unicode characters must be converted
to U+FFFD REPLACEMENT CHARACTER code points. */
// XXX currently assuming input data is UTF-8; once we
// build encoding detection this will no longer be the case
//
// We previously had an mbstring implementation here, but that
// implementation is heavily non-conforming, so it's been
// omitted.
if (extension_loaded('iconv')) {
// non-conforming
$data = @iconv('UTF-8', 'UTF-8//IGNORE', $data);
} else {
// we can make a conforming native implementation
throw new Exception('Not implemented, please install mbstring or iconv');
}
/* One leading U+FEFF BYTE ORDER MARK character must be
ignored if any are present. */
if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
$data = substr($data, 3);
}
/* All U+0000 NULL characters in the input must be replaced
by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such
characters is a parse error. */
for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i++) {
$this->errors[] = array(
'type' => HTML5_Tokenizer::PARSEERROR,
'data' => 'null-character'
);
}
/* U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED
(LF) characters are treated specially. Any CR characters
that are followed by LF characters must be removed, and any
CR characters not followed by LF characters must be converted
to LF characters. Thus, newlines in HTML DOMs are represented
by LF characters, and there are never any CR characters in the
input to the tokenization stage. */
$data = str_replace(
array(
"\0",
"\r\n",
"\r"
),
array(
"\xEF\xBF\xBD",
"\n",
"\n"
),
$data
);
/* Any occurrences of any characters in the ranges U+0001 to
U+0008, U+000B, U+000E to U+001F, U+007F to U+009F,
U+D800 to U+DFFF , U+FDD0 to U+FDEF, and
characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF,
U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE,
U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF,
U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE,
U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and
U+10FFFF are parse errors. (These are all control characters
or permanently undefined Unicode characters.) */
// Check PCRE is loaded.
if (extension_loaded('pcre')) {
$count = preg_match_all(
'/(?:
[\x01-\x08\x0B\x0E-\x1F\x7F] # U+0001 to U+0008, U+000B, U+000E to U+001F and U+007F
|
\xC2[\x80-\x9F] # U+0080 to U+009F
|
\xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF]) # U+D800 to U+DFFFF
|
\xEF\xB7[\x90-\xAF] # U+FDD0 to U+FDEF
|
\xEF\xBF[\xBE\xBF] # U+FFFE and U+FFFF
|
[\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
)/x',
$data,
$matches
);
for ($i = 0; $i < $count; $i++) {
$this->errors[] = array(
'type' => HTML5_Tokenizer::PARSEERROR,
'data' => 'invalid-codepoint'
);
}
} else {
// XXX: Need non-PCRE impl, probably using substr_count
}
$this->data = $data;
$this->char = 0;
$this->EOF = strlen($data);
}
/**
* Returns the current line that the tokenizer is at.
*/
public function getCurrentLine() {
// Check the string isn't empty
if($this->EOF) {
// Add one to $this->char because we want the number for the next
// byte to be processed.
return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
} else {
// If the string is empty, we are on the first line (sorta).
return 1;
}
}
/**
* Returns the current column of the current line that the tokenizer is at.
*/
public function getColumnOffset() {
// strrpos is weird, and the offset needs to be negative for what we
// want (i.e., the last \n before $this->char). This needs to not have
// one (to make it point to the next character, the one we want the
// position of) added to it because strrpos's behaviour includes the
// final offset byte.
$lastLine = strrpos($this->data, "\n", $this->char - 1 - strlen($this->data));
// However, for here we want the length up until the next byte to be
// processed, so add one to the current byte ($this->char).
if($lastLine !== false) {
$findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
} else {
$findLengthOf = substr($this->data, 0, $this->char);
}
// Get the length for the string we need.
if(extension_loaded('iconv')) {
return iconv_strlen($findLengthOf, 'utf-8');
} elseif(extension_loaded('mbstring')) {
return mb_strlen($findLengthOf, 'utf-8');
} elseif(extension_loaded('xml')) {
return strlen(utf8_decode($findLengthOf));
} else {
$count = count_chars($findLengthOf);
// 0x80 = 0x7F - 0 + 1 (one added to get inclusive range)
// 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range)
return array_sum(array_slice($count, 0, 0x80)) +
array_sum(array_slice($count, 0xC2, 0x33));
}
}
/**
* Retrieve the currently consume character.
* @note This performs bounds checking
*/
public function char() {
return ($this->char++ < $this->EOF)
? $this->data[$this->char - 1]
: false;
}
/**
* Get all characters until EOF.
* @note This performs bounds checking
*/
public function remainingChars() {
if($this->char < $this->EOF) {
$data = substr($this->data, $this->char);
$this->char = $this->EOF;
return $data;
} else {
return false;
}
}
/**
* Matches as far as possible until we reach a certain set of bytes
* and returns the matched substring.
* @param $bytes Bytes to match.
*/
public function charsUntil($bytes, $max = null) {
if ($this->char < $this->EOF) {
if ($max === 0 || $max) {
$len = strcspn($this->data, $bytes, $this->char, $max);
} else {
$len = strcspn($this->data, $bytes, $this->char);
}
$string = (string) substr($this->data, $this->char, $len);
$this->char += $len;
return $string;
} else {
return false;
}
}
/**
* Matches as far as possible with a certain set of bytes
* and returns the matched substring.
* @param $bytes Bytes to match.
*/
public function charsWhile($bytes, $max = null) {
if ($this->char < $this->EOF) {
if ($max === 0 || $max) {
$len = strspn($this->data, $bytes, $this->char, $max);
} else {
$len = strspn($this->data, $bytes, $this->char);
}
$string = (string) substr($this->data, $this->char, $len);
$this->char += $len;
return $string;
} else {
return false;
}
}
/**
* Unconsume one character.
*/
public function unget() {
if ($this->char <= $this->EOF) {
$this->char--;
}
}
}

36
inc/3rdparty/libraries/html5/Parser.php vendored Normal file
View file

@ -0,0 +1,36 @@
<?php
require_once dirname(__FILE__) . '/Data.php';
require_once dirname(__FILE__) . '/InputStream.php';
require_once dirname(__FILE__) . '/TreeBuilder.php';
require_once dirname(__FILE__) . '/Tokenizer.php';
/**
* Outwards facing interface for HTML5.
*/
class HTML5_Parser
{
/**
* Parses a full HTML document.
* @param $text HTML text to parse
* @param $builder Custom builder implementation
* @return Parsed HTML as DOMDocument
*/
static public function parse($text, $builder = null) {
$tokenizer = new HTML5_Tokenizer($text, $builder);
$tokenizer->parse();
return $tokenizer->save();
}
/**
* Parses an HTML fragment.
* @param $text HTML text to parse
* @param $context String name of context element to pretend parsing is in.
* @param $builder Custom builder implementation
* @return Parsed HTML as DOMDocument
*/
static public function parseFragment($text, $context = null, $builder = null) {
$tokenizer = new HTML5_Tokenizer($text, $builder);
$tokenizer->parseFragment($context);
return $tokenizer->save();
}
}

2422
inc/3rdparty/libraries/html5/Tokenizer.php vendored Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -7,8 +7,8 @@
* For environments which do not have these options, it reverts to standard sequential
* requests (using file_get_contents())
*
* @version 1.0
* @date 2012-02-09
* @version 1.1
* @date 2012-08-20
* @see http://php.net/HttpRequestPool
* @author Keyvan Minoukadeh
* @copyright 2011-2012 Keyvan Minoukadeh
@ -32,9 +32,10 @@ class HumbleHttpAgent
protected $cache = null; //TODO
protected $httpContext;
protected $minimiseMemoryUse = false; //TODO
protected $debug = false;
protected $method;
protected $cookieJar;
public $debug = false;
public $debugVerbose = false;
public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html
public $maxRedirects = 5;
public $userAgentMap = array();
@ -51,6 +52,9 @@ class HumbleHttpAgent
// prompt Humble HTTP Agent to send a HEAD request first
// to see if returned content type matches $headerOnlyTypes.
public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov');
// AJAX triggers to search for.
// for AJAX sites, e.g. Blogger with its dynamic views templates.
public $ajaxTriggers = array("<meta name='fragment' content='!'",'<meta name="fragment" content="!"',"<meta content='!' name='fragment'",'<meta content="!" name="fragment"');
//TODO: set max file size
//TODO: normalise headers
@ -99,7 +103,8 @@ class HumbleHttpAgent
$mem = round(memory_get_usage()/1024, 2);
$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
echo "\n";
ob_flush();
flush();
}
@ -150,6 +155,27 @@ class HumbleHttpAgent
return $iri->get_iri();
}
public function getUglyURL($url, $html) {
if ($html == '') return false;
$found = false;
foreach ($this->ajaxTriggers as $string) {
if (stripos($html, $string)) {
$found = true;
break;
}
}
if (!$found) return false;
$iri = new SimplePie_IRI($url);
if (isset($iri->query)) {
parse_str($iri->query, $query);
} else {
$query = array();
}
$query['_escaped_fragment_'] = '';
$iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
return $iri->get_iri();
}
public function removeFragment($url) {
$pos = strpos($url, '#');
if ($pos === false) {
@ -308,6 +334,17 @@ class HumbleHttpAgent
$this->debug('Wrong guess at content-type, queing GET request');
$this->requests[$orig]['wrongGuess'] = true;
$this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
// check for <meta name='fragment' content='!'/>
// for AJAX sites, e.g. Blogger with its dynamic views templates.
// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
if (isset($this->requests[$orig]['body'])) {
$redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
if ($redirectURL) {
$this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
$this->redirectQueue[$orig] = $redirectURL;
}
}
}
//die($url.' -multi- '.$request->getResponseInfo('effective_url'));
$pool->detach($request);
@ -416,12 +453,23 @@ class HumbleHttpAgent
$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
}
} elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') {
// the response content-type did not match our 'header only' types,
// but we'd issues a HEAD request because we assumed it would. So
// let's queue a proper GET request for this item...
$this->debug('Wrong guess at content-type, queing GET request');
$this->requests[$orig]['wrongGuess'] = true;
$this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
// the response content-type did not match our 'header only' types,
// but we'd issues a HEAD request because we assumed it would. So
// let's queue a proper GET request for this item...
$this->debug('Wrong guess at content-type, queing GET request');
$this->requests[$orig]['wrongGuess'] = true;
$this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
// check for <meta name='fragment' content='!'/>
// for AJAX sites, e.g. Blogger with its dynamic views templates.
// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
if (isset($this->requests[$orig]['body'])) {
$redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
if ($redirectURL) {
$this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
$this->redirectQueue[$orig] = $redirectURL;
}
}
}
// die($url.' -multi- '.$request->getResponseInfo('effective_url'));
unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']);
@ -481,7 +529,7 @@ class HumbleHttpAgent
$this->requests[$orig]['status_code'] = $status_code = (int)$match[1];
unset($match);
// handle redirect
if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) {
if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
$this->requests[$orig]['location'] = trim($match[1]);
}
if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
@ -498,6 +546,17 @@ class HumbleHttpAgent
} else {
$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
}
} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
// check for <meta name='fragment' content='!'/>
// for AJAX sites, e.g. Blogger with its dynamic views templates.
// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
if (isset($this->requests[$orig]['body'])) {
$redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
if ($redirectURL) {
$this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
$this->redirectQueue[$orig] = $redirectURL;
}
}
}
}
} else {
@ -520,7 +579,7 @@ class HumbleHttpAgent
$this->requests[$orig]['method'] = $request->method;
$this->requests[$orig]['effective_url'] = $info['url'];
$this->requests[$orig]['status_code'] = (int)$info['http_code'];
if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) {
if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
$this->requests[$orig]['location'] = trim($match[1]);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,354 @@
<?php
/**
* This class represents a text sample to be parsed.
*
* @category Text
* @package Text_LanguageDetect
* @author Nicholas Pisarro
* @copyright 2006
* @license BSD
* @version CVS: $Id: Parser.php,v 1.5 2006/03/11 05:45:05 taak Exp $
* @link http://pear.php.net/package/Text_LanguageDetect/
* @link http://langdetect.blogspot.com/
*/
/**
* This class represents a text sample to be parsed.
*
* This separates the analysis of a text sample from the primary LanguageDetect
* class. After a new profile has been built, the data can be retrieved using
* the accessor functions.
*
* This class is intended to be used by the Text_LanguageDetect class, not
* end-users.
*
* @category Text
* @package Text_LanguageDetect
* @author Nicholas Pisarro
* @copyright 2006
* @license BSD
* @version release: 0.2.3
*/
class Text_LanguageDetect_Parser extends Text_LanguageDetect
{
/**
* the piece of text being parsed
*
* @access private
* @var string
*/
var $_string;
/**
* stores the trigram frequencies of the sample
*
* @access private
* @var string
*/
var $_trigrams = array();
/**
* stores the trigram ranks of the sample
*
* @access private
* @var array
*/
var $_trigram_ranks = array();
/**
* stores the unicode blocks of the sample
*
* @access private
* @var array
*/
var $_unicode_blocks = array();
/**
* Whether the parser should compile the unicode ranges
*
* @access private
* @var bool
*/
var $_compile_unicode = false;
/**
* Whether the parser should compile trigrams
*
* @access private
* @var bool
*/
var $_compile_trigram = false;
/**
* Whether the trigram parser should pad the beginning of the string
*
* @access private
* @var bool
*/
var $_trigram_pad_start = false;
/**
* Whether the unicode parser should skip non-alphabetical ascii chars
*
* @access private
* @var bool
*/
var $_unicode_skip_symbols = true;
/**
* Constructor
*
* @access private
* @param string $string string to be parsed
*/
function Text_LanguageDetect_Parser($string, $db=null, $unicode_db=null) {
if (isset($db)) $this->_db_filename = $db;
if (isset($unicode_db)) $this->_unicode_db_filename = $unicode_db;
$this->_string = $string;
}
/**
* Returns true if a string is suitable for parsing
*
* @static
* @access public
* @param string $str input string to test
* @return bool true if acceptable, false if not
*/
function validateString($str) {
if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) {
return true;
} else {
return false;
}
}
/**
* turn on/off trigram counting
*
* @access public
* @param bool $bool true for on, false for off
*/
function prepareTrigram($bool = true)
{
$this->_compile_trigram = $bool;
}
/**
* turn on/off unicode block counting
*
* @access public
* @param bool $bool true for on, false for off
*/
function prepareUnicode($bool = true)
{
$this->_compile_unicode = $bool;
}
/**
* turn on/off padding the beginning of the sample string
*
* @access public
* @param bool $bool true for on, false for off
*/
function setPadStart($bool = true)
{
$this->_trigram_pad_start = $bool;
}
/**
* Should the unicode block counter skip non-alphabetical ascii chars?
*
* @access public
* @param bool $bool true for on, false for off
*/
function setUnicodeSkipSymbols($bool = true)
{
$this->_unicode_skip_symbols = $bool;
}
/**
* Returns the trigram ranks for the text sample
*
* @access public
* @return array trigram ranks in the text sample
*/
function &getTrigramRanks()
{
return $this->_trigram_ranks;
}
/**
* Return the trigram freqency table
*
* only used in testing to make sure the parser is working
*
* @access public
* @return array trigram freqencies in the text sample
*/
function &getTrigramFreqs()
{
return $this->_trigram;
}
/**
* returns the array of unicode blocks
*
* @access public
* @return array unicode blocks in the text sample
*/
function &getUnicodeBlocks()
{
return $this->_unicode_blocks;
}
/**
* Executes the parsing operation
*
* Be sure to call the set*() functions to set options and the
* prepare*() functions first to tell it what kind of data to compute
*
* Afterwards the get*() functions can be used to access the compiled
* information.
*
* @access public
*/
function analyze()
{
$len = strlen($this->_string);
$byte_counter = 0;
// unicode startup
if ($this->_compile_unicode) {
$blocks =& $this->_read_unicode_block_db();
$block_count = count($blocks);
$skipped_count = 0;
$unicode_chars = array();
}
// trigram startup
if ($this->_compile_trigram) {
// initialize them as blank so the parser will skip the first two
// (since it skips trigrams with more than 2 contiguous spaces)
$a = ' ';
$b = ' ';
// kludge
// if it finds a valid trigram to start and the start pad option is
// off, then set a variable that will be used to reduce this
// trigram after parsing has finished
if (!$this->_trigram_pad_start) {
$a = $this->_next_char($this->_string, $byte_counter, true);
if ($a != ' ') {
$b = $this->_next_char($this->_string, $byte_counter, true);
$dropone = " $a$b";
}
$byte_counter = 0;
$a = ' ';
$b = ' ';
}
}
while ($byte_counter < $len) {
$char = $this->_next_char($this->_string, $byte_counter, true);
// language trigram detection
if ($this->_compile_trigram) {
if (!($b == ' ' && ($a == ' ' || $char == ' '))) {
if (!isset($this->_trigram[$a . $b . $char])) {
$this->_trigram[$a . $b . $char] = 1;
} else {
$this->_trigram[$a . $b . $char]++;
}
}
$a = $b;
$b = $char;
}
// unicode block detection
if ($this->_compile_unicode) {
if ($this->_unicode_skip_symbols
&& strlen($char) == 1
&& ($char < 'A' || $char > 'z'
|| ($char > 'Z' && $char < 'a'))
&& $char != "'") { // does not skip the apostrophe
// since it's included in the language
// models
$skipped_count++;
continue;
}
// build an array of all the characters
if (isset($unicode_chars[$char])) {
$unicode_chars[$char]++;
} else {
$unicode_chars[$char] = 1;
}
}
// todo: add byte detection here
}
// unicode cleanup
if ($this->_compile_unicode) {
foreach ($unicode_chars as $utf8_char => $count) {
$search_result = $this->_unicode_block_name(
$this->_utf8char2unicode($utf8_char), $blocks, $block_count);
if ($search_result != -1) {
$block_name = $search_result[2];
} else {
$block_name = '[Malformatted]';
}
if (isset($this->_unicode_blocks[$block_name])) {
$this->_unicode_blocks[$block_name] += $count;
} else {
$this->_unicode_blocks[$block_name] = $count;
}
}
}
// trigram cleanup
if ($this->_compile_trigram) {
// pad the end
if ($b != ' ') {
if (!isset($this->_trigram["$a$b "])) {
$this->_trigram["$a$b "] = 1;
} else {
$this->_trigram["$a$b "]++;
}
}
// perl compatibility; Language::Guess does not pad the beginning
// kludge
if (isset($dropone)) {
if ($this->_trigram[$dropone] == 1) {
unset($this->_trigram[$dropone]);
} else {
$this->_trigram[$dropone]--;
}
}
if (!empty($this->_trigram)) {
$this->_trigram_ranks = $this->_arr_rank($this->_trigram);
} else {
$this->_trigram_ranks = array();
}
}
}
}
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
?>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -107,3 +107,4 @@ class JSLikeHTMLElement extends DOMElement
return '['.$this->tagName.']';
}
}
?>

File diff suppressed because it is too large Load diff

View file

@ -33,44 +33,50 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
// autoloader
spl_autoload_register(array(new SimplePie_Autoloader(), 'autoload'));
if (!class_exists('SimplePie'))
{
trigger_error('Autoloader not registered properly', E_USER_ERROR);
}
/**
* SimplePie Autoloader class.
* Autoloader class
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Autoloader
{
/**
* Constructor.
* Constructor
*/
public function __construct()
{
$this->path = dirname(__FILE__);
$this->path = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'library';
}
/**
* Autoloader.
* Autoloader
*
* @param string $class The name of the class to attempt to load.
*/
public function autoload($class)
{
// see if this request should be handled by this autoloader
if (strpos($class, 'SimplePie') !== 0) {
// Only load the class if it starts with "SimplePie"
if (strpos($class, 'SimplePie') !== 0)
{
return;
}

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,24 +33,58 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Manages all author-related data
*
* Used by {@see SimplePie_Item::get_author()} and {@see SimplePie::get_authors()}
*
* This class can be overloaded with {@see SimplePie::set_author_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Author
{
/**
* Author's name
*
* @var string
* @see get_name()
*/
var $name;
/**
* Author's link
*
* @var string
* @see get_link()
*/
var $link;
/**
* Author's email address
*
* @var string
* @see get_email()
*/
var $email;
// Constructor, used to input the data
/**
* Constructor, used to input the data
*
* @param string $name
* @param string $link
* @param string $email
*/
public function __construct($name = null, $link = null, $email = null)
{
$this->name = $name;
@ -58,12 +92,22 @@ class SimplePie_Author
$this->email = $email;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Author's name
*
* @return string|null
*/
public function get_name()
{
if ($this->name !== null)
@ -76,6 +120,11 @@ class SimplePie_Author
}
}
/**
* Author's link
*
* @return string|null
*/
public function get_link()
{
if ($this->link !== null)
@ -88,6 +137,11 @@ class SimplePie_Author
}
}
/**
* Author's email address
*
* @return string|null
*/
public function get_email()
{
if ($this->email !== null)

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,17 +33,25 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Used to create cache objects
*
* This class can be overloaded with {@see SimplePie::set_cache_class()},
* although the preferred way is to create your own handler
* via {@see register()}
*
* @package SimplePie
* @subpackage Caching
*/
class SimplePie_Cache
{
/**
@ -65,8 +73,13 @@ class SimplePie_Cache
/**
* Create a new SimplePie_Cache object
*
* @param string $location URL location (scheme is used to determine handler)
* @param string $filename Unique identifier for cache object
* @param string $extension 'spi' or 'spc'
* @return SimplePie_Cache_Base Type of object depends on scheme of `$location`
*/
public static function create($location, $filename, $extension)
public static function get_handler($location, $filename, $extension)
{
$type = explode(':', $location, 2);
$type = $type[0];
@ -79,6 +92,17 @@ class SimplePie_Cache
return new SimplePie_Cache_File($location, $filename, $extension);
}
/**
* Create a new SimplePie_Cache object
*
* @deprecated Use {@see get_handler} instead
*/
public function create($location, $filename, $extension)
{
trigger_error('Cache::create() has been replaced with Cache::get_handler(). Switch to the registry system to use this.', E_USER_DEPRECATED);
return self::get_handler($location, $filename, $extension);
}
/**
* Register a handler
*

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,26 +33,37 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Base for cache objects
*
* Classes to be used with {@see SimplePie_Cache::register()} are expected
* to implement this interface.
*
* @package SimplePie
* @subpackage Caching
*/
interface SimplePie_Cache_Base
{
/**
* Feed cache type
*
* @var string
*/
const TYPE_FEED = 'spc';
/**
* Image cache type
*
* @var string
*/
const TYPE_IMAGE = 'spi';
@ -69,6 +80,7 @@ interface SimplePie_Cache_Base
* Save data to the cache
*
* @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property
* @return bool Successfulness
*/
public function save($data);

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,19 +33,32 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Base class for database-based caches
*
* @package SimplePie
* @subpackage Caching
*/
abstract class SimplePie_Cache_DB implements SimplePie_Cache_Base
{
protected static function prepare_simplepie_object_for_cache(&$data)
/**
* Helper for database conversion
*
* Converts a given {@see SimplePie} object into data to be stored
*
* @param SimplePie $data
* @return array First item is the serialized data for storage, second item is the unique ID for this item
*/
protected static function prepare_simplepie_object_for_cache($data)
{
$items = $data->get_items();
$items_by_id = array();

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,37 +33,78 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Caches data to the filesystem
*
* @package SimplePie
* @subpackage Caching
*/
class SimplePie_Cache_File implements SimplePie_Cache_Base
{
/**
* Location string
*
* @see SimplePie::$cache_location
* @var string
*/
protected $location;
/**
* Filename
*
* @var string
*/
protected $filename;
/**
* File extension
*
* @var string
*/
protected $extension;
/**
* File path
*
* @var string
*/
protected $name;
public function __construct($location, $filename, $extension)
/**
* Create a new cache object
*
* @param string $location Location string (from SimplePie::$cache_location)
* @param string $name Unique ID for the cache
* @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data
*/
public function __construct($location, $name, $type)
{
$this->location = $location;
$this->filename = $filename;
$this->extension = $extension;
$this->filename = $name;
$this->extension = $type;
$this->name = "$this->location/$this->filename.$this->extension";
}
/**
* Save data to the cache
*
* @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property
* @return bool Successfulness
*/
public function save($data)
{
if (file_exists($this->name) && is_writeable($this->name) || file_exists($this->location) && is_writeable($this->location))
{
if (is_a($data, 'SimplePie'))
if ($data instanceof SimplePie)
{
$data = $data->data;
}
@ -74,6 +115,11 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base
return false;
}
/**
* Retrieve the data saved to the cache
*
* @return array Data for SimplePie::$data
*/
public function load()
{
if (file_exists($this->name) && is_readable($this->name))
@ -83,6 +129,11 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base
return false;
}
/**
* Retrieve the last modified time for the cache
*
* @return int Timestamp
*/
public function mtime()
{
if (file_exists($this->name))
@ -92,6 +143,11 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base
return false;
}
/**
* Set the last modified time to the current time
*
* @return bool Success status
*/
public function touch()
{
if (file_exists($this->name))
@ -101,6 +157,11 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base
return false;
}
/**
* Remove the cache
*
* @return bool Success status
*/
public function unlink()
{
if (file_exists($this->name))

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,23 +33,59 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Caches data to memcache
*
* Registered for URLs with the "memcache" protocol
*
* For example, `memcache://localhost:11211/?timeout=3600&prefix=sp_` will
* connect to memcache on `localhost` on port 11211. All tables will be
* prefixed with `sp_` and data will expire after 3600 seconds
*
* @package SimplePie
* @subpackage Caching
* @uses Memcache
*/
class SimplePie_Cache_Memcache implements SimplePie_Cache_Base
{
/**
* Memcache instance
*
* @var Memcache
*/
protected $cache;
/**
* Options
*
* @var array
*/
protected $options;
/**
* Cache name
*
* @var string
*/
protected $name;
public function __construct($url, $filename, $extension)
/**
* Create a new cache object
*
* @param string $location Location string (from SimplePie::$cache_location)
* @param string $name Unique ID for the cache
* @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data
*/
public function __construct($location, $name, $type)
{
$this->options = array(
'host' => '127.0.0.1',
@ -59,22 +95,36 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base
'prefix' => 'simplepie_',
),
);
$this->options = array_merge_recursive($this->options, SimplePie_Cache::parse_URL($url));
$this->name = $this->options['extras']['prefix'] . md5("$filename:$extension");
$parsed = SimplePie_Cache::parse_URL($location);
$this->options['host'] = empty($parsed['host']) ? $this->options['host'] : $parsed['host'];
$this->options['port'] = empty($parsed['port']) ? $this->options['port'] : $parsed['port'];
$this->options['extras'] = array_merge($this->options['extras'], $parsed['extras']);
$this->name = $this->options['extras']['prefix'] . md5("$name:$type");
$this->cache = new Memcache();
$this->cache->addServer($this->options['host'], (int) $this->options['port']);
}
/**
* Save data to the cache
*
* @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property
* @return bool Successfulness
*/
public function save($data)
{
if (is_a($data, 'SimplePie'))
if ($data instanceof SimplePie)
{
$data = $data->data;
}
return $this->cache->set($this->name, serialize($data), MEMCACHE_COMPRESSED, (int) $this->options['extras']['timeout']);
}
/**
* Retrieve the data saved to the cache
*
* @return array Data for SimplePie::$data
*/
public function load()
{
$data = $this->cache->get($this->name);
@ -86,6 +136,11 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base
return false;
}
/**
* Retrieve the last modified time for the cache
*
* @return int Timestamp
*/
public function mtime()
{
$data = $this->cache->get($this->name);
@ -99,6 +154,11 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base
return false;
}
/**
* Set the last modified time to the current time
*
* @return bool Success status
*/
public function touch()
{
$data = $this->cache->get($this->name);
@ -111,8 +171,13 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base
return false;
}
/**
* Remove the cache
*
* @return bool Success status
*/
public function unlink()
{
return $this->cache->delete($this->name);
return $this->cache->delete($this->name, 0);
}
}

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,24 +33,58 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Caches data to a MySQL database
*
* Registered for URLs with the "mysql" protocol
*
* For example, `mysql://root:password@localhost:3306/mydb?prefix=sp_` will
* connect to the `mydb` database on `localhost` on port 3306, with the user
* `root` and the password `password`. All tables will be prefixed with `sp_`
*
* @package SimplePie
* @subpackage Caching
*/
class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
{
/**
* PDO instance
*
* @var PDO
*/
protected $mysql;
/**
* Options
*
* @var array
*/
protected $options;
/**
* Cache ID
*
* @var string
*/
protected $id;
public function __construct($url, $name, $extension)
/**
* Create a new cache object
*
* @param string $location Location string (from SimplePie::$cache_location)
* @param string $name Unique ID for the cache
* @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data
*/
public function __construct($location, $name, $type)
{
$this->options = array(
'user' => null,
@ -62,7 +96,7 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
'prefix' => '',
),
);
$this->options = array_merge_recursive($this->options, SimplePie_Cache::parse_URL($url));
$this->options = array_merge_recursive($this->options, SimplePie_Cache::parse_URL($location));
// Path is prefixed with a "/"
$this->options['dbname'] = substr($this->options['path'], 1);
@ -77,7 +111,7 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
return;
}
$this->id = $name . $extension;
$this->id = $name . $type;
if (!$query = $this->mysql->query('SHOW TABLES'))
{
@ -110,6 +144,12 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
}
}
/**
* Save data to the cache
*
* @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property
* @return bool Successfulness
*/
public function save($data)
{
if ($this->mysql === null)
@ -117,7 +157,7 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
return false;
}
if (is_a($data, 'SimplePie'))
if ($data instanceof SimplePie)
{
$data = clone $data;
@ -243,6 +283,11 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
return false;
}
/**
* Retrieve the data saved to the cache
*
* @return array Data for SimplePie::$data
*/
public function load()
{
if ($this->mysql === null)
@ -316,6 +361,11 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
return false;
}
/**
* Retrieve the last modified time for the cache
*
* @return int Timestamp
*/
public function mtime()
{
if ($this->mysql === null)
@ -335,6 +385,11 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
}
}
/**
* Set the last modified time to the current time
*
* @return bool Success status
*/
public function touch()
{
if ($this->mysql === null)
@ -355,6 +410,11 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
}
}
/**
* Remove the cache
*
* @return bool Success status
*/
public function unlink()
{
if ($this->mysql === null)

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,26 +33,74 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Handles `<media:text>` captions as defined in Media RSS.
*
* Used by {@see SimplePie_Enclosure::get_caption()} and {@see SimplePie_Enclosure::get_captions()}
*
* This class can be overloaded with {@see SimplePie::set_caption_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Caption
{
/**
* Content type
*
* @var string
* @see get_type()
*/
var $type;
/**
* Language
*
* @var string
* @see get_language()
*/
var $lang;
/**
* Start time
*
* @var string
* @see get_starttime()
*/
var $startTime;
/**
* End time
*
* @var string
* @see get_endtime()
*/
var $endTime;
/**
* Caption text
*
* @var string
* @see get_text()
*/
var $text;
// Constructor, used to input the data
/**
* Constructor, used to input the data
*
* For documentation on all the parameters, see the corresponding
* properties and their accessors
*/
public function __construct($type = null, $lang = null, $startTime = null, $endTime = null, $text = null)
{
$this->type = $type;
@ -62,12 +110,22 @@ class SimplePie_Caption
$this->text = $text;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the end time
*
* @return string|null Time in the format 'hh:mm:ss.SSS'
*/
public function get_endtime()
{
if ($this->endTime !== null)
@ -80,6 +138,12 @@ class SimplePie_Caption
}
}
/**
* Get the language
*
* @link http://tools.ietf.org/html/rfc3066
* @return string|null Language code as per RFC 3066
*/
public function get_language()
{
if ($this->lang !== null)
@ -92,6 +156,11 @@ class SimplePie_Caption
}
}
/**
* Get the start time
*
* @return string|null Time in the format 'hh:mm:ss.SSS'
*/
public function get_starttime()
{
if ($this->startTime !== null)
@ -104,6 +173,11 @@ class SimplePie_Caption
}
}
/**
* Get the text of the caption
*
* @return string|null
*/
public function get_text()
{
if ($this->text !== null)
@ -116,6 +190,11 @@ class SimplePie_Caption
}
}
/**
* Get the content type (not MIME type)
*
* @return string|null Either 'text' or 'html'
*/
public function get_type()
{
if ($this->type !== null)

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,24 +33,58 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Manages all category-related data
*
* Used by {@see SimplePie_Item::get_category()} and {@see SimplePie_Item::get_categories()}
*
* This class can be overloaded with {@see SimplePie::set_category_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Category
{
/**
* Category identifier
*
* @var string
* @see get_term
*/
var $term;
/**
* Categorization scheme identifier
*
* @var string
* @see get_scheme()
*/
var $scheme;
/**
* Human readable label
*
* @var string
* @see get_label()
*/
var $label;
// Constructor, used to input the data
/**
* Constructor, used to input the data
*
* @param string $term
* @param string $scheme
* @param string $label
*/
public function __construct($term = null, $scheme = null, $label = null)
{
$this->term = $term;
@ -58,12 +92,22 @@ class SimplePie_Category
$this->label = $label;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the category identifier
*
* @return string|null
*/
public function get_term()
{
if ($this->term !== null)
@ -76,6 +120,11 @@ class SimplePie_Category
}
}
/**
* Get the categorization scheme identifier
*
* @return string|null
*/
public function get_scheme()
{
if ($this->scheme !== null)
@ -88,6 +137,11 @@ class SimplePie_Category
}
}
/**
* Get the human readable label
*
* @return string|null
*/
public function get_label()
{
if ($this->label !== null)

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,14 +33,13 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
@ -48,7 +47,15 @@
* Content-type sniffing
*
* Based on the rules in http://tools.ietf.org/html/draft-abarth-mime-sniff-06
*
* This is used since we can't always trust Content-Type headers, and is based
* upon the HTML5 parsing rules.
*
*
* This class can be overloaded with {@see SimplePie::set_content_type_sniffer_class()}
*
* @package SimplePie
* @subpackage HTTP
*/
class SimplePie_Content_Type_Sniffer
{

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,35 +33,71 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Manages `<media:copyright>` copyright tags as defined in Media RSS
*
* Used by {@see SimplePie_Enclosure::get_copyright()}
*
* This class can be overloaded with {@see SimplePie::set_copyright_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Copyright
{
/**
* Copyright URL
*
* @var string
* @see get_url()
*/
var $url;
/**
* Attribution
*
* @var string
* @see get_attribution()
*/
var $label;
// Constructor, used to input the data
/**
* Constructor, used to input the data
*
* For documentation on all the parameters, see the corresponding
* properties and their accessors
*/
public function __construct($url = null, $label = null)
{
$this->url = $url;
$this->label = $label;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the copyright URL
*
* @return string|null URL to copyright information
*/
public function get_url()
{
if ($this->url !== null)
@ -74,6 +110,11 @@ class SimplePie_Copyright
}
}
/**
* Get the attribution text
*
* @return string|null
*/
public function get_attribution()
{
if ($this->label !== null)

View file

@ -33,14 +33,13 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
@ -48,9 +47,11 @@
*
* Class for backward compatibility.
*
* @deprecated Use {@see SimplePie} directly
* @package SimplePie
* @subpackage API
*/
class SimplePie extends SimplePie_Core
class SimplePie_Core extends SimplePie
{
}

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,24 +33,57 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Handles `<media:credit>` as defined in Media RSS
*
* Used by {@see SimplePie_Enclosure::get_credit()} and {@see SimplePie_Enclosure::get_credits()}
*
* This class can be overloaded with {@see SimplePie::set_credit_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Credit
{
/**
* Credited role
*
* @var string
* @see get_role()
*/
var $role;
/**
* Organizational scheme
*
* @var string
* @see get_scheme()
*/
var $scheme;
/**
* Credited name
*
* @var string
* @see get_name()
*/
var $name;
// Constructor, used to input the data
/**
* Constructor, used to input the data
*
* For documentation on all the parameters, see the corresponding
* properties and their accessors
*/
public function __construct($role = null, $scheme = null, $name = null)
{
$this->role = $role;
@ -58,12 +91,22 @@ class SimplePie_Credit
$this->name = $name;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the role of the person receiving credit
*
* @return string|null
*/
public function get_role()
{
if ($this->role !== null)
@ -76,6 +119,11 @@ class SimplePie_Credit
}
}
/**
* Get the organizational scheme
*
* @return string|null
*/
public function get_scheme()
{
if ($this->scheme !== null)
@ -88,6 +136,11 @@ class SimplePie_Credit
}
}
/**
* Get the credited person/entity's name
*
* @return string|null
*/
public function get_name()
{
if ($this->name !== null)

View file

@ -0,0 +1,617 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Decode HTML Entities
*
* This implements HTML5 as of revision 967 (2007-06-28)
*
* @deprecated Use DOMDocument instead!
* @package SimplePie
*/
class SimplePie_Decode_HTML_Entities
{
/**
* Data to be parsed
*
* @access private
* @var string
*/
var $data = '';
/**
* Currently consumed bytes
*
* @access private
* @var string
*/
var $consumed = '';
/**
* Position of the current byte being parsed
*
* @access private
* @var int
*/
var $position = 0;
/**
* Create an instance of the class with the input data
*
* @access public
* @param string $data Input data
*/
public function __construct($data)
{
$this->data = $data;
}
/**
* Parse the input data
*
* @access public
* @return string Output data
*/
public function parse()
{
while (($this->position = strpos($this->data, '&', $this->position)) !== false)
{
$this->consume();
$this->entity();
$this->consumed = '';
}
return $this->data;
}
/**
* Consume the next byte
*
* @access private
* @return mixed The next byte, or false, if there is no more data
*/
public function consume()
{
if (isset($this->data[$this->position]))
{
$this->consumed .= $this->data[$this->position];
return $this->data[$this->position++];
}
else
{
return false;
}
}
/**
* Consume a range of characters
*
* @access private
* @param string $chars Characters to consume
* @return mixed A series of characters that match the range, or false
*/
public function consume_range($chars)
{
if ($len = strspn($this->data, $chars, $this->position))
{
$data = substr($this->data, $this->position, $len);
$this->consumed .= $data;
$this->position += $len;
return $data;
}
else
{
return false;
}
}
/**
* Unconsume one byte
*
* @access private
*/
public function unconsume()
{
$this->consumed = substr($this->consumed, 0, -1);
$this->position--;
}
/**
* Decode an entity
*
* @access private
*/
public function entity()
{
switch ($this->consume())
{
case "\x09":
case "\x0A":
case "\x0B":
case "\x0B":
case "\x0C":
case "\x20":
case "\x3C":
case "\x26":
case false:
break;
case "\x23":
switch ($this->consume())
{
case "\x78":
case "\x58":
$range = '0123456789ABCDEFabcdef';
$hex = true;
break;
default:
$range = '0123456789';
$hex = false;
$this->unconsume();
break;
}
if ($codepoint = $this->consume_range($range))
{
static $windows_1252_specials = array(0x0D => "\x0A", 0x80 => "\xE2\x82\xAC", 0x81 => "\xEF\xBF\xBD", 0x82 => "\xE2\x80\x9A", 0x83 => "\xC6\x92", 0x84 => "\xE2\x80\x9E", 0x85 => "\xE2\x80\xA6", 0x86 => "\xE2\x80\xA0", 0x87 => "\xE2\x80\xA1", 0x88 => "\xCB\x86", 0x89 => "\xE2\x80\xB0", 0x8A => "\xC5\xA0", 0x8B => "\xE2\x80\xB9", 0x8C => "\xC5\x92", 0x8D => "\xEF\xBF\xBD", 0x8E => "\xC5\xBD", 0x8F => "\xEF\xBF\xBD", 0x90 => "\xEF\xBF\xBD", 0x91 => "\xE2\x80\x98", 0x92 => "\xE2\x80\x99", 0x93 => "\xE2\x80\x9C", 0x94 => "\xE2\x80\x9D", 0x95 => "\xE2\x80\xA2", 0x96 => "\xE2\x80\x93", 0x97 => "\xE2\x80\x94", 0x98 => "\xCB\x9C", 0x99 => "\xE2\x84\xA2", 0x9A => "\xC5\xA1", 0x9B => "\xE2\x80\xBA", 0x9C => "\xC5\x93", 0x9D => "\xEF\xBF\xBD", 0x9E => "\xC5\xBE", 0x9F => "\xC5\xB8");
if ($hex)
{
$codepoint = hexdec($codepoint);
}
else
{
$codepoint = intval($codepoint);
}
if (isset($windows_1252_specials[$codepoint]))
{
$replacement = $windows_1252_specials[$codepoint];
}
else
{
$replacement = SimplePie_Misc::codepoint_to_utf8($codepoint);
}
if (!in_array($this->consume(), array(';', false), true))
{
$this->unconsume();
}
$consumed_length = strlen($this->consumed);
$this->data = substr_replace($this->data, $replacement, $this->position - $consumed_length, $consumed_length);
$this->position += strlen($replacement) - $consumed_length;
}
break;
default:
static $entities = array(
'Aacute' => "\xC3\x81",
'aacute' => "\xC3\xA1",
'Aacute;' => "\xC3\x81",
'aacute;' => "\xC3\xA1",
'Acirc' => "\xC3\x82",
'acirc' => "\xC3\xA2",
'Acirc;' => "\xC3\x82",
'acirc;' => "\xC3\xA2",
'acute' => "\xC2\xB4",
'acute;' => "\xC2\xB4",
'AElig' => "\xC3\x86",
'aelig' => "\xC3\xA6",
'AElig;' => "\xC3\x86",
'aelig;' => "\xC3\xA6",
'Agrave' => "\xC3\x80",
'agrave' => "\xC3\xA0",
'Agrave;' => "\xC3\x80",
'agrave;' => "\xC3\xA0",
'alefsym;' => "\xE2\x84\xB5",
'Alpha;' => "\xCE\x91",
'alpha;' => "\xCE\xB1",
'AMP' => "\x26",
'amp' => "\x26",
'AMP;' => "\x26",
'amp;' => "\x26",
'and;' => "\xE2\x88\xA7",
'ang;' => "\xE2\x88\xA0",
'apos;' => "\x27",
'Aring' => "\xC3\x85",
'aring' => "\xC3\xA5",
'Aring;' => "\xC3\x85",
'aring;' => "\xC3\xA5",
'asymp;' => "\xE2\x89\x88",
'Atilde' => "\xC3\x83",
'atilde' => "\xC3\xA3",
'Atilde;' => "\xC3\x83",
'atilde;' => "\xC3\xA3",
'Auml' => "\xC3\x84",
'auml' => "\xC3\xA4",
'Auml;' => "\xC3\x84",
'auml;' => "\xC3\xA4",
'bdquo;' => "\xE2\x80\x9E",
'Beta;' => "\xCE\x92",
'beta;' => "\xCE\xB2",
'brvbar' => "\xC2\xA6",
'brvbar;' => "\xC2\xA6",
'bull;' => "\xE2\x80\xA2",
'cap;' => "\xE2\x88\xA9",
'Ccedil' => "\xC3\x87",
'ccedil' => "\xC3\xA7",
'Ccedil;' => "\xC3\x87",
'ccedil;' => "\xC3\xA7",
'cedil' => "\xC2\xB8",
'cedil;' => "\xC2\xB8",
'cent' => "\xC2\xA2",
'cent;' => "\xC2\xA2",
'Chi;' => "\xCE\xA7",
'chi;' => "\xCF\x87",
'circ;' => "\xCB\x86",
'clubs;' => "\xE2\x99\xA3",
'cong;' => "\xE2\x89\x85",
'COPY' => "\xC2\xA9",
'copy' => "\xC2\xA9",
'COPY;' => "\xC2\xA9",
'copy;' => "\xC2\xA9",
'crarr;' => "\xE2\x86\xB5",
'cup;' => "\xE2\x88\xAA",
'curren' => "\xC2\xA4",
'curren;' => "\xC2\xA4",
'Dagger;' => "\xE2\x80\xA1",
'dagger;' => "\xE2\x80\xA0",
'dArr;' => "\xE2\x87\x93",
'darr;' => "\xE2\x86\x93",
'deg' => "\xC2\xB0",
'deg;' => "\xC2\xB0",
'Delta;' => "\xCE\x94",
'delta;' => "\xCE\xB4",
'diams;' => "\xE2\x99\xA6",
'divide' => "\xC3\xB7",
'divide;' => "\xC3\xB7",
'Eacute' => "\xC3\x89",
'eacute' => "\xC3\xA9",
'Eacute;' => "\xC3\x89",
'eacute;' => "\xC3\xA9",
'Ecirc' => "\xC3\x8A",
'ecirc' => "\xC3\xAA",
'Ecirc;' => "\xC3\x8A",
'ecirc;' => "\xC3\xAA",
'Egrave' => "\xC3\x88",
'egrave' => "\xC3\xA8",
'Egrave;' => "\xC3\x88",
'egrave;' => "\xC3\xA8",
'empty;' => "\xE2\x88\x85",
'emsp;' => "\xE2\x80\x83",
'ensp;' => "\xE2\x80\x82",
'Epsilon;' => "\xCE\x95",
'epsilon;' => "\xCE\xB5",
'equiv;' => "\xE2\x89\xA1",
'Eta;' => "\xCE\x97",
'eta;' => "\xCE\xB7",
'ETH' => "\xC3\x90",
'eth' => "\xC3\xB0",
'ETH;' => "\xC3\x90",
'eth;' => "\xC3\xB0",
'Euml' => "\xC3\x8B",
'euml' => "\xC3\xAB",
'Euml;' => "\xC3\x8B",
'euml;' => "\xC3\xAB",
'euro;' => "\xE2\x82\xAC",
'exist;' => "\xE2\x88\x83",
'fnof;' => "\xC6\x92",
'forall;' => "\xE2\x88\x80",
'frac12' => "\xC2\xBD",
'frac12;' => "\xC2\xBD",
'frac14' => "\xC2\xBC",
'frac14;' => "\xC2\xBC",
'frac34' => "\xC2\xBE",
'frac34;' => "\xC2\xBE",
'frasl;' => "\xE2\x81\x84",
'Gamma;' => "\xCE\x93",
'gamma;' => "\xCE\xB3",
'ge;' => "\xE2\x89\xA5",
'GT' => "\x3E",
'gt' => "\x3E",
'GT;' => "\x3E",
'gt;' => "\x3E",
'hArr;' => "\xE2\x87\x94",
'harr;' => "\xE2\x86\x94",
'hearts;' => "\xE2\x99\xA5",
'hellip;' => "\xE2\x80\xA6",
'Iacute' => "\xC3\x8D",
'iacute' => "\xC3\xAD",
'Iacute;' => "\xC3\x8D",
'iacute;' => "\xC3\xAD",
'Icirc' => "\xC3\x8E",
'icirc' => "\xC3\xAE",
'Icirc;' => "\xC3\x8E",
'icirc;' => "\xC3\xAE",
'iexcl' => "\xC2\xA1",
'iexcl;' => "\xC2\xA1",
'Igrave' => "\xC3\x8C",
'igrave' => "\xC3\xAC",
'Igrave;' => "\xC3\x8C",
'igrave;' => "\xC3\xAC",
'image;' => "\xE2\x84\x91",
'infin;' => "\xE2\x88\x9E",
'int;' => "\xE2\x88\xAB",
'Iota;' => "\xCE\x99",
'iota;' => "\xCE\xB9",
'iquest' => "\xC2\xBF",
'iquest;' => "\xC2\xBF",
'isin;' => "\xE2\x88\x88",
'Iuml' => "\xC3\x8F",
'iuml' => "\xC3\xAF",
'Iuml;' => "\xC3\x8F",
'iuml;' => "\xC3\xAF",
'Kappa;' => "\xCE\x9A",
'kappa;' => "\xCE\xBA",
'Lambda;' => "\xCE\x9B",
'lambda;' => "\xCE\xBB",
'lang;' => "\xE3\x80\x88",
'laquo' => "\xC2\xAB",
'laquo;' => "\xC2\xAB",
'lArr;' => "\xE2\x87\x90",
'larr;' => "\xE2\x86\x90",
'lceil;' => "\xE2\x8C\x88",
'ldquo;' => "\xE2\x80\x9C",
'le;' => "\xE2\x89\xA4",
'lfloor;' => "\xE2\x8C\x8A",
'lowast;' => "\xE2\x88\x97",
'loz;' => "\xE2\x97\x8A",
'lrm;' => "\xE2\x80\x8E",
'lsaquo;' => "\xE2\x80\xB9",
'lsquo;' => "\xE2\x80\x98",
'LT' => "\x3C",
'lt' => "\x3C",
'LT;' => "\x3C",
'lt;' => "\x3C",
'macr' => "\xC2\xAF",
'macr;' => "\xC2\xAF",
'mdash;' => "\xE2\x80\x94",
'micro' => "\xC2\xB5",
'micro;' => "\xC2\xB5",
'middot' => "\xC2\xB7",
'middot;' => "\xC2\xB7",
'minus;' => "\xE2\x88\x92",
'Mu;' => "\xCE\x9C",
'mu;' => "\xCE\xBC",
'nabla;' => "\xE2\x88\x87",
'nbsp' => "\xC2\xA0",
'nbsp;' => "\xC2\xA0",
'ndash;' => "\xE2\x80\x93",
'ne;' => "\xE2\x89\xA0",
'ni;' => "\xE2\x88\x8B",
'not' => "\xC2\xAC",
'not;' => "\xC2\xAC",
'notin;' => "\xE2\x88\x89",
'nsub;' => "\xE2\x8A\x84",
'Ntilde' => "\xC3\x91",
'ntilde' => "\xC3\xB1",
'Ntilde;' => "\xC3\x91",
'ntilde;' => "\xC3\xB1",
'Nu;' => "\xCE\x9D",
'nu;' => "\xCE\xBD",
'Oacute' => "\xC3\x93",
'oacute' => "\xC3\xB3",
'Oacute;' => "\xC3\x93",
'oacute;' => "\xC3\xB3",
'Ocirc' => "\xC3\x94",
'ocirc' => "\xC3\xB4",
'Ocirc;' => "\xC3\x94",
'ocirc;' => "\xC3\xB4",
'OElig;' => "\xC5\x92",
'oelig;' => "\xC5\x93",
'Ograve' => "\xC3\x92",
'ograve' => "\xC3\xB2",
'Ograve;' => "\xC3\x92",
'ograve;' => "\xC3\xB2",
'oline;' => "\xE2\x80\xBE",
'Omega;' => "\xCE\xA9",
'omega;' => "\xCF\x89",
'Omicron;' => "\xCE\x9F",
'omicron;' => "\xCE\xBF",
'oplus;' => "\xE2\x8A\x95",
'or;' => "\xE2\x88\xA8",
'ordf' => "\xC2\xAA",
'ordf;' => "\xC2\xAA",
'ordm' => "\xC2\xBA",
'ordm;' => "\xC2\xBA",
'Oslash' => "\xC3\x98",
'oslash' => "\xC3\xB8",
'Oslash;' => "\xC3\x98",
'oslash;' => "\xC3\xB8",
'Otilde' => "\xC3\x95",
'otilde' => "\xC3\xB5",
'Otilde;' => "\xC3\x95",
'otilde;' => "\xC3\xB5",
'otimes;' => "\xE2\x8A\x97",
'Ouml' => "\xC3\x96",
'ouml' => "\xC3\xB6",
'Ouml;' => "\xC3\x96",
'ouml;' => "\xC3\xB6",
'para' => "\xC2\xB6",
'para;' => "\xC2\xB6",
'part;' => "\xE2\x88\x82",
'permil;' => "\xE2\x80\xB0",
'perp;' => "\xE2\x8A\xA5",
'Phi;' => "\xCE\xA6",
'phi;' => "\xCF\x86",
'Pi;' => "\xCE\xA0",
'pi;' => "\xCF\x80",
'piv;' => "\xCF\x96",
'plusmn' => "\xC2\xB1",
'plusmn;' => "\xC2\xB1",
'pound' => "\xC2\xA3",
'pound;' => "\xC2\xA3",
'Prime;' => "\xE2\x80\xB3",
'prime;' => "\xE2\x80\xB2",
'prod;' => "\xE2\x88\x8F",
'prop;' => "\xE2\x88\x9D",
'Psi;' => "\xCE\xA8",
'psi;' => "\xCF\x88",
'QUOT' => "\x22",
'quot' => "\x22",
'QUOT;' => "\x22",
'quot;' => "\x22",
'radic;' => "\xE2\x88\x9A",
'rang;' => "\xE3\x80\x89",
'raquo' => "\xC2\xBB",
'raquo;' => "\xC2\xBB",
'rArr;' => "\xE2\x87\x92",
'rarr;' => "\xE2\x86\x92",
'rceil;' => "\xE2\x8C\x89",
'rdquo;' => "\xE2\x80\x9D",
'real;' => "\xE2\x84\x9C",
'REG' => "\xC2\xAE",
'reg' => "\xC2\xAE",
'REG;' => "\xC2\xAE",
'reg;' => "\xC2\xAE",
'rfloor;' => "\xE2\x8C\x8B",
'Rho;' => "\xCE\xA1",
'rho;' => "\xCF\x81",
'rlm;' => "\xE2\x80\x8F",
'rsaquo;' => "\xE2\x80\xBA",
'rsquo;' => "\xE2\x80\x99",
'sbquo;' => "\xE2\x80\x9A",
'Scaron;' => "\xC5\xA0",
'scaron;' => "\xC5\xA1",
'sdot;' => "\xE2\x8B\x85",
'sect' => "\xC2\xA7",
'sect;' => "\xC2\xA7",
'shy' => "\xC2\xAD",
'shy;' => "\xC2\xAD",
'Sigma;' => "\xCE\xA3",
'sigma;' => "\xCF\x83",
'sigmaf;' => "\xCF\x82",
'sim;' => "\xE2\x88\xBC",
'spades;' => "\xE2\x99\xA0",
'sub;' => "\xE2\x8A\x82",
'sube;' => "\xE2\x8A\x86",
'sum;' => "\xE2\x88\x91",
'sup;' => "\xE2\x8A\x83",
'sup1' => "\xC2\xB9",
'sup1;' => "\xC2\xB9",
'sup2' => "\xC2\xB2",
'sup2;' => "\xC2\xB2",
'sup3' => "\xC2\xB3",
'sup3;' => "\xC2\xB3",
'supe;' => "\xE2\x8A\x87",
'szlig' => "\xC3\x9F",
'szlig;' => "\xC3\x9F",
'Tau;' => "\xCE\xA4",
'tau;' => "\xCF\x84",
'there4;' => "\xE2\x88\xB4",
'Theta;' => "\xCE\x98",
'theta;' => "\xCE\xB8",
'thetasym;' => "\xCF\x91",
'thinsp;' => "\xE2\x80\x89",
'THORN' => "\xC3\x9E",
'thorn' => "\xC3\xBE",
'THORN;' => "\xC3\x9E",
'thorn;' => "\xC3\xBE",
'tilde;' => "\xCB\x9C",
'times' => "\xC3\x97",
'times;' => "\xC3\x97",
'TRADE;' => "\xE2\x84\xA2",
'trade;' => "\xE2\x84\xA2",
'Uacute' => "\xC3\x9A",
'uacute' => "\xC3\xBA",
'Uacute;' => "\xC3\x9A",
'uacute;' => "\xC3\xBA",
'uArr;' => "\xE2\x87\x91",
'uarr;' => "\xE2\x86\x91",
'Ucirc' => "\xC3\x9B",
'ucirc' => "\xC3\xBB",
'Ucirc;' => "\xC3\x9B",
'ucirc;' => "\xC3\xBB",
'Ugrave' => "\xC3\x99",
'ugrave' => "\xC3\xB9",
'Ugrave;' => "\xC3\x99",
'ugrave;' => "\xC3\xB9",
'uml' => "\xC2\xA8",
'uml;' => "\xC2\xA8",
'upsih;' => "\xCF\x92",
'Upsilon;' => "\xCE\xA5",
'upsilon;' => "\xCF\x85",
'Uuml' => "\xC3\x9C",
'uuml' => "\xC3\xBC",
'Uuml;' => "\xC3\x9C",
'uuml;' => "\xC3\xBC",
'weierp;' => "\xE2\x84\x98",
'Xi;' => "\xCE\x9E",
'xi;' => "\xCE\xBE",
'Yacute' => "\xC3\x9D",
'yacute' => "\xC3\xBD",
'Yacute;' => "\xC3\x9D",
'yacute;' => "\xC3\xBD",
'yen' => "\xC2\xA5",
'yen;' => "\xC2\xA5",
'yuml' => "\xC3\xBF",
'Yuml;' => "\xC5\xB8",
'yuml;' => "\xC3\xBF",
'Zeta;' => "\xCE\x96",
'zeta;' => "\xCE\xB6",
'zwj;' => "\xE2\x80\x8D",
'zwnj;' => "\xE2\x80\x8C"
);
for ($i = 0, $match = null; $i < 9 && $this->consume() !== false; $i++)
{
$consumed = substr($this->consumed, 1);
if (isset($entities[$consumed]))
{
$match = $consumed;
}
}
if ($match !== null)
{
$this->data = substr_replace($this->data, $entities[$match], $this->position - strlen($consumed) - 1, strlen($match) + 1);
$this->position += strlen($entities[$match]) - strlen($consumed) - 1;
}
break;
}
}
}

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,48 +33,197 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Handles everything related to enclosures (including Media RSS and iTunes RSS)
*
* Used by {@see SimplePie_Item::get_enclosure()} and {@see SimplePie_Item::get_enclosures()}
*
* This class can be overloaded with {@see SimplePie::set_enclosure_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Enclosure
{
/**
* @var string
* @see get_bitrate()
*/
var $bitrate;
/**
* @var array
* @see get_captions()
*/
var $captions;
/**
* @var array
* @see get_categories()
*/
var $categories;
/**
* @var int
* @see get_channels()
*/
var $channels;
/**
* @var SimplePie_Copyright
* @see get_copyright()
*/
var $copyright;
/**
* @var array
* @see get_credits()
*/
var $credits;
/**
* @var string
* @see get_description()
*/
var $description;
/**
* @var int
* @see get_duration()
*/
var $duration;
/**
* @var string
* @see get_expression()
*/
var $expression;
/**
* @var string
* @see get_framerate()
*/
var $framerate;
/**
* @var string
* @see get_handler()
*/
var $handler;
/**
* @var array
* @see get_hashes()
*/
var $hashes;
/**
* @var string
* @see get_height()
*/
var $height;
/**
* @deprecated
* @var null
*/
var $javascript;
/**
* @var array
* @see get_keywords()
*/
var $keywords;
/**
* @var string
* @see get_language()
*/
var $lang;
/**
* @var string
* @see get_length()
*/
var $length;
/**
* @var string
* @see get_link()
*/
var $link;
/**
* @var string
* @see get_medium()
*/
var $medium;
/**
* @var string
* @see get_player()
*/
var $player;
/**
* @var array
* @see get_ratings()
*/
var $ratings;
/**
* @var array
* @see get_restrictions()
*/
var $restrictions;
/**
* @var string
* @see get_sampling_rate()
*/
var $samplingrate;
/**
* @var array
* @see get_thumbnails()
*/
var $thumbnails;
/**
* @var string
* @see get_title()
*/
var $title;
/**
* @var string
* @see get_type()
*/
var $type;
/**
* @var string
* @see get_width()
*/
var $width;
// Constructor, used to input the data
/**
* Constructor, used to input the data
*
* For documentation on all the parameters, see the corresponding
* properties and their accessors
*
* @uses idna_convert If available, this will convert an IDN
*/
public function __construct($link = null, $type = null, $length = null, $javascript = null, $bitrate = null, $captions = null, $categories = null, $channels = null, $copyright = null, $credits = null, $description = null, $duration = null, $expression = null, $framerate = null, $hashes = null, $height = null, $keywords = null, $lang = null, $medium = null, $player = null, $ratings = null, $restrictions = null, $samplingrate = null, $thumbnails = null, $title = null, $width = null)
{
$this->bitrate = $bitrate;
@ -112,12 +261,22 @@ class SimplePie_Enclosure
$this->handler = $this->get_handler(); // Needs to load last
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the bitrate
*
* @return string|null
*/
public function get_bitrate()
{
if ($this->bitrate !== null)
@ -130,6 +289,12 @@ class SimplePie_Enclosure
}
}
/**
* Get a single caption
*
* @param int $key
* @return SimplePie_Caption|null
*/
public function get_caption($key = 0)
{
$captions = $this->get_captions();
@ -143,6 +308,11 @@ class SimplePie_Enclosure
}
}
/**
* Get all captions
*
* @return array|null Array of {@see SimplePie_Caption} objects
*/
public function get_captions()
{
if ($this->captions !== null)
@ -155,6 +325,12 @@ class SimplePie_Enclosure
}
}
/**
* Get a single category
*
* @param int $key
* @return SimplePie_Category|null
*/
public function get_category($key = 0)
{
$categories = $this->get_categories();
@ -168,6 +344,11 @@ class SimplePie_Enclosure
}
}
/**
* Get all categories
*
* @return array|null Array of {@see SimplePie_Category} objects
*/
public function get_categories()
{
if ($this->categories !== null)
@ -180,6 +361,11 @@ class SimplePie_Enclosure
}
}
/**
* Get the number of audio channels
*
* @return int|null
*/
public function get_channels()
{
if ($this->channels !== null)
@ -192,6 +378,11 @@ class SimplePie_Enclosure
}
}
/**
* Get the copyright information
*
* @return SimplePie_Copyright|null
*/
public function get_copyright()
{
if ($this->copyright !== null)
@ -204,6 +395,12 @@ class SimplePie_Enclosure
}
}
/**
* Get a single credit
*
* @param int $key
* @return SimplePie_Credit|null
*/
public function get_credit($key = 0)
{
$credits = $this->get_credits();
@ -217,6 +414,11 @@ class SimplePie_Enclosure
}
}
/**
* Get all credits
*
* @return array|null Array of {@see SimplePie_Credit} objects
*/
public function get_credits()
{
if ($this->credits !== null)
@ -229,6 +431,11 @@ class SimplePie_Enclosure
}
}
/**
* Get the description of the enclosure
*
* @return string|null
*/
public function get_description()
{
if ($this->description !== null)
@ -241,6 +448,12 @@ class SimplePie_Enclosure
}
}
/**
* Get the duration of the enclosure
*
* @param string $convert Convert seconds into hh:mm:ss
* @return string|int|null 'hh:mm:ss' string if `$convert` was specified, otherwise integer (or null if none found)
*/
public function get_duration($convert = false)
{
if ($this->duration !== null)
@ -261,6 +474,11 @@ class SimplePie_Enclosure
}
}
/**
* Get the expression
*
* @return string Probably one of 'sample', 'full', 'nonstop', 'clip'. Defaults to 'full'
*/
public function get_expression()
{
if ($this->expression !== null)
@ -273,6 +491,11 @@ class SimplePie_Enclosure
}
}
/**
* Get the file extension
*
* @return string|null
*/
public function get_extension()
{
if ($this->link !== null)
@ -286,6 +509,11 @@ class SimplePie_Enclosure
return null;
}
/**
* Get the framerate (in frames-per-second)
*
* @return string|null
*/
public function get_framerate()
{
if ($this->framerate !== null)
@ -298,11 +526,23 @@ class SimplePie_Enclosure
}
}
/**
* Get the preferred handler
*
* @return string|null One of 'flash', 'fmedia', 'quicktime', 'wmedia', 'mp3'
*/
public function get_handler()
{
return $this->get_real_type(true);
}
/**
* Get a single hash
*
* @link http://www.rssboard.org/media-rss#media-hash
* @param int $key
* @return string|null Hash as per `media:hash`, prefixed with "$algo:"
*/
public function get_hash($key = 0)
{
$hashes = $this->get_hashes();
@ -316,6 +556,11 @@ class SimplePie_Enclosure
}
}
/**
* Get all credits
*
* @return array|null Array of strings, see {@see get_hash()}
*/
public function get_hashes()
{
if ($this->hashes !== null)
@ -328,6 +573,11 @@ class SimplePie_Enclosure
}
}
/**
* Get the height
*
* @return string|null
*/
public function get_height()
{
if ($this->height !== null)
@ -340,6 +590,12 @@ class SimplePie_Enclosure
}
}
/**
* Get the language
*
* @link http://tools.ietf.org/html/rfc3066
* @return string|null Language code as per RFC 3066
*/
public function get_language()
{
if ($this->lang !== null)
@ -352,6 +608,12 @@ class SimplePie_Enclosure
}
}
/**
* Get a single keyword
*
* @param int $key
* @return string|null
*/
public function get_keyword($key = 0)
{
$keywords = $this->get_keywords();
@ -365,6 +627,11 @@ class SimplePie_Enclosure
}
}
/**
* Get all keywords
*
* @return array|null Array of strings
*/
public function get_keywords()
{
if ($this->keywords !== null)
@ -377,6 +644,11 @@ class SimplePie_Enclosure
}
}
/**
* Get length
*
* @return float Length in bytes
*/
public function get_length()
{
if ($this->length !== null)
@ -389,6 +661,11 @@ class SimplePie_Enclosure
}
}
/**
* Get the URL
*
* @return string|null
*/
public function get_link()
{
if ($this->link !== null)
@ -401,6 +678,12 @@ class SimplePie_Enclosure
}
}
/**
* Get the medium
*
* @link http://www.rssboard.org/media-rss#media-content
* @return string|null Should be one of 'image', 'audio', 'video', 'document', 'executable'
*/
public function get_medium()
{
if ($this->medium !== null)
@ -413,6 +696,12 @@ class SimplePie_Enclosure
}
}
/**
* Get the player URL
*
* Typically the same as {@see get_permalink()}
* @return string|null Player URL
*/
public function get_player()
{
if ($this->player !== null)
@ -425,6 +714,12 @@ class SimplePie_Enclosure
}
}
/**
* Get a single rating
*
* @param int $key
* @return SimplePie_Rating|null
*/
public function get_rating($key = 0)
{
$ratings = $this->get_ratings();
@ -438,6 +733,11 @@ class SimplePie_Enclosure
}
}
/**
* Get all ratings
*
* @return array|null Array of {@see SimplePie_Rating} objects
*/
public function get_ratings()
{
if ($this->ratings !== null)
@ -450,6 +750,12 @@ class SimplePie_Enclosure
}
}
/**
* Get a single restriction
*
* @param int $key
* @return SimplePie_Restriction|null
*/
public function get_restriction($key = 0)
{
$restrictions = $this->get_restrictions();
@ -463,6 +769,11 @@ class SimplePie_Enclosure
}
}
/**
* Get all restrictions
*
* @return array|null Array of {@see SimplePie_Restriction} objects
*/
public function get_restrictions()
{
if ($this->restrictions !== null)
@ -475,6 +786,11 @@ class SimplePie_Enclosure
}
}
/**
* Get the sampling rate (in kHz)
*
* @return string|null
*/
public function get_sampling_rate()
{
if ($this->samplingrate !== null)
@ -487,6 +803,11 @@ class SimplePie_Enclosure
}
}
/**
* Get the file size (in MiB)
*
* @return float|null File size in mebibytes (1048 bytes)
*/
public function get_size()
{
$length = $this->get_length();
@ -500,6 +821,12 @@ class SimplePie_Enclosure
}
}
/**
* Get a single thumbnail
*
* @param int $key
* @return string|null Thumbnail URL
*/
public function get_thumbnail($key = 0)
{
$thumbnails = $this->get_thumbnails();
@ -513,6 +840,11 @@ class SimplePie_Enclosure
}
}
/**
* Get all thumbnails
*
* @return array|null Array of thumbnail URLs
*/
public function get_thumbnails()
{
if ($this->thumbnails !== null)
@ -525,6 +857,11 @@ class SimplePie_Enclosure
}
}
/**
* Get the title
*
* @return string|null
*/
public function get_title()
{
if ($this->title !== null)
@ -537,6 +874,12 @@ class SimplePie_Enclosure
}
}
/**
* Get mimetype of the enclosure
*
* @see get_real_type()
* @return string|null MIME type
*/
public function get_type()
{
if ($this->type !== null)
@ -549,6 +892,11 @@ class SimplePie_Enclosure
}
}
/**
* Get the width
*
* @return string|null
*/
public function get_width()
{
if ($this->width !== null)
@ -561,13 +909,63 @@ class SimplePie_Enclosure
}
}
/**
* Embed the enclosure using `<embed>`
*
* @deprecated Use the second parameter to {@see embed} instead
*
* @param array|string $options See first paramter to {@see embed}
* @return string HTML string to output
*/
public function native_embed($options='')
{
return $this->embed($options, true);
}
/**
* Embed the enclosure using Javascript
*
* `$options` is an array or comma-separated key:value string, with the
* following properties:
*
* - `alt` (string): Alternate content for when an end-user does not have
* the appropriate handler installed or when a file type is
* unsupported. Can be any text or HTML. Defaults to blank.
* - `altclass` (string): If a file type is unsupported, the end-user will
* see the alt text (above) linked directly to the content. That link
* will have this value as its class name. Defaults to blank.
* - `audio` (string): This is an image that should be used as a
* placeholder for audio files before they're loaded (QuickTime-only).
* Can be any relative or absolute URL. Defaults to blank.
* - `bgcolor` (string): The background color for the media, if not
* already transparent. Defaults to `#ffffff`.
* - `height` (integer): The height of the embedded media. Accepts any
* numeric pixel value (such as `360`) or `auto`. Defaults to `auto`,
* and it is recommended that you use this default.
* - `loop` (boolean): Do you want the media to loop when its done?
* Defaults to `false`.
* - `mediaplayer` (string): The location of the included
* `mediaplayer.swf` file. This allows for the playback of Flash Video
* (`.flv`) files, and is the default handler for non-Odeo MP3's.
* Defaults to blank.
* - `video` (string): This is an image that should be used as a
* placeholder for video files before they're loaded (QuickTime-only).
* Can be any relative or absolute URL. Defaults to blank.
* - `width` (integer): The width of the embedded media. Accepts any
* numeric pixel value (such as `480`) or `auto`. Defaults to `auto`,
* and it is recommended that you use this default.
* - `widescreen` (boolean): Is the enclosure widescreen or standard?
* This applies only to video enclosures, and will automatically resize
* the content appropriately. Defaults to `false`, implying 4:3 mode.
*
* Note: Non-widescreen (4:3) mode with `width` and `height` set to `auto`
* will default to 480x360 video resolution. Widescreen (16:9) mode with
* `width` and `height` set to `auto` will default to 480x270 video resolution.
*
* @todo If the dimensions for media:content are defined, use them when width/height are set to 'auto'.
* @param array|string $options Comma-separated key:value list, or array
* @param bool $native Use `<embed>`
* @return string HTML string to output
*/
public function embed($options = '', $native = false)
{
@ -723,21 +1121,8 @@ class SimplePie_Enclosure
$embed = '';
// Odeo Feed MP3's
if ($handler === 'odeo')
{
if ($native)
{
$embed .= '<embed src="http://odeo.com/flash/audio_player_fullsize.swf" pluginspage="http://adobe.com/go/getflashplayer" type="application/x-shockwave-flash" quality="high" width="440" height="80" wmode="transparent" allowScriptAccess="any" flashvars="valid_sample_rate=true&external_url=' . $this->get_link() . '"></embed>';
}
else
{
$embed .= '<script type="text/javascript">embed_odeo("' . $this->get_link() . '");</script>';
}
}
// Flash
elseif ($handler === 'flash')
if ($handler === 'flash')
{
if ($native)
{
@ -806,14 +1191,19 @@ class SimplePie_Enclosure
return $embed;
}
/**
* Get the real media type
*
* Often, feeds lie to us, necessitating a bit of deeper inspection. This
* converts types to their canonical representations based on the file
* extension
*
* @see get_type()
* @param bool $find_handler Internal use only, use {@see get_handler()} instead
* @return string MIME type
*/
public function get_real_type($find_handler = false)
{
// If it's Odeo, let's get it out of the way.
if (substr(strtolower($this->get_link()), 0, 15) === 'http://odeo.com')
{
return 'odeo';
}
// Mime-types by handler.
$types_flash = array('application/x-shockwave-flash', 'application/futuresplash'); // Flash
$types_fmedia = array('video/flv', 'video/x-flv','flv-application/octet-stream'); // Flash Media Player

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,56 +33,20 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.4-dev
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Rating
/**
* General SimplePie exception class
*
* @package SimplePie
*/
class SimplePie_Exception extends Exception
{
var $scheme;
var $value;
// Constructor, used to input the data
public function __construct($scheme = null, $value = null)
{
$this->scheme = $scheme;
$this->value = $value;
}
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
public function get_scheme()
{
if ($this->scheme !== null)
{
return $this->scheme;
}
else
{
return null;
}
}
public function get_value()
{
if ($this->value !== null)
{
return $this->value;
}
else
{
return null;
}
}
}

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,18 +33,24 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Used for fetching remote files and reading local files
*
* Supports HTTP 1.0 via cURL or fsockopen, with spotty HTTP 1.1 support
*
* This class can be overloaded with {@see SimplePie::set_file_class()}
*
* @package SimplePie
* @subpackage HTTP
* @todo Move to properly supporting RFC2616 (HTTP/1.1)
*/
class SimplePie_File
@ -238,15 +244,23 @@ class SimplePie_File
break;
case 'deflate':
if (($body = gzuncompress($this->body)) === false)
if (($decompressed = gzinflate($this->body)) !== false)
{
if (($body = gzinflate($this->body)) === false)
{
$this->error = 'Unable to decode HTTP "deflate" stream';
$this->success = false;
}
$this->body = $decompressed;
}
else if (($decompressed = gzuncompress($this->body)) !== false)
{
$this->body = $decompressed;
}
else if (function_exists('gzdecode') && ($decompressed = gzdecode($this->body)) !== false)
{
$this->body = $decompressed;
}
else
{
$this->error = 'Unable to decode HTTP "deflate" stream';
$this->success = false;
}
$this->body = $body;
break;
default:

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,14 +33,13 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
@ -48,6 +47,7 @@
* HTTP Response Parser
*
* @package SimplePie
* @subpackage HTTP
*/
class SimplePie_HTTP_Parser
{
@ -457,7 +457,7 @@ class SimplePie_HTTP_Parser
*/
protected function chunked()
{
if (!preg_match('/^[0-9a-f]+(\s|\r|\n)+/mi', trim($this->body)))
if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
{
$this->state = 'emit';
return;
@ -468,7 +468,7 @@ class SimplePie_HTTP_Parser
while (true)
{
$is_chunked = (bool) preg_match( '/^([0-9a-f]+)(\s|\r|\n)+/mi', $encoded, $matches );
$is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
if (!$is_chunked)
{
// Looks like it's not chunked after all
@ -476,12 +476,20 @@ class SimplePie_HTTP_Parser
return;
}
$length = hexdec($matches[1]);
$length = hexdec(trim($matches[1]));
if ($length === 0)
{
// Ignore trailer headers
$this->state = 'emit';
$this->body = $decoded;
return;
}
$chunk_length = strlen($matches[0]);
$decoded .= $part = substr($encoded, $chunk_length, $length);
$encoded = ltrim(substr($encoded, $chunk_length + $length), "\r\n");
$encoded = substr($encoded, $chunk_length + $length + 2);
if (trim($encoded) === '0')
if (trim($encoded) === '0' || empty($encoded))
{
$this->state = 'emit';
$this->body = $decoded;

File diff suppressed because it is too large Load diff

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,17 +33,23 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Used for feed auto-discovery
*
*
* This class can be overloaded with {@see SimplePie::set_locator_class()}
*
* @package SimplePie
*/
class SimplePie_Locator
{
var $useragent;
@ -51,23 +57,38 @@ class SimplePie_Locator
var $file;
var $local = array();
var $elsewhere = array();
var $file_class = 'SimplePie_File';
var $cached_entities = array();
var $http_base;
var $base;
var $base_location = 0;
var $checked_feeds = 0;
var $max_checked_feeds = 10;
var $content_type_sniffer_class = 'SimplePie_Content_Type_Sniffer';
protected $registry;
public function __construct(&$file, $timeout = 10, $useragent = null, $file_class = 'SimplePie_File', $max_checked_feeds = 10, $content_type_sniffer_class = 'SimplePie_Content_Type_Sniffer')
public function __construct(SimplePie_File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10)
{
$this->file =& $file;
$this->file_class = $file_class;
$this->file = $file;
$this->useragent = $useragent;
$this->timeout = $timeout;
$this->max_checked_feeds = $max_checked_feeds;
$this->content_type_sniffer_class = $content_type_sniffer_class;
if (class_exists('DOMDocument'))
{
$this->dom = new DOMDocument();
set_error_handler(array('SimplePie_Misc', 'silence_errors'));
$this->dom->loadHTML($this->file->body);
restore_error_handler();
}
else
{
$this->dom = null;
}
}
public function set_registry(SimplePie_Registry $registry)
{
$this->registry = $registry;
}
public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working)
@ -79,7 +100,7 @@ class SimplePie_Locator
if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
{
$sniffer = new $this->content_type_sniffer_class($this->file);
$sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file));
if ($sniffer->get_type() !== 'text/html')
{
return null;
@ -121,11 +142,11 @@ class SimplePie_Locator
return null;
}
public function is_feed(&$file)
public function is_feed($file)
{
if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
{
$sniffer = new $this->content_type_sniffer_class($file);
$sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
$sniffed = $sniffer->get_type();
if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml')))
{
@ -148,15 +169,24 @@ class SimplePie_Locator
public function get_base()
{
if ($this->dom === null)
{
throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
}
$this->http_base = $this->file->url;
$this->base = $this->http_base;
$elements = SimplePie_Misc::get_element('base', $this->file->body);
$elements = $this->dom->getElementsByTagName('base');
foreach ($elements as $element)
{
if ($element['attribs']['href']['data'] !== '')
if ($element->hasAttribute('href'))
{
$this->base = SimplePie_Misc::absolutize_url(trim($element['attribs']['href']['data']), $this->http_base);
$this->base_location = $element['offset'];
$base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base));
if ($base === false)
{
continue;
}
$this->base = $base;
$this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
break;
}
}
@ -164,43 +194,11 @@ class SimplePie_Locator
public function autodiscovery()
{
$links = array_merge(SimplePie_Misc::get_element('link', $this->file->body), SimplePie_Misc::get_element('a', $this->file->body), SimplePie_Misc::get_element('area', $this->file->body));
$done = array();
$feeds = array();
foreach ($links as $link)
{
if ($this->checked_feeds === $this->max_checked_feeds)
{
break;
}
if (isset($link['attribs']['href']['data']) && isset($link['attribs']['rel']['data']))
{
$rel = array_unique(SimplePie_Misc::space_seperated_tokens(strtolower($link['attribs']['rel']['data'])));
if ($this->base_location < $link['offset'])
{
$href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->base);
}
else
{
$href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->http_base);
}
if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && !empty($link['attribs']['type']['data']) && in_array(strtolower(SimplePie_Misc::parse_mime($link['attribs']['type']['data'])), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
{
$this->checked_feeds++;
$headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
);
$feed = new $this->file_class($href, $this->timeout, 5, $headers, $this->useragent);
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{
$feeds[$href] = $feed;
}
}
$done[] = $href;
}
}
$feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
$feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
$feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
if (!empty($feeds))
{
@ -212,27 +210,87 @@ class SimplePie_Locator
}
}
public function get_links()
protected function search_elements_by_tag($name, &$done, $feeds)
{
$links = SimplePie_Misc::get_element('a', $this->file->body);
if ($this->dom === null)
{
throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
}
$links = $this->dom->getElementsByTagName($name);
foreach ($links as $link)
{
if (isset($link['attribs']['href']['data']))
if ($this->checked_feeds === $this->max_checked_feeds)
{
$href = trim($link['attribs']['href']['data']);
$parsed = SimplePie_Misc::parse_url($href);
break;
}
if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
{
$rel = array_unique($this->registry->call('Misc', 'space_seperated_tokens', array(strtolower($link->getAttribute('rel')))));
$line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
if ($this->base_location < $line)
{
$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
}
else
{
$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
}
if ($href === false)
{
continue;
}
if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
{
$this->checked_feeds++;
$headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
);
$feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent));
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{
$feeds[$href] = $feed;
}
}
$done[] = $href;
}
}
return $feeds;
}
public function get_links()
{
if ($this->dom === null)
{
throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
}
$links = $this->dom->getElementsByTagName('a');
foreach ($links as $link)
{
if ($link->hasAttribute('href'))
{
$href = trim($link->getAttribute('href'));
$parsed = $this->registry->call('Misc', 'parse_url', array($href));
if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme']))
{
if ($this->base_location < $link['offset'])
if ($this->base_location < $link->getLineNo())
{
$href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->base);
$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
}
else
{
$href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->http_base);
$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
}
if ($href === false)
{
continue;
}
$current = SimplePie_Misc::parse_url($this->file->url);
$current = $this->registry->call('Misc', 'parse_url', array($this->file->url));
if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
{
@ -269,7 +327,7 @@ class SimplePie_Locator
$headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
);
$feed = new $this->file_class($value, $this->timeout, 5, $headers, $this->useragent);
$feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent));
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{
return $feed;
@ -297,7 +355,7 @@ class SimplePie_Locator
$headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
);
$feed = new $this->file_class($value, $this->timeout, 5, null, $this->useragent);
$feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent));
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{
return $feed;

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,17 +33,20 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Miscellanous utilities
*
* @package SimplePie
*/
class SimplePie_Misc
{
public static function time_hms($seconds)
@ -77,63 +80,21 @@ class SimplePie_Misc
public static function absolutize_url($relative, $base)
{
$iri = SimplePie_IRI::absolutize(new SimplePie_IRI($base), $relative);
return $iri->get_iri();
}
public static function remove_dot_segments($input)
{
$output = '';
while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..')
if ($iri === false)
{
// A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
if (strpos($input, '../') === 0)
{
$input = substr($input, 3);
}
elseif (strpos($input, './') === 0)
{
$input = substr($input, 2);
}
// B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
elseif (strpos($input, '/./') === 0)
{
$input = substr_replace($input, '/', 0, 3);
}
elseif ($input === '/.')
{
$input = '/';
}
// C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
elseif (strpos($input, '/../') === 0)
{
$input = substr_replace($input, '/', 0, 4);
$output = substr_replace($output, '', strrpos($output, '/'));
}
elseif ($input === '/..')
{
$input = '/';
$output = substr_replace($output, '', strrpos($output, '/'));
}
// D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
elseif ($input === '.' || $input === '..')
{
$input = '';
}
// E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
elseif (($pos = strpos($input, '/', 1)) !== false)
{
$output .= substr($input, 0, $pos);
$input = substr_replace($input, '', 0, $pos);
}
else
{
$output .= $input;
$input = '';
}
return false;
}
return $output . $input;
return $iri->get_uri();
}
/**
* Get a HTML/XML element from a HTML string
*
* @deprecated Use DOMDocument instead (parsing HTML with regex is bad!)
* @param string $realname Element name (including namespace prefix if applicable)
* @param string $string HTML document
* @return array
*/
public static function get_element($realname, $string)
{
$return = array();
@ -267,29 +228,29 @@ class SimplePie_Misc
{
$iri = new SimplePie_IRI($url);
return array(
'scheme' => (string) $iri->get_scheme(),
'authority' => (string) $iri->get_authority(),
'path' => (string) $iri->get_path(),
'query' => (string) $iri->get_query(),
'fragment' => (string) $iri->get_fragment()
'scheme' => (string) $iri->scheme,
'authority' => (string) $iri->authority,
'path' => (string) $iri->path,
'query' => (string) $iri->query,
'fragment' => (string) $iri->fragment
);
}
public static function compress_parse_url($scheme = '', $authority = '', $path = '', $query = '', $fragment = '')
{
$iri = new SimplePie_IRI('');
$iri->set_scheme($scheme);
$iri->set_authority($authority);
$iri->set_path($path);
$iri->set_query($query);
$iri->set_fragment($fragment);
return $iri->get_iri();
$iri->scheme = $scheme;
$iri->authority = $authority;
$iri->path = $path;
$iri->query = $query;
$iri->fragment = $fragment;
return $iri->get_uri();
}
public static function normalize_url($url)
{
$iri = new SimplePie_IRI($url);
return $iri->get_iri();
return $iri->get_uri();
}
public static function percent_encoding_normalization($match)
@ -378,6 +339,14 @@ class SimplePie_Misc
{
$output = 'EUC-KR';
}
if ($input === 'Windows-31J')
{
$input = 'SJIS';
}
if ($output === 'Windows-31J')
{
$output = 'SJIS';
}
// Check that the encoding is supported
if (@mb_convert_encoding("\x80", 'UTF-16BE', $input) === "\x00\x80")
@ -1613,7 +1582,6 @@ class SimplePie_Misc
case 'utf7':
return 'UTF-7';
case 'utf8lias':
case 'utf8':
return 'UTF-8';
@ -1665,8 +1633,7 @@ class SimplePie_Misc
case 'mskanji':
case 'shiftjis':
case 'windows31j':
return 'SJIS';
//return 'Windows-31J';
return 'Windows-31J';
case 'iso885911':
case 'tis620':
@ -1750,40 +1717,6 @@ class SimplePie_Misc
return $curl;
}
public static function is_subclass_of($class1, $class2)
{
if (func_num_args() !== 2)
{
trigger_error('Wrong parameter count for SimplePie_Misc::is_subclass_of()', E_USER_WARNING);
}
elseif (version_compare(PHP_VERSION, '5.0.3', '>=') || is_object($class1))
{
return is_subclass_of($class1, $class2);
}
elseif (is_string($class1) && is_string($class2))
{
if (class_exists($class1))
{
if (class_exists($class2))
{
$class2 = strtolower($class2);
while ($class1 = strtolower(get_parent_class($class1)))
{
if ($class1 === $class2)
{
return true;
}
}
}
}
else
{
trigger_error('Unknown class passed as parameter', E_USER_WARNNG);
}
}
return false;
}
/**
* Strip HTML comments
*
@ -1817,7 +1750,7 @@ class SimplePie_Misc
/**
* Decode HTML entities
*
* @static
* @deprecated Use DOMDocument instead
* @param string $data Input data
* @return string Output data
*/
@ -1899,18 +1832,6 @@ class SimplePie_Misc
}
}
public static function htmlspecialchars_decode($string, $quote_style)
{
if (function_exists('htmlspecialchars_decode'))
{
return htmlspecialchars_decode($string, $quote_style);
}
else
{
return strtr($string, array_flip(get_html_translation_table(HTML_SPECIALCHARS, $quote_style)));
}
}
public static function atom_03_construct_type($attribs)
{
if (isset($attribs['']['mode']) && strtolower(trim($attribs['']['mode']) === 'base64'))
@ -2024,48 +1945,6 @@ class SimplePie_Misc
return $tokens;
}
public static function array_unique($array)
{
if (version_compare(PHP_VERSION, '5.2', '>='))
{
return array_unique($array);
}
else
{
$array = (array) $array;
$new_array = array();
$new_array_strings = array();
foreach ($array as $key => $value)
{
if (is_object($value))
{
if (method_exists($value, '__toString'))
{
$cmp = $value->__toString();
}
else
{
trigger_error('Object of class ' . get_class($value) . ' could not be converted to string', E_USER_ERROR);
}
}
elseif (is_array($value))
{
$cmp = (string) reset($value);
}
else
{
$cmp = (string) $value;
}
if (!in_array($cmp, $new_array_strings))
{
$new_array[$key] = $value;
$new_array_strings[] = $cmp;
}
}
return $new_array;
}
}
/**
* Converts a unicode codepoint to a UTF-8 character
*
@ -2139,9 +2018,10 @@ class SimplePie_Misc
*
* @todo Add support for EBCDIC
* @param string $data XML data
* @param SimplePie_Registry $registry Class registry
* @return array Possible encodings
*/
public static function xml_encoding($data)
public static function xml_encoding($data, $registry)
{
// UTF-32 Big Endian BOM
if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
@ -2173,7 +2053,7 @@ class SimplePie_Misc
{
if ($pos = strpos($data, "\x00\x00\x00\x3F\x00\x00\x00\x3E"))
{
$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32BE', 'UTF-8'));
$parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32BE', 'UTF-8')));
if ($parser->parse())
{
$encoding[] = $parser->encoding;
@ -2186,7 +2066,7 @@ class SimplePie_Misc
{
if ($pos = strpos($data, "\x3F\x00\x00\x00\x3E\x00\x00\x00"))
{
$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32LE', 'UTF-8'));
$parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32LE', 'UTF-8')));
if ($parser->parse())
{
$encoding[] = $parser->encoding;
@ -2199,7 +2079,7 @@ class SimplePie_Misc
{
if ($pos = strpos($data, "\x00\x3F\x00\x3E"))
{
$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16BE', 'UTF-8'));
$parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16BE', 'UTF-8')));
if ($parser->parse())
{
$encoding[] = $parser->encoding;
@ -2212,7 +2092,7 @@ class SimplePie_Misc
{
if ($pos = strpos($data, "\x3F\x00\x3E\x00"))
{
$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16LE', 'UTF-8'));
$parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16LE', 'UTF-8')));
if ($parser->parse())
{
$encoding[] = $parser->encoding;
@ -2225,7 +2105,7 @@ class SimplePie_Misc
{
if ($pos = strpos($data, "\x3F\x3E"))
{
$parser = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5));
$parser = $registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
if ($parser->parse())
{
$encoding[] = $parser->encoding;
@ -2251,10 +2131,6 @@ class SimplePie_Misc
header('Cache-Control: must-revalidate');
header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 604800) . ' GMT'); // 7 days
?>
function embed_odeo(link) {
document.writeln('<embed src="http://odeo.com/flash/audio_player_fullsize.swf" pluginspage="http://www.macromedia.com/go/getflashplayer" type="application/x-shockwave-flash" quality="high" width="440" height="80" wmode="transparent" allowScriptAccess="any" flashvars="valid_sample_rate=true&external_url='+link+'"></embed>');
}
function embed_quicktime(type, bgcolor, width, height, link, placeholder, loop) {
if (placeholder != '') {
document.writeln('<embed type="'+type+'" style="cursor:hand; cursor:pointer;" href="'+link+'" src="'+placeholder+'" width="'+width+'" height="'+height+'" autoplay="false" target="myself" controller="false" loop="'+loop+'" scale="aspect" bgcolor="'+bgcolor+'" pluginspage="http://www.apple.com/quicktime/download/"></embed>');
@ -2362,5 +2238,10 @@ function embed_wmedia(width, height, link) {
}
return $info;
}
public static function silence_errors($num, $str)
{
// No-op
}
}

View file

@ -0,0 +1,276 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Class to validate and to work with IPv6 addresses.
*
* @package SimplePie
* @subpackage HTTP
* @copyright 2003-2005 The PHP Group
* @license http://www.opensource.org/licenses/bsd-license.php
* @link http://pear.php.net/package/Net_IPv6
* @author Alexander Merz <alexander.merz@web.de>
* @author elfrink at introweb dot nl
* @author Josh Peck <jmp at joshpeck dot org>
* @author Geoffrey Sneddon <geoffers@gmail.com>
*/
class SimplePie_Net_IPv6
{
/**
* Uncompresses an IPv6 address
*
* RFC 4291 allows you to compress concecutive zero pieces in an address to
* '::'. This method expects a valid IPv6 address and expands the '::' to
* the required number of zero pieces.
*
* Example: FF01::101 -> FF01:0:0:0:0:0:0:101
* ::1 -> 0:0:0:0:0:0:0:1
*
* @author Alexander Merz <alexander.merz@web.de>
* @author elfrink at introweb dot nl
* @author Josh Peck <jmp at joshpeck dot org>
* @copyright 2003-2005 The PHP Group
* @license http://www.opensource.org/licenses/bsd-license.php
* @param string $ip An IPv6 address
* @return string The uncompressed IPv6 address
*/
public static function uncompress($ip)
{
$c1 = -1;
$c2 = -1;
if (substr_count($ip, '::') === 1)
{
list($ip1, $ip2) = explode('::', $ip);
if ($ip1 === '')
{
$c1 = -1;
}
else
{
$c1 = substr_count($ip1, ':');
}
if ($ip2 === '')
{
$c2 = -1;
}
else
{
$c2 = substr_count($ip2, ':');
}
if (strpos($ip2, '.') !== false)
{
$c2++;
}
// ::
if ($c1 === -1 && $c2 === -1)
{
$ip = '0:0:0:0:0:0:0:0';
}
// ::xxx
else if ($c1 === -1)
{
$fill = str_repeat('0:', 7 - $c2);
$ip = str_replace('::', $fill, $ip);
}
// xxx::
else if ($c2 === -1)
{
$fill = str_repeat(':0', 7 - $c1);
$ip = str_replace('::', $fill, $ip);
}
// xxx::xxx
else
{
$fill = ':' . str_repeat('0:', 6 - $c2 - $c1);
$ip = str_replace('::', $fill, $ip);
}
}
return $ip;
}
/**
* Compresses an IPv6 address
*
* RFC 4291 allows you to compress concecutive zero pieces in an address to
* '::'. This method expects a valid IPv6 address and compresses consecutive
* zero pieces to '::'.
*
* Example: FF01:0:0:0:0:0:0:101 -> FF01::101
* 0:0:0:0:0:0:0:1 -> ::1
*
* @see uncompress()
* @param string $ip An IPv6 address
* @return string The compressed IPv6 address
*/
public static function compress($ip)
{
// Prepare the IP to be compressed
$ip = self::uncompress($ip);
$ip_parts = self::split_v6_v4($ip);
// Replace all leading zeros
$ip_parts[0] = preg_replace('/(^|:)0+([0-9])/', '\1\2', $ip_parts[0]);
// Find bunches of zeros
if (preg_match_all('/(?:^|:)(?:0(?::|$))+/', $ip_parts[0], $matches, PREG_OFFSET_CAPTURE))
{
$max = 0;
$pos = null;
foreach ($matches[0] as $match)
{
if (strlen($match[0]) > $max)
{
$max = strlen($match[0]);
$pos = $match[1];
}
}
$ip_parts[0] = substr_replace($ip_parts[0], '::', $pos, $max);
}
if ($ip_parts[1] !== '')
{
return implode(':', $ip_parts);
}
else
{
return $ip_parts[0];
}
}
/**
* Splits an IPv6 address into the IPv6 and IPv4 representation parts
*
* RFC 4291 allows you to represent the last two parts of an IPv6 address
* using the standard IPv4 representation
*
* Example: 0:0:0:0:0:0:13.1.68.3
* 0:0:0:0:0:FFFF:129.144.52.38
*
* @param string $ip An IPv6 address
* @return array [0] contains the IPv6 represented part, and [1] the IPv4 represented part
*/
private static function split_v6_v4($ip)
{
if (strpos($ip, '.') !== false)
{
$pos = strrpos($ip, ':');
$ipv6_part = substr($ip, 0, $pos);
$ipv4_part = substr($ip, $pos + 1);
return array($ipv6_part, $ipv4_part);
}
else
{
return array($ip, '');
}
}
/**
* Checks an IPv6 address
*
* Checks if the given IP is a valid IPv6 address
*
* @param string $ip An IPv6 address
* @return bool true if $ip is a valid IPv6 address
*/
public static function check_ipv6($ip)
{
$ip = self::uncompress($ip);
list($ipv6, $ipv4) = self::split_v6_v4($ip);
$ipv6 = explode(':', $ipv6);
$ipv4 = explode('.', $ipv4);
if (count($ipv6) === 8 && count($ipv4) === 1 || count($ipv6) === 6 && count($ipv4) === 4)
{
foreach ($ipv6 as $ipv6_part)
{
// The section can't be empty
if ($ipv6_part === '')
return false;
// Nor can it be over four characters
if (strlen($ipv6_part) > 4)
return false;
// Remove leading zeros (this is safe because of the above)
$ipv6_part = ltrim($ipv6_part, '0');
if ($ipv6_part === '')
$ipv6_part = '0';
// Check the value is valid
$value = hexdec($ipv6_part);
if (dechex($value) !== strtolower($ipv6_part) || $value < 0 || $value > 0xFFFF)
return false;
}
if (count($ipv4) === 4)
{
foreach ($ipv4 as $ipv4_part)
{
$value = (int) $ipv4_part;
if ((string) $value !== $ipv4_part || $value < 0 || $value > 0xFF)
return false;
}
}
return true;
}
else
{
return false;
}
}
/**
* Checks if the given IP is a valid IPv6 address
*
* @codeCoverageIgnore
* @deprecated Use {@see SimplePie_Net_IPv6::check_ipv6()} instead
* @see check_ipv6
* @param string $ip An IPv6 address
* @return bool true if $ip is a valid IPv6 address
*/
public static function checkIPv6($ip)
{
return self::check_ipv6($ip);
}
}

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,14 +33,13 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
@ -48,6 +47,7 @@
* Date Parser
*
* @package SimplePie
* @subpackage Parsing
*/
class SimplePie_Parse_Date
{
@ -599,7 +599,7 @@ class SimplePie_Parse_Date
foreach ($this->built_in as $method)
{
if (($returned = call_user_func(array(&$this, $method), $date)) !== false)
if (($returned = call_user_func(array($this, $method), $date)) !== false)
{
return $returned;
}

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,17 +33,24 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Parses XML into something sane
*
*
* This class can be overloaded with {@see SimplePie::set_parser_class()}
*
* @package SimplePie
* @subpackage Parsing
*/
class SimplePie_Parser
{
var $error_code;
@ -61,6 +68,12 @@ class SimplePie_Parser
var $datas = array(array());
var $current_xhtml_construct = -1;
var $encoding;
protected $registry;
public function set_registry(SimplePie_Registry $registry)
{
$this->registry = $registry;
}
public function parse(&$data, $encoding)
{
@ -103,7 +116,7 @@ class SimplePie_Parser
if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
{
$declaration = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5));
$declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
if ($declaration->parse())
{
$data = substr($data, $pos + 2);
@ -265,8 +278,12 @@ class SimplePie_Parser
if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base']))
{
$this->xml_base[] = SimplePie_Misc::absolutize_url($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base));
$this->xml_base_explicit[] = true;
$base = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)));
if ($base !== false)
{
$this->xml_base[] = $base;
$this->xml_base_explicit[] = true;
}
}
else
{
@ -305,7 +322,10 @@ class SimplePie_Parser
$this->data =& $this->data['child'][end($this->namespace)][end($this->element)][];
$this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang));
if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml')
|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml'))
|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml')
|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_20 && in_array(end($this->element), array('title')))
|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_090 && in_array(end($this->element), array('title')))
|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_10 && in_array(end($this->element), array('title'))))
{
$this->current_xhtml_construct = 0;
}

View file

@ -0,0 +1,129 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Handles `<media:rating>` or `<itunes:explicit>` tags as defined in Media RSS and iTunes RSS respectively
*
* Used by {@see SimplePie_Enclosure::get_rating()} and {@see SimplePie_Enclosure::get_ratings()}
*
* This class can be overloaded with {@see SimplePie::set_rating_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Rating
{
/**
* Rating scheme
*
* @var string
* @see get_scheme()
*/
var $scheme;
/**
* Rating value
*
* @var string
* @see get_value()
*/
var $value;
/**
* Constructor, used to input the data
*
* For documentation on all the parameters, see the corresponding
* properties and their accessors
*/
public function __construct($scheme = null, $value = null)
{
$this->scheme = $scheme;
$this->value = $value;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the organizational scheme for the rating
*
* @return string|null
*/
public function get_scheme()
{
if ($this->scheme !== null)
{
return $this->scheme;
}
else
{
return null;
}
}
/**
* Get the value of the rating
*
* @return string|null
*/
public function get_value()
{
if ($this->value !== null)
{
return $this->value;
}
else
{
return null;
}
}
}

View file

@ -0,0 +1,225 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Handles creating objects and calling methods
*
* Access this via {@see SimplePie::get_registry()}
*
* @package SimplePie
*/
class SimplePie_Registry
{
/**
* Default class mapping
*
* Overriding classes *must* subclass these.
*
* @var array
*/
protected $default = array(
'Cache' => 'SimplePie_Cache',
'Locator' => 'SimplePie_Locator',
'Parser' => 'SimplePie_Parser',
'File' => 'SimplePie_File',
'Sanitize' => 'SimplePie_Sanitize',
'Item' => 'SimplePie_Item',
'Author' => 'SimplePie_Author',
'Category' => 'SimplePie_Category',
'Enclosure' => 'SimplePie_Enclosure',
'Caption' => 'SimplePie_Caption',
'Copyright' => 'SimplePie_Copyright',
'Credit' => 'SimplePie_Credit',
'Rating' => 'SimplePie_Rating',
'Restriction' => 'SimplePie_Restriction',
'Content_Type_Sniffer' => 'SimplePie_Content_Type_Sniffer',
'Source' => 'SimplePie_Source',
'Misc' => 'SimplePie_Misc',
'XML_Declaration_Parser' => 'SimplePie_XML_Declaration_Parser',
'Parse_Date' => 'SimplePie_Parse_Date',
);
/**
* Class mapping
*
* @see register()
* @var array
*/
protected $classes = array();
/**
* Legacy classes
*
* @see register()
* @var array
*/
protected $legacy = array();
/**
* Constructor
*
* No-op
*/
public function __construct() { }
/**
* Register a class
*
* @param string $type See {@see $default} for names
* @param string $class Class name, must subclass the corresponding default
* @param bool $legacy Whether to enable legacy support for this class
* @return bool Successfulness
*/
public function register($type, $class, $legacy = false)
{
if (!is_subclass_of($class, $this->default[$type]))
{
return false;
}
$this->classes[$type] = $class;
if ($legacy)
{
$this->legacy[] = $class;
}
return true;
}
/**
* Get the class registered for a type
*
* Where possible, use {@see create()} or {@see call()} instead
*
* @param string $type
* @return string|null
*/
public function get_class($type)
{
if (!empty($this->classes[$type]))
{
return $this->classes[$type];
}
if (!empty($this->default[$type]))
{
return $this->default[$type];
}
return null;
}
/**
* Create a new instance of a given type
*
* @param string $type
* @param array $parameters Parameters to pass to the constructor
* @return object Instance of class
*/
public function &create($type, $parameters = array())
{
$class = $this->get_class($type);
if (in_array($class, $this->legacy))
{
switch ($type)
{
case 'locator':
// Legacy: file, timeout, useragent, file_class, max_checked_feeds, content_type_sniffer_class
// Specified: file, timeout, useragent, max_checked_feeds
$replacement = array($this->get_class('file'), $parameters[3], $this->get_class('content_type_sniffer'));
array_splice($parameters, 3, 1, $replacement);
break;
}
}
if (!method_exists($class, '__construct'))
{
$instance = new $class;
}
else
{
$reflector = new ReflectionClass($class);
$instance = $reflector->newInstanceArgs($parameters);
}
if (method_exists($instance, 'set_registry'))
{
$instance->set_registry($this);
}
return $instance;
}
/**
* Call a static method for a type
*
* @param string $type
* @param string $method
* @param array $parameters
* @return mixed
*/
public function &call($type, $method, $parameters = array())
{
$class = $this->get_class($type);
if (in_array($class, $this->legacy))
{
switch ($type)
{
case 'Cache':
// For backwards compatibility with old non-static
// Cache::create() methods
if ($method === 'get_handler')
{
$result = @call_user_func_array(array($class, 'create'), $parameters);
return $result;
}
break;
}
}
$result = call_user_func_array(array($class, $method), $parameters);
return $result;
}
}

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,24 +33,57 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Handles `<media:restriction>` as defined in Media RSS
*
* Used by {@see SimplePie_Enclosure::get_restriction()} and {@see SimplePie_Enclosure::get_restrictions()}
*
* This class can be overloaded with {@see SimplePie::set_restriction_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Restriction
{
/**
* Relationship ('allow'/'deny')
*
* @var string
* @see get_relationship()
*/
var $relationship;
/**
* Type of restriction
*
* @var string
* @see get_type()
*/
var $type;
/**
* Restricted values
*
* @var string
* @see get_value()
*/
var $value;
// Constructor, used to input the data
/**
* Constructor, used to input the data
*
* For documentation on all the parameters, see the corresponding
* properties and their accessors
*/
public function __construct($relationship = null, $type = null, $value = null)
{
$this->relationship = $relationship;
@ -58,12 +91,22 @@ class SimplePie_Restriction
$this->value = $value;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the relationship
*
* @return string|null Either 'allow' or 'deny'
*/
public function get_relationship()
{
if ($this->relationship !== null)
@ -76,6 +119,11 @@ class SimplePie_Restriction
}
}
/**
* Get the type
*
* @return string|null
*/
public function get_type()
{
if ($this->type !== null)
@ -88,6 +136,11 @@ class SimplePie_Restriction
}
}
/**
* Get the list of restricted things
*
* @return string|null
*/
public function get_value()
{
if ($this->value !== null)

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,18 +33,22 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Used for data cleanup and post-processing
*
*
* This class can be overloaded with {@see SimplePie::set_sanitize_class()}
*
* @package SimplePie
* @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags
*/
class SimplePie_Sanitize
@ -63,23 +67,16 @@ class SimplePie_Sanitize
var $enable_cache = true;
var $cache_location = './cache';
var $cache_name_function = 'md5';
var $cache_class = 'SimplePie_Cache';
var $file_class = 'SimplePie_File';
var $timeout = 10;
var $useragent = '';
var $force_fsockopen = false;
var $replace_url_attributes = null;
var $replace_url_attributes = array(
'a' => 'href',
'area' => 'href',
'blockquote' => 'cite',
'del' => 'cite',
'form' => 'action',
'img' => array('longdesc', 'src'),
'input' => 'src',
'ins' => 'cite',
'q' => 'cite'
);
public function __construct()
{
// Set defaults
$this->set_url_replacements(null);
}
public function remove_div($enable = true)
{
@ -98,6 +95,11 @@ class SimplePie_Sanitize
}
}
public function set_registry(SimplePie_Registry $registry)
{
$this->registry = $registry;
}
public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
{
if (isset($enable_cache))
@ -114,20 +116,10 @@ class SimplePie_Sanitize
{
$this->cache_name_function = (string) $cache_name_function;
}
if ($cache_class)
{
$this->cache_class = (string) $cache_class;
}
}
public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
{
if ($file_class)
{
$this->file_class = (string) $file_class;
}
if ($timeout)
{
$this->timeout = (string) $timeout;
@ -201,12 +193,32 @@ class SimplePie_Sanitize
* Set element/attribute key/value pairs of HTML attributes
* containing URLs that need to be resolved relative to the feed
*
* @access public
* Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite,
* |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite,
* |q|@cite
*
* @since 1.0
* @param array $element_attribute Element/attribute key/value pairs
* @param array|null $element_attribute Element/attribute key/value pairs, null for default
*/
public function set_url_replacements($element_attribute = array('a' => 'href', 'area' => 'href', 'blockquote' => 'cite', 'del' => 'cite', 'form' => 'action', 'img' => array('longdesc', 'src'), 'input' => 'src', 'ins' => 'cite', 'q' => 'cite'))
public function set_url_replacements($element_attribute = null)
{
if ($element_attribute === null)
{
$element_attribute = array(
'a' => 'href',
'area' => 'href',
'blockquote' => 'cite',
'del' => 'cite',
'form' => 'action',
'img' => array(
'longdesc',
'src'
),
'input' => 'src',
'ins' => 'cite',
'q' => 'cite'
);
}
$this->replace_url_attributes = (array) $element_attribute;
}
@ -232,25 +244,27 @@ class SimplePie_Sanitize
$data = base64_decode($data);
}
if ($type & SIMPLEPIE_CONSTRUCT_XHTML)
{
if ($this->remove_div)
{
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
$data = preg_replace('/<\/div>$/', '', $data);
}
else
{
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
}
}
if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
{
$document = new DOMDocument();
$document->encoding = 'UTF-8';
$data = $this->preprocess($data, $type);
set_error_handler(array('SimplePie_Misc', 'silence_errors'));
$document->loadHTML($data);
restore_error_handler();
// Strip comments
if ($this->strip_comments)
{
$data = SimplePie_Misc::strip_comments($data);
$xpath = new DOMXPath($document);
$comments = $xpath->query('//comment()');
foreach ($comments as $comment)
{
$comment->parentNode->removeChild($comment);
}
}
// Strip out HTML tags and attributes that might cause various security problems.
@ -260,11 +274,7 @@ class SimplePie_Sanitize
{
foreach ($this->strip_htmltags as $tag)
{
$pcre = "/<($tag)" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . "(>(.*)<\/$tag" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>|(\/)?>)/siU';
while (preg_match($pcre, $data))
{
$data = preg_replace_callback($pcre, array(&$this, 'do_strip_htmltags'), $data);
}
$this->strip_tag($tag, $document, $type);
}
}
@ -272,7 +282,7 @@ class SimplePie_Sanitize
{
foreach ($this->strip_attributes as $attrib)
{
$data = preg_replace('/(<[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*)' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . trim($attrib) . '(?:\s*=\s*(?:"(?:[^"]*)"|\'(?:[^\']*)\'|(?:[^\x09\x0A\x0B\x0C\x0D\x20\x22\x27\x3E][^\x09\x0A\x0B\x0C\x0D\x20\x3E]*)?))?' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>/', '\1\2\3>', $data);
$this->strip_attr($attrib, $document);
}
}
@ -280,36 +290,34 @@ class SimplePie_Sanitize
$this->base = $base;
foreach ($this->replace_url_attributes as $element => $attributes)
{
$data = $this->replace_urls($data, $element, $attributes);
$this->replace_urls($document, $element, $attributes);
}
// If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
{
$images = SimplePie_Misc::get_element('img', $data);
$images = $document->getElementsByTagName('img');
foreach ($images as $img)
{
if (isset($img['attribs']['src']['data']))
if ($img->hasAttribute('src'))
{
$image_url = call_user_func($this->cache_name_function, $img['attribs']['src']['data']);
$cache = call_user_func(array($this->cache_class, 'create'), $this->cache_location, $image_url, 'spi');
$image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
$cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi'));
if ($cache->load())
{
$img['attribs']['src']['data'] = $this->image_handler . $image_url;
$data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
$img->setAttribute('src', $this->image_handler . $image_url);
}
else
{
$file = new $this->file_class($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen);
$file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
$headers = $file->headers;
if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
{
if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
{
$img['attribs']['src']['data'] = $this->image_handler . $image_url;
$data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
$img->setAttribute('src', $this->image_handler . $image_url);
}
else
{
@ -321,13 +329,38 @@ class SimplePie_Sanitize
}
}
// Having (possibly) taken stuff out, there may now be whitespace at the beginning/end of the data
$data = trim($data);
// Remove the DOCTYPE
// Seems to cause segfaulting if we don't do this
if ($document->firstChild instanceof DOMDocumentType)
{
$document->removeChild($document->firstChild);
}
// Move everything from the body to the root
$real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0);
$document->replaceChild($real_body, $document->firstChild);
// Finally, convert to a HTML string
$data = trim($document->saveHTML());
if ($this->remove_div)
{
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
$data = preg_replace('/<\/div>$/', '', $data);
}
else
{
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
}
}
if ($type & SIMPLEPIE_CONSTRUCT_IRI)
{
$data = SimplePie_Misc::absolutize_url($data, $base);
$absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
if ($absolute !== false)
{
$data = $absolute;
}
}
if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
@ -337,40 +370,60 @@ class SimplePie_Sanitize
if ($this->output_encoding !== 'UTF-8')
{
$data = SimplePie_Misc::change_encoding($data, 'UTF-8', $this->output_encoding);
$data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
}
}
return $data;
}
public function replace_urls($data, $tag, $attributes)
protected function preprocess($html, $type)
{
$ret = '';
if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
{
// Atom XHTML constructs are wrapped with a div by default
// Note: No protection if $html contains a stray </div>!
$html = '<div>' . $html . '</div>';
$ret .= '<!DOCTYPE html>';
$content_type = 'text/html';
}
else
{
$ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
$content_type = 'application/xhtml+xml';
}
$ret .= '<html><head>';
$ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
$ret .= '</head><body>' . $html . '</body></html>';
return $ret;
}
public function replace_urls($document, $tag, $attributes)
{
if (!is_array($attributes))
{
$attributes = array($attributes);
}
if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
{
$elements = SimplePie_Misc::get_element($tag, $data);
$elements = $document->getElementsByTagName($tag);
foreach ($elements as $element)
{
if (is_array($attributes))
foreach ($attributes as $attribute)
{
foreach ($attributes as $attribute)
if ($element->hasAttribute($attribute))
{
if (isset($element['attribs'][$attribute]['data']))
$value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
if ($value !== false)
{
$element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base);
$new_element = SimplePie_Misc::element_implode($element);
$data = str_replace($element['full'], $new_element, $data);
$element['full'] = $new_element;
$element->setAttribute($attribute, $value);
}
}
}
elseif (isset($element['attribs'][$attributes]['data']))
{
$element['attribs'][$attributes]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attributes]['data'], $this->base);
$data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data);
}
}
}
return $data;
}
public function do_strip_htmltags($match)
@ -397,4 +450,100 @@ class SimplePie_Sanitize
return '';
}
}
protected function strip_tag($tag, $document, $type)
{
$xpath = new DOMXPath($document);
$elements = $xpath->query('body//' . $tag);
if ($this->encode_instead_of_strip)
{
foreach ($elements as $element)
{
$fragment = $document->createDocumentFragment();
// For elements which aren't script or style, include the tag itself
if (!in_array($tag, array('script', 'style')))
{
$text = '<' . $tag;
if ($element->hasAttributes())
{
$attrs = array();
foreach ($element->attributes as $name => $attr)
{
$value = $attr->value;
// In XHTML, empty values should never exist, so we repeat the value
if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
{
$value = $name;
}
// For HTML, empty is fine
elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
{
$attrs[] = $name;
continue;
}
// Standard attribute text
$attrs[] = $name . '="' . $attr->value . '"';
}
$text .= ' ' . implode(' ', $attrs);
}
$text .= '>';
$fragment->appendChild(new DOMText($text));
}
$number = $element->childNodes->length;
for ($i = $number; $i > 0; $i--)
{
$child = $element->childNodes->item(0);
$fragment->appendChild($child);
}
if (!in_array($tag, array('script', 'style')))
{
$fragment->appendChild(new DOMText('</' . $tag . '>'));
}
$element->parentNode->replaceChild($fragment, $element);
}
return;
}
elseif (in_array($tag, array('script', 'style')))
{
foreach ($elements as $element)
{
$element->parentNode->removeChild($element);
}
return;
}
else
{
foreach ($elements as $element)
{
$fragment = $document->createDocumentFragment();
$number = $element->childNodes->length;
for ($i = $number; $i > 0; $i--)
{
$child = $element->childNodes->item(0);
$fragment->appendChild($child);
}
$element->parentNode->replaceChild($fragment, $element);
}
}
}
protected function strip_attr($attrib, $document)
{
$xpath = new DOMXPath($document);
$elements = $xpath->query('//*[@' . $attrib . ']');
foreach ($elements as $element)
{
$element->removeAttribute($attrib);
}
}
}

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,21 +33,30 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Handles `<atom:source>`
*
* Used by {@see SimplePie_Item::get_source()}
*
* This class can be overloaded with {@see SimplePie::set_source_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Source
{
var $item;
var $data = array();
protected $registry;
public function __construct($item, $data)
{
@ -55,6 +64,11 @@ class SimplePie_Source
$this->data = $data;
}
public function set_registry(SimplePie_Registry $registry)
{
$this->registry = $registry;
}
public function __toString()
{
return md5(serialize($this->data));
@ -91,11 +105,11 @@ class SimplePie_Source
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'title'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0]));
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'title'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0]));
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title'))
{
@ -157,7 +171,7 @@ class SimplePie_Source
{
$label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT);
}
$categories[] = new $this->item->feed->category_class($term, $scheme, $label);
$categories[] = $this->registry->create('Category', array($term, $scheme, $label));
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'category') as $category)
{
@ -172,20 +186,20 @@ class SimplePie_Source
{
$scheme = null;
}
$categories[] = new $this->item->feed->category_class($term, $scheme, null);
$categories[] = $this->registry->create('Category', array($term, $scheme, null));
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category)
{
$categories[] = new $this->item->feed->category_class($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null);
$categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category)
{
$categories[] = new $this->item->feed->category_class($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null);
$categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
}
if (!empty($categories))
{
return SimplePie_Misc::array_unique($categories);
return array_unique($categories);
}
else
{
@ -228,7 +242,7 @@ class SimplePie_Source
}
if ($name !== null || $email !== null || $uri !== null)
{
$authors[] = new $this->item->feed->author_class($name, $uri, $email);
$authors[] = $this->registry->create('Author', array($name, $uri, $email));
}
}
if ($author = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'author'))
@ -250,25 +264,25 @@ class SimplePie_Source
}
if ($name !== null || $email !== null || $url !== null)
{
$authors[] = new $this->item->feed->author_class($name, $url, $email);
$authors[] = $this->registry->create('Author', array($name, $url, $email));
}
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author)
{
$authors[] = new $this->item->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null);
$authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author)
{
$authors[] = new $this->item->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null);
$authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author)
{
$authors[] = new $this->item->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null);
$authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
}
if (!empty($authors))
{
return SimplePie_Misc::array_unique($authors);
return array_unique($authors);
}
else
{
@ -311,7 +325,7 @@ class SimplePie_Source
}
if ($name !== null || $email !== null || $uri !== null)
{
$contributors[] = new $this->item->feed->author_class($name, $uri, $email);
$contributors[] = $this->registry->create('Author', array($name, $uri, $email));
}
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'contributor') as $contributor)
@ -333,13 +347,13 @@ class SimplePie_Source
}
if ($name !== null || $email !== null || $url !== null)
{
$contributors[] = new $this->item->feed->author_class($name, $url, $email);
$contributors[] = $this->registry->create('Author', array($name, $url, $email));
}
}
if (!empty($contributors))
{
return SimplePie_Misc::array_unique($contributors);
return array_unique($contributors);
}
else
{
@ -412,7 +426,7 @@ class SimplePie_Source
$keys = array_keys($this->data['links']);
foreach ($keys as $key)
{
if (SimplePie_Misc::is_isegment_nz_nc($key))
if ($this->registry->call('Misc', 'is_isegment_nz_nc', array($key)))
{
if (isset($this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key]))
{
@ -446,11 +460,11 @@ class SimplePie_Source
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'subtitle'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0]));
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'tagline'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0]));
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description'))
{
@ -490,11 +504,11 @@ class SimplePie_Source
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'rights'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0]));
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'copyright'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0]));
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'copyright'))
{

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,14 +33,13 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
@ -48,6 +47,7 @@
* Parses the XML Declaration
*
* @package SimplePie
* @subpackage Parsing
*/
class SimplePie_XML_Declaration_Parser
{

View file

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
@ -33,21 +33,22 @@
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* gzdecode
* Decode 'gzip' encoded HTTP data
*
* @package SimplePie
* @subpackage HTTP
* @link http://www.gzip.org/format.txt
*/
class SimplePie_gzdecode
{
@ -55,6 +56,7 @@ class SimplePie_gzdecode
* Compressed data
*
* @access private
* @var string
* @see gzdecode::$data
*/
var $compressed_data;
@ -63,6 +65,7 @@ class SimplePie_gzdecode
* Size of compressed data
*
* @access private
* @var int
*/
var $compressed_size;
@ -70,6 +73,7 @@ class SimplePie_gzdecode
* Minimum size of a valid gzip string
*
* @access private
* @var int
*/
var $min_compressed_size = 18;
@ -77,6 +81,7 @@ class SimplePie_gzdecode
* Current position of pointer
*
* @access private
* @var int
*/
var $position = 0;
@ -84,6 +89,7 @@ class SimplePie_gzdecode
* Flags (FLG)
*
* @access private
* @var int
*/
var $flags;
@ -92,6 +98,7 @@ class SimplePie_gzdecode
*
* @access public
* @see gzdecode::$compressed_data
* @var string
*/
var $data;
@ -99,6 +106,7 @@ class SimplePie_gzdecode
* Modified time
*
* @access public
* @var int
*/
var $MTIME;
@ -106,6 +114,7 @@ class SimplePie_gzdecode
* Extra Flags
*
* @access public
* @var int
*/
var $XFL;
@ -113,6 +122,7 @@ class SimplePie_gzdecode
* Operating System
*
* @access public
* @var int
*/
var $OS;
@ -122,6 +132,7 @@ class SimplePie_gzdecode
* @access public
* @see gzdecode::$extra_field
* @see gzdecode::$SI2
* @var string
*/
var $SI1;
@ -131,6 +142,7 @@ class SimplePie_gzdecode
* @access public
* @see gzdecode::$extra_field
* @see gzdecode::$SI1
* @var string
*/
var $SI2;
@ -140,6 +152,7 @@ class SimplePie_gzdecode
* @access public
* @see gzdecode::$SI1
* @see gzdecode::$SI2
* @var string
*/
var $extra_field;
@ -147,6 +160,7 @@ class SimplePie_gzdecode
* Original filename
*
* @access public
* @var string
*/
var $filename;
@ -154,13 +168,15 @@ class SimplePie_gzdecode
* Human readable comment
*
* @access public
* @var string
*/
var $comment;
/**
* Don't allow anything to be set
*
* @access public
* @param string $name
* @param mixed $value
*/
public function __set($name, $value)
{
@ -170,7 +186,7 @@ class SimplePie_gzdecode
/**
* Set the compressed string and related properties
*
* @access public
* @param string $data
*/
public function __construct($data)
{
@ -181,7 +197,7 @@ class SimplePie_gzdecode
/**
* Decode the GZIP stream
*
* @access public
* @return bool Successfulness
*/
public function parse()
{

1195
inc/3rdparty/makefulltextfeed.php vendored Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -1,997 +0,0 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* IRI parser/serialiser
*
* @package SimplePie
*/
class SimplePie_IRI
{
/**
* Scheme
*
* @access private
* @var string
*/
var $scheme;
/**
* User Information
*
* @access private
* @var string
*/
var $userinfo;
/**
* Host
*
* @access private
* @var string
*/
var $host;
/**
* Port
*
* @access private
* @var string
*/
var $port;
/**
* Path
*
* @access private
* @var string
*/
var $path;
/**
* Query
*
* @access private
* @var string
*/
var $query;
/**
* Fragment
*
* @access private
* @var string
*/
var $fragment;
/**
* Whether the object represents a valid IRI
*
* @access private
* @var array
*/
var $valid = array();
/**
* Return the entire IRI when you try and read the object as a string
*
* @access public
* @return string
*/
public function __toString()
{
return $this->get_iri();
}
/**
* Create a new IRI object, from a specified string
*
* @access public
* @param string $iri
* @return SimplePie_IRI
*/
public function __construct($iri)
{
$iri = (string) $iri;
if ($iri !== '')
{
$parsed = $this->parse_iri($iri);
$this->set_scheme($parsed['scheme']);
$this->set_authority($parsed['authority']);
$this->set_path($parsed['path']);
$this->set_query($parsed['query']);
$this->set_fragment($parsed['fragment']);
}
}
/**
* Create a new IRI object by resolving a relative IRI
*
* @static
* @access public
* @param SimplePie_IRI $base Base IRI
* @param string $relative Relative IRI
* @return SimplePie_IRI
*/
public static function absolutize($base, $relative)
{
$relative = (string) $relative;
if ($relative !== '')
{
$relative = new SimplePie_IRI($relative);
if ($relative->get_scheme() !== null)
{
$target = $relative;
}
elseif ($base->get_iri() !== null)
{
if ($relative->get_authority() !== null)
{
$target = $relative;
$target->set_scheme($base->get_scheme());
}
else
{
$target = new SimplePie_IRI('');
$target->set_scheme($base->get_scheme());
$target->set_userinfo($base->get_userinfo());
$target->set_host($base->get_host());
$target->set_port($base->get_port());
if ($relative->get_path() !== null)
{
if (strpos($relative->get_path(), '/') === 0)
{
$target->set_path($relative->get_path());
}
elseif (($base->get_userinfo() !== null || $base->get_host() !== null || $base->get_port() !== null) && $base->get_path() === null)
{
$target->set_path('/' . $relative->get_path());
}
elseif (($last_segment = strrpos($base->get_path(), '/')) !== false)
{
$target->set_path(substr($base->get_path(), 0, $last_segment + 1) . $relative->get_path());
}
else
{
$target->set_path($relative->get_path());
}
$target->set_query($relative->get_query());
}
else
{
$target->set_path($base->get_path());
if ($relative->get_query() !== null)
{
$target->set_query($relative->get_query());
}
elseif ($base->get_query() !== null)
{
$target->set_query($base->get_query());
}
}
}
$target->set_fragment($relative->get_fragment());
}
else
{
// No base URL, just return the relative URL
$target = $relative;
}
}
else
{
$target = $base;
}
return $target;
}
/**
* Parse an IRI into scheme/authority/path/query/fragment segments
*
* @access private
* @param string $iri
* @return array
*/
public function parse_iri($iri)
{
preg_match('/^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/', $iri, $match);
for ($i = count($match); $i <= 9; $i++)
{
$match[$i] = '';
}
return array('scheme' => $match[2], 'authority' => $match[4], 'path' => $match[5], 'query' => $match[7], 'fragment' => $match[9]);
}
/**
* Remove dot segments from a path
*
* @access private
* @param string $input
* @return string
*/
public function remove_dot_segments($input)
{
$output = '';
while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..')
{
// A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
if (strpos($input, '../') === 0)
{
$input = substr($input, 3);
}
elseif (strpos($input, './') === 0)
{
$input = substr($input, 2);
}
// B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
elseif (strpos($input, '/./') === 0)
{
$input = substr_replace($input, '/', 0, 3);
}
elseif ($input === '/.')
{
$input = '/';
}
// C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
elseif (strpos($input, '/../') === 0)
{
$input = substr_replace($input, '/', 0, 4);
$output = substr_replace($output, '', strrpos($output, '/'));
}
elseif ($input === '/..')
{
$input = '/';
$output = substr_replace($output, '', strrpos($output, '/'));
}
// D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
elseif ($input === '.' || $input === '..')
{
$input = '';
}
// E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
elseif (($pos = strpos($input, '/', 1)) !== false)
{
$output .= substr($input, 0, $pos);
$input = substr_replace($input, '', 0, $pos);
}
else
{
$output .= $input;
$input = '';
}
}
return $output . $input;
}
/**
* Replace invalid character with percent encoding
*
* @param string $string Input string
* @param string $valid_chars Valid characters not in iunreserved or iprivate (this is ASCII-only)
* @param int $case Normalise case
* @param bool $iprivate Allow iprivate
* @return string
*/
protected function replace_invalid_with_pct_encoding($string, $valid_chars, $case = SIMPLEPIE_SAME_CASE, $iprivate = false)
{
// Normalize as many pct-encoded sections as possible
$string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array(&$this, 'remove_iunreserved_percent_encoded'), $string);
// Replace invalid percent characters
$string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
// Add unreserved and % to $valid_chars (the latter is safe because all
// pct-encoded sections are now valid).
$valid_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
// Now replace any bytes that aren't allowed with their pct-encoded versions
$position = 0;
$strlen = strlen($string);
while (($position += strspn($string, $valid_chars, $position)) < $strlen)
{
$value = ord($string[$position]);
// Start position
$start = $position;
// By default we are valid
$valid = true;
// No one byte sequences are valid due to the while.
// Two byte sequence:
if (($value & 0xE0) === 0xC0)
{
$character = ($value & 0x1F) << 6;
$length = 2;
$remaining = 1;
}
// Three byte sequence:
elseif (($value & 0xF0) === 0xE0)
{
$character = ($value & 0x0F) << 12;
$length = 3;
$remaining = 2;
}
// Four byte sequence:
elseif (($value & 0xF8) === 0xF0)
{
$character = ($value & 0x07) << 18;
$length = 4;
$remaining = 3;
}
// Invalid byte:
else
{
$valid = false;
$length = 1;
$remaining = 0;
}
if ($remaining)
{
if ($position + $length <= $strlen)
{
for ($position++; $remaining; $position++)
{
$value = ord($string[$position]);
// Check that the byte is valid, then add it to the character:
if (($value & 0xC0) === 0x80)
{
$character |= ($value & 0x3F) << (--$remaining * 6);
}
// If it is invalid, count the sequence as invalid and reprocess the current byte:
else
{
$valid = false;
$position--;
break;
}
}
}
else
{
$position = $strlen - 1;
$valid = false;
}
}
// Percent encode anything invalid or not in ucschar
if (
// Invalid sequences
!$valid
// Non-shortest form sequences are invalid
|| $length > 1 && $character <= 0x7F
|| $length > 2 && $character <= 0x7FF
|| $length > 3 && $character <= 0xFFFF
// Outside of range of ucschar codepoints
// Noncharacters
|| ($character & 0xFFFE) === 0xFFFE
|| $character >= 0xFDD0 && $character <= 0xFDEF
|| (
// Everything else not in ucschar
$character > 0xD7FF && $character < 0xF900
|| $character < 0xA0
|| $character > 0xEFFFD
)
&& (
// Everything not in iprivate, if it applies
!$iprivate
|| $character < 0xE000
|| $character > 0x10FFFD
)
)
{
// If we were a character, pretend we weren't, but rather an error.
if ($valid)
$position--;
for ($j = $start; $j <= $position; $j++)
{
$string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
$j += 2;
$position += 2;
$strlen += 2;
}
}
}
// Normalise case
if ($case & SIMPLEPIE_LOWERCASE)
{
$string = strtolower($string);
}
elseif ($case & SIMPLEPIE_UPPERCASE)
{
$string = strtoupper($string);
}
return $string;
}
/**
* Callback function for preg_replace_callback.
*
* Removes sequences of percent encoded bytes that represent UTF-8
* encoded characters in iunreserved
*
* @param array $match PCRE match
* @return string Replacement
*/
protected function remove_iunreserved_percent_encoded($match)
{
// As we just have valid percent encoded sequences we can just explode
// and ignore the first member of the returned array (an empty string).
$bytes = explode('%', $match[0]);
// Initialize the new string (this is what will be returned) and that
// there are no bytes remaining in the current sequence (unsurprising
// at the first byte!).
$string = '';
$remaining = 0;
// Loop over each and every byte, and set $value to its value
for ($i = 1, $len = count($bytes); $i < $len; $i++)
{
$value = hexdec($bytes[$i]);
// If we're the first byte of sequence:
if (!$remaining)
{
// Start position
$start = $i;
// By default we are valid
$valid = true;
// One byte sequence:
if ($value <= 0x7F)
{
$character = $value;
$length = 1;
}
// Two byte sequence:
elseif (($value & 0xE0) === 0xC0)
{
$character = ($value & 0x1F) << 6;
$length = 2;
$remaining = 1;
}
// Three byte sequence:
elseif (($value & 0xF0) === 0xE0)
{
$character = ($value & 0x0F) << 12;
$length = 3;
$remaining = 2;
}
// Four byte sequence:
elseif (($value & 0xF8) === 0xF0)
{
$character = ($value & 0x07) << 18;
$length = 4;
$remaining = 3;
}
// Invalid byte:
else
{
$valid = false;
$remaining = 0;
}
}
// Continuation byte:
else
{
// Check that the byte is valid, then add it to the character:
if (($value & 0xC0) === 0x80)
{
$remaining--;
$character |= ($value & 0x3F) << ($remaining * 6);
}
// If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
else
{
$valid = false;
$remaining = 0;
$i--;
}
}
// If we've reached the end of the current byte sequence, append it to Unicode::$data
if (!$remaining)
{
// Percent encode anything invalid or not in iunreserved
if (
// Invalid sequences
!$valid
// Non-shortest form sequences are invalid
|| $length > 1 && $character <= 0x7F
|| $length > 2 && $character <= 0x7FF
|| $length > 3 && $character <= 0xFFFF
// Outside of range of iunreserved codepoints
|| $character < 0x2D
|| $character > 0xEFFFD
// Noncharacters
|| ($character & 0xFFFE) === 0xFFFE
|| $character >= 0xFDD0 && $character <= 0xFDEF
// Everything else not in iunreserved (this is all BMP)
|| $character === 0x2F
|| $character > 0x39 && $character < 0x41
|| $character > 0x5A && $character < 0x61
|| $character > 0x7A && $character < 0x7E
|| $character > 0x7E && $character < 0xA0
|| $character > 0xD7FF && $character < 0xF900
)
{
for ($j = $start; $j <= $i; $j++)
{
$string .= '%' . strtoupper($bytes[$j]);
}
}
else
{
for ($j = $start; $j <= $i; $j++)
{
$string .= chr(hexdec($bytes[$j]));
}
}
}
}
// If we have any bytes left over they are invalid (i.e., we are
// mid-way through a multi-byte sequence)
if ($remaining)
{
for ($j = $start; $j < $len; $j++)
{
$string .= '%' . strtoupper($bytes[$j]);
}
}
return $string;
}
/**
* Check if the object represents a valid IRI
*
* @access public
* @return bool
*/
public function is_valid()
{
return array_sum($this->valid) === count($this->valid);
}
/**
* Set the scheme. Returns true on success, false on failure (if there are
* any invalid characters).
*
* @access public
* @param string $scheme
* @return bool
*/
public function set_scheme($scheme)
{
if ($scheme === null || $scheme === '')
{
$this->scheme = null;
}
else
{
$len = strlen($scheme);
switch (true)
{
case $len > 1:
if (!strspn($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-.', 1))
{
$this->scheme = null;
$this->valid[__FUNCTION__] = false;
return false;
}
case $len > 0:
if (!strspn($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', 0, 1))
{
$this->scheme = null;
$this->valid[__FUNCTION__] = false;
return false;
}
}
$this->scheme = strtolower($scheme);
}
$this->valid[__FUNCTION__] = true;
return true;
}
/**
* Set the authority. Returns true on success, false on failure (if there are
* any invalid characters).
*
* @access public
* @param string $authority
* @return bool
*/
public function set_authority($authority)
{
if (($userinfo_end = strrpos($authority, '@')) !== false)
{
$userinfo = substr($authority, 0, $userinfo_end);
$authority = substr($authority, $userinfo_end + 1);
}
else
{
$userinfo = null;
}
if (($port_start = strpos($authority, ':')) !== false)
{
$port = substr($authority, $port_start + 1);
if ($port === false)
{
$port = null;
}
$authority = substr($authority, 0, $port_start);
}
else
{
$port = null;
}
return $this->set_userinfo($userinfo) && $this->set_host($authority) && $this->set_port($port);
}
/**
* Set the userinfo.
*
* @access public
* @param string $userinfo
* @return bool
*/
public function set_userinfo($userinfo)
{
if ($userinfo === null || $userinfo === '')
{
$this->userinfo = null;
}
else
{
$this->userinfo = $this->replace_invalid_with_pct_encoding($userinfo, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=:');
}
$this->valid[__FUNCTION__] = true;
return true;
}
/**
* Set the host. Returns true on success, false on failure (if there are
* any invalid characters).
*
* @access public
* @param string $host
* @return bool
*/
public function set_host($host)
{
if ($host === null || $host === '')
{
$this->host = null;
$this->valid[__FUNCTION__] = true;
return true;
}
elseif ($host[0] === '[' && substr($host, -1) === ']')
{
if (SimplePie_Net_IPv6::checkIPv6(substr($host, 1, -1)))
{
$this->host = $host;
$this->valid[__FUNCTION__] = true;
return true;
}
else
{
$this->host = null;
$this->valid[__FUNCTION__] = false;
return false;
}
}
else
{
$this->host = $this->replace_invalid_with_pct_encoding($host, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=', SIMPLEPIE_LOWERCASE);
$this->valid[__FUNCTION__] = true;
return true;
}
}
/**
* Set the port. Returns true on success, false on failure (if there are
* any invalid characters).
*
* @access public
* @param string $port
* @return bool
*/
public function set_port($port)
{
if ($port === null || $port === '')
{
$this->port = null;
$this->valid[__FUNCTION__] = true;
return true;
}
elseif (strspn($port, '0123456789') === strlen($port))
{
$this->port = (int) $port;
$this->valid[__FUNCTION__] = true;
return true;
}
else
{
$this->port = null;
$this->valid[__FUNCTION__] = false;
return false;
}
}
/**
* Set the path.
*
* @access public
* @param string $path
* @return bool
*/
public function set_path($path)
{
if ($path === null || $path === '')
{
$this->path = null;
$this->valid[__FUNCTION__] = true;
return true;
}
elseif (substr($path, 0, 2) === '//' && $this->userinfo === null && $this->host === null && $this->port === null)
{
$this->path = null;
$this->valid[__FUNCTION__] = false;
return false;
}
else
{
$this->path = $this->replace_invalid_with_pct_encoding($path, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=@/');
if ($this->scheme !== null)
{
$this->path = $this->remove_dot_segments($this->path);
}
$this->valid[__FUNCTION__] = true;
return true;
}
}
/**
* Set the query.
*
* @access public
* @param string $query
* @return bool
*/
public function set_query($query)
{
if ($query === null || $query === '')
{
$this->query = null;
}
else
{
$this->query = $this->replace_invalid_with_pct_encoding($query, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$\'()*+,;:@/?&=');
}
$this->valid[__FUNCTION__] = true;
return true;
}
/**
* Set the fragment.
*
* @access public
* @param string $fragment
* @return bool
*/
public function set_fragment($fragment)
{
if ($fragment === null || $fragment === '')
{
$this->fragment = null;
}
else
{
$this->fragment = $this->replace_invalid_with_pct_encoding($fragment, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=:@/?');
}
$this->valid[__FUNCTION__] = true;
return true;
}
/**
* Get the complete IRI
*
* @access public
* @return string
*/
public function get_iri()
{
$iri = '';
if ($this->scheme !== null)
{
$iri .= $this->scheme . ':';
}
if (($authority = $this->get_authority()) !== null)
{
$iri .= '//' . $authority;
}
if ($this->path !== null)
{
$iri .= $this->path;
}
if ($this->query !== null)
{
$iri .= '?' . $this->query;
}
if ($this->fragment !== null)
{
$iri .= '#' . $this->fragment;
}
if ($iri !== '')
{
return $iri;
}
else
{
return null;
}
}
/**
* Get the scheme
*
* @access public
* @return string
*/
public function get_scheme()
{
return $this->scheme;
}
/**
* Get the complete authority
*
* @access public
* @return string
*/
public function get_authority()
{
$authority = '';
if ($this->userinfo !== null)
{
$authority .= $this->userinfo . '@';
}
if ($this->host !== null)
{
$authority .= $this->host;
}
if ($this->port !== null)
{
$authority .= ':' . $this->port;
}
if ($authority !== '')
{
return $authority;
}
else
{
return null;
}
}
/**
* Get the user information
*
* @access public
* @return string
*/
public function get_userinfo()
{
return $this->userinfo;
}
/**
* Get the host
*
* @access public
* @return string
*/
public function get_host()
{
return $this->host;
}
/**
* Get the port
*
* @access public
* @return string
*/
public function get_port()
{
return $this->port;
}
/**
* Get the path
*
* @access public
* @return string
*/
public function get_path()
{
return $this->path;
}
/**
* Get the query
*
* @access public
* @return string
*/
public function get_query()
{
return $this->query;
}
/**
* Get the fragment
*
* @access public
* @return string
*/
public function get_fragment()
{
return $this->fragment;
}
}

View file

@ -1,258 +0,0 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Class to validate and to work with IPv6 addresses.
*
* @package SimplePie
* @copyright 2003-2005 The PHP Group
* @license http://www.opensource.org/licenses/bsd-license.php
* @link http://pear.php.net/package/Net_IPv6
* @author Alexander Merz <alexander.merz@web.de>
* @author elfrink at introweb dot nl
* @author Josh Peck <jmp at joshpeck dot org>
* @author Geoffrey Sneddon <geoffers@gmail.com>
*/
class SimplePie_Net_IPv6
{
/**
* Removes a possible existing netmask specification of an IP address.
*
* @param string $ip the (compressed) IP as Hex representation
* @return string the IP the without netmask
* @since 1.1.0
* @access public
* @static
*/
public static function removeNetmaskSpec($ip)
{
if (strpos($ip, '/') !== false)
{
list($addr, $nm) = explode('/', $ip);
}
else
{
$addr = $ip;
}
return $addr;
}
/**
* Uncompresses an IPv6 address
*
* RFC 2373 allows you to compress zeros in an address to '::'. This
* function expects an valid IPv6 address and expands the '::' to
* the required zeros.
*
* Example: FF01::101 -> FF01:0:0:0:0:0:0:101
* ::1 -> 0:0:0:0:0:0:0:1
*
* @access public
* @static
* @param string $ip a valid IPv6-address (hex format)
* @return string the uncompressed IPv6-address (hex format)
*/
public static function Uncompress($ip)
{
$uip = SimplePie_Net_IPv6::removeNetmaskSpec($ip);
$c1 = -1;
$c2 = -1;
if (strpos($ip, '::') !== false)
{
list($ip1, $ip2) = explode('::', $ip);
if ($ip1 === '')
{
$c1 = -1;
}
else
{
$pos = 0;
if (($pos = substr_count($ip1, ':')) > 0)
{
$c1 = $pos;
}
else
{
$c1 = 0;
}
}
if ($ip2 === '')
{
$c2 = -1;
}
else
{
$pos = 0;
if (($pos = substr_count($ip2, ':')) > 0)
{
$c2 = $pos;
}
else
{
$c2 = 0;
}
}
if (strstr($ip2, '.'))
{
$c2++;
}
// ::
if ($c1 === -1 && $c2 === -1)
{
$uip = '0:0:0:0:0:0:0:0';
}
// ::xxx
else if ($c1 === -1)
{
$fill = str_repeat('0:', 7 - $c2);
$uip = str_replace('::', $fill, $uip);
}
// xxx::
else if ($c2 === -1)
{
$fill = str_repeat(':0', 7 - $c1);
$uip = str_replace('::', $fill, $uip);
}
// xxx::xxx
else
{
$fill = str_repeat(':0:', 6 - $c2 - $c1);
$uip = str_replace('::', $fill, $uip);
$uip = str_replace('::', ':', $uip);
}
}
return $uip;
}
/**
* Splits an IPv6 address into the IPv6 and a possible IPv4 part
*
* RFC 2373 allows you to note the last two parts of an IPv6 address as
* an IPv4 compatible address
*
* Example: 0:0:0:0:0:0:13.1.68.3
* 0:0:0:0:0:FFFF:129.144.52.38
*
* @access public
* @static
* @param string $ip a valid IPv6-address (hex format)
* @return array [0] contains the IPv6 part, [1] the IPv4 part (hex format)
*/
public static function SplitV64($ip)
{
$ip = SimplePie_Net_IPv6::Uncompress($ip);
if (strstr($ip, '.'))
{
$pos = strrpos($ip, ':');
$ip[$pos] = '_';
$ipPart = explode('_', $ip);
return $ipPart;
}
else
{
return array($ip, '');
}
}
/**
* Checks an IPv6 address
*
* Checks if the given IP is IPv6-compatible
*
* @access public
* @static
* @param string $ip a valid IPv6-address
* @return bool true if $ip is an IPv6 address
*/
public static function checkIPv6($ip)
{
$ipPart = SimplePie_Net_IPv6::SplitV64($ip);
$count = 0;
if (!empty($ipPart[0]))
{
$ipv6 = explode(':', $ipPart[0]);
for ($i = 0; $i < count($ipv6); $i++)
{
$dec = hexdec($ipv6[$i]);
$hex = strtoupper(preg_replace('/^[0]{1,3}(.*[0-9a-fA-F])$/', '\\1', $ipv6[$i]));
if ($ipv6[$i] >= 0 && $dec <= 65535 && $hex === strtoupper(dechex($dec)))
{
$count++;
}
}
if ($count === 8)
{
return true;
}
elseif ($count === 6 && !empty($ipPart[1]))
{
$ipv4 = explode('.', $ipPart[1]);
$count = 0;
foreach ($ipv4 as $ipv4_part)
{
if ($ipv4_part >= 0 && $ipv4_part <= 255 && preg_match('/^\d{1,3}$/', $ipv4_part))
{
$count++;
}
}
if ($count === 4)
{
return true;
}
}
else
{
return false;
}
}
else
{
return false;
}
}
}

View file

@ -0,0 +1,6 @@
title: //div[@class='meta']/h2/a
author: //div[@class='meta']/h2/following-sibling::p/a/text()
date://div[@class='meta']/h2/strong
body: //div[@id='article']
strip: //div[@class='domore']
test_url: http://24ways.org/2011/composing-the-new-canon

View file

@ -1,2 +1 @@
<?php
return 1;
<?php return 4; ?>

View file

@ -332,9 +332,12 @@ class Poche
switch ($action)
{
case 'add':
$content = $url->extract();
$json = file_get_contents(Tools::getPocheUrl() . '/inc/3rdparty/makefulltextfeed.php?url='.urlencode($url->getUrl()).'&max=5&links=preserve&exc=&format=json&submit=Create+Feed');
$content = json_decode($json, true);
$title = $content['rss']['channel']['item']['title'];
$body = $content['rss']['channel']['item']['description'];
if ($this->store->add($url->getUrl(), $content['title'], $content['body'], $this->user->getId())) {
if ($this->store->add($url->getUrl(), $title, $body, $this->user->getId())) {
Tools::logm('add link ' . $url->getUrl());
$sequence = '';
if (STORAGE == 'postgres') {
@ -342,7 +345,7 @@ class Poche
}
$last_id = $this->store->getLastId($sequence);
if (DOWNLOAD_PICTURES) {
$content = filtre_picture($content['body'], $url->getUrl(), $last_id);
$content = filtre_picture($body, $url->getUrl(), $last_id);
Tools::logm('updating content article');
$this->store->updateContent($last_id, $content, $this->user->getId());
}

View file

@ -1,46 +0,0 @@
<?php
class PocheReadability extends Readability
{
/**
* Get the article title as an H1.
*
* @return DOMElement
*/
protected function getArticleTitle() {
$curTitle = '';
$origTitle = '';
try {
$curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
} catch(Exception $e) {}
if (preg_match('/ [\|\-] /', $curTitle))
{
$curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle);
if (count(explode(' ', $curTitle)) < 3) {
$curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle);
}
}
else if(strlen($curTitle) > 150 || strlen($curTitle) < 15)
{
$hOnes = $this->dom->getElementsByTagName('h1');
if($hOnes->length == 1)
{
$curTitle = $this->getInnerText($hOnes->item(0));
}
}
$curTitle = trim($curTitle);
if (count(explode(' ', $curTitle)) <= 4) {
$curTitle = $origTitle;
}
$articleTitle = $this->dom->createElement('h1');
$articleTitle->innerHTML = $curTitle;
return $articleTitle;
}
}

View file

@ -12,45 +12,6 @@ class Url
{
public $url;
private $fingerprints = array(
// Posterous
'<meta name="generator" content="Posterous"' => array('hostname'=>'fingerprint.posterous.com', 'head'=>true),
// Blogger
'<meta content=\'blogger\' name=\'generator\'' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
'<meta name="generator" content="Blogger"' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
// WordPress (self-hosted and hosted)
'<meta name="generator" content="WordPress' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true)
);
private $user_agents = array( 'lifehacker.com' => 'PHP/5.2',
'gawker.com' => 'PHP/5.2',
'deadspin.com' => 'PHP/5.2',
'kotaku.com' => 'PHP/5.2',
'jezebel.com' => 'PHP/5.2',
'io9.com' => 'PHP/5.2',
'jalopnik.com' => 'PHP/5.2',
'gizmodo.com' => 'PHP/5.2',
'.wikipedia.org' => 'Mozilla/5.2'
);
private $content_type_exc = array(
'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
'image' => array('action'=>'link', 'name'=>'Image'),
'audio' => array('action'=>'link', 'name'=>'Audio'),
'video' => array('action'=>'link', 'name'=>'Video')
);
private $rewrite_url = array(
// Rewrite public Google Docs URLs to point to HTML view:
// if a URL contains docs.google.com, replace /Doc? with /View?
'docs.google.com' => array('/Doc?' => '/View?'),
'tnr.com' => array('tnr.com/article/' => 'tnr.com/print/article/'),
'.m.wikipedia.org' => array('.m.wikipedia.org' => '.wikipedia.org')
);
private $rewrite_relative_urls = true;
private $error_message = '[unable to retrieve full-text content]';
function __construct($url)
{
$this->url = base64_decode($url);
@ -67,329 +28,4 @@ class Url
public function isCorrect() {
return filter_var($this->url, FILTER_VALIDATE_URL) !== FALSE;
}
public function extract() {
global $http, $extractor;
$extractor = new ContentExtractor(dirname(__FILE__).'/../3rdparty/site_config/custom', dirname(__FILE__).'/../3rdparty/site_config/standard');
$extractor->fingerprints = $this->fingerprints;
$http = new HumbleHttpAgent();
$http->userAgentMap = $this->user_agents;
$http->headerOnlyTypes = array_keys($this->content_type_exc);
$http->rewriteUrls = $this->rewrite_url;
$http->userAgentDefault = HumbleHttpAgent::UA_PHP;
// configure SimplePie HTTP extension class to use our HumbleHttpAgent instance
SimplePie_HumbleHttpAgent::set_agent($http);
$feed = new SimplePie();
// some feeds use the text/html content type - force_feed tells SimplePie to process anyway
$feed->force_feed(true);
$feed->set_file_class('SimplePie_HumbleHttpAgent');
$feed->feed_url = $this->url;
$feed->set_autodiscovery_level(SIMPLEPIE_LOCATOR_NONE);
$feed->set_timeout(20);
$feed->enable_cache(false);
$feed->set_stupidly_fast(true);
$feed->enable_order_by_date(false); // we don't want to do anything to the feed
$feed->set_url_replacements(array());
// initialise the feed
// the @ suppresses notices which on some servers causes a 500 internal server error
$result = @$feed->init();
if ($result && (!is_array($feed->data) || count($feed->data) == 0)) {
die('Sorry, no feed items found');
}
// from now on, we'll identify ourselves as a browser
$http->userAgentDefault = HumbleHttpAgent::UA_BROWSER;
unset($feed, $result);
$feed = new DummySingleItemFeed($this->url);
$items = $feed->get_items(0, 1);
// Request all feed items in parallel (if supported)
$urls_sanitized = array();
$urls = array();
foreach ($items as $key => $item) {
$permalink = htmlspecialchars_decode($item->get_permalink());
// Colons in URL path segments get encoded by SimplePie, yet some sites expect them unencoded
$permalink = str_replace('%3A', ':', $permalink);
if ($permalink) {
$urls_sanitized[] = $permalink;
}
$urls[$key] = $permalink;
}
$http->fetchAll($urls_sanitized);
foreach ($items as $key => $item) {
$do_content_extraction = true;
$extract_result = false;
$permalink = $urls[$key];
// TODO: Allow error codes - some sites return correct content with error status
// e.g. prospectmagazine.co.uk returns 403
if ($permalink && ($response = $http->get($permalink, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) {
$effective_url = $response['effective_url'];
// check if action defined for returned Content-Type
$type = null;
if (preg_match('!^Content-Type:\s*(([-\w]+)/([-\w\+]+))!im', $response['headers'], $match)) {
// look for full mime type (e.g. image/jpeg) or just type (e.g. image)
$match[1] = strtolower(trim($match[1]));
$match[2] = strtolower(trim($match[2]));
foreach (array($match[1], $match[2]) as $_mime) {
if (isset($this->content_type_exc[$_mime])) {
$type = $match[1];
$_act = $this->content_type_exc[$_mime]['action'];
$_name = $this->content_type_exc[$_mime]['name'];
if ($_act == 'exclude') {
continue 2; // skip this feed item entry
} elseif ($_act == 'link') {
if ($match[2] == 'image') {
$html = "<a href=\"$effective_url\"><img src=\"$effective_url\" alt=\"$_name\" /></a>";
} else {
$html = "<a href=\"$effective_url\">Download $_name</a>";
}
$title = $_name;
$do_content_extraction = false;
break;
}
}
}
unset($_mime, $_act, $_name, $match);
}
if ($do_content_extraction) {
$html = $response['body'];
// remove strange things
$html = str_replace('</[>', '', $html);
$html = $this->convert_to_utf8($html, $response['headers']);
// check site config for single page URL - fetch it if found
if ($single_page_response = $this->getSinglePage($item, $html, $effective_url)) {
$html = $single_page_response['body'];
// remove strange things
$html = str_replace('</[>', '', $html);
$html = $this->convert_to_utf8($html, $single_page_response['headers']);
$effective_url = $single_page_response['effective_url'];
unset($single_page_response);
}
$extract_result = $extractor->process($html, $effective_url);
$readability = $extractor->readability;
$content_block = ($extract_result) ? $extractor->getContent() : null;
}
}
if ($do_content_extraction) {
// if we failed to extract content...
if (!$extract_result) {
$html = $this->error_message;
// keep the original item description
$html .= $item->get_description();
} else {
$readability->clean($content_block, 'select');
if ($this->rewrite_relative_urls) $this->makeAbsolute($effective_url, $content_block);
if ($content_block->childNodes->length == 1 && $content_block->firstChild->nodeType === XML_ELEMENT_NODE) {
$html = $content_block->firstChild->innerHTML;
} else {
$html = $content_block->innerHTML;
}
// post-processing cleanup
$html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html);
}
}
}
$title = ($extractor->getTitle() != '' ? $extractor->getTitle() : _('Untitled'));
$content = array ('title' => $title, 'body' => $html);
return $content;
}
private function convert_to_utf8($html, $header=null)
{
$encoding = null;
if ($html || $header) {
if (is_array($header)) $header = implode("\n", $header);
if (!$header || !preg_match_all('/^Content-Type:\s+([^;]+)(?:;\s*charset=["\']?([^;"\'\n]*))?/im', $header, $match, PREG_SET_ORDER)) {
// error parsing the response
} else {
$match = end($match); // get last matched element (in case of redirects)
if (isset($match[2])) $encoding = trim($match[2], "\"' \r\n\0\x0B\t");
}
// TODO: check to see if encoding is supported (can we convert it?)
// If it's not, result will be empty string.
// For now we'll check for invalid encoding types returned by some sites, e.g. 'none'
// Problem URL: http://facta.co.jp/blog/archives/20111026001026.html
if (!$encoding || $encoding == 'none') {
// search for encoding in HTML - only look at the first 35000 characters
$html_head = substr($html, 0, 40000);
if (preg_match('/^<\?xml\s+version=(?:"[^"]*"|\'[^\']*\')\s+encoding=("[^"]*"|\'[^\']*\')/s', $html_head, $match)) {
$encoding = trim($match[1], '"\'');
} elseif (preg_match('/<meta\s+http-equiv=["\']?Content-Type["\']? content=["\'][^;]+;\s*charset=["\']?([^;"\'>]+)/i', $html_head, $match)) {
$encoding = trim($match[1]);
} elseif (preg_match_all('/<meta\s+([^>]+)>/i', $html_head, $match)) {
foreach ($match[1] as $_test) {
if (preg_match('/charset=["\']?([^"\']+)/i', $_test, $_m)) {
$encoding = trim($_m[1]);
break;
}
}
}
}
if (isset($encoding)) $encoding = trim($encoding);
// trim is important here!
if (!$encoding || (strtolower($encoding) == 'iso-8859-1')) {
// replace MS Word smart qutoes
$trans = array();
$trans[chr(130)] = '&sbquo;'; // Single Low-9 Quotation Mark
$trans[chr(131)] = '&fnof;'; // Latin Small Letter F With Hook
$trans[chr(132)] = '&bdquo;'; // Double Low-9 Quotation Mark
$trans[chr(133)] = '&hellip;'; // Horizontal Ellipsis
$trans[chr(134)] = '&dagger;'; // Dagger
$trans[chr(135)] = '&Dagger;'; // Double Dagger
$trans[chr(136)] = '&circ;'; // Modifier Letter Circumflex Accent
$trans[chr(137)] = '&permil;'; // Per Mille Sign
$trans[chr(138)] = '&Scaron;'; // Latin Capital Letter S With Caron
$trans[chr(139)] = '&lsaquo;'; // Single Left-Pointing Angle Quotation Mark
$trans[chr(140)] = '&OElig;'; // Latin Capital Ligature OE
$trans[chr(145)] = '&lsquo;'; // Left Single Quotation Mark
$trans[chr(146)] = '&rsquo;'; // Right Single Quotation Mark
$trans[chr(147)] = '&ldquo;'; // Left Double Quotation Mark
$trans[chr(148)] = '&rdquo;'; // Right Double Quotation Mark
$trans[chr(149)] = '&bull;'; // Bullet
$trans[chr(150)] = '&ndash;'; // En Dash
$trans[chr(151)] = '&mdash;'; // Em Dash
$trans[chr(152)] = '&tilde;'; // Small Tilde
$trans[chr(153)] = '&trade;'; // Trade Mark Sign
$trans[chr(154)] = '&scaron;'; // Latin Small Letter S With Caron
$trans[chr(155)] = '&rsaquo;'; // Single Right-Pointing Angle Quotation Mark
$trans[chr(156)] = '&oelig;'; // Latin Small Ligature OE
$trans[chr(159)] = '&Yuml;'; // Latin Capital Letter Y With Diaeresis
$html = strtr($html, $trans);
}
if (!$encoding) {
$encoding = 'utf-8';
} else {
if (strtolower($encoding) != 'utf-8') {
$html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
/*
if (function_exists('iconv')) {
// iconv appears to handle certain character encodings better than mb_convert_encoding
$html = iconv($encoding, 'utf-8', $html);
} else {
$html = mb_convert_encoding($html, 'utf-8', $encoding);
}
*/
}
}
}
return $html;
}
private function makeAbsolute($base, $elem) {
$base = new SimplePie_IRI($base);
// remove '//' in URL path (used to prevent URLs from resolving properly)
// TODO: check if this is still the case
if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path);
foreach(array('a'=>'href', 'img'=>'src') as $tag => $attr) {
$elems = $elem->getElementsByTagName($tag);
for ($i = $elems->length-1; $i >= 0; $i--) {
$e = $elems->item($i);
//$e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e);
$this->makeAbsoluteAttr($base, $e, $attr);
}
if (strtolower($elem->tagName) == $tag) $this->makeAbsoluteAttr($base, $elem, $attr);
}
}
private function makeAbsoluteAttr($base, $e, $attr) {
if ($e->hasAttribute($attr)) {
// Trim leading and trailing white space. I don't really like this but
// unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" />
$url = trim(str_replace('%20', ' ', $e->getAttribute($attr)));
$url = str_replace(' ', '%20', $url);
if (!preg_match('!https?://!i', $url)) {
if ($absolute = SimplePie_IRI::absolutize($base, $url)) {
$e->setAttribute($attr, $absolute);
}
}
}
}
private function makeAbsoluteStr($base, $url) {
$base = new SimplePie_IRI($base);
// remove '//' in URL path (causes URLs not to resolve properly)
if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path);
if (preg_match('!^https?://!i', $url)) {
// already absolute
return $url;
} else {
if ($absolute = SimplePie_IRI::absolutize($base, $url)) {
return $absolute;
}
return false;
}
}
// returns single page response, or false if not found
private function getSinglePage($item, $html, $url) {
global $http, $extractor;
$host = @parse_url($url, PHP_URL_HOST);
$site_config = SiteConfig::build($host);
if ($site_config === false) {
// check for fingerprints
if (!empty($extractor->fingerprints) && ($_fphost = $extractor->findHostUsingFingerprints($html))) {
$site_config = SiteConfig::build($_fphost);
}
if ($site_config === false) $site_config = new SiteConfig();
SiteConfig::add_to_cache($host, $site_config);
return false;
} else {
SiteConfig::add_to_cache($host, $site_config);
}
$splink = null;
if (!empty($site_config->single_page_link)) {
$splink = $site_config->single_page_link;
} elseif (!empty($site_config->single_page_link_in_feed)) {
// single page link xpath is targeted at feed
$splink = $site_config->single_page_link_in_feed;
// so let's replace HTML with feed item description
$html = $item->get_description();
}
if (isset($splink)) {
// Build DOM tree from HTML
$readability = new PocheReadability($html, $url);
$xpath = new DOMXPath($readability->dom);
// Loop through single_page_link xpath expressions
$single_page_url = null;
foreach ($splink as $pattern) {
$elems = @$xpath->evaluate($pattern, $readability->dom);
if (is_string($elems)) {
$single_page_url = trim($elems);
break;
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $item) {
if ($item instanceof DOMElement && $item->hasAttribute('href')) {
$single_page_url = $item->getAttribute('href');
break;
} elseif ($item instanceof DOMAttr && $item->value) {
$single_page_url = $item->value;
break;
}
}
}
}
// If we've got URL, resolve against $url
if (isset($single_page_url) && ($single_page_url = $this->makeAbsoluteStr($url, $single_page_url))) {
// check it's not what we have already!
if ($single_page_url != $url) {
// it's not, so let's try to fetch it...
$_prev_ref = $http->referer;
$http->referer = $single_page_url;
if (($response = $http->get($single_page_url, true)) && $response['status_code'] < 300) {
$http->referer = $_prev_ref;
return $response;
}
$http->referer = $_prev_ref;
}
}
}
return false;
}
}

View file

@ -20,25 +20,13 @@ require_once INCLUDES . '/poche/Url.class.php';
require_once INCLUDES . '/3rdparty/class.messages.php';
require_once INCLUDES . '/poche/Poche.class.php';
require_once INCLUDES . '/3rdparty/Readability.php';
require_once INCLUDES . '/poche/PocheReadability.php';
require_once INCLUDES . '/3rdparty/Encoding.php';
require_once INCLUDES . '/poche/Database.class.php';
require_once INCLUDES . '/3rdparty/simple_html_dom.php';
require_once INCLUDES . '/3rdparty/paginator.php';
require_once INCLUDES . '/3rdparty/Session.class.php';
require_once INCLUDES . '/3rdparty/simplepie/SimplePieAutoloader.php';
require_once INCLUDES . '/3rdparty/simplepie/SimplePie/Core.php';
require_once INCLUDES . '/3rdparty/content-extractor/ContentExtractor.php';
require_once INCLUDES . '/3rdparty/content-extractor/SiteConfig.php';
require_once INCLUDES . '/3rdparty/humble-http-agent/HumbleHttpAgent.php';
require_once INCLUDES . '/3rdparty/humble-http-agent/SimplePie_HumbleHttpAgent.php';
require_once INCLUDES . '/3rdparty/humble-http-agent/CookieJar.php';
require_once INCLUDES . '/3rdparty/feedwriter/FeedItem.php';
require_once INCLUDES . '/3rdparty/feedwriter/FeedWriter.php';
require_once INCLUDES . '/3rdparty/feedwriter/DummySingleItemFeed.php';
require_once INCLUDES . '/3rdparty/libraries/feedwriter/FeedItem.php';
require_once INCLUDES . '/3rdparty/libraries/feedwriter/FeedWriter.php';
require_once INCLUDES . '/3rdparty/FlattrItem.class.php';
# Composer its autoloader for automatically loading Twig