mirror of
https://github.com/wallabag/wallabag.git
synced 2024-11-27 03:21:01 +00:00
Merge branch 'refactor' into dev
This commit is contained in:
commit
887b015def
980 changed files with 8973 additions and 6815 deletions
27
COPYING.md
27
COPYING.md
|
@ -1,14 +1,19 @@
|
|||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
Version 2, December 2004
|
||||
Copyright (c) 2013-2014 Nicolas Lœuillet
|
||||
|
||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is furnished
|
||||
to do so, subject to the following conditions:
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim or modified
|
||||
copies of this license document, and changing it is allowed as long
|
||||
as the name is changed.
|
||||
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -1,7 +1,6 @@
|
|||
wallabag is based on :
|
||||
* PHP Readability https://bitbucket.org/fivefilters/php-readability
|
||||
* Full Text RSS http://code.fivefilters.org/full-text-rss/src
|
||||
* Encoding https://github.com/neitanod/forceutf8
|
||||
* logo by Maylis Agniel https://github.com/wallabag/logo
|
||||
* icons http://icomoon.io
|
||||
* PHP Simple HTML DOM Parser (for Pocket import) http://simplehtmldom.sourceforge.net/
|
||||
|
@ -10,6 +9,6 @@ wallabag is based on :
|
|||
* Flash messages https://github.com/plasticbrain/PHP-Flash-Messages
|
||||
* Pagination https://github.com/daveismyname/pagination
|
||||
|
||||
wallabag is developed by Nicolas Lœuillet under the Do What the Fuck You Want to Public License
|
||||
wallabag is mainly developed by Nicolas Lœuillet under the MIT License
|
||||
|
||||
Contributors : https://github.com/wallabag/wallabag/graphs/contributors
|
|
@ -4,7 +4,6 @@ wallabag is a self hostable application allowing you to not miss any content any
|
|||
More informations on our website: [wallabag.org](http://wallabag.org)
|
||||
|
||||
## License
|
||||
Copyright © 2010-2014 Nicolas Lœuillet <nicolas@loeuillet.org>
|
||||
Copyright © 2013-2014 Nicolas Lœuillet <nicolas@loeuillet.org>
|
||||
This work is free. You can redistribute it and/or modify it under the
|
||||
terms of the Do What The Fuck You Want To Public License, Version 2,
|
||||
as published by Sam Hocevar. See the COPYING file for more details.
|
||||
terms of the MIT License. See the COPYING file for more details.
|
||||
|
|
28
inc/3rdparty/FlattrItem.class.php
vendored
28
inc/3rdparty/FlattrItem.class.php
vendored
|
@ -1,16 +1,23 @@
|
|||
<?php
|
||||
/*
|
||||
* Class for Flattr querying
|
||||
/**
|
||||
* wallabag, self hostable application allowing you to not miss any content anymore
|
||||
*
|
||||
* @category wallabag
|
||||
* @author Nicolas Lœuillet <nicolas@loeuillet.org>
|
||||
* @copyright 2013
|
||||
* @license http://opensource.org/licenses/MIT see COPYING file
|
||||
*/
|
||||
class FlattrItem {
|
||||
|
||||
class FlattrItem
|
||||
{
|
||||
public $status;
|
||||
public $urltoflattr;
|
||||
public $urlToFlattr;
|
||||
public $flattrItemURL;
|
||||
public $numflattrs;
|
||||
public $numFlattrs;
|
||||
|
||||
public function checkItem($urltoflattr,$id) {
|
||||
$this->cacheflattrfile($urltoflattr, $id);
|
||||
public function checkItem($urlToFlattr, $id)
|
||||
{
|
||||
$this->_cacheFlattrFile($urlToFlattr, $id);
|
||||
$flattrResponse = file_get_contents(CACHE . "/flattr/".$id.".cache");
|
||||
if($flattrResponse != FALSE) {
|
||||
$result = json_decode($flattrResponse);
|
||||
|
@ -22,7 +29,7 @@ class FlattrItem {
|
|||
elseif (is_object($result) && $result->link) {
|
||||
$this->status = FLATTRED;
|
||||
$this->flattrItemURL = $result->link;
|
||||
$this->numflattrs = $result->flattrs;
|
||||
$this->numFlattrs = $result->flattrs;
|
||||
}
|
||||
else {
|
||||
$this->status = NOT_FLATTRABLE;
|
||||
|
@ -33,14 +40,15 @@ class FlattrItem {
|
|||
}
|
||||
}
|
||||
|
||||
private function cacheflattrfile($urltoflattr, $id) {
|
||||
private function _cacheFlattrFile($urlToFlattr, $id)
|
||||
{
|
||||
if (!is_dir(CACHE . '/flattr')) {
|
||||
mkdir(CACHE . '/flattr', 0777);
|
||||
}
|
||||
|
||||
// if a cache flattr file for this url already exists and it's been less than one day than it have been updated, see in /cache
|
||||
if ((!file_exists(CACHE . "/flattr/".$id.".cache")) || (time() - filemtime(CACHE . "/flattr/".$id.".cache") > 86400)) {
|
||||
$askForFlattr = Tools::getFile(FLATTR_API . $urltoflattr);
|
||||
$askForFlattr = Tools::getFile(FLATTR_API . $urlToFlattr);
|
||||
$flattrCacheFile = fopen(CACHE . "/flattr/".$id.".cache", 'w+');
|
||||
fwrite($flattrCacheFile, $askForFlattr);
|
||||
fclose($flattrCacheFile);
|
||||
|
|
34
inc/3rdparty/Session.class.php
vendored
34
inc/3rdparty/Session.class.php
vendored
|
@ -309,4 +309,38 @@ class Session
|
|||
|
||||
return true; // User is not banned.
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tells if a param exists in session
|
||||
*
|
||||
* @param $name name of the param to test
|
||||
* @return bool
|
||||
*/
|
||||
public static function isInSession($name)
|
||||
{
|
||||
return (isset($_SESSION[$name]) ? : FALSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns param in session
|
||||
*
|
||||
* @param $name name of the param to return
|
||||
* @return mixed param or null
|
||||
*/
|
||||
public static function getParam($name)
|
||||
{
|
||||
return (self::isInSession($name) ? $_SESSION[$name] : NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store value in session
|
||||
*
|
||||
* @param $name name of the variable to store
|
||||
* @param $value value to store
|
||||
*/
|
||||
public static function setParam($name, $value)
|
||||
{
|
||||
$_SESSION[$name] = $value;
|
||||
}
|
||||
}
|
||||
|
|
0
inc/3rdparty/site_config/standard/24ways.org.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/24ways.org.txt
vendored
Normal file → Executable file
8
inc/3rdparty/site_config/standard/36kr.com.txt
vendored
Executable file
8
inc/3rdparty/site_config/standard/36kr.com.txt
vendored
Executable file
|
@ -0,0 +1,8 @@
|
|||
title: //h1[contains(@class, 'entry-title')]
|
||||
date: //meta[@name='weibo: article:create_at']/@content
|
||||
body: //div[contains(@class, 'mainContent')]
|
||||
strip_id_or_class: related_topics
|
||||
|
||||
prune: no
|
||||
|
||||
test_url: http://www.36kr.com/p/207879.html
|
0
inc/3rdparty/site_config/standard/37signals.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/37signals.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/3quarksdaily.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/3quarksdaily.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/43folders.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/43folders.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/500px.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/500px.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/512pixels.net.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/512pixels.net.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/5by5.tv.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/5by5.tv.txt
vendored
Normal file → Executable file
7
inc/3rdparty/site_config/standard/7newsbelize.com.txt
vendored
Executable file
7
inc/3rdparty/site_config/standard/7newsbelize.com.txt
vendored
Executable file
|
@ -0,0 +1,7 @@
|
|||
title: //*[@id='sstitle']
|
||||
body: //div[@id='sstory']
|
||||
strip_id_or_class: newsoptions
|
||||
prune: no
|
||||
|
||||
test_url: http://www.7newsbelize.com/sstory.php?nid=25654
|
||||
test_url: http://www.7newsbelize.com/7news.xml
|
0
inc/3rdparty/site_config/standard/944.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/944.com.txt
vendored
Normal file → Executable file
38
inc/3rdparty/site_config/standard/README.md
vendored
Executable file
38
inc/3rdparty/site_config/standard/README.md
vendored
Executable file
|
@ -0,0 +1,38 @@
|
|||
Full-Text RSS site config files
|
||||
================
|
||||
|
||||
[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no site patterns, it tries to detect the content block automatically.
|
||||
|
||||
This repository contains the site config files we use in Full-Text RSS.
|
||||
|
||||
### Contributing changes
|
||||
|
||||
We chose GitHub for this set of files because they offer one feature which we hope will make contributing changes easier: [file editing](https://github.com/blog/844-forking-with-the-edit-button) through the web interface.
|
||||
|
||||
You can now make changes to any of our site config files and request that your changes be pulled into the main set we maintain. This is what GitHub calls the Fork and Pull model:
|
||||
|
||||
> The Fork & Pull Model lets anyone fork an existing repository and push changes to their personal fork without requiring access be granted to the source repository. The changes must then be pulled into the source repository by the project maintainer. This model reduces the amount of friction for new contributors and is popular with open source projects because it allows people to work independently without upfront coordination.
|
||||
|
||||
When we receive a pull request we'll review the changes and if everything's okay we'll update our copy.
|
||||
|
||||
If a site is not in our set, you can create a file for it in the same way. See [Creating files on GitHub](https://github.com/blog/1327-creating-files-on-github).
|
||||
|
||||
### How to write a site config file
|
||||
|
||||
The quickest and simplest way is to use our [point-and-click interface](http://siteconfig.fivefilters.org). It's a simple tool only intended to create a rule to extract the correct content block.
|
||||
|
||||
For further refinements, e.g. selecting the title, stripping elements, dealing with multi-page articles, please see our [help page](http://help.fivefilters.org/customer/portal/articles/223153-site-patterns).
|
||||
|
||||
### Instapaper
|
||||
|
||||
When we introduced site patterns, we chose to adopt the [same format](http://blog.instapaper.com/post/730281947) used by Instapaper. This allows us to make use of the existing extraction rules contributed by Instapaper users.
|
||||
|
||||
Marco, Instapaper's creator, graciously opened up the database of contributions to everyone:
|
||||
|
||||
> And, recognizing that your efforts could be useful to a wide range of other tools and services, I'll make the list of all of these site-specific configurations available to the public, free, with no strings attached.
|
||||
|
||||
Most of the extraction rules in our set are borrowed from Instapaper. You can see the list maintained by Instapaper at [instapaper.com/bodytext/](http://instapaper.com/bodytext/) (login required).
|
||||
|
||||
### Testing site config files
|
||||
|
||||
Currently you will have to have a copy of Full-Text RSS to test changes to the site config files. In the future we will try to make this process easier.
|
0
inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/aachener-zeitung.de.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/aachener-zeitung.de.txt
vendored
Normal file → Executable file
2
inc/3rdparty/site_config/standard/abc.es.txt
vendored
Normal file → Executable file
2
inc/3rdparty/site_config/standard/abc.es.txt
vendored
Normal file → Executable file
|
@ -1,5 +1,5 @@
|
|||
title: //meta[@property='og:title']/@content
|
||||
body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text']
|
||||
body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text' or @itemprop='articleBody']
|
||||
strip_id_or_class: colB
|
||||
|
||||
prune: no
|
||||
|
|
12
inc/3rdparty/site_config/standard/abc.net.au.txt
vendored
Normal file → Executable file
12
inc/3rdparty/site_config/standard/abc.net.au.txt
vendored
Normal file → Executable file
|
@ -1,10 +1,18 @@
|
|||
title: //h1
|
||||
title: //div[@class='article section']//h1
|
||||
author: //div[@class="byline"]/a
|
||||
date: //span[@class="timestamp"]
|
||||
body: //div[@class="page section"]
|
||||
|
||||
strip: //a[@class="inline-caption"]
|
||||
strip: //p[@class="ticker section noprint"]
|
||||
strip: //p[@class="topics"]
|
||||
strip: //h1
|
||||
strip: //div[@class="byline"]
|
||||
strip: //p[@class="published"]
|
||||
strip: //div[contains(@class,"featured-scroller")]
|
||||
test_url: http://www.abc.net.au/news/2011-11-08/crabb-carbon-legislation-abbott-demolition/3652544
|
||||
strip_id_or_class: footer
|
||||
|
||||
tidy: no
|
||||
|
||||
test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892
|
||||
test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business
|
||||
|
|
0
inc/3rdparty/site_config/standard/abcnews.go.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/abcnews.go.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/accesstoinsight.org.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/accesstoinsight.org.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/acidcow.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/acidcow.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/acquia.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/acquia.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/acroswing.fr.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/acroswing.fr.txt
vendored
Normal file → Executable file
5
inc/3rdparty/site_config/standard/aftenposten.no.txt
vendored
Executable file
5
inc/3rdparty/site_config/standard/aftenposten.no.txt
vendored
Executable file
|
@ -0,0 +1,5 @@
|
|||
title: //h1[@class='articleTitle ']
|
||||
body: //div[@class='bodyText widget storyContent']
|
||||
strip: //p/span[@class='quote']/..
|
||||
strip_id_or_class: 'pull1'
|
||||
test_url: https://www.aftenposten.no/meninger/spaltister/Portrett-av-scenekunstneren-som-ung-mann-7167959.html
|
13
inc/3rdparty/site_config/standard/aftonbladet.se.txt
vendored
Executable file
13
inc/3rdparty/site_config/standard/aftonbladet.se.txt
vendored
Executable file
|
@ -0,0 +1,13 @@
|
|||
author: //article//address[contains(@class, 'author')]
|
||||
body: //article[.//div[contains(@class, 'abBodyText')]]//*[contains(@class, 'abLeadText') or contains(@class, 'abBodyText') or contains(@class, 'abImageBlock') or contains(@class, 'abIGSatellite')]
|
||||
|
||||
strip: //address//img
|
||||
strip: //footer
|
||||
strip_id_or_class: abSticky
|
||||
|
||||
prune: no
|
||||
|
||||
test_url: http://www.aftonbladet.se/sportbladet/hockey/sverige/allsvenskan/article17498194.ab
|
||||
test_url: http://www.aftonbladet.se/debatt/article16207536.ab
|
||||
test_url: http://www.aftonbladet.se/debatt/debattamnen/politik/article17483377.ab
|
||||
test_url: http://www.aftonbladet.se/rss.xml
|
0
inc/3rdparty/site_config/standard/aht.seriouseats.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/aht.seriouseats.com.txt
vendored
Normal file → Executable file
6
inc/3rdparty/site_config/standard/albayan.ae.txt
vendored
Executable file
6
inc/3rdparty/site_config/standard/albayan.ae.txt
vendored
Executable file
|
@ -0,0 +1,6 @@
|
|||
body: //div[@id='main-column']//div[@class='content']
|
||||
|
||||
prune: no
|
||||
|
||||
test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645
|
||||
test_url: http://www.albayan.ae/1.448?ot=ot.AjaxPageLayout
|
0
inc/3rdparty/site_config/standard/alex.mullr.net.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alex.mullr.net.txt
vendored
Normal file → Executable file
4
inc/3rdparty/site_config/standard/alexduner.com.txt
vendored
Executable file
4
inc/3rdparty/site_config/standard/alexduner.com.txt
vendored
Executable file
|
@ -0,0 +1,4 @@
|
|||
body: //section[@class='content']
|
||||
date: //span[1]
|
||||
author: //h1[@id='sitetitle']
|
||||
test_url: https://alexduner.com/blog/2013/1/something-i-learned-today
|
4
inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt
vendored
Executable file
4
inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt
vendored
Executable file
|
@ -0,0 +1,4 @@
|
|||
body: //section[@class='content']
|
||||
date: //span[1]
|
||||
author: //h1[@id='sitetitle']
|
||||
test_url: https://alexduner.squarespace.com/blog/2013/1/tech-culture-from-the-outside-looking-in
|
0
inc/3rdparty/site_config/standard/alistapart.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alistapart.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/aljazeera.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/aljazeera.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/allrecipes.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/allrecipes.com.txt
vendored
Normal file → Executable file
5
inc/3rdparty/site_config/standard/allthingsd.com.txt
vendored
Normal file → Executable file
5
inc/3rdparty/site_config/standard/allthingsd.com.txt
vendored
Normal file → Executable file
|
@ -1,10 +1,13 @@
|
|||
title://div[@class="article-title"]/h1[@class="title"]
|
||||
date: //p[@class="article-date"]
|
||||
body://*[@class="article-body article-text"]
|
||||
body://div[contains(@class, "article-body")]
|
||||
# Trim out related posts at bottom of article
|
||||
strip://blockquote[@class="memo"]
|
||||
|
||||
tidy: no
|
||||
|
||||
# Yup, no idea why author won't work...
|
||||
author://div[@class="page-header article-header clearfix"]/p[@class="title"]
|
||||
# [Marco:] Author won't work here because the page defines the "home" link under the author's name as rel="author", which always gets priority if the page has defined it.
|
||||
test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/
|
||||
test_url: http://allthingsd.com/20131010/google-cio-ben-fried-on-how-google-works/
|
0
inc/3rdparty/site_config/standard/allyou.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/allyou.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alriyadh.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alriyadh.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alseraj.net.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alseraj.net.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alt1040.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alt1040.com.txt
vendored
Normal file → Executable file
4
inc/3rdparty/site_config/standard/alternet.org.txt
vendored
Executable file
4
inc/3rdparty/site_config/standard/alternet.org.txt
vendored
Executable file
|
@ -0,0 +1,4 @@
|
|||
single_page_link: //div[contains(@class, 'story_tools')]//a[contains(@href, '/print/')]
|
||||
|
||||
test_url: http://www.alternet.org/civil-liberties/noam-chomsky-surveillance-state-beyond-imagination-being-created-one-freest
|
||||
test_url: http://feeds.feedblitz.com/alternet
|
0
inc/3rdparty/site_config/standard/altfoto.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/altfoto.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alumni.stanford.edu.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/alumni.stanford.edu.txt
vendored
Normal file → Executable file
6
inc/3rdparty/site_config/standard/amandala.com.bz.txt
vendored
Executable file
6
inc/3rdparty/site_config/standard/amandala.com.bz.txt
vendored
Executable file
|
@ -0,0 +1,6 @@
|
|||
body: //div[@id='content']//div[contains(@class, 'content')]
|
||||
strip_id_or_class: widget
|
||||
strip: //a[contains(@href, 'upm_export=')]
|
||||
|
||||
test_url: http://amandala.com.bz/news/feed/
|
||||
test_url: http://amandala.com.bz/news/poor-pse-results-30-raise/
|
0
inc/3rdparty/site_config/standard/amazon.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/amazon.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/americandrink.net.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/americandrink.net.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/americascup.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/americascup.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt
vendored
Normal file → Executable file
8
inc/3rdparty/site_config/standard/amptoons.com.txt
vendored
Executable file
8
inc/3rdparty/site_config/standard/amptoons.com.txt
vendored
Executable file
|
@ -0,0 +1,8 @@
|
|||
title: //title
|
||||
|
||||
body: //div[@class="entry-content"]
|
||||
|
||||
author: //span[@class="author vcard"]
|
||||
|
||||
date: //span[@class="entry-date"]
|
||||
test_url: http://www.amptoons.com/blog/2013/03/14/open-thread-and-link-farm-i-hate-being-sick-edition/
|
0
inc/3rdparty/site_config/standard/anandtech.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/anandtech.com.txt
vendored
Normal file → Executable file
5
inc/3rdparty/site_config/standard/androidpolice.com.txt
vendored
Executable file
5
inc/3rdparty/site_config/standard/androidpolice.com.txt
vendored
Executable file
|
@ -0,0 +1,5 @@
|
|||
body: //div[@class='post_content']
|
||||
date: //div[@class='date_day'] | div[@class='date_month']
|
||||
|
||||
test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/
|
||||
|
0
inc/3rdparty/site_config/standard/andyrutledge.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/andyrutledge.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/applature.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/applature.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/apple.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/apple.com.txt
vendored
Normal file → Executable file
4
inc/3rdparty/site_config/standard/appledaily.com.tw.txt
vendored
Executable file
4
inc/3rdparty/site_config/standard/appledaily.com.tw.txt
vendored
Executable file
|
@ -0,0 +1,4 @@
|
|||
body: //div[contains(@class, 'articulum')]
|
||||
|
||||
test_url: http://www.appledaily.com.tw/realtimenews/article/new/20140120/330479
|
||||
test_url: http://www.appledaily.com.tw/rss/create/kind/rnews/type/new/
|
24
inc/3rdparty/site_config/standard/appleinsider.com.txt
vendored
Normal file → Executable file
24
inc/3rdparty/site_config/standard/appleinsider.com.txt
vendored
Normal file → Executable file
|
@ -1,11 +1,23 @@
|
|||
title: //p[@class='title']
|
||||
title: //h1[@class="art-head"]
|
||||
|
||||
author: //p[text() = 'By ']/a/text()
|
||||
strip: //p[text() = 'By ']
|
||||
author: //p[contains(@class, 'byline')]/a
|
||||
#author: //p[text() = 'By ']/a/text()
|
||||
#strip: //p[text() = 'By ']
|
||||
|
||||
body: //td[@class='bod']
|
||||
strip_id_or_class: title
|
||||
strip_id_or_class: minor
|
||||
date: //p[contains(@class, 'date-header')]
|
||||
|
||||
body: //div[@class="article"]
|
||||
strip_id_or_class: lazy
|
||||
#strip_id_or_class: minor
|
||||
strip_id_or_class: multipagefooter
|
||||
strip_id_or_class: date-header
|
||||
strip_id_or_class: byline
|
||||
|
||||
find_string: <noscript>
|
||||
replace_string: <div>
|
||||
find_string: </noscript>
|
||||
replace_string: </div>
|
||||
|
||||
test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html
|
||||
test_url: http://appleinsider.com/articles/13/10/03/goldee-companion-app-for-philips-hue-bulbs-offers-shifting-dynamic-light-scenes
|
||||
test_url: http://appleinsider.com/appleinsider.rss
|
0
inc/3rdparty/site_config/standard/appleweblog.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/appleweblog.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/archdaily.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/archdaily.com.txt
vendored
Normal file → Executable file
4
inc/3rdparty/site_config/standard/archiveofourown.org.txt
vendored
Normal file → Executable file
4
inc/3rdparty/site_config/standard/archiveofourown.org.txt
vendored
Normal file → Executable file
|
@ -15,4 +15,8 @@ strip_id_or_class:add_comment_placeholder
|
|||
strip_id_or_class:add_comment
|
||||
strip_id_or_class:globalize
|
||||
strip_id_or_class:footer
|
||||
|
||||
single_page_link: //div[@id='main']//a[contains(@href, 'view_adult=true')]
|
||||
|
||||
test_url: http://archiveofourown.org/works/229402?view_full_work=true
|
||||
test_url: http://archiveofourown.org/works/750111/chapters/1399929
|
1
inc/3rdparty/site_config/standard/arstechnica.com.txt
vendored
Normal file → Executable file
1
inc/3rdparty/site_config/standard/arstechnica.com.txt
vendored
Normal file → Executable file
|
@ -2,6 +2,7 @@ author: //p[@class='byline']/a
|
|||
body: //div[contains(@class,'article-content')]
|
||||
strip: //h2[@class='title']
|
||||
strip_id_or_class: byline
|
||||
strip_id_or_class: story-sidebar
|
||||
prune: no
|
||||
|
||||
date: //div[@class='byline']/span[@class='posted']//abbr/@original-title
|
||||
|
|
0
inc/3rdparty/site_config/standard/articles.boston.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/articles.boston.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/articles.courant.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/articles.courant.com.txt
vendored
Normal file → Executable file
11
inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt
vendored
Executable file
11
inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt
vendored
Executable file
|
@ -0,0 +1,11 @@
|
|||
body: //div[contains(@class, "article_body")]
|
||||
# print view
|
||||
body: //div[@id='print_facet']//div[@id='body']
|
||||
|
||||
tidy: no
|
||||
prune: no
|
||||
|
||||
single_page_link: concat(substring-before(//div[@id="echo_container_a"]/@guid, '_story.html'), '_print.html')
|
||||
|
||||
test_url: http://articles.washingtonpost.com/2011-10-22/world/35279694_1_germany-acts-german-leaders-chancellor-angela-merkel
|
||||
test_url: http://articles.washingtonpost.com/2013-05-31/opinions/39658000_1_chemical-weapons-mass-destruction-cartels
|
0
inc/3rdparty/site_config/standard/asahi.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/asahi.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/ascarter.net.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/ascarter.net.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/astronews.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/astronews.com.txt
vendored
Normal file → Executable file
2
inc/3rdparty/site_config/standard/asymco.com.txt
vendored
Normal file → Executable file
2
inc/3rdparty/site_config/standard/asymco.com.txt
vendored
Normal file → Executable file
|
@ -1,4 +1,4 @@
|
|||
# Johannes Stühler
|
||||
# Johannes Stühler
|
||||
|
||||
title://h2
|
||||
author://span[@class='meta-content']
|
||||
|
|
0
inc/3rdparty/site_config/standard/autoblog.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/autoblog.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/avclub.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/avclub.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/baltimoresun.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/baltimoresun.com.txt
vendored
Normal file → Executable file
13
inc/3rdparty/site_config/standard/baseballprospectus.com.txt
vendored
Executable file
13
inc/3rdparty/site_config/standard/baseballprospectus.com.txt
vendored
Executable file
|
@ -0,0 +1,13 @@
|
|||
title: //h1[@class='title']
|
||||
author: //p[@class="author"]/a[1]
|
||||
body: //div[@class="article"]
|
||||
date: //p[@class="date"]
|
||||
|
||||
# remove user tools
|
||||
strip: //div[@class='tools']
|
||||
strip: //h1
|
||||
strip: //h2[@class='subtitle']
|
||||
strip: //p[@class='author']
|
||||
strip: //p[@class='date']
|
||||
|
||||
test_url: http://www.baseballprospectus.com/article.php?articleid=18463
|
0
inc/3rdparty/site_config/standard/basicthinking.de.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/basicthinking.de.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/bb.is.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/bb.is.txt
vendored
Normal file → Executable file
10
inc/3rdparty/site_config/standard/bbc.co.uk.txt
vendored
Normal file → Executable file
10
inc/3rdparty/site_config/standard/bbc.co.uk.txt
vendored
Normal file → Executable file
|
@ -1,6 +1,11 @@
|
|||
body: //div[@class="story-body"]
|
||||
# for video entries
|
||||
body: //div[contains(@class, "videoInStory") or @id="meta-information"]
|
||||
title: //h1[@class="story-header"]
|
||||
date: //span[@class="story-date"]/span[@class='date']
|
||||
# for sport site
|
||||
date: //meta[@name='DCTERMS.created']/@content
|
||||
author: //div[@id='headline']//span[@class='byline-name']
|
||||
|
||||
# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055
|
||||
body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1']
|
||||
|
@ -22,6 +27,8 @@ strip: //div[contains(@class, 'hypertabs')]
|
|||
strip: //div[contains(@class, 'related')]
|
||||
strip: //form[@id='comment-form']
|
||||
strip: //div[contains(@class, 'comment-introduction')]
|
||||
strip: //div[contains(@class, 'share-tools')]
|
||||
strip: //div[@id='also-related-links']
|
||||
|
||||
replace_string(<noscript>): <div>
|
||||
replace_string(</noscript>): </div>
|
||||
|
@ -29,4 +36,7 @@ replace_string(</noscript>): </div>
|
|||
prune: no
|
||||
|
||||
dissolve: //h2
|
||||
test_url: http://www.bbc.co.uk/sport/0/football/23224017
|
||||
test_url: http://www.bbc.co.uk/news/business-15060862
|
||||
# video entry
|
||||
test_url: http://www.bbc.co.uk/news/world-asia-22056933
|
16
inc/3rdparty/site_config/standard/bbcgoodfood.com.txt
vendored
Executable file
16
inc/3rdparty/site_config/standard/bbcgoodfood.com.txt
vendored
Executable file
|
@ -0,0 +1,16 @@
|
|||
title: //header//h1
|
||||
#body: //article[contains(@class, 'node-full')]
|
||||
body: //div[contains(@class, 'recipe-details') or contains(@class, 'tips-carousel')] | //section[@id='recipe-ingredients' or @id='recipe-method']
|
||||
|
||||
strip_id_or_class: recipe-rating-wrapper
|
||||
strip_id_or_class: magazine-subcribe-header
|
||||
strip_id_or_class: hide
|
||||
strip_id_or_class: recipe-actions
|
||||
strip_id_or_class: buy-ingredients
|
||||
strip_id_or_class: related-content
|
||||
strip_id_or_class: recipe-magazine-ad
|
||||
strip_id_or_class: copy-right
|
||||
|
||||
prune: no
|
||||
|
||||
test_url: http://www.bbcgoodfood.com/recipes/1131634/minced-beef-wellington
|
0
inc/3rdparty/site_config/standard/benoitmaison.org.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/benoitmaison.org.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/berlingske.dk.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/berlingske.dk.txt
vendored
Normal file → Executable file
5
inc/3rdparty/site_config/standard/bernama.com.txt
vendored
Executable file
5
inc/3rdparty/site_config/standard/bernama.com.txt
vendored
Executable file
|
@ -0,0 +1,5 @@
|
|||
body: //div[contains(@class, "NewsText"]
|
||||
prune: no
|
||||
|
||||
test_url: http://www.bernama.com/bernama/v7/rss/english.php
|
||||
test_url: http://www.bernama.com/bernama/v7/newsindex.php?id=943513
|
0
inc/3rdparty/site_config/standard/betabeat.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/betabeat.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/betanews.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/betanews.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/biography.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/biography.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/bitelia.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/bitelia.com.txt
vendored
Normal file → Executable file
13
inc/3rdparty/site_config/standard/bizjournals.com.txt
vendored
Executable file
13
inc/3rdparty/site_config/standard/bizjournals.com.txt
vendored
Executable file
|
@ -0,0 +1,13 @@
|
|||
date: //meta[@name='publish-date']/@content
|
||||
body: //div[contains(@class, 'articleContentWrapper')]
|
||||
prune: no
|
||||
|
||||
strip: //div[contains(@class, 'staff_info')]//dd[contains(., 'Twitter')]
|
||||
|
||||
strip_id_or_class: related_content
|
||||
strip_id_or_class: enlarge
|
||||
strip_id_or_class: photoBy
|
||||
strip_id_or_class: older
|
||||
|
||||
test_url: http://www.bizjournals.com/cincinnati/news/2013/10/03/harris-teeter-shareholders-vote-on.html
|
||||
test_url: http://feeds.bizjournals.com/industry_20?format=xml
|
0
inc/3rdparty/site_config/standard/bjango.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/bjango.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.arsln.org.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.arsln.org.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.asmartbear.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.asmartbear.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.fefe.de.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.fefe.de.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.instagram.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.instagram.com.txt
vendored
Normal file → Executable file
9
inc/3rdparty/site_config/standard/blog.instapaper.com.txt
vendored
Executable file
9
inc/3rdparty/site_config/standard/blog.instapaper.com.txt
vendored
Executable file
|
@ -0,0 +1,9 @@
|
|||
author: //a[@href="http://www.marco.org/about"]
|
||||
date: //span[@class="date"]
|
||||
|
||||
# Remove the date from article body.
|
||||
strip: //span[@class="date"]
|
||||
|
||||
# Remove pagination links from article body.
|
||||
strip: //div[@id="pagination"]
|
||||
test_url: http://blog.instapaper.com/post/31303984531
|
0
inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.kaelig.fr.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.kaelig.fr.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.naver.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.naver.com.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.pchome.net.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.pchome.net.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.pinboard.in.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.pinboard.in.txt
vendored
Normal file → Executable file
11
inc/3rdparty/site_config/standard/blog.renren.com.txt
vendored
Executable file
11
inc/3rdparty/site_config/standard/blog.renren.com.txt
vendored
Executable file
|
@ -0,0 +1,11 @@
|
|||
# This filter is tested on:
|
||||
# http://blog.renren.com/share/224959024/14260739544
|
||||
# http://blog.renren.com/share/231323504/14261768898
|
||||
# http://blog.renren.com/share/230305019/1502806705
|
||||
|
||||
title://h1[contains(@class, 'title-article')]
|
||||
author://span[contains(@class, 'name')]
|
||||
body://div[contains(@class, 'content-body')]
|
||||
|
||||
convert_double_br_tags:yes
|
||||
test_url: http://blog.renren.com/share/230305019/1502806705
|
0
inc/3rdparty/site_config/standard/blog.sina.com.cn.txt
vendored
Normal file → Executable file
0
inc/3rdparty/site_config/standard/blog.sina.com.cn.txt
vendored
Normal file → Executable file
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue