mirror of
https://github.com/wallabag/wallabag.git
synced 2024-11-27 11:31:05 +00:00
Add support for *.about.com
Includes next_page_link for multi-page articles and strips pesky in-line 'next' links from the article body. Also includes an Xpath for author but I can't see where this is used in the wallabag UI. The 'tidy' option is turned off because it messed up bulleted lists. Tested with psychology.about.com and food.about.com.
This commit is contained in:
parent
6400371ff9
commit
d59536deea
1 changed files with 14 additions and 0 deletions
14
inc/3rdparty/site_config/standard/.about.com.txt
vendored
Normal file
14
inc/3rdparty/site_config/standard/.about.com.txt
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
body: //div[@id='articlebody']
|
||||
title: //h1
|
||||
author: //p[@id='by']//a
|
||||
|
||||
next_page_link: //span[@class='next']/a
|
||||
# Not the same as below!
|
||||
|
||||
prune: yes
|
||||
tidy: no
|
||||
|
||||
# Annoying 'next' links plainly inside the article body
|
||||
strip: //*[text()[contains(.,'Next: ')]]
|
||||
|
||||
test_url: http://psychology.about.com/od/theoriesofpersonality/ss/defensemech.htm
|
Loading…
Reference in a new issue