mirror of
https://github.com/wallabag/wallabag.git
synced 2024-12-05 07:16:29 +00:00
33 lines
No EOL
870 B
Text
Executable file
33 lines
No EOL
870 B
Text
Executable file
# fforst@...
|
|
|
|
# Use link to print article for single page view
|
|
single_page_link: //a[@class="print"]
|
|
|
|
# set body
|
|
tidy: no
|
|
body: //div[@class='artikel-content']
|
|
|
|
# strip title and subtitle since we got it already
|
|
strip: //div[@class='issue']
|
|
strip: //div[@class='artikel-content']/h2
|
|
|
|
# some authors are known and have a link, others don't
|
|
author: //a[contains(@href, 'autor?')]
|
|
|
|
#date
|
|
date: //span[@class='article-date']
|
|
|
|
# Strip author since we got him
|
|
strip_id_or_class: author
|
|
|
|
#strip captions
|
|
strip_id_or_class: field-name-field-image-credit
|
|
strip_id_or_class: field-name-field-article-image-subtitle
|
|
|
|
# remove community functions
|
|
strip: //div[@class='meta']
|
|
strip: //div[@id='comments']
|
|
|
|
# remove "continue on the next page" text
|
|
strip: //p[text()="[SEITE]"]
|
|
test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049 |