wallabag/inc/3rdparty/site_config/standard/faz.net.txt
2014-07-13 10:15:40 +02:00

36 lines
No EOL
1.1 KiB
Text
Executable file

# Title
title: //p[@class='Content HeadlineShort']
# Authors
# some are known and have a link, others don't
author: substring-after(//span[@class='Autor'], 'Von')
# Date
date: //span[@class='Datum']
# Body
body: //div[@class='Artikel']
# Removements before body text
strip: //div[@class='Breadcrumbs']
strip: //div[@class='QuickSearchBox']
strip: //div[@class='FAZArtikelEinleitung']
strip: //div[@class='FAZArtikelReiter']
strip: //div[@class='clear']
# General removements
strip: //span[@class='Bildnachweis']
strip: //img[@class='MediaIcon']
strip: //div[@class='ArtikelMediaLink']
dissolve: //a[img]
# Removements after body text
strip: //div[@class='ArtikelAbbinder']
strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content']
strip: //div[@class='FAZArtikelKommentare FAZArtikelContent']
strip: //div[@class='FAZArtikelFunktionen']
strip: //div[@id='FAZContentRight']
# Fix picture captions
wrap_in(small): //span[@class='Bildunterschrift']/text()
test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken