mirror of
https://github.com/wallabag/wallabag.git
synced 2024-12-18 13:46:28 +00:00
17 lines
581 B
Text
17 lines
581 B
Text
|
#host configuration should be http://www.neh.gov/news/humanities/
|
||
|
|
||
|
|
||
|
#meta data
|
||
|
title:substring-after(substring-after(//title,':'),':')
|
||
|
author:substring-after(//h2[@class = 'subHead'],'By')
|
||
|
date:substring-before(substring-after(//title,':'),':')
|
||
|
|
||
|
#img and caption handling
|
||
|
wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text()
|
||
|
wrap_in(fieldset)://div[@id = 'mainContent']/table
|
||
|
|
||
|
# clean up
|
||
|
strip: //table[@class = 'marginpaddingTop']
|
||
|
strip: //h2[@class = 'subHead']
|
||
|
|
||
|
test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html
|