mirror of
https://github.com/wallabag/wallabag.git
synced 2024-11-01 14:49:15 +00:00
20 lines
No EOL
674 B
Text
Executable file
20 lines
No EOL
674 B
Text
Executable file
# meta data
|
|
title://h1[@id = 'articleTitle']
|
|
author:substring-after(//ul[@id = 'byLine']/li[1],'By ')
|
|
date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',')
|
|
body://div[@id = 'article-body']
|
|
|
|
# full content
|
|
single_page_link://td/li[@class = 'article-singlepage']/a
|
|
|
|
# caption clean up
|
|
wrap_in(i)://span[@class='articleImageCaptionwide']
|
|
move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p
|
|
|
|
|
|
# clean up
|
|
strip://p[@id = 'articlePaginationWrapper']
|
|
strip://ul[contains(@class, 'cat-breadcrumb')]
|
|
strip://div [@class= 'viewMorePhotos']
|
|
|
|
test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html |