mirror of
https://github.com/wallabag/wallabag.git
synced 2024-11-18 23:21:03 +00:00
36 lines
No EOL
1.4 KiB
Text
36 lines
No EOL
1.4 KiB
Text
title://h1[@class="articleHeadline"]
|
|
body://div[@id="article"]
|
|
strip_id_or_class:articleTools
|
|
strip_id_or_class:readerscomment
|
|
#strip://div[contains(@class, "articleInline runaroundLeft")]
|
|
strip: //div[contains(@class, "doubleRule")]
|
|
# strip image credit - appears as a bold heading
|
|
strip: //div[contains(@class, "articleInline")]//h6
|
|
strip_id_or_class:enlargeThis
|
|
strip_id_or_class:pageLinks
|
|
strip_id_or_class:memberTools
|
|
strip_id_or_class:articleExtras
|
|
strip_id_or_class:singleAd
|
|
strip_id_or_class:byline
|
|
strip_id_or_class:dateline
|
|
strip_id_or_class:articleheadline
|
|
strip_id_or_class:articleBottomExtra
|
|
strip://a[contains(@href, 'nytimes.com/adx/')]
|
|
strip: //nyt_byline
|
|
strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]
|
|
strip: //p[@class='caption']//a[contains(., 'More Photos')]
|
|
|
|
prune: no
|
|
tidy: no
|
|
|
|
date: substring-after(//*[contains(@class, 'dateline')], 'Published:')
|
|
|
|
single_page_link: //link[contains(@href, 'pagewanted=all')]
|
|
#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]
|
|
|
|
strip://ul[@id = 'toolsList']
|
|
strip://h6[@class = 'kicker']
|
|
author:substring-after(//h6[@class='byline'],'By ')
|
|
|
|
test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html
|
|
test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html |