wallabag/inc/3rdparty/site_config/standard/nytimes.com.txt
2014-10-27 06:46:13 +01:00

53 lines
2.4 KiB
Text
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

title://h1[@class="articleHeadline"]
body://div[@id="article"]
body://*[@itemprop="articleBody"]
strip_id_or_class:articleTools
strip_id_or_class:readerscomment
#strip://div[contains(@class, "articleInline runaroundLeft")]
strip: //div[contains(@class, "doubleRule")]
# strip image credit - appears as a bold heading
strip: //div[contains(@class, "articleInline")]//h6
strip_id_or_class:enlargeThis
strip_id_or_class:pageLinks
strip_id_or_class:memberTools
strip_id_or_class:articleExtras
strip_id_or_class:singleAd
strip_id_or_class:byline
strip_id_or_class:dateline
strip_id_or_class:articleheadline
strip_id_or_class:articleBottomExtra
strip_id_or_class:shareTools
strip://a[contains(@href, 'nytimes.com/adx/')]
strip: //nyt_byline
strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]
strip: //p[@class='caption']//a[contains(., 'More Photos')]
prune: no
tidy: no
find_string: <script
replace_string: <div style="display:none"
find_string: </script>
replace_string: </div>
date: substring-after(//*[contains(@class, 'dateline')], 'Published:')
single_page_link: //link[contains(@href, 'pagewanted=all')]
single_page_link: //link[@rel='alternate' and contains(@href, 'mobile.nytimes.com')]/@href
single_page_link: concat(substring-before(//div[@id='pageLinks']//a[contains(@href, 'pagewanted=')]/@href, 'pagewanted='), 'pagewanted=all')
#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]
strip://ul[@id = 'toolsList']
strip://h6[@class = 'kicker']
author:substring-after(//h6[@class='byline'],'By ')
test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html
test_contains: In this column I want to look at a not uncommon way of writing
test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html
test_contains: IF youve seen enough of Aaron Sorkins theater
test_url: http://www.nytimes.com/2013/03/25/world/middleeast/israeli-military-responds-after-patrols-come-under-fire-from-syria.html
test_url: http://www.nytimes.com/2013/08/15/nyregion/when-the-new-york-city-subway-ran-without-rails.html
test_url: http://www.nytimes.com/2004/02/29/weekinreview/correspondence-class-consciousness-china-s-wealthy-live-creed-hobbes-darwin-meet.html
test_url: http://www.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html