2014-10-27 05:46:13 +00:00
|
|
|
# include the lead graphic in the body, if available
|
|
|
|
body: //div[contains(concat(' ', normalize-space(@id), ' '), ' lead_graphic ')] | //div[contains(concat(' ', normalize-space(@itemprop), ' '), ' articleBody ')]
|
|
|
|
title: //h1[contains(concat(' ', normalize-space(@itemprop), ' '), ' headline ')]
|
|
|
|
date: //time[contains(concat(' ', normalize-space(@itemprop), ' '), ' datePublished ')]
|
2014-07-13 08:15:40 +00:00
|
|
|
|
2014-10-27 05:46:13 +00:00
|
|
|
strip_id_or_class: photo_credit
|
|
|
|
strip_id_or_class: photo_caption
|
|
|
|
strip_id_or_class: inline_gallery
|
|
|
|
# pull quote, often inside a blockquote element
|
|
|
|
strip_id_or_class: pq
|
|
|
|
strip_id_or_class: credit
|
|
|
|
strip_id_or_class: figcaption
|
|
|
|
strip_id_or_class: related_item
|
2014-07-13 08:15:40 +00:00
|
|
|
|
|
|
|
test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html
|
2014-10-27 05:46:13 +00:00
|
|
|
test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall
|
|
|
|
test_url: http://www.businessweek.com/articles/2014-07-09/american-apparel-dov-charneys-sleazy-struggle-for-control
|