author: //p[contains(@class, "byline")]/a[contains(@class, "author")] date: //span[contains(@class, "publish-date")]/time[@pubdate]/@datetime body: //div[contains(@class, 'entry-content')] # for vergecasts, e.g. http://www.theverge.com/2013/8/22/4648566/the-vergecast-090-august-22th-2013-video body: //article prune: no #tidy: no strip: //article/header strip: //*[@id='sticky-menu'] strip: //aside strip: //nav strip: //img[contains(@class, 'vox-lazy-load')] # deal with bad parsing strip: //div[contains(@class, 'story-image')]//div[contains(., 'function(')] strip: //div[contains(@class, 'm-linkset')] strip: //div[contains(@class, 'm-entry__sidebar')] strip: //ul[contains(@class, 'm-article__sources')] strip: //div[contains(@class, 'chorus-emc__content')] strip_id_or_class: gallery strip_id_or_class: article-meta strip_id_or_class: story-navigation strip_id_or_class: slegend strip_id_or_class: related-product-meta strip_id_or_class: comments strip_id_or_class: ui-jump-list strip_id_or_class: pullquote strip_id_or_class: m-ad strip_id_or_class: social-sharing strip_id_or_class: m-video-entry__excerpt strip_id_or_class: hidden replace_string(