mirror of
https://github.com/wallabag/wallabag.git
synced 2024-11-01 14:49:15 +00:00
23 lines
1.2 KiB
Text
Executable file
23 lines
1.2 KiB
Text
Executable file
# This setup grabs the text from a Reddit self post. It ignores all comments etc.
|
|
|
|
title: //p[@class="title"]/a/text()
|
|
|
|
author: //p[@class="tagline"]/a
|
|
|
|
# this doesn't work for some reason...?
|
|
date: //p[@class="tagline"]//@datetime
|
|
|
|
#body: (//div[contains(@class, 'noncollapsed')]//div[contains(@class, 'usertext-body')])[1]
|
|
|
|
body: //div[contains(concat(' ',normalize-space(@class),' '),' usertext-body ') and (contains(concat(' ',normalize-space(@class),' '),' may-blank-within ')) and (contains(concat(' ',normalize-space(@class),' '),' md-container '))]//div[contains(concat(' ',normalize-space(@class),' '),' md ')]
|
|
|
|
strip_id_or_class: tagline
|
|
strip_id_or_class: unvotable-message
|
|
strip_id_or_class: buttons
|
|
|
|
# follow the posted link (unless it's a self post - relative URL, no http://)
|
|
single_page_link: //p[@class="title"]/a[contains(@href, 'http://')]
|
|
|
|
test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/
|
|
test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/
|
|
test_url: http://www.reddit.com/r/WritingPrompts/comments/2786lw/wp_in_a_world_where_puns_are_illegal_one_man/chybk8e
|