[fix] skip non-complete google news results

This commit is contained in:
Adam Tauber 2017-01-10 11:03:05 +01:00
parent 94327d67fc
commit 108392f8da

View file

@ -66,11 +66,14 @@ def response(resp):
# parse results # parse results
for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'): for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'):
r = { try:
'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0], r = {
'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')), 'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0],
'content': ''.join(result.xpath('.//div[@class="st"]//text()')), 'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')),
} 'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
}
except:
continue
imgs = result.xpath('.//img/@src') imgs = result.xpath('.//img/@src')
if len(imgs) and not imgs[0].startswith('data'): if len(imgs) and not imgs[0].startswith('data'):