From d0427d9baebc5abeb9f09bac54196026e56498b8 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Mon, 18 Nov 2013 16:47:20 +0100 Subject: [PATCH] [fix] html escape --- searx/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/searx/utils.py b/searx/utils.py index 8e3b10d39..670499805 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -1,5 +1,5 @@ from HTMLParser import HTMLParser -import htmlentitydefs +#import htmlentitydefs import csv import codecs import cStringIO @@ -17,8 +17,9 @@ class HTMLTextExtractor(HTMLParser): self.result.append(unichr(codepoint)) def handle_entityref(self, name): - codepoint = htmlentitydefs.name2codepoint[name] - self.result.append(unichr(codepoint)) + #codepoint = htmlentitydefs.name2codepoint[name] + #self.result.append(unichr(codepoint)) + self.result.append(name) def get_text(self): return u''.join(self.result)