Handle misidentified codepoint (#281)

This commit is contained in:
Ross Scroggs
2016-10-12 15:02:26 -07:00
committed by Jay Lee
parent 8435d41d44
commit 6b0fb9e54d

View File

@ -384,7 +384,11 @@ class _DeHTMLParser(HTMLParser):
self.__text.append(unichr(int(name[1:], 16)) if name.startswith('x') else unichr(int(name)))
def handle_entityref(self, name):
self.__text.append(unichr(name2codepoint[name]))
cp = name2codepoint.get(name)
if cp:
self.__text.append(unichr(cp))
else:
self.__text.append(u'&'+name)
def handle_starttag(self, tag, attrs):
if tag == 'p':