import collections import re import sys from html.entities import name2codepoint from html.parser import HTMLParser from var import GM_Globals, GM_WINDOWS, GM_SYS_ENCODING ONE_KILO_BYTES = 1000 ONE_MEGA_BYTES = 1000000 ONE_GIGA_BYTES = 1000000000 def convertUTF8(data): if isinstance(data, str): return data if isinstance(data, str): if GM_Globals[GM_WINDOWS]: return data return data.encode(GM_Globals[GM_SYS_ENCODING]) if isinstance(data, collections.Mapping): return dict(list(map(convertUTF8, iter(data.items())))) if isinstance(data, collections.Iterable): return type(data)(list(map(convertUTF8, data))) return data class _DeHTMLParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.__text = [] def handle_data(self, data): self.__text.append(data) def handle_charref(self, name): self.__text.append(chr(int(name[1:], 16)) if name.startswith('x') else chr(int(name))) def handle_entityref(self, name): cp = name2codepoint.get(name) if cp: self.__text.append(chr(cp)) else: self.__text.append('&'+name) def handle_starttag(self, tag, attrs): if tag == 'p': self.__text.append('\n\n') elif tag == 'br': self.__text.append('\n') elif tag == 'a': for attr in attrs: if attr[0] == 'href': self.__text.append('({0}) '.format(attr[1])) break elif tag == 'div': if not attrs: self.__text.append('\n') elif tag in ['http:', 'https']: self.__text.append(' ({0}//{1}) '.format(tag, attrs[0][0])) def handle_startendtag(self, tag, attrs): if tag == 'br': self.__text.append('\n\n') def text(self): return re.sub(r'\n{2}\n+', '\n\n', re.sub(r'\n +', '\n', ''.join(self.__text))).strip() def dehtml(text): try: parser = _DeHTMLParser() parser.feed(str(text)) parser.close() return parser.text() except: from traceback import print_exc print_exc(file=sys.stderr) return text def indentMultiLineText(message, n=0): return message.replace('\n', '\n{0}'.format(' '*n)).rstrip() def formatFileSize(fileSize): if fileSize == 0: return '0kb' if fileSize < ONE_KILO_BYTES: return '1kb' if fileSize < ONE_MEGA_BYTES: return '{0}kb'.format(fileSize//ONE_KILO_BYTES) if fileSize < ONE_GIGA_BYTES: return '{0}mb'.format(fileSize//ONE_MEGA_BYTES) return '{0}gb'.format(fileSize//ONE_GIGA_BYTES) def formatMilliSeconds(millis): seconds, millis = divmod(millis, 1000) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return '%02d:%02d:%02d' % (hours, minutes, seconds)