mirror of
https://github.com/GAM-team/GAM.git
synced 2026-06-21 22:51:37 +00:00
* Code cleanup * Add missing _ * Add missing character One character was missing from the prefix, I assumed :, did you want a space? * Put missing variable back * More cleanup repairs
96 lines
2.6 KiB
Python
96 lines
2.6 KiB
Python
import collections
|
|
import re
|
|
import sys
|
|
from html.entities import name2codepoint
|
|
from html.parser import HTMLParser
|
|
from var import GM_Globals, GM_WINDOWS, GM_SYS_ENCODING
|
|
|
|
ONE_KILO_BYTES = 1000
|
|
ONE_MEGA_BYTES = 1000000
|
|
ONE_GIGA_BYTES = 1000000000
|
|
|
|
def convertUTF8(data):
|
|
if isinstance(data, str):
|
|
return data
|
|
if isinstance(data, str):
|
|
if GM_Globals[GM_WINDOWS]:
|
|
return data
|
|
return data.encode(GM_Globals[GM_SYS_ENCODING])
|
|
if isinstance(data, collections.Mapping):
|
|
return dict(list(map(convertUTF8, iter(data.items()))))
|
|
if isinstance(data, collections.Iterable):
|
|
return type(data)(list(map(convertUTF8, data)))
|
|
return data
|
|
|
|
class _DeHTMLParser(HTMLParser):
|
|
def __init__(self):
|
|
HTMLParser.__init__(self)
|
|
self.__text = []
|
|
|
|
def handle_data(self, data):
|
|
self.__text.append(data)
|
|
|
|
def handle_charref(self, name):
|
|
self.__text.append(chr(int(name[1:], 16)) if name.startswith('x') else chr(int(name)))
|
|
|
|
def handle_entityref(self, name):
|
|
cp = name2codepoint.get(name)
|
|
if cp:
|
|
self.__text.append(chr(cp))
|
|
else:
|
|
self.__text.append('&'+name)
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag == 'p':
|
|
self.__text.append('\n\n')
|
|
elif tag == 'br':
|
|
self.__text.append('\n')
|
|
elif tag == 'a':
|
|
for attr in attrs:
|
|
if attr[0] == 'href':
|
|
self.__text.append('({0}) '.format(attr[1]))
|
|
break
|
|
elif tag == 'div':
|
|
if not attrs:
|
|
self.__text.append('\n')
|
|
elif tag in ['http:', 'https']:
|
|
self.__text.append(' ({0}//{1}) '.format(tag, attrs[0][0]))
|
|
|
|
def handle_startendtag(self, tag, attrs):
|
|
if tag == 'br':
|
|
self.__text.append('\n\n')
|
|
|
|
def text(self):
|
|
return re.sub(r'\n{2}\n+', '\n\n', re.sub(r'\n +', '\n', ''.join(self.__text))).strip()
|
|
|
|
def dehtml(text):
|
|
try:
|
|
parser = _DeHTMLParser()
|
|
parser.feed(str(text))
|
|
parser.close()
|
|
return parser.text()
|
|
except:
|
|
from traceback import print_exc
|
|
print_exc(file=sys.stderr)
|
|
return text
|
|
|
|
def indentMultiLineText(message, n=0):
|
|
return message.replace('\n', '\n{0}'.format(' '*n)).rstrip()
|
|
|
|
def formatFileSize(fileSize):
|
|
if fileSize == 0:
|
|
return '0kb'
|
|
if fileSize < ONE_KILO_BYTES:
|
|
return '1kb'
|
|
if fileSize < ONE_MEGA_BYTES:
|
|
return '{0}kb'.format(fileSize//ONE_KILO_BYTES)
|
|
if fileSize < ONE_GIGA_BYTES:
|
|
return '{0}mb'.format(fileSize//ONE_MEGA_BYTES)
|
|
return '{0}gb'.format(fileSize//ONE_GIGA_BYTES)
|
|
|
|
def formatMilliSeconds(millis):
|
|
seconds, millis = divmod(millis, 1000)
|
|
minutes, seconds = divmod(seconds, 60)
|
|
hours, minutes = divmod(minutes, 60)
|
|
return '%02d:%02d:%02d' % (hours, minutes, seconds)
|