Files
GoogleDriveManagement/src/utils.py
Ross Scroggs aa6dca4b4c Keep the pylint wolf at bay (#430)
* Fix bug, update ducumantation

* Clean up error messages

* Exit on error, fix bug

* One more bug fix

* Update documentation, fix code

l_sku can never match a_sku.lower() because it has -'s stripped and
a_sku doesn't

* Keep the pylint wolf at bay

* Clean up code, avoid try/except
2017-02-11 08:31:34 -05:00

81 lines
2.2 KiB
Python

import collections
import re
import sys
from htmlentitydefs import name2codepoint
from HTMLParser import HTMLParser
from var import GM_Globals, GM_WINDOWS, GM_SYS_ENCODING
def convertUTF8(data):
if isinstance(data, str):
return data
if isinstance(data, unicode):
if GM_Globals[GM_WINDOWS]:
return data
return data.encode(GM_Globals[GM_SYS_ENCODING])
if isinstance(data, collections.Mapping):
return dict(map(convertUTF8, data.iteritems()))
if isinstance(data, collections.Iterable):
return type(data)(map(convertUTF8, data))
return data
class _DeHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.__text = []
def handle_data(self, data):
self.__text.append(data)
def handle_charref(self, name):
self.__text.append(unichr(int(name[1:], 16)) if name.startswith('x') else unichr(int(name)))
def handle_entityref(self, name):
cp = name2codepoint.get(name)
if cp:
self.__text.append(unichr(cp))
else:
self.__text.append(u'&'+name)
def handle_starttag(self, tag, attrs):
if tag == 'p':
self.__text.append('\n\n')
elif tag == 'br':
self.__text.append('\n')
elif tag == 'a':
for attr in attrs:
if attr[0] == 'href':
self.__text.append('({0}) '.format(attr[1]))
break
elif tag == 'div':
if not attrs:
self.__text.append('\n')
elif tag in ['http:', 'https']:
self.__text.append(' ({0}//{1}) '.format(tag, attrs[0][0]))
def handle_startendtag(self, tag, attrs):
if tag == 'br':
self.__text.append('\n\n')
def text(self):
return re.sub(r'\n{2}\n+', '\n\n', re.sub(r'\n +', '\n', ''.join(self.__text))).strip()
def dehtml(text):
try:
parser = _DeHTMLParser()
parser.feed(text.encode(u'utf-8'))
parser.close()
return parser.text()
except:
from traceback import print_exc
print_exc(file=sys.stderr)
return text
def indentMultiLineText(message, n=0):
return message.replace(u'\n', u'\n{0}'.format(u' '*n)).rstrip()
def formatMilliSeconds(millis):
seconds, millis = divmod(millis, 1000)
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return u'%02d:%02d:%02d' % (hours, minutes, seconds)