Chrome Policy rough draft, further customer_id standardization

This commit is contained in:
Jay Lee
2021-03-13 10:02:53 -05:00
parent a567599eae
commit bd38b7479f
7 changed files with 327 additions and 26 deletions

View File

@ -59,6 +59,16 @@ class _DeHTMLParser(HTMLParser):
re.sub(r'\n +', '\n', ''.join(self.__text))).strip()
def commonprefix(m):
'''Given a list of strings m, return string which is prefix common to all'''
s1 = min(m)
s2 = max(m)
for i, c in enumerate(s1):
if c != s2[i]:
return s1[:i]
return s1
def dehtml(text):
try:
parser = _DeHTMLParser()