Update GAM_CSV_ROW_FILTER (#901)

* Update GAM_CSV_ROW_FILTER

* Improve GAM_CSV_ROW_FILTER

* Improve again GAM_CSV_ROW_FILTER

* Improve error messages
This commit is contained in:
Ross Scroggs
2019-04-25 10:11:11 -07:00
committed by Jay Lee
parent b617d5cab0
commit 4098a9b70f
3 changed files with 172 additions and 45 deletions

View File

@@ -118,7 +118,7 @@ If an item contains spaces, it should be surrounded by ".
<MilliSeconds> ::= <Digit><Digit><Digit>
<Date> ::=
<Year>-<Month>-<Day> |
(+|-)<Number>(d|w)
(+|-)<Number>(d|w|y)
<Time> ::=
<Year>-<Month>-<Day>(<Space>|T)<Hour>:<Minute>:<Second>[.<MilliSeconds>](Z|(+|-(<Hour>:<Minute>))) |
(+|-)<Number>(m|h|d|w)

View File

@@ -332,14 +332,16 @@ def getString(i, item, optional=False, minLen=1, maxLen=None):
return ''
systemErrorExit(2, 'expected a <{0}>'.format(item))
def getDelta(argstr, pattern, formatRequired):
def getDelta(argstr, pattern):
tg = pattern.match(argstr.lower())
if tg is None:
systemErrorExit(2, 'expected a <{0}>; got {1}'.format(formatRequired, argstr))
return None
sign = tg.group(1)
delta = int(tg.group(2))
unit = tg.group(3)
if unit == 'w':
if unit == 'y':
deltaTime = datetime.timedelta(days=delta*365)
elif unit == 'w':
deltaTime = datetime.timedelta(weeks=delta)
elif unit == 'd':
deltaTime = datetime.timedelta(days=delta)
@@ -351,17 +353,23 @@ def getDelta(argstr, pattern, formatRequired):
return -deltaTime
return deltaTime
DELTA_DATE_PATTERN = re.compile(r'^([+-])(\d+)([dw])$')
DELTA_DATE_FORMAT_REQUIRED = '(+|-)<Number>(d|w)'
DELTA_DATE_PATTERN = re.compile(r'^([+-])(\d+)([dwy])$')
DELTA_DATE_FORMAT_REQUIRED = '(+|-)<Number>(d|w|y)'
def getDeltaDate(argstr):
return getDelta(argstr, DELTA_DATE_PATTERN, DELTA_DATE_FORMAT_REQUIRED)
deltaDate = getDelta(argstr, DELTA_DATE_PATTERN)
if deltaDate is None:
systemErrorExit(2, 'expected a <{0}>; got {1}'.format(DELTA_DATE_FORMAT_REQUIRED, argstr))
return deltaDate
DELTA_TIME_PATTERN = re.compile(r'^([+-])(\d+)([mhdw])$')
DELTA_TIME_FORMAT_REQUIRED = '(+|-)<Number>(m|h|d|w)'
def getDeltaTime(argstr):
return getDelta(argstr, DELTA_TIME_PATTERN, DELTA_TIME_FORMAT_REQUIRED)
deltaTime = getDelta(argstr, DELTA_TIME_PATTERN)
if deltaTime is None:
systemErrorExit(2, 'expected a <{0}>; got {1}'.format(DELTA_TIME_FORMAT_REQUIRED, argstr))
return deltaTime
YYYYMMDD_FORMAT = '%Y-%m-%d'
YYYYMMDD_FORMAT_REQUIRED = 'yyyy-mm-dd'
@@ -388,7 +396,43 @@ def getYYYYMMDD(argstr, minLen=1, returnTimeStamp=False, returnDateTime=False):
YYYYMMDDTHHMMSS_FORMAT_REQUIRED = 'yyyy-mm-ddThh:mm:ss[.fff](Z|(+|-(hh:mm)))'
def getTimeOrDeltaFromNow(time_string):
"""Get an ISO 8601 date/time or a positive/negative delta applied to now.
"""Get an ISO 8601 time or a positive/negative delta applied to now.
Args:
time_string (string): The time or delta (e.g. '2017-09-01T12:34:56Z' or '-4h')
Returns:
string: iso8601 formatted datetime in UTC.
"""
time_string = time_string.strip().upper()
if time_string:
if time_string[0] not in ['+', '-']:
return time_string
return (datetime.datetime.utcnow() + getDeltaTime(time_string)).isoformat() + 'Z'
systemErrorExit(2, 'expected a <{0}>'.format(YYYYMMDDTHHMMSS_FORMAT_REQUIRED))
def getRowFilterDateOrDeltaFromNow(date_string):
"""Get an ISO 8601 date or a positive/negative delta applied to now.
Args:
date_string (string): The time or delta (e.g. '2017-09-01' or '-4y')
Returns:
string: iso8601 formatted datetime in UTC.
"""
date_string = date_string.strip().upper()
if date_string:
if date_string[0] in ['+', '-']:
deltaDate = getDelta(date_string, DELTA_DATE_PATTERN)
if deltaDate is None:
return (False, DELTA_DATE_FORMAT_REQUIRED)
today = datetime.date.today()
return (True, (datetime.datetime(today.year, today.month, today.day)+deltaDate).isoformat()+'Z')
try:
deltaDate = dateutil.parser.parse(date_string, ignoretz=True)
return (True, datetime.datetime(deltaDate.year, deltaDate.month, deltaDate.day).isoformat()+'Z')
except ValueError:
pass
return (False, YYYYMMDD_FORMAT_REQUIRED)
def getRowFilterTimeOrDeltaFromNow(time_string):
"""Get an ISO 8601 time or a positive/negative delta applied to now.
Args:
time_string (string): The time or delta (e.g. '2017-09-01T12:34:56Z' or '-4h')
Returns:
@@ -398,10 +442,17 @@ def getTimeOrDeltaFromNow(time_string):
"""
time_string = time_string.strip().upper()
if time_string:
if time_string[0] not in ['+', '-']:
return time_string
return (datetime.datetime.utcnow() + getDeltaTime(time_string)).isoformat() + 'Z'
systemErrorExit(2, 'expected a <{0}>'.format(YYYYMMDDTHHMMSS_FORMAT_REQUIRED))
if time_string[0] in ['+', '-']:
deltaTime = getDelta(time_string, DELTA_TIME_PATTERN)
if deltaTime is None:
return (False, DELTA_TIME_FORMAT_REQUIRED)
return (True, (datetime.datetime.utcnow()+deltaTime).isoformat()+'Z')
try:
deltaTime = dateutil.parser.parse(time_string, ignoretz=True)
return (True, deltaTime.isoformat()+'Z')
except ValueError:
pass
return (False, YYYYMMDDTHHMMSS_FORMAT_REQUIRED)
YYYYMMDD_PATTERN = re.compile(r'^[0-9]{4}-[0-9]{2}-[0-9]{2}$')
@@ -555,6 +606,56 @@ def SetGlobalVariables():
value = os.path.expanduser(os.path.join(GC_Values[GC_CONFIG_DIR], value))
return value
ROW_FILTER_COMP_PATTERN = re.compile(r'^(date|time|count)\s*([<>]=?|=|!=)\s*(.+)$', re.IGNORECASE)
ROW_FILTER_BOOL_PATTERN = re.compile(r'^(boolean):(.+)$', re.IGNORECASE)
ROW_FILTER_RE_PATTERN = re.compile(r'^(regex):(.+)$', re.IGNORECASE)
def _getCfgRowFilter(itemName):
value = GC_Defaults[itemName]
rowFilters = {}
if not value:
return rowFilters
try:
for column, filterStr in iter(json.loads(value).items()):
mg = ROW_FILTER_COMP_PATTERN.match(filterStr)
if mg:
if mg.group(1) in ['date', 'time']:
if mg.group(1) == 'date':
valid, filterValue = getRowFilterDateOrDeltaFromNow(mg.group(3))
else:
valid, filterValue = getRowFilterTimeOrDeltaFromNow(mg.group(3))
if valid:
rowFilters[column] = (mg.group(1), mg.group(2), filterValue)
continue
systemErrorExit(3, 'Item: {0}, Value: "{1}": "{2}", Expected: {3}'.format(itemName, column, filterStr, filterValue))
else: #count
if mg.group(3).isdigit():
rowFilters[column] = (mg.group(1), mg.group(2), int(mg.group(3)))
continue
systemErrorExit(3, 'Item: {0}, Value: "{1}": "{2}", Expected: {3}'.format(itemName, column, filterStr, '<Number>'))
mg = ROW_FILTER_BOOL_PATTERN.match(filterStr)
if mg:
value = mg.group(2).lower()
if value in true_values:
filterValue = True
elif value in false_values:
filterValue = False
else:
systemErrorExit(3, 'Item: {0}, Value: "{1}": "{2}", Expected true|false'.format(itemName, column, filterStr))
rowFilters[column] = (mg.group(1), filterValue)
continue
mg = ROW_FILTER_RE_PATTERN.match(filterStr)
if mg:
try:
rowFilters[column] = (mg.group(1), re.compile(mg.group(2)))
continue
except re.error as e:
systemErrorExit(3, 'Item: {0}, Value: "{1}": {2}, Invalid RE: {3}'.format(itemName, column, filterStr, e))
systemErrorExit(3, 'Item: {0}, Value: "{1}": {2}, Expected: (date|time|count<Operator><Value>) or (boolean:true|false) or (regex:<RegularExpression>)'.format(itemName, column, filterStr))
return rowFilters
except (TypeError, ValueError) as e:
systemErrorExit(3, 'Item: {0}, Value: "{1}", Failed to parse as JSON: {2}'.format(itemName, value, str(e)))
GC_Defaults[GC_CONFIG_DIR] = GM_Globals[GM_GAM_PATH]
GC_Defaults[GC_CACHE_DIR] = os.path.join(GM_Globals[GM_GAM_PATH], 'gamcache')
GC_Defaults[GC_DRIVE_DIR] = GM_Globals[GM_GAM_PATH]
@@ -599,6 +700,8 @@ def SetGlobalVariables():
varType = GC_VAR_INFO[itemName][GC_VAR_TYPE]
if varType == GC_TYPE_FILE:
GC_Values[itemName] = _getCfgFile(itemName)
elif varType == GC_TYPE_ROWFILTER:
GC_Values[itemName] = _getCfgRowFilter(itemName)
else:
GC_Values[itemName] = GC_Defaults[itemName]
GM_Globals[GM_LAST_UPDATE_CHECK_TXT] = os.path.join(GC_Values[GC_CONFIG_DIR], FN_LAST_UPDATE_CHECK_TXT)
@@ -10632,37 +10735,60 @@ def sortCSVTitles(firstTitle, titles):
titles.insert(0, title)
def writeCSVfile(csvRows, titles, list_type, todrive):
def rowDateTimeFilterMatch(dateMode, rowDate, op, filterDate):
if not rowDate:
return False
try:
rowTime = dateutil.parser.parse(rowDate, ignoretz=True)
if dateMode:
rowDate = datetime.datetime(rowTime.year, rowTime.month, rowTime.day).isoformat()+'Z'
except ValueError:
rowDate = NEVER_TIME
if op == '<':
return rowDate < filterDate
if op == '<=':
return rowDate <= filterDate
if op == '>':
return rowDate > filterDate
if op == '>=':
return rowDate >= filterDate
if op == '!=':
return rowDate != filterDate
return rowDate == filterDate
def rowCountFilterMatch(rowCount, op, filterCount):
if not isinstance(rowCount, int):
return False
if op == '<':
return rowCount < filterCount
if op == '<=':
return rowCount <= filterCount
if op == '>':
return rowCount > filterCount
if op == '>=':
return rowCount >= filterCount
if op == '!=':
return rowCount != filterCount
return rowCount == filterCount
def rowBooleanFilterMatch(rowBoolean, filterBoolean):
if not isinstance(rowBoolean, bool):
return False
return rowBoolean == filterBoolean
if GC_Values[GC_CSV_ROW_FILTER]:
row_dict = json.loads(GC_Values[GC_CSV_ROW_FILTER])
for match_column, filter_str in list(row_dict.items()):
if filter_str.lower()[:4] == 'date':
new_csvRows = []
direction = filter_str[4]
if direction not in ['<', '>']:
systemErrorExit(3, '%s is not a valid filter date direction' % direction)
try:
date_str = filter_str[5:]
filter_date = dateutil.parser.parse(date_str, ignoretz=True)
except ValueError:
systemErrorExit(3, '%s is not a date GAM understands' % filter_str)
for row in csvRows:
try:
row_date = dateutil.parser.parse(row[match_column], ignoretz=True)
except ValueError:
row_date = dateutil.parser.parse('1/1/1970')
if direction == '<' and row_date < filter_date:
new_csvRows.append(row)
elif direction == '>' and row_date > filter_date:
new_csvRows.append(row)
csvRows = new_csvRows
else:
if filter_str.lower()[:6] == 'regex:':
filter_str = filter_str[6:]
if match_column not in titles:
sys.stderr.write('WARNING: Row filter %s is not in output columns\n' % match_column)
continue
regex = re.compile(filter_str)
csvRows = [row for row in csvRows if regex.search(row.get(match_column, ''))]
for column, filterVal in iter(GC_Values[GC_CSV_ROW_FILTER].items()):
if column not in titles:
sys.stderr.write('WARNING: Row filter column "{0}" is not in output columns\n'.format(column))
continue
if filterVal[0] == 'regex':
csvRows = [row for row in csvRows if filterVal[1].search(row.get(column, ''))]
elif filterVal[0] in ['date', 'time']:
csvRows = [row for row in csvRows if rowDateTimeFilterMatch(filterVal[0] == 'date', row.get(column, ''), filterVal[1], filterVal[2])]
elif filterVal[0] == 'count':
csvRows = [row for row in csvRows if rowCountFilterMatch(row.get(column, ''), filterVal[1], filterVal[2])]
else: #boolean
csvRows = [row for row in csvRows if rowBooleanFilterMatch(row.get(column, False), filterVal[1])]
if GC_Values[GC_CSV_HEADER_FILTER]:
titles_filter = GC_Values[GC_CSV_HEADER_FILTER].lower().split(',')
titles = [t for t in titles if t.lower() in titles_filter]
@@ -13118,7 +13244,7 @@ Append an 'r' to grant read-only access or an 'a' to grant action-only access.
raise ScopeSelectionMenu.MenuChoiceError(
'Scope "%s" is required and cannot be unselected!' %
option.description)
elif selected and not option.is_selected:
if selected and not option.is_selected:
# Make sure we're not about to exceed the maximum number of scopes
# authorized on a single token.
num_scopes_to_add = len(option.get_effective_scopes())

View File

@@ -769,6 +769,7 @@ GC_TYPE_EMAIL = 'emai'
GC_TYPE_FILE = 'file'
GC_TYPE_INTEGER = 'inte'
GC_TYPE_LANGUAGE = 'lang'
GC_TYPE_ROWFILTER = 'rowf'
GC_TYPE_STRING = 'stri'
GC_VAR_TYPE = 'type'
@@ -803,7 +804,7 @@ GC_VAR_INFO = {
GC_SITE_DIR: {GC_VAR_TYPE: GC_TYPE_DIRECTORY},
GC_USER_MAX_RESULTS: {GC_VAR_TYPE: GC_TYPE_INTEGER, GC_VAR_LIMITS: (1, 500)},
GC_CSV_HEADER_FILTER: {GC_VAR_TYPE: GC_TYPE_STRING},
GC_CSV_ROW_FILTER: {GC_VAR_TYPE: GC_TYPE_STRING},
GC_CSV_ROW_FILTER: {GC_VAR_TYPE: GC_TYPE_ROWFILTER},
GC_TLS_MIN_VERSION: {GC_VAR_TYPE: GC_TYPE_STRING},
GC_TLS_MAX_VERSION: {GC_VAR_TYPE: GC_TYPE_STRING},
}