Add GAM_CSV_ROW_DROP_FILTER (#1304)

* Add GAM_CSV_ROW_DROP_FILTER

Allow regex column names in GAM_CSV_ROW_FILTER and GAM_CSV_ROW_DROP_FILTER.
```
# Get users with managers
$ export GAM_CSV_ROW_FILTER="relations.*.type:regex:manager"
$ gam print users query "orgUnitPath='/Test'" relations
Getting all Users in G Suite account that match query (orgUnitPath='/Test') (may take some time on a large account)...
Got 17 Users: dick@domain.com - tom@domain.com
primaryEmail,relations,relations.0.value,relations.0.type,relations.1.value,relations.1.type
testuser1@domain.com,,admin@rdschool.net,manager,,
testuser2@domain.com,,testuser1@domain.com,manager,,

# Get users without managers
$ export GAM_CSV_ROW_DROP_FILTER="relations.*.type:regex:manager"
$ gam print users query "orgUnitPath='/Test'" relations
Getting all Users in G Suite account that match query (orgUnitPath='/Test') (may take some time on a large account)...
Got 17 Users: dick@domain.com - tom@domain.com
primaryEmail,relations,relations.0.value,relations.0.type,relations.1.value,relations.1.type
dick@domain.com,,,,,
harry@domain.com,,,,,
testadmin@domain.com,,,,,
...

* Update var.py to 5.31
This commit is contained in:
Ross Scroggs
2021-01-13 08:37:17 -08:00
committed by GitHub
parent ff80150216
commit 09c0c18fce
3 changed files with 84 additions and 39 deletions

View File

@ -400,8 +400,8 @@ def SetGlobalVariables():
ROW_FILTER_COMP_PATTERN = re.compile( ROW_FILTER_COMP_PATTERN = re.compile(
r'^(date|time|count)\s*([<>]=?|=|!=)\s*(.+)$', re.IGNORECASE) r'^(date|time|count)\s*([<>]=?|=|!=)\s*(.+)$', re.IGNORECASE)
ROW_FILTER_BOOL_PATTERN = re.compile(r'^(boolean):(.+)$', re.IGNORECASE) ROW_FILTER_BOOL_PATTERN = re.compile(r'^(boolean):(.+)$', re.IGNORECASE)
ROW_FILTER_RE_PATTERN = re.compile(r'^(regex|notregex):(.+)$', ROW_FILTER_RE_PATTERN = re.compile(r'^(regex|notregex):(.+)$', re.IGNORECASE)
re.IGNORECASE) REGEX_CHARS = '^$*+|$[{('
def _getCfgRowFilter(itemName): def _getCfgRowFilter(itemName):
value = GC_Defaults[itemName] value = GC_Defaults[itemName]
@ -439,6 +439,17 @@ def SetGlobalVariables():
) )
filterDict[column] = filterStr filterDict[column] = filterStr
for column, filterStr in iter(filterDict.items()): for column, filterStr in iter(filterDict.items()):
for c in REGEX_CHARS:
if c in column:
columnPat = column
break
else:
columnPat = f'^{column}$'
try:
columnPat = re.compile(columnPat, re.IGNORECASE)
except re.error as e:
controlflow.system_error_exit(
3, f'Item: {itemName}: "{column}", Invalid RE: {str(e)}')
mg = ROW_FILTER_COMP_PATTERN.match(filterStr) mg = ROW_FILTER_COMP_PATTERN.match(filterStr)
if mg: if mg:
if mg.group(1) in ['date', 'time']: if mg.group(1) in ['date', 'time']:
@ -449,7 +460,7 @@ def SetGlobalVariables():
valid, filterValue = utils.get_row_filter_time_or_delta_from_now( valid, filterValue = utils.get_row_filter_time_or_delta_from_now(
mg.group(3)) mg.group(3))
if valid: if valid:
rowFilters[column] = (mg.group(1), mg.group(2), rowFilters[column] = (columnPat, mg.group(1), mg.group(2),
filterValue) filterValue)
continue continue
controlflow.system_error_exit( controlflow.system_error_exit(
@ -458,7 +469,7 @@ def SetGlobalVariables():
) )
else: #count else: #count
if mg.group(3).isdigit(): if mg.group(3).isdigit():
rowFilters[column] = (mg.group(1), mg.group(2), rowFilters[column] = (columnPat, mg.group(1), mg.group(2),
int(mg.group(3))) int(mg.group(3)))
continue continue
controlflow.system_error_exit( controlflow.system_error_exit(
@ -477,12 +488,12 @@ def SetGlobalVariables():
3, 3,
f'Item: {itemName}, Value: "{column}": "{filterStr}", Expected true|false' f'Item: {itemName}, Value: "{column}": "{filterStr}", Expected true|false'
) )
rowFilters[column] = (mg.group(1), filterValue) rowFilters[column] = (columnPat, mg.group(1), filterValue)
continue continue
mg = ROW_FILTER_RE_PATTERN.match(filterStr) mg = ROW_FILTER_RE_PATTERN.match(filterStr)
if mg: if mg:
try: try:
rowFilters[column] = (mg.group(1), re.compile(mg.group(2))) rowFilters[column] = (columnPat, mg.group(1), re.compile(mg.group(2)))
continue continue
except re.error as e: except re.error as e:
controlflow.system_error_exit( controlflow.system_error_exit(
@ -520,6 +531,7 @@ def SetGlobalVariables():
_getOldEnvVar(GC_CSV_HEADER_FILTER, 'GAM_CSV_HEADER_FILTER') _getOldEnvVar(GC_CSV_HEADER_FILTER, 'GAM_CSV_HEADER_FILTER')
_getOldEnvVar(GC_CSV_HEADER_DROP_FILTER, 'GAM_CSV_HEADER_DROP_FILTER') _getOldEnvVar(GC_CSV_HEADER_DROP_FILTER, 'GAM_CSV_HEADER_DROP_FILTER')
_getOldEnvVar(GC_CSV_ROW_FILTER, 'GAM_CSV_ROW_FILTER') _getOldEnvVar(GC_CSV_ROW_FILTER, 'GAM_CSV_ROW_FILTER')
_getOldEnvVar(GC_CSV_ROW_DROP_FILTER, 'GAM_CSV_ROW_DROP_FILTER')
_getOldEnvVar(GC_TLS_MIN_VERSION, 'GAM_TLS_MIN_VERSION') _getOldEnvVar(GC_TLS_MIN_VERSION, 'GAM_TLS_MIN_VERSION')
_getOldEnvVar(GC_TLS_MAX_VERSION, 'GAM_TLS_MAX_VERSION') _getOldEnvVar(GC_TLS_MAX_VERSION, 'GAM_TLS_MAX_VERSION')
_getOldEnvVar(GC_CA_FILE, 'GAM_CA_FILE') _getOldEnvVar(GC_CA_FILE, 'GAM_CA_FILE')

View File

@ -154,39 +154,66 @@ def write_csv_file(csvRows, titles, list_type, todrive):
return True return True
return False return False
if GC_Values[GC_CSV_ROW_FILTER]: def rowFilterMatch(filters, columns, row):
for column, filterVal in iter(GC_Values[GC_CSV_ROW_FILTER].items()): for c, filterVal in iter(filters.items()):
if column not in titles: for column in columns[c]:
sys.stderr.write( if filterVal[1] == 'regex':
f'WARNING: Row filter column "{column}" is not in output columns\n' if filterVal[2].search(str(row.get(column, ''))):
) return True
continue elif filterVal[1] == 'notregex':
if filterVal[0] == 'regex': if not filterVal[2].search(str(row.get(column, ''))):
csvRows = [ return True
row for row in csvRows elif filterVal[1] in ['date', 'time']:
if filterVal[1].search(str(row.get(column, ''))) if rowDateTimeFilterMatch(
] filterVal[1] == 'date', row.get(column, ''),
elif filterVal[0] == 'notregex': filterVal[2], filterVal[3]):
csvRows = [ return True
row for row in csvRows elif filterVal[1] == 'count':
if not filterVal[1].search(str(row.get(column, ''))) if rowCountFilterMatch(
] row.get(column, 0), filterVal[2], filterVal[3]):
elif filterVal[0] in ['date', 'time']: return True
csvRows = [
row for row in csvRows if rowDateTimeFilterMatch(
filterVal[0] == 'date', row.get(column, ''),
filterVal[1], filterVal[2])
]
elif filterVal[0] == 'count':
csvRows = [
row for row in csvRows if rowCountFilterMatch(
row.get(column, 0), filterVal[1], filterVal[2])
]
else: #boolean else: #boolean
csvRows = [ if rowBooleanFilterMatch(
row for row in csvRows if rowBooleanFilterMatch( row.get(column, False), filterVal[2]):
row.get(column, False), filterVal[1]) return True
] return False
if GC_Values[GC_CSV_ROW_FILTER] or GC_Values[GC_CSV_ROW_DROP_FILTER]:
if GC_Values[GC_CSV_ROW_FILTER]:
keepColumns = {}
for column, filterVal in iter(GC_Values[GC_CSV_ROW_FILTER].items()):
columns = [t for t in titles if filterVal[0].match(t)]
if columns:
keepColumns[column] = columns
else:
keepColumns[column] = [None]
sys.stderr.write(
f'WARNING: Row filter column pattern "{column}" does not match any output columns\n'
)
else:
keepColumns = None
if GC_Values[GC_CSV_ROW_DROP_FILTER]:
dropColumns = {}
for column, filterVal in iter(GC_Values[GC_CSV_ROW_DROP_FILTER].items()):
columns = [t for t in titles if filterVal[0].match(t)]
if columns:
dropColumns[column] = columns
else:
dropColumns[column] = [None]
sys.stderr.write(
f'WARNING: Row drop filter column pattern "{column}" does not match any output columns\n'
)
else:
dropColumns = None
rows = []
for row in csvRows:
if (((keepColumns is None) or
rowFilterMatch(GC_Values[GC_CSV_ROW_FILTER], keepColumns, row)) and
((dropColumns is None) or
not rowFilterMatch(GC_Values[GC_CSV_ROW_DROP_FILTER], dropColumns, row))):
rows.append(row)
csvRows = rows
if GC_Values[GC_CSV_HEADER_FILTER] or GC_Values[GC_CSV_HEADER_DROP_FILTER]: if GC_Values[GC_CSV_HEADER_FILTER] or GC_Values[GC_CSV_HEADER_DROP_FILTER]:
if GC_Values[GC_CSV_HEADER_DROP_FILTER]: if GC_Values[GC_CSV_HEADER_DROP_FILTER]:
titles = [ titles = [

View File

@ -1221,6 +1221,8 @@ GC_CSV_HEADER_FILTER = 'csv_header_filter'
GC_CSV_HEADER_DROP_FILTER = 'csv_header_drop_filter' GC_CSV_HEADER_DROP_FILTER = 'csv_header_drop_filter'
# CSV Rows GAM should filter # CSV Rows GAM should filter
GC_CSV_ROW_FILTER = 'csv_row_filter' GC_CSV_ROW_FILTER = 'csv_row_filter'
# CSV Rows GAM should filter/drop
GC_CSV_ROW_DROP_FILTER = 'csv_row_drop_filter'
# Minimum TLS Version required for HTTPS connections # Minimum TLS Version required for HTTPS connections
GC_TLS_MIN_VERSION = 'tls_min_ver' GC_TLS_MIN_VERSION = 'tls_min_ver'
# Maximum TLS Version used for HTTPS connections # Maximum TLS Version used for HTTPS connections
@ -1259,6 +1261,7 @@ GC_Defaults = {
GC_CSV_HEADER_FILTER: '', GC_CSV_HEADER_FILTER: '',
GC_CSV_HEADER_DROP_FILTER: '', GC_CSV_HEADER_DROP_FILTER: '',
GC_CSV_ROW_FILTER: '', GC_CSV_ROW_FILTER: '',
GC_CSV_ROW_DROP_FILTER: '',
GC_TLS_MIN_VERSION: TLS_MIN, GC_TLS_MIN_VERSION: TLS_MIN,
GC_TLS_MAX_VERSION: None, GC_TLS_MAX_VERSION: None,
GC_CA_FILE: None, GC_CA_FILE: None,
@ -1373,6 +1376,9 @@ GC_VAR_INFO = {
GC_CSV_ROW_FILTER: { GC_CSV_ROW_FILTER: {
GC_VAR_TYPE: GC_TYPE_ROWFILTER GC_VAR_TYPE: GC_TYPE_ROWFILTER
}, },
GC_CSV_ROW_DROP_FILTER: {
GC_VAR_TYPE: GC_TYPE_ROWFILTER
},
GC_TLS_MIN_VERSION: { GC_TLS_MIN_VERSION: {
GC_VAR_TYPE: GC_TYPE_STRING GC_VAR_TYPE: GC_TYPE_STRING
}, },