diff --git a/src/gam/__init__.py b/src/gam/__init__.py index 60bc66f9..2080d253 100755 --- a/src/gam/__init__.py +++ b/src/gam/__init__.py @@ -400,8 +400,8 @@ def SetGlobalVariables(): ROW_FILTER_COMP_PATTERN = re.compile( r'^(date|time|count)\s*([<>]=?|=|!=)\s*(.+)$', re.IGNORECASE) ROW_FILTER_BOOL_PATTERN = re.compile(r'^(boolean):(.+)$', re.IGNORECASE) - ROW_FILTER_RE_PATTERN = re.compile(r'^(regex|notregex):(.+)$', - re.IGNORECASE) + ROW_FILTER_RE_PATTERN = re.compile(r'^(regex|notregex):(.+)$', re.IGNORECASE) + REGEX_CHARS = '^$*+|$[{(' def _getCfgRowFilter(itemName): value = GC_Defaults[itemName] @@ -439,6 +439,17 @@ def SetGlobalVariables(): ) filterDict[column] = filterStr for column, filterStr in iter(filterDict.items()): + for c in REGEX_CHARS: + if c in column: + columnPat = column + break + else: + columnPat = f'^{column}$' + try: + columnPat = re.compile(columnPat, re.IGNORECASE) + except re.error as e: + controlflow.system_error_exit( + 3, f'Item: {itemName}: "{column}", Invalid RE: {str(e)}') mg = ROW_FILTER_COMP_PATTERN.match(filterStr) if mg: if mg.group(1) in ['date', 'time']: @@ -449,7 +460,7 @@ def SetGlobalVariables(): valid, filterValue = utils.get_row_filter_time_or_delta_from_now( mg.group(3)) if valid: - rowFilters[column] = (mg.group(1), mg.group(2), + rowFilters[column] = (columnPat, mg.group(1), mg.group(2), filterValue) continue controlflow.system_error_exit( @@ -458,7 +469,7 @@ def SetGlobalVariables(): ) else: #count if mg.group(3).isdigit(): - rowFilters[column] = (mg.group(1), mg.group(2), + rowFilters[column] = (columnPat, mg.group(1), mg.group(2), int(mg.group(3))) continue controlflow.system_error_exit( @@ -477,12 +488,12 @@ def SetGlobalVariables(): 3, f'Item: {itemName}, Value: "{column}": "{filterStr}", Expected true|false' ) - rowFilters[column] = (mg.group(1), filterValue) + rowFilters[column] = (columnPat, mg.group(1), filterValue) continue mg = ROW_FILTER_RE_PATTERN.match(filterStr) if mg: try: - rowFilters[column] = (mg.group(1), re.compile(mg.group(2))) + rowFilters[column] = (columnPat, mg.group(1), re.compile(mg.group(2))) continue except re.error as e: controlflow.system_error_exit( @@ -520,6 +531,7 @@ def SetGlobalVariables(): _getOldEnvVar(GC_CSV_HEADER_FILTER, 'GAM_CSV_HEADER_FILTER') _getOldEnvVar(GC_CSV_HEADER_DROP_FILTER, 'GAM_CSV_HEADER_DROP_FILTER') _getOldEnvVar(GC_CSV_ROW_FILTER, 'GAM_CSV_ROW_FILTER') + _getOldEnvVar(GC_CSV_ROW_DROP_FILTER, 'GAM_CSV_ROW_DROP_FILTER') _getOldEnvVar(GC_TLS_MIN_VERSION, 'GAM_TLS_MIN_VERSION') _getOldEnvVar(GC_TLS_MAX_VERSION, 'GAM_TLS_MAX_VERSION') _getOldEnvVar(GC_CA_FILE, 'GAM_CA_FILE') diff --git a/src/gam/display.py b/src/gam/display.py index b48edfd6..4e9eb7ed 100644 --- a/src/gam/display.py +++ b/src/gam/display.py @@ -154,39 +154,66 @@ def write_csv_file(csvRows, titles, list_type, todrive): return True return False - if GC_Values[GC_CSV_ROW_FILTER]: - for column, filterVal in iter(GC_Values[GC_CSV_ROW_FILTER].items()): - if column not in titles: - sys.stderr.write( - f'WARNING: Row filter column "{column}" is not in output columns\n' - ) - continue - if filterVal[0] == 'regex': - csvRows = [ - row for row in csvRows - if filterVal[1].search(str(row.get(column, ''))) - ] - elif filterVal[0] == 'notregex': - csvRows = [ - row for row in csvRows - if not filterVal[1].search(str(row.get(column, ''))) - ] - elif filterVal[0] in ['date', 'time']: - csvRows = [ - row for row in csvRows if rowDateTimeFilterMatch( - filterVal[0] == 'date', row.get(column, ''), - filterVal[1], filterVal[2]) - ] - elif filterVal[0] == 'count': - csvRows = [ - row for row in csvRows if rowCountFilterMatch( - row.get(column, 0), filterVal[1], filterVal[2]) - ] - else: #boolean - csvRows = [ - row for row in csvRows if rowBooleanFilterMatch( - row.get(column, False), filterVal[1]) - ] + def rowFilterMatch(filters, columns, row): + for c, filterVal in iter(filters.items()): + for column in columns[c]: + if filterVal[1] == 'regex': + if filterVal[2].search(str(row.get(column, ''))): + return True + elif filterVal[1] == 'notregex': + if not filterVal[2].search(str(row.get(column, ''))): + return True + elif filterVal[1] in ['date', 'time']: + if rowDateTimeFilterMatch( + filterVal[1] == 'date', row.get(column, ''), + filterVal[2], filterVal[3]): + return True + elif filterVal[1] == 'count': + if rowCountFilterMatch( + row.get(column, 0), filterVal[2], filterVal[3]): + return True + else: #boolean + if rowBooleanFilterMatch( + row.get(column, False), filterVal[2]): + return True + return False + + if GC_Values[GC_CSV_ROW_FILTER] or GC_Values[GC_CSV_ROW_DROP_FILTER]: + if GC_Values[GC_CSV_ROW_FILTER]: + keepColumns = {} + for column, filterVal in iter(GC_Values[GC_CSV_ROW_FILTER].items()): + columns = [t for t in titles if filterVal[0].match(t)] + if columns: + keepColumns[column] = columns + else: + keepColumns[column] = [None] + sys.stderr.write( + f'WARNING: Row filter column pattern "{column}" does not match any output columns\n' + ) + else: + keepColumns = None + if GC_Values[GC_CSV_ROW_DROP_FILTER]: + dropColumns = {} + for column, filterVal in iter(GC_Values[GC_CSV_ROW_DROP_FILTER].items()): + columns = [t for t in titles if filterVal[0].match(t)] + if columns: + dropColumns[column] = columns + else: + dropColumns[column] = [None] + sys.stderr.write( + f'WARNING: Row drop filter column pattern "{column}" does not match any output columns\n' + ) + else: + dropColumns = None + rows = [] + for row in csvRows: + if (((keepColumns is None) or + rowFilterMatch(GC_Values[GC_CSV_ROW_FILTER], keepColumns, row)) and + ((dropColumns is None) or + not rowFilterMatch(GC_Values[GC_CSV_ROW_DROP_FILTER], dropColumns, row))): + rows.append(row) + csvRows = rows + if GC_Values[GC_CSV_HEADER_FILTER] or GC_Values[GC_CSV_HEADER_DROP_FILTER]: if GC_Values[GC_CSV_HEADER_DROP_FILTER]: titles = [ diff --git a/src/gam/var.py b/src/gam/var.py index 508fdaca..8c4c111d 100644 --- a/src/gam/var.py +++ b/src/gam/var.py @@ -1221,6 +1221,8 @@ GC_CSV_HEADER_FILTER = 'csv_header_filter' GC_CSV_HEADER_DROP_FILTER = 'csv_header_drop_filter' # CSV Rows GAM should filter GC_CSV_ROW_FILTER = 'csv_row_filter' +# CSV Rows GAM should filter/drop +GC_CSV_ROW_DROP_FILTER = 'csv_row_drop_filter' # Minimum TLS Version required for HTTPS connections GC_TLS_MIN_VERSION = 'tls_min_ver' # Maximum TLS Version used for HTTPS connections @@ -1259,6 +1261,7 @@ GC_Defaults = { GC_CSV_HEADER_FILTER: '', GC_CSV_HEADER_DROP_FILTER: '', GC_CSV_ROW_FILTER: '', + GC_CSV_ROW_DROP_FILTER: '', GC_TLS_MIN_VERSION: TLS_MIN, GC_TLS_MAX_VERSION: None, GC_CA_FILE: None, @@ -1373,6 +1376,9 @@ GC_VAR_INFO = { GC_CSV_ROW_FILTER: { GC_VAR_TYPE: GC_TYPE_ROWFILTER }, + GC_CSV_ROW_DROP_FILTER: { + GC_VAR_TYPE: GC_TYPE_ROWFILTER + }, GC_TLS_MIN_VERSION: { GC_VAR_TYPE: GC_TYPE_STRING },