From dc3976bdda0040c5e7be01ec8922152840853437 Mon Sep 17 00:00:00 2001 From: Jay Lee Date: Mon, 6 Feb 2023 13:33:26 +0000 Subject: [PATCH] gam copy vaultexport/storagebucket commands --- src/cbcm-v1.1beta1.json | 2 +- src/gam/__init__.py | 51 +++++++++++++--- src/gam/gapi/storage.py | 132 +++++++++++++++++++++++++++++++++++++++- src/gam/gapi/vault.py | 44 +++++++++++++- 4 files changed, 214 insertions(+), 15 deletions(-) diff --git a/src/cbcm-v1.1beta1.json b/src/cbcm-v1.1beta1.json index 60e46631..8c5f75d4 100644 --- a/src/cbcm-v1.1beta1.json +++ b/src/cbcm-v1.1beta1.json @@ -12,7 +12,7 @@ } }, "basePath": "", - "baseUrl": "https://www.googleapis.com/admin/directory/v1.1beta1/customer/", + "baseUrl": "https://admin.googleapis.com/admin/directory/v1.1beta1/customer/", "batchPath": "batch", "canonicalName": "cbcm", "discoveryVersion": "v1", diff --git a/src/gam/__init__.py b/src/gam/__init__.py index e9a6a7c0..f419af6c 100755 --- a/src/gam/__init__.py +++ b/src/gam/__init__.py @@ -7442,12 +7442,21 @@ def _getCurrentProjectID(): def _getProjects(crm, pfilter): try: - return gapi.get_all_pages( + projects = gapi.get_all_pages( crm.projects(), 'search', 'projects', throw_reasons=[gapi_errors.ErrorReason.BAD_REQUEST], query=pfilter) + if projects: + return projects + if pfilter.startswith('id:'): + pfilter = pfilter[3:] + return [gapi.call( + crm.projects(), + 'get', + name=f'projects/{pfilter}', + throw_reasons=[gapi_errors.ErrorReason.BAD_REQUEST])] except gapi_errors.GapiBadRequestError as e: controlflow.system_error_exit(2, f'Project: {pfilter}, {str(e)}') @@ -10742,11 +10751,17 @@ OAUTH2_SCOPES = [ 'subscopes': ['readonly'], 'scopes': 'https://www.googleapis.com/auth/ediscovery' }, + # off by default to avoid reauth issues with GCP APIs + # and since many admins never use Vault API. { - 'name': 'Cloud Storage (Vault Export - read only)', - 'subscopes': [], - 'scopes': 'https://www.googleapis.com/auth/devstorage.read_only' - }, + 'name': 'Cloud Storage - Vault/Takeout Download/Copy', + 'subscopes': ['readonly'], + 'offByDefault': True, + 'restricted_scopes': { + 'readonly': 'https://www.googleapis.com/auth/devstorage.read_only' + }, + 'scopes': 'https://www.googleapis.com/auth/devstorage.read_write' + }, { 'name': 'User Profile (Email address - read only)', 'subscopes': [], @@ -10791,6 +10806,7 @@ class ScopeMenuOption(): is_required=False, is_selected=False, supported_restrictions=None, + restricted_scopes=None, restriction=None): """A data structure for storing and toggling feature/API scope attributes. @@ -10820,6 +10836,7 @@ class ScopeMenuOption(): self._restriction = None self.scopes = oauth_scopes + self.restricted_scopes = restricted_scopes self.description = description self.is_required = is_required # Required scopes must be selected @@ -10910,7 +10927,10 @@ class ScopeMenuOption(): effective_scopes = [] for scope in self.scopes: if self.is_restricted: - scope = f'{scope}.{self._restriction}' + if self.restricted_scopes.get(self._restriction): + scope = self.restricted_scopes.get(self._restriction) + else: + scope = f'{scope}.{self._restriction}' effective_scopes.append(scope) return effective_scopes @@ -10922,7 +10942,10 @@ class ScopeMenuOption(): name: Some description of the API/feature. subscopes: A list of compatible scope restrictions such as 'action' or 'readonly'. Each scope in the scopes list must support this - restriction text appended to the end of its normal scope text. + restriction text appended to the end of its normal scope text or + be defined in the restricted_scopes attribute. + restricted_scopes: A dict of scopes to be used for restrictions. If not + defined then {scope}.{subscope} is used. scopes: A list of scopes that are required for the API/feature. offByDefault: A bool indicating whether this feature/scope should be off by default (when no prior selection has been made). Default is False @@ -10951,8 +10974,8 @@ class ScopeMenuOption(): description=scope_definition.get('name'), is_selected=not scope_definition.get('offByDefault'), supported_restrictions=scope_definition.get('subscopes', []), - is_required=scope_definition.get('required', False)) - + is_required=scope_definition.get('required', False), + restricted_scopes=scope_definition.get('restricted_scopes', {})) class ScopeSelectionMenu(): """A text menu which prompts the user to select the scopes to authorize.""" @@ -12027,6 +12050,16 @@ def ProcessGAMCommand(args): else: controlflow.invalid_argument_exit(argument, 'gam download') sys.exit(0) + elif command == 'copy': + argument = sys.argv[2].lower().replace('_', '') + if argument in ['export', 'vaultexport']: + gapi_vault.copyExport() + elif argument in ['storagebucket', 'bucket']: + gapi_storage.copy_bucket() + else: + controlflow.invalid_argument_exit(argument, 'gam copy') + sys.exit(0) + elif command == 'rotate': argument = sys.argv[2].lower() if argument in ['sakey', 'sakeys']: diff --git a/src/gam/gapi/storage.py b/src/gam/gapi/storage.py index acd42bd8..7764a8ac 100644 --- a/src/gam/gapi/storage.py +++ b/src/gam/gapi/storage.py @@ -2,20 +2,148 @@ import base64 import os import re import sys +import time import googleapiclient import gam +from gam.gapi import errors as gapi_errors from gam.var import * +from gam import controlflow from gam import fileutils from gam import gapi from gam import utils -def build_gapi(): +def build(): return gam.buildGAPIObject('storage') +def copy_bucket(): + s = build() + source_bucket = None + target_bucket = None + prefix = None + i = 3 + while i < len(sys.argv): + myarg = sys.argv[i].lower().replace('_', '') + if myarg == 'sourcebucket': + source_bucket = sys.argv[i+1] + i += 2 + elif myarg == 'targetbucket': + target_bucket = sys.argv[i+1] + i += 2 + elif myarg == 'sourceprefix': + prefix = sys.argv[i+1] + i += 2 + elif myarg == 'targetprefix': + target_prefix = sys.argv[i+1] + i += 2 + else: + controlflow.invalid_argument_exit(sys.argv[i], + 'gam copy storagebucket') + if not target_bucket: + controlflow.missing_argument_exit('target_bucket', 'gam copy storagebucket') + if not source_bucket: + controlflow.missing_argument_exit('source_bucket', 'gam copy storagebucket') + page_message = gapi.got_total_items_msg('Storage Objects', '...\n') + objects = gapi.get_all_pages(s.objects(), + 'list', + items='items', + page_message=page_message, + prefix=prefix, + bucket=source_bucket, + fields='items(name,bucket,md5Hash),nextPageToken') + copy_objects(objects, + target_bucket, + target_prefix) + + +def copy_objects(objects, + target_bucket, + target_prefix): + """Copies objects to target_bucket. + + Args: + objects: list of object dicts + [ + { + bucket: source bucket, + name: source object name, + (optional) md5Hash: source file hash value + }, + ... + ] + target_bucket: target bucket id + target_prefix: prefix name to prepend to target object + + """ + + def process_rewrite(request_id, response, exception): + file_ptr = int(request_id) + if exception: + # Poor man's backoff/retry + if exception.status_code == 429 or exception.status_code > 499: + print(f'Temporary error {exception.status_code}. Sleeping 10 seconds...') + time.sleep(10) + next_batch.add(s.objects().rewrite(**files_to_copy[file_ptr]['method']), + request_id=request_id) + return + else: + raise exception + file_count = file_ptr + 1 + source_displayname = files_to_copy[file_ptr]['source_displayname'] + target_displayname = files_to_copy[file_ptr]['target_displayname'] + if response.get('done'): + source_md5 = files_to_copy[file_ptr]['md5Hash'] + target_md5 = response['resource']['md5Hash'] + if source_md5 != target_md5: + controlflow.system_error_exit(99, f'Target file {target_displayname} checksum {target_md5} does not match source {source_md5}. This should not happen') + else: + print(f'[ {file_count} / {total_files} ] 100% VERIFIED - finished copying:\n source: {source_displayname}\n dest: {target_displayname}') + else: + total_bytes = float(response.get('objectSize')) + done_bytes = float(response.get('totalBytesRewritten')) + pct = (done_bytes / total_bytes) * 100 + print(f'[ {file_count} / {total_files} ] {pct:.2f}%\n source: {source_displayname}\n dest:{target_displayname}') + files_to_copy[file_ptr]['method']['rewriteToken'] = response.get('rewriteToken') + next_batch.add(s.objects().rewrite(**files_to_copy[file_ptr]['method']), + request_id=request_id) + + s = build() + sbatch = s.new_batch_http_request(callback=process_rewrite) + files_to_copy = [] + for object_ in objects: + files_to_copy.append( + { + 'md5Hash': object_['md5Hash'], + 'source_displayname': f'{object_["bucket"]}:{object_["name"]}', + 'target_displayname': f'{target_bucket}:{target_prefix}{object_["name"]}', + 'method': { + 'destinationBucket': target_bucket, + 'destinationObject': f'{target_prefix}{object_["name"]}', + 'sourceBucket': object_['bucket'], + 'sourceObject': object_['name'], + # 'maxBytesRewrittenPerCall': 1048576, # uncomment to easily test multiple rewrite API calls per object + }, + }) + i = 0 + total_files = len(files_to_copy) + for file in files_to_copy: + while len(sbatch._order) == 100: + next_batch = s.new_batch_http_request(callback=process_rewrite) + sbatch.execute() + sbatch = next_batch + sbatch.add(s.objects().rewrite(**file['method']), + request_id=str(i)) + i += 1 + while len(sbatch._order) > 0: + next_batch = s.new_batch_http_request(callback=process_rewrite) + sbatch.execute() + sbatch = next_batch + print('All done!') + + def get_cloud_storage_object(s, bucket, object_, @@ -60,7 +188,7 @@ def get_cloud_storage_object(s, def download_bucket(): bucket = sys.argv[3] - s = build_gapi() + s = build() page_message = gapi.got_total_items_msg('Files', '...') fields = 'nextPageToken,items(name,id,md5Hash)' objects = gapi.get_all_pages(s.objects(), diff --git a/src/gam/gapi/vault.py b/src/gam/gapi/vault.py index 60d0569c..d3502f16 100644 --- a/src/gam/gapi/vault.py +++ b/src/gam/gapi/vault.py @@ -1,3 +1,4 @@ +from base64 import b64encode import datetime import json import sys @@ -519,7 +520,6 @@ def getHoldInfo(): def convertExportNameToID(v, nameOrID, matterId): - nameOrID = nameOrID.lower() cg = UID_PATTERN.match(nameOrID) if cg: return cg.group(1) @@ -530,7 +530,7 @@ def convertExportNameToID(v, nameOrID, matterId): matterId=matterId, fields=fields) for export in exports: - if export['name'].lower() == nameOrID: + if export['name'].lower() == nameOrID.lower(): return export['id'] controlflow.system_error_exit( 4, f'could not find export name {nameOrID} ' @@ -797,11 +797,49 @@ def getMatterInfo(): display.print_json(result) +def copyExport(): + v = buildGAPIObject() + s = gapi_storage.build() + matterId = getMatterItem(v, sys.argv[3]) + exportId = convertExportNameToID(v, sys.argv[4], matterId) + target_bucket = None + target_prefix = '' + i = 5 + while i < len(sys.argv): + myarg = sys.argv[i].lower().replace('_', '') + if myarg == 'targetbucket': + target_bucket = sys.argv[i+1] + i += 2 + elif myarg == 'targetprefix': + target_prefix = sys.argv[i+1] + i += 2 + else: + controlflow.invalid_argument_exit(sys.argv[i], + 'gam copy export') + if not target_bucket: + controlflow.missing_argument_exit('target_bucket', 'gam copy export') + export = gapi.call(v.matters().exports(), + 'get', + matterId=matterId, + exportId=exportId) + objects = [] + for s_file in export['cloudStorageSink']['files']: + # Convert to md5Hash format Storage API uses + # because OF COURSE they differ + md5Hash = b64encode(bytes.fromhex(s_file['md5Hash'])).decode() + objects.append({'bucket': s_file['bucketName'], + 'name': s_file['objectName'], + 'md5Hash': md5Hash}) + gapi_storage.copy_objects(objects, + target_bucket, + target_prefix) + + def downloadExport(): verifyFiles = True extractFiles = True v = buildGAPIObject() - s = gapi_storage.build_gapi() + s = gapi_storage.build() matterId = getMatterItem(v, sys.argv[3]) exportId = convertExportNameToID(v, sys.argv[4], matterId) targetFolder = GC_Values[GC_DRIVE_DIR]