gam copy vaultexport/storagebucket commands

This commit is contained in:
Jay Lee
2023-02-06 13:33:26 +00:00
parent 454778b190
commit dc3976bdda
4 changed files with 214 additions and 15 deletions

View File

@@ -12,7 +12,7 @@
}
},
"basePath": "",
"baseUrl": "https://www.googleapis.com/admin/directory/v1.1beta1/customer/",
"baseUrl": "https://admin.googleapis.com/admin/directory/v1.1beta1/customer/",
"batchPath": "batch",
"canonicalName": "cbcm",
"discoveryVersion": "v1",

View File

@@ -7442,12 +7442,21 @@ def _getCurrentProjectID():
def _getProjects(crm, pfilter):
try:
return gapi.get_all_pages(
projects = gapi.get_all_pages(
crm.projects(),
'search',
'projects',
throw_reasons=[gapi_errors.ErrorReason.BAD_REQUEST],
query=pfilter)
if projects:
return projects
if pfilter.startswith('id:'):
pfilter = pfilter[3:]
return [gapi.call(
crm.projects(),
'get',
name=f'projects/{pfilter}',
throw_reasons=[gapi_errors.ErrorReason.BAD_REQUEST])]
except gapi_errors.GapiBadRequestError as e:
controlflow.system_error_exit(2, f'Project: {pfilter}, {str(e)}')
@@ -10742,11 +10751,17 @@ OAUTH2_SCOPES = [
'subscopes': ['readonly'],
'scopes': 'https://www.googleapis.com/auth/ediscovery'
},
# off by default to avoid reauth issues with GCP APIs
# and since many admins never use Vault API.
{
'name': 'Cloud Storage (Vault Export - read only)',
'subscopes': [],
'scopes': 'https://www.googleapis.com/auth/devstorage.read_only'
},
'name': 'Cloud Storage - Vault/Takeout Download/Copy',
'subscopes': ['readonly'],
'offByDefault': True,
'restricted_scopes': {
'readonly': 'https://www.googleapis.com/auth/devstorage.read_only'
},
'scopes': 'https://www.googleapis.com/auth/devstorage.read_write'
},
{
'name': 'User Profile (Email address - read only)',
'subscopes': [],
@@ -10791,6 +10806,7 @@ class ScopeMenuOption():
is_required=False,
is_selected=False,
supported_restrictions=None,
restricted_scopes=None,
restriction=None):
"""A data structure for storing and toggling feature/API scope attributes.
@@ -10820,6 +10836,7 @@ class ScopeMenuOption():
self._restriction = None
self.scopes = oauth_scopes
self.restricted_scopes = restricted_scopes
self.description = description
self.is_required = is_required
# Required scopes must be selected
@@ -10910,7 +10927,10 @@ class ScopeMenuOption():
effective_scopes = []
for scope in self.scopes:
if self.is_restricted:
scope = f'{scope}.{self._restriction}'
if self.restricted_scopes.get(self._restriction):
scope = self.restricted_scopes.get(self._restriction)
else:
scope = f'{scope}.{self._restriction}'
effective_scopes.append(scope)
return effective_scopes
@@ -10922,7 +10942,10 @@ class ScopeMenuOption():
name: Some description of the API/feature.
subscopes: A list of compatible scope restrictions such as 'action' or
'readonly'. Each scope in the scopes list must support this
restriction text appended to the end of its normal scope text.
restriction text appended to the end of its normal scope text or
be defined in the restricted_scopes attribute.
restricted_scopes: A dict of scopes to be used for restrictions. If not
defined then {scope}.{subscope} is used.
scopes: A list of scopes that are required for the API/feature.
offByDefault: A bool indicating whether this feature/scope should be off
by default (when no prior selection has been made). Default is False
@@ -10951,8 +10974,8 @@ class ScopeMenuOption():
description=scope_definition.get('name'),
is_selected=not scope_definition.get('offByDefault'),
supported_restrictions=scope_definition.get('subscopes', []),
is_required=scope_definition.get('required', False))
is_required=scope_definition.get('required', False),
restricted_scopes=scope_definition.get('restricted_scopes', {}))
class ScopeSelectionMenu():
"""A text menu which prompts the user to select the scopes to authorize."""
@@ -12027,6 +12050,16 @@ def ProcessGAMCommand(args):
else:
controlflow.invalid_argument_exit(argument, 'gam download')
sys.exit(0)
elif command == 'copy':
argument = sys.argv[2].lower().replace('_', '')
if argument in ['export', 'vaultexport']:
gapi_vault.copyExport()
elif argument in ['storagebucket', 'bucket']:
gapi_storage.copy_bucket()
else:
controlflow.invalid_argument_exit(argument, 'gam copy')
sys.exit(0)
elif command == 'rotate':
argument = sys.argv[2].lower()
if argument in ['sakey', 'sakeys']:

View File

@@ -2,20 +2,148 @@ import base64
import os
import re
import sys
import time
import googleapiclient
import gam
from gam.gapi import errors as gapi_errors
from gam.var import *
from gam import controlflow
from gam import fileutils
from gam import gapi
from gam import utils
def build_gapi():
def build():
return gam.buildGAPIObject('storage')
def copy_bucket():
s = build()
source_bucket = None
target_bucket = None
prefix = None
i = 3
while i < len(sys.argv):
myarg = sys.argv[i].lower().replace('_', '')
if myarg == 'sourcebucket':
source_bucket = sys.argv[i+1]
i += 2
elif myarg == 'targetbucket':
target_bucket = sys.argv[i+1]
i += 2
elif myarg == 'sourceprefix':
prefix = sys.argv[i+1]
i += 2
elif myarg == 'targetprefix':
target_prefix = sys.argv[i+1]
i += 2
else:
controlflow.invalid_argument_exit(sys.argv[i],
'gam copy storagebucket')
if not target_bucket:
controlflow.missing_argument_exit('target_bucket', 'gam copy storagebucket')
if not source_bucket:
controlflow.missing_argument_exit('source_bucket', 'gam copy storagebucket')
page_message = gapi.got_total_items_msg('Storage Objects', '...\n')
objects = gapi.get_all_pages(s.objects(),
'list',
items='items',
page_message=page_message,
prefix=prefix,
bucket=source_bucket,
fields='items(name,bucket,md5Hash),nextPageToken')
copy_objects(objects,
target_bucket,
target_prefix)
def copy_objects(objects,
target_bucket,
target_prefix):
"""Copies objects to target_bucket.
Args:
objects: list of object dicts
[
{
bucket: source bucket,
name: source object name,
(optional) md5Hash: source file hash value
},
...
]
target_bucket: target bucket id
target_prefix: prefix name to prepend to target object
"""
def process_rewrite(request_id, response, exception):
file_ptr = int(request_id)
if exception:
# Poor man's backoff/retry
if exception.status_code == 429 or exception.status_code > 499:
print(f'Temporary error {exception.status_code}. Sleeping 10 seconds...')
time.sleep(10)
next_batch.add(s.objects().rewrite(**files_to_copy[file_ptr]['method']),
request_id=request_id)
return
else:
raise exception
file_count = file_ptr + 1
source_displayname = files_to_copy[file_ptr]['source_displayname']
target_displayname = files_to_copy[file_ptr]['target_displayname']
if response.get('done'):
source_md5 = files_to_copy[file_ptr]['md5Hash']
target_md5 = response['resource']['md5Hash']
if source_md5 != target_md5:
controlflow.system_error_exit(99, f'Target file {target_displayname} checksum {target_md5} does not match source {source_md5}. This should not happen')
else:
print(f'[ {file_count} / {total_files} ] 100% VERIFIED - finished copying:\n source: {source_displayname}\n dest: {target_displayname}')
else:
total_bytes = float(response.get('objectSize'))
done_bytes = float(response.get('totalBytesRewritten'))
pct = (done_bytes / total_bytes) * 100
print(f'[ {file_count} / {total_files} ] {pct:.2f}%\n source: {source_displayname}\n dest:{target_displayname}')
files_to_copy[file_ptr]['method']['rewriteToken'] = response.get('rewriteToken')
next_batch.add(s.objects().rewrite(**files_to_copy[file_ptr]['method']),
request_id=request_id)
s = build()
sbatch = s.new_batch_http_request(callback=process_rewrite)
files_to_copy = []
for object_ in objects:
files_to_copy.append(
{
'md5Hash': object_['md5Hash'],
'source_displayname': f'{object_["bucket"]}:{object_["name"]}',
'target_displayname': f'{target_bucket}:{target_prefix}{object_["name"]}',
'method': {
'destinationBucket': target_bucket,
'destinationObject': f'{target_prefix}{object_["name"]}',
'sourceBucket': object_['bucket'],
'sourceObject': object_['name'],
# 'maxBytesRewrittenPerCall': 1048576, # uncomment to easily test multiple rewrite API calls per object
},
})
i = 0
total_files = len(files_to_copy)
for file in files_to_copy:
while len(sbatch._order) == 100:
next_batch = s.new_batch_http_request(callback=process_rewrite)
sbatch.execute()
sbatch = next_batch
sbatch.add(s.objects().rewrite(**file['method']),
request_id=str(i))
i += 1
while len(sbatch._order) > 0:
next_batch = s.new_batch_http_request(callback=process_rewrite)
sbatch.execute()
sbatch = next_batch
print('All done!')
def get_cloud_storage_object(s,
bucket,
object_,
@@ -60,7 +188,7 @@ def get_cloud_storage_object(s,
def download_bucket():
bucket = sys.argv[3]
s = build_gapi()
s = build()
page_message = gapi.got_total_items_msg('Files', '...')
fields = 'nextPageToken,items(name,id,md5Hash)'
objects = gapi.get_all_pages(s.objects(),

View File

@@ -1,3 +1,4 @@
from base64 import b64encode
import datetime
import json
import sys
@@ -519,7 +520,6 @@ def getHoldInfo():
def convertExportNameToID(v, nameOrID, matterId):
nameOrID = nameOrID.lower()
cg = UID_PATTERN.match(nameOrID)
if cg:
return cg.group(1)
@@ -530,7 +530,7 @@ def convertExportNameToID(v, nameOrID, matterId):
matterId=matterId,
fields=fields)
for export in exports:
if export['name'].lower() == nameOrID:
if export['name'].lower() == nameOrID.lower():
return export['id']
controlflow.system_error_exit(
4, f'could not find export name {nameOrID} '
@@ -797,11 +797,49 @@ def getMatterInfo():
display.print_json(result)
def copyExport():
v = buildGAPIObject()
s = gapi_storage.build()
matterId = getMatterItem(v, sys.argv[3])
exportId = convertExportNameToID(v, sys.argv[4], matterId)
target_bucket = None
target_prefix = ''
i = 5
while i < len(sys.argv):
myarg = sys.argv[i].lower().replace('_', '')
if myarg == 'targetbucket':
target_bucket = sys.argv[i+1]
i += 2
elif myarg == 'targetprefix':
target_prefix = sys.argv[i+1]
i += 2
else:
controlflow.invalid_argument_exit(sys.argv[i],
'gam copy export')
if not target_bucket:
controlflow.missing_argument_exit('target_bucket', 'gam copy export')
export = gapi.call(v.matters().exports(),
'get',
matterId=matterId,
exportId=exportId)
objects = []
for s_file in export['cloudStorageSink']['files']:
# Convert to md5Hash format Storage API uses
# because OF COURSE they differ
md5Hash = b64encode(bytes.fromhex(s_file['md5Hash'])).decode()
objects.append({'bucket': s_file['bucketName'],
'name': s_file['objectName'],
'md5Hash': md5Hash})
gapi_storage.copy_objects(objects,
target_bucket,
target_prefix)
def downloadExport():
verifyFiles = True
extractFiles = True
v = buildGAPIObject()
s = gapi_storage.build_gapi()
s = gapi_storage.build()
matterId = getMatterItem(v, sys.argv[3])
exportId = convertExportNameToID(v, sys.argv[4], matterId)
targetFolder = GC_Values[GC_DRIVE_DIR]