Initial commit of a new experimental modular GAM.

This commit is contained in:
Jay Lee
2026-07-03 08:33:14 -04:00
parent 2fbc3c5c35
commit 8a89a91414
129 changed files with 88262 additions and 82716 deletions

301
src/gam/cmd/cloudstorage.py Normal file
View File

@@ -0,0 +1,301 @@
"""GAM cloud storage management."""
import re
import sys
from gamlib import glaction
from gamlib import glapi as API
from gamlib import glcfg as GC
from gamlib import glclargs
from gamlib import glentity
from gamlib import glgapi as GAPI
from gamlib import glglobals as GM
from gamlib import glindent
from gamlib import glmsgs as Msg
Act = glaction.GamAction()
Ent = glentity.GamEntity()
Ind = glindent.GamIndent()
Cmd = glclargs.GamCLArgs()
def _getMain():
return sys.modules['gam']
def __getattr__(name):
"""Fall back to gam module for any undefined names."""
main = _getMain()
try:
return getattr(main, name)
except AttributeError:
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
def _copyStorageObjects(objects, target_bucket, target_prefix):
"""Copies objects to target_bucket.
Args:
objects: list of object dicts
[
{
bucket: source bucket,
name: source object name,
(optional) md5Hash: source file hash value
},
...
]
target_bucket: target bucket id
target_prefix: prefix name to prepend to target object
"""
def process_rewrite(request_id, response, exception):
fileIndex = int(request_id)
if exception:
http_status, reason, message = _getMain().checkGAPIError(exception)
# Poor man's backoff/retry
if http_status == 429 or http_status > 499:
_getMain().writeStderr(f'Temporary error: {http_status} - {message}, Backing off: 10 seconds\n')
_getMain().flushStderr()
time.sleep(10)
next_batch.add(s.objects().rewrite(**files_to_copy[fileIndex]['method']), request_id=request_id)
return
if reason in GAPI.REASON_EXCEPTION_MAP:
raise GAPI.REASON_EXCEPTION_MAP[reason](message)
raise exception(message)
source_displayname = files_to_copy[fileIndex]['source_displayname']
target_displayname = files_to_copy[fileIndex]['target_displayname']
if response.get('done'):
source_md5 = files_to_copy[fileIndex]['md5Hash']
target_md5 = response['resource']['md5Hash']
if source_md5 != target_md5:
_getMain().systemErrorExit(_getMain().GOOGLE_API_ERROR_RC, f'Target file {target_displayname} checksum {target_md5} does not match source {source_md5}. This should not happen')
_getMain().entityActionPerformedMessage([Ent.CLOUD_STORAGE_FILE, None], f'{1:>7.2%} Complete', fileIndex+1, totalFiles)
Ind.Increment()
_getMain().writeStdout(_getMain().formatKeyValueList(Ind.Spaces(), ['Source', source_displayname], '\n'))
_getMain().writeStdout(_getMain().formatKeyValueList(Ind.Spaces(), ['Target', target_displayname], '\n'))
Ind.Decrement()
else:
total_bytes = float(response.get('objectSize'))
done_bytes = float(response.get('totalBytesRewritten'))
_getMain().entityActionPerformedMessage([Ent.CLOUD_STORAGE_FILE, None], f'{(done_bytes / total_bytes):>7.2%} Complete', fileIndex+1, totalFiles)
Ind.Increment()
_getMain().writeStdout(_getMain().formatKeyValueList(Ind.Spaces(), ['Source', source_displayname], '\n'))
_getMain().writeStdout(_getMain().formatKeyValueList(Ind.Spaces(), ['Target', target_displayname], '\n'))
Ind.Decrement()
files_to_copy[fileIndex]['method']['rewriteToken'] = response.get('rewriteToken')
next_batch.add(s.objects().rewrite(**files_to_copy[fileIndex]['method']), request_id=request_id)
action = Act.Get()
Act.Set(Act.COPY)
s = _getMain().buildGAPIObject(API.STORAGEWRITE)
sbatch = s.new_batch_http_request(callback=process_rewrite)
files_to_copy = []
for object_ in objects:
files_to_copy.append(
{
'md5Hash': object_['md5Hash'],
'source_displayname': f'{object_["bucket"]}:{object_["name"]}',
'target_displayname': f'{target_bucket}:{target_prefix}{object_["name"]}',
'method': {
'destinationBucket': target_bucket,
'destinationObject': f'{target_prefix}{object_["name"]}',
'sourceBucket': object_['bucket'],
'sourceObject': object_['name'],
'maxBytesRewrittenPerCall': 1048576, # uncomment to easily test multiple rewrite API calls per object
},
})
totalFiles = len(files_to_copy)
i = 0
try:
for file in files_to_copy:
while len(sbatch._order) == 100:
next_batch = s.new_batch_http_request(callback=process_rewrite)
sbatch.execute()
sbatch = next_batch
sbatch.add(s.objects().rewrite(**file['method']), request_id=str(i))
i += 1
while len(sbatch._order) > 0:
next_batch = s.new_batch_http_request(callback=process_rewrite)
sbatch.execute()
sbatch = next_batch
except GAPI.notFound:
Act.Set(action)
_getMain().entityDoesNotExistExit(Ent.CLOUD_STORAGE_BUCKET, target_bucket)
except Exception:
ClientAPIAccessDeniedExit()
Act.Set(action)
def md5MatchesFile(filename, expected_md5, j=0, jcount=0):
action = Act.Get()
Act.Set(Act.VERIFY)
try:
f = _getMain().openFile(filename, 'rb')
hash_md5 = hashlib.md5()
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
_getMain().closeFile(f)
actual_hash = hash_md5.hexdigest()
if actual_hash == expected_md5:
_getMain().entityActionPerformed([Ent.FILE, filename, Ent.MD5HASH, expected_md5], j, jcount)
Act.Set(action)
return True
_getMain().entityActionFailedWarning([Ent.FILE, filename, Ent.MD5HASH, expected_md5], Msg.DOES_NOT_MATCH.format(actual_hash), j, jcount)
Act.Set(action)
return False
except IOError as e:
_getMain().systemErrorExit(_getMain().FILE_ERROR_RC, _getMain().fileErrorMessage(filename, e))
def _getCloudStorageObject(s, bucket, s_object, localFilename, expectedMd5=None, zipToStdout=False, j=0, jcount=0):
if not zipToStdout:
localFilename = _getMain().cleanFilepath(localFilename)
entityValueList = [Ent.DRIVE_FILE, localFilename]
if os.path.exists(localFilename):
_getMain().printEntityMessage(entityValueList, Msg.EXISTS)
if not expectedMd5:
return # nothing to verify, just assume we're good.
if md5MatchesFile(localFilename, expectedMd5):
return
_getMain().printEntityMessage(entityValueList, Msg.DOWNLOADING_AGAIN_AND_OVER_WRITING)
_getMain().entityPerformAction(entityValueList)
file_path = os.path.dirname(localFilename)
if not os.path.exists(file_path):
os.makedirs(file_path)
f = _getMain().openFile(localFilename, 'wb')
else:
f = _getMain().openFile('-', 'wb')
try:
request = s.objects().get_media(bucket=bucket, object=s_object)
downloader = googleapiclient.http.MediaIoBaseDownload(f, request)
done = False
while not done:
status, done = downloader.next_chunk()
if not zipToStdout and status.progress() < 1.0:
_getMain().entityActionPerformedMessage([Ent.CLOUD_STORAGE_FILE, s_object], f'{status.progress():>7.2%}', j, jcount)
if not zipToStdout:
_getMain().entityModifierNewValueActionPerformed([Ent.CLOUD_STORAGE_FILE, s_object], Act.MODIFIER_TO, localFilename, j, jcount)
_getMain().closeFile(f, True)
if expectedMd5 and not md5MatchesFile(localFilename, expectedMd5):
_getMain().systemErrorExit(_getMain().FILE_ERROR_RC, _getMain().fileErrorMessage(localFilename, Msg.CORRUPT_FILE))
except googleapiclient.http.HttpError as e:
mg = _getMain().HTTP_ERROR_PATTERN.match(str(e))
_getMain().entityModifierNewValueActionFailedWarning([Ent.CLOUD_STORAGE_FILE, s_object], Act.MODIFIER_TO, localFilename, mg.group(1) if mg else str(e), j, jcount)
TAKEOUT_EXPORT_PATTERN = re.compile(r'(takeout-export-[a-f,0-9,-]*)')
# gam copy storagebucket sourcebucket <StorageBucketName> targetbucket <StorageBucketName>
# [sourceprefix <String>] [targetprefix <String>]
def doCopyCloudStorageBucket():
s = _getMain().buildGAPIObject(API.STORAGEREAD)
source_bucket = None
target_bucket = None
source_prefix = None
target_prefix = ''
while Cmd.ArgumentsRemaining():
myarg = _getMain().getArgument()
if myarg == 'sourcebucket':
source_bucket = _getMain().getString(Cmd.OB_URL)
elif myarg == 'targetbucket':
target_bucket = _getMain().getString(Cmd.OB_URL)
elif myarg == 'sourceprefix':
source_prefix = _getMain().getString(Cmd.OB_STRING, minLen=0)
elif myarg == 'targetprefix':
target_prefix = _getMain().getString(Cmd.OB_STRING, minLen=0)
else:
_getMain().unknownArgumentExit()
if not target_bucket:
_getMain().missingArgumentExit('targetbucket')
if not source_bucket:
_getMain().missingArgumentExit('sourcebucket')
_getMain().printGettingAllAccountEntities(Ent.FILE)
pageMessage = _getMain().getPageMessage()
try:
objects = _getMain().callGAPIpages(s.objects(), 'list', 'items',
pageMessage=pageMessage,
throwReasons=[GAPI.NOT_FOUND, GAPI.FORBIDDEN],
bucket=source_bucket, prefix=source_prefix,
fields='nextPageToken,items(name,bucket,md5Hash)')
except GAPI.notFound:
_getMain().entityDoesNotExistExit(Ent.CLOUD_STORAGE_BUCKET, source_bucket)
except GAPI.forbidden as e:
_getMain().entityActionFailedExit([Ent.CLOUD_STORAGE_BUCKET, source_bucket], str(e))
_copyStorageObjects(objects, target_bucket, target_prefix)
# gam download storagebucket <TakeoutBucketName>
# [targetfolder <FilePath>]
def doDownloadCloudStorageBucket():
bucket_url = _getMain().getString(Cmd.OB_STRING)
targetFolder = GC.Values[GC.DRIVE_DIR]
while Cmd.ArgumentsRemaining():
myarg = _getMain().getArgument()
if myarg == 'targetfolder':
targetFolder = _getMain().setFilePath(_getMain().getString(Cmd.OB_FILE_PATH), GC.DRIVE_DIR)
if not os.path.isdir(targetFolder):
os.makedirs(targetFolder)
else:
_getMain().unknownArgumentExit()
bucket_match = re.search(TAKEOUT_EXPORT_PATTERN, bucket_url)
if not bucket_match:
_getMain().systemErrorExit(_getMain().ACTION_NOT_PERFORMED_RC, f'Could not find a takeout-export-* bucket in {bucket_url}')
bucket = bucket_match.group(1)
s = _getMain().buildGAPIObject(API.STORAGEREAD)
_getMain().printGettingAllAccountEntities(Ent.FILE)
pageMessage = _getMain().getPageMessage()
try:
objects = _getMain().callGAPIpages(s.objects(), 'list', 'items',
pageMessage=pageMessage,
throwReasons=[GAPI.NOT_FOUND, GAPI.FORBIDDEN],
bucket=bucket, projection='noAcl', fields='nextPageToken,items(name,md5Hash)')
except GAPI.notFound:
_getMain().entityDoesNotExistExit(Ent.CLOUD_STORAGE_BUCKET, bucket)
except GAPI.forbidden as e:
_getMain().entityActionFailedExit([Ent.CLOUD_STORAGE_BUCKET, bucket], str(e))
count = len(objects)
i = 0
for s_object in objects:
i += 1
_getMain().printGettingEntityItem(Ent.FILE, s_object['name'], i, count)
expectedMd5 = base64.b64decode(s_object['md5Hash']).hex()
_getCloudStorageObject(s, bucket, s_object['name'], os.path.join(targetFolder, s_object['name']),
expectedMd5=expectedMd5)
# gam download storagefile <StorageBucketObjectName>
# [targetfolder <FilePath>] [overwrite [<Boolean>]] [nogcspath [Boolean>]]
def doDownloadCloudStorageFile():
bucket, s_object, bucketObject = _getMain().getBucketObjectName()
targetFolder = GC.Values[GC.DRIVE_DIR]
overwrite = nogcspath = False
while Cmd.ArgumentsRemaining():
myarg = _getMain().getArgument()
if myarg == 'targetfolder':
targetFolder = _getMain().setFilePath(_getMain().getString(Cmd.OB_FILE_PATH), GC.DRIVE_DIR)
if not os.path.isdir(targetFolder):
os.makedirs(targetFolder)
elif myarg == 'overwrite':
overwrite = _getMain().getBoolean()
elif myarg == 'nogcspath':
nogcspath = _getMain().getBoolean()
else:
_getMain().unknownArgumentExit()
s_obpaths = s_object.rsplit('/', 1)
s_obfile = s_obpaths[-1]
if len(s_obpaths) > 1 and not nogcspath:
targetFolder = os.path.join(targetFolder, s_obpaths[0])
filename, _ = _getMain().uniqueFilename(targetFolder, s_obfile, overwrite)
filepath = os.path.dirname(filename)
if not os.path.exists(filepath):
os.makedirs(filepath)
s = _getMain().buildGAPIObject(API.STORAGEREAD)
_getMain().printGettingEntityItem(Ent.FILE, s_object)
f = _getMain().openFile(filename, 'wb')
try:
request = s.objects().get_media(bucket=bucket, object=s_object)
downloader = googleapiclient.http.MediaIoBaseDownload(f, request)
done = False
while not done:
_, done = downloader.next_chunk()
_getMain().entityModifierNewValueActionPerformed([Ent.CLOUD_STORAGE_FILE, s_object], Act.MODIFIER_TO, filename)
_getMain().closeFile(f, True)
except googleapiclient.http.HttpError as e:
mg = _getMain().HTTP_ERROR_PATTERN.match(str(e))
_getMain().entityActionFailedWarning([Ent.CLOUD_STORAGE_FILE, bucketObject], mg.group(1) if mg else str(e))