mirror of
https://github.com/GAM-team/GAM.git
synced 2026-07-04 04:41:35 +00:00
Some checks are pending
Build and test GAM / build (false, build, 1, Build Intel Ubuntu Jammy, ubuntu-22.04) (push) Waiting to run
Build and test GAM / build (false, build, 10, Build x86_64 macOS 15, macos-15-intel) (push) Waiting to run
Build and test GAM / build (false, build, 11, Build x86_64 macOS 26, macos-26-intel) (push) Waiting to run
Build and test GAM / build (false, build, 12, Build Arm MacOS 26, macos-26) (push) Waiting to run
Build and test GAM / build (false, build, 13, Build Intel Windows, windows-2025-vs2026) (push) Waiting to run
Build and test GAM / build (false, build, 14, Build Arm Windows, windows-11-arm) (push) Waiting to run
Build and test GAM / build (false, build, 2, Build Intel Ubuntu Noble, ubuntu-24.04) (push) Waiting to run
Build and test GAM / build (false, build, 3, Build Arm Ubuntu Noble, ubuntu-24.04-arm) (push) Waiting to run
Build and test GAM / build (false, build, 4, Build Arm Ubuntu Jammy, ubuntu-22.04-arm) (push) Waiting to run
Build and test GAM / build (false, build, 5, Build Intel StaticX Legacy, ubuntu-22.04, yes) (push) Waiting to run
Build and test GAM / build (false, build, 6, Build Arm StaticX Legacy, ubuntu-22.04-arm, yes) (push) Waiting to run
Build and test GAM / build (false, build, 8, Build Arm MacOS 14, macos-14) (push) Waiting to run
Build and test GAM / build (false, build, 9, Build Arm MacOS 15, macos-15) (push) Waiting to run
Build and test GAM / build (false, test, 16, Test Python 3.11, ubuntu-24.04, 3.11) (push) Waiting to run
Build and test GAM / build (false, test, 17, Test Python 3.12, ubuntu-24.04, 3.12) (push) Waiting to run
Build and test GAM / build (false, test, 18, Test Python 3.13, ubuntu-24.04, 3.13) (push) Waiting to run
Build and test GAM / build (false, test, 19, Test Python 3.15-dev, ubuntu-24.04, 3.15-dev) (push) Waiting to run
Build and test GAM / build (true, test, 20, Test Python 3.14 freethread, ubuntu-24.04, 3.14) (push) Waiting to run
Build and test GAM / publish (push) Blocked by required conditions
CodeQL / Analyze (python) (push) Waiting to run
276 lines
12 KiB
Python
276 lines
12 KiB
Python
"""GAM GDoc/GSheet/Cloud Storage data retrieval utilities.
|
|
|
|
Reading data from Google Docs, Google Sheets, Cloud Storage
|
|
buckets, and CSV files.
|
|
"""
|
|
|
|
import csv
|
|
import re
|
|
import sys
|
|
import time
|
|
from tempfile import TemporaryFile
|
|
from urllib.parse import unquote
|
|
|
|
import googleapiclient.http
|
|
import httplib2
|
|
import google.auth.exceptions
|
|
|
|
from gamlib import glapi as API
|
|
from gamlib import glcfg as GC
|
|
from gamlib import glentity
|
|
Ent = glentity.GamEntity()
|
|
from gamlib import glgapi as GAPI
|
|
from gamlib import glglobals as GM
|
|
from gamlib import glmsgs as Msg
|
|
|
|
|
|
_gam = lambda: sys.modules['gam']
|
|
|
|
|
|
GDOC_FORMAT_MIME_TYPES = {
|
|
'gcsv': 'text/csv',
|
|
'gdoc': 'text/plain',
|
|
'ghtml': 'text/html',
|
|
}
|
|
|
|
# gdoc <EmailAddress> <DriveFileIDEntity>|<DriveFileNameEntity>
|
|
def getGDocData(gformat):
|
|
mimeType = GDOC_FORMAT_MIME_TYPES[gformat]
|
|
user = _gam().getEmailAddress()
|
|
fileIdEntity = _gam().getDriveFileEntity(queryShortcutsOK=False)
|
|
if not GC.Values[GC.COMMANDDATA_CLIENTACCESS]:
|
|
_, drive = _gam().buildGAPIServiceObject(API.DRIVE3, user)
|
|
else:
|
|
drive = _gam().buildGAPIObject(API.DRIVE3)
|
|
if not drive:
|
|
sys.exit(GM.Globals[GM.SYSEXITRC])
|
|
_, _, jcount = _gam()._validateUserGetFileIDs(user, 0, 0, fileIdEntity, drive=drive)
|
|
if jcount == 0:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user], Msg.NO_ENTITIES_FOUND.format(Ent.Singular(Ent.DRIVE_FILE)))
|
|
if jcount > 1:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user], Msg.MULTIPLE_ENTITIES_FOUND.format(Ent.Plural(Ent.DRIVE_FILE), jcount, ','.join(fileIdEntity['list'])))
|
|
fileId = fileIdEntity['list'][0]
|
|
f = None
|
|
try:
|
|
result = _gam().callGAPI(drive.files(), 'get',
|
|
throwReasons=GAPI.DRIVE_GET_THROW_REASONS,
|
|
fileId=fileId, fields='name,mimeType,exportLinks',
|
|
supportsAllDrives=True)
|
|
# Google Doc
|
|
if 'exportLinks' in result:
|
|
if mimeType not in result['exportLinks']:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user, Ent.DRIVE_FILE, result['name']],
|
|
Msg.INVALID_MIMETYPE.format(result['mimeType'], mimeType))
|
|
f = TemporaryFile(mode='w+', encoding=_gam().UTF8)
|
|
_, content = drive._http.request(uri=result['exportLinks'][mimeType], method='GET')
|
|
f.write(content.decode(_gam().UTF8_SIG))
|
|
f.seek(0)
|
|
return f
|
|
# Drive File
|
|
if result['mimeType'] != mimeType:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user, Ent.DRIVE_FILE, result['name']],
|
|
Msg.INVALID_MIMETYPE.format(result['mimeType'], mimeType))
|
|
fb = TemporaryFile(mode='wb+')
|
|
request = drive.files().get_media(fileId=fileId)
|
|
downloader = googleapiclient.http.MediaIoBaseDownload(fb, request)
|
|
done = False
|
|
while not done:
|
|
_, done = downloader.next_chunk()
|
|
f = TemporaryFile(mode='w+', encoding=_gam().UTF8)
|
|
fb.seek(0)
|
|
f.write(fb.read().decode(_gam().UTF8_SIG))
|
|
fb.close()
|
|
f.seek(0)
|
|
return f
|
|
except GAPI.fileNotFound:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user, Ent.DOCUMENT, fileId], Msg.DOES_NOT_EXIST)
|
|
except (IOError, httplib2.HttpLib2Error, google.auth.exceptions.TransportError, RuntimeError) as e:
|
|
if f:
|
|
f.close()
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user, Ent.DOCUMENT, fileId], str(e))
|
|
except (GAPI.serviceNotAvailable, GAPI.authError, GAPI.domainPolicy) as e:
|
|
_gam().userDriveServiceNotEnabledWarning(user, str(e))
|
|
sys.exit(GM.Globals[GM.SYSEXITRC])
|
|
|
|
HTML_TITLE_PATTERN = re.compile(r'.*<title>(.+)</title>')
|
|
|
|
# gsheet <EmailAddress> <DriveFileIDEntity>|<DriveFileNameEntity> <SheetEntity>
|
|
def getGSheetData():
|
|
user = _gam().getEmailAddress()
|
|
fileIdEntity = _gam().getDriveFileEntity(queryShortcutsOK=False)
|
|
sheetEntity = _gam().getSheetEntity(False)
|
|
if not GC.Values[GC.COMMANDDATA_CLIENTACCESS]:
|
|
user, drive = _gam().buildGAPIServiceObject(API.DRIVE3, user)
|
|
else:
|
|
drive = _gam().buildGAPIObject(API.DRIVE3)
|
|
if not drive:
|
|
sys.exit(GM.Globals[GM.SYSEXITRC])
|
|
_, _, jcount = _gam()._validateUserGetFileIDs(user, 0, 0, fileIdEntity, drive=drive)
|
|
if jcount == 0:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user], Msg.NO_ENTITIES_FOUND.format(Ent.Singular(Ent.DRIVE_FILE)))
|
|
if jcount > 1:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user], Msg.MULTIPLE_ENTITIES_FOUND.format(Ent.Plural(Ent.DRIVE_FILE), jcount, ','.join(fileIdEntity['list'])))
|
|
if not GC.Values[GC.COMMANDDATA_CLIENTACCESS]:
|
|
_, sheet = _gam().buildGAPIServiceObject(API.SHEETS, user)
|
|
else:
|
|
sheet = _gam().buildGAPIObject(API.SHEETS)
|
|
if not sheet:
|
|
sys.exit(GM.Globals[GM.SYSEXITRC])
|
|
fileId = fileIdEntity['list'][0]
|
|
f = None
|
|
try:
|
|
result = _gam().callGAPI(drive.files(), 'get',
|
|
throwReasons=GAPI.DRIVE_GET_THROW_REASONS,
|
|
fileId=fileId, fields='name,mimeType', supportsAllDrives=True)
|
|
if result['mimeType'] != _gam().MIMETYPE_GA_SPREADSHEET:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user, Ent.DRIVE_FILE, result['name']],
|
|
Msg.INVALID_MIMETYPE.format(result['mimeType'], _gam().MIMETYPE_GA_SPREADSHEET))
|
|
spreadsheet = _gam().callGAPI(sheet.spreadsheets(), 'get',
|
|
throwReasons=GAPI.SHEETS_ACCESS_THROW_REASONS,
|
|
spreadsheetId=fileId, fields='spreadsheetUrl,sheets(properties(sheetId,title))')
|
|
sheetId = _gam().getSheetIdFromSheetEntity(spreadsheet, sheetEntity)
|
|
if sheetId is None:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user, Ent.SPREADSHEET, result['name'], sheetEntity['sheetType'], sheetEntity['sheetValue']], Msg.NOT_FOUND)
|
|
spreadsheetUrl = f'{re.sub("/edit.*$", "/export", spreadsheet["spreadsheetUrl"])}?format=csv&id={fileId}&gid={sheetId}'
|
|
f = TemporaryFile(mode='w+', encoding=_gam().UTF8)
|
|
if GC.Values[GC.DEBUG_LEVEL] > 0:
|
|
sys.stderr.write(f'Debug: spreadsheetUrl: {spreadsheetUrl}\n')
|
|
triesLimit = 3
|
|
for n in range(1, triesLimit+1):
|
|
_, content = drive._http.request(uri=spreadsheetUrl, method='GET')
|
|
# Check for HTML error message instead of data
|
|
if content[0:15] != b'<!DOCTYPE html>':
|
|
break
|
|
tg = HTML_TITLE_PATTERN.match(content[0:600].decode('utf-8'))
|
|
errMsg = tg.group(1) if tg else 'Unknown error'
|
|
_gam().getGDocSheetDataRetryWarning([Ent.USER, user, Ent.SPREADSHEET, result['name'], sheetEntity['sheetType'], sheetEntity['sheetValue']], errMsg, n, triesLimit)
|
|
time.sleep(20)
|
|
else:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user, Ent.SPREADSHEET, result['name'], sheetEntity['sheetType'], sheetEntity['sheetValue']], errMsg)
|
|
f.write(content.decode(_gam().UTF8_SIG))
|
|
f.seek(0)
|
|
return f
|
|
except GAPI.fileNotFound:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user, Ent.SPREADSHEET, fileId], Msg.DOES_NOT_EXIST)
|
|
except (GAPI.notFound, GAPI.forbidden, GAPI.permissionDenied,
|
|
GAPI.internalError, GAPI.insufficientFilePermissions, GAPI.badRequest,
|
|
GAPI.invalid, GAPI.invalidArgument, GAPI.failedPrecondition) as e:
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user, Ent.SPREADSHEET, fileId, sheetEntity['sheetType'], sheetEntity['sheetValue']], str(e))
|
|
except (IOError, httplib2.HttpLib2Error) as e:
|
|
if f:
|
|
f.close()
|
|
_gam().getGDocSheetDataFailedExit([Ent.USER, user, Ent.SPREADSHEET, fileId, sheetEntity['sheetType'], sheetEntity['sheetValue']], str(e))
|
|
except (GAPI.serviceNotAvailable, GAPI.authError, GAPI.domainPolicy) as e:
|
|
_gam().userDriveServiceNotEnabledWarning(user, str(e))
|
|
sys.exit(GM.Globals[GM.SYSEXITRC])
|
|
|
|
|
|
BUCKET_OBJECT_PATTERNS = [
|
|
{'pattern': re.compile(r'https://storage.(?:googleapis|cloud.google).com/(.+?)/(.+)'), 'unquote': True},
|
|
{'pattern': re.compile(r'gs://(.+?)/(.+)'), 'unquote': False},
|
|
{'pattern': re.compile(r'(.+?)/(.+)'), 'unquote': False},
|
|
]
|
|
|
|
def getBucketObjectName():
|
|
Cmd = _gam().Cmd
|
|
uri = _gam().getString(Cmd.OB_STRING)
|
|
for pattern in BUCKET_OBJECT_PATTERNS:
|
|
mg = re.search(pattern['pattern'], uri)
|
|
if mg:
|
|
bucket = mg.group(1)
|
|
s_object = mg.group(2) if not pattern['unquote'] else unquote(mg.group(2))
|
|
return (bucket, s_object, f'{bucket}/{s_object}')
|
|
_gam().systemErrorExit(_gam().ACTION_NOT_PERFORMED_RC, f'Invalid <StorageBucketObjectName>: {uri}')
|
|
|
|
GCS_FORMAT_MIME_TYPES = {
|
|
'gcscsv': 'text/csv',
|
|
'gcsdoc': 'text/plain',
|
|
'gcshtml': 'text/html',
|
|
}
|
|
|
|
# gcscsv|gcshtml|gcsdoc <StorageBucketObjectName>
|
|
def getStorageFileData(gcsformat, returnData=True):
|
|
mimeType = GCS_FORMAT_MIME_TYPES[gcsformat]
|
|
bucket, s_object, bucketObject = getBucketObjectName()
|
|
s = _gam().buildGAPIObject(API.STORAGEREAD)
|
|
try:
|
|
result = _gam().callGAPI(s.objects(), 'get',
|
|
throwReasons=[GAPI.NOT_FOUND, GAPI.FORBIDDEN],
|
|
bucket=bucket, object=s_object, projection='noAcl', fields='contentType')
|
|
except GAPI.notFound:
|
|
_gam().entityDoesNotExistExit(Ent.CLOUD_STORAGE_FILE, bucketObject)
|
|
except GAPI.forbidden as e:
|
|
_gam().entityActionFailedExit([Ent.CLOUD_STORAGE_FILE, bucketObject], str(e))
|
|
if result['contentType'] != mimeType:
|
|
_gam().getGDocSheetDataFailedExit([Ent.CLOUD_STORAGE_FILE, bucketObject],
|
|
Msg.INVALID_MIMETYPE.format(result['contentType'], mimeType))
|
|
fb = TemporaryFile(mode='wb+')
|
|
try:
|
|
request = s.objects().get_media(bucket=bucket, object=s_object)
|
|
downloader = googleapiclient.http.MediaIoBaseDownload(fb, request)
|
|
done = False
|
|
while not done:
|
|
_, done = downloader.next_chunk()
|
|
fb.seek(0)
|
|
if returnData:
|
|
data = fb.read().decode(_gam().UTF8)
|
|
fb.close()
|
|
return data
|
|
f = TemporaryFile(mode='w+', encoding=_gam().UTF8)
|
|
f.write(fb.read().decode(_gam().UTF8_SIG))
|
|
fb.close()
|
|
f.seek(0)
|
|
return f
|
|
except googleapiclient.http.HttpError as e:
|
|
mg = _gam().HTTP_ERROR_PATTERN.match(str(e))
|
|
_gam().getGDocSheetDataFailedExit([Ent.CLOUD_STORAGE_FILE, bucketObject], mg.group(1) if mg else str(e))
|
|
|
|
# <CSVFileInput>
|
|
def openCSVFileReader(filename, fieldnames=None):
|
|
Cmd = _gam().Cmd
|
|
filenameLower = filename.lower()
|
|
if filenameLower == 'gsheet':
|
|
f = getGSheetData()
|
|
_gam().getCharSet()
|
|
elif filenameLower in {'gcsv', 'gdoc'}:
|
|
f = getGDocData(filenameLower)
|
|
_gam().getCharSet()
|
|
elif filenameLower in {'gcscsv', 'gcsdoc'}:
|
|
f = getStorageFileData(filenameLower, False)
|
|
_gam().getCharSet()
|
|
else:
|
|
encoding = _gam().getCharSet()
|
|
filename = _gam().setFilePath(filename, GC.INPUT_DIR)
|
|
f = _gam().openFile(filename, mode=_gam().DEFAULT_CSV_READ_MODE, encoding=encoding)
|
|
if _gam().checkArgumentPresent('warnifnodata'):
|
|
loc = f.tell()
|
|
try:
|
|
if not f.readline() or not f.readline():
|
|
_gam().stderrWarningMsg(_gam().fileErrorMessage(filename, Msg.NO_CSV_FILE_DATA_FOUND))
|
|
sys.exit(_gam().NO_ENTITIES_FOUND_RC)
|
|
f.seek(loc)
|
|
except (IOError, UnicodeDecodeError, UnicodeError) as e:
|
|
_gam().systemErrorExit(_gam().FILE_ERROR_RC, _gam().fileErrorMessage(filename, e))
|
|
if _gam().checkArgumentPresent('columndelimiter'):
|
|
columnDelimiter = _gam().getCharacter()
|
|
else:
|
|
columnDelimiter = GC.Values[GC.CSV_INPUT_COLUMN_DELIMITER]
|
|
if _gam().checkArgumentPresent('noescapechar'):
|
|
noEscapeChar = _gam().getBoolean()
|
|
else:
|
|
noEscapeChar = GC.Values[GC.CSV_INPUT_NO_ESCAPE_CHAR]
|
|
if _gam().checkArgumentPresent('quotechar'):
|
|
quotechar = _gam().getCharacter()
|
|
else:
|
|
quotechar = GC.Values[GC.CSV_INPUT_QUOTE_CHAR]
|
|
if not _gam().checkArgumentPresent('endcsv') and _gam().checkArgumentPresent('fields'):
|
|
fieldnames = _gam().shlexSplitList(_gam().getString(Cmd.OB_FIELD_NAME_LIST))
|
|
try:
|
|
csvFile = csv.DictReader(f, fieldnames=fieldnames,
|
|
delimiter=columnDelimiter,
|
|
escapechar='\\' if not noEscapeChar else None,
|
|
quotechar=quotechar)
|
|
return (f, csvFile, csvFile.fieldnames if csvFile.fieldnames is not None else [])
|
|
except (csv.Error, UnicodeDecodeError, UnicodeError) as e:
|
|
_gam().systemErrorExit(_gam().FILE_ERROR_RC, e)
|