dc-ml-emea-ar/utils/s3_util.py

40 lines
1.6 KiB
Python

""" upload one directory from the current working directory to aws """
from pathlib import Path
import os
import glob
import boto3
def upload_dir(local_dir, aws_init_dir, bucket_name, tag, prefix='/'):
"""
from current working directory, upload a 'localDir' with all its subcontents (files and subdirectories...)
to a aws bucket
Parameters
----------
local_dir : localDirectory to be uploaded, with respect to current working directory
aws_init_dir : prefix 'directory' in aws
bucket_name : bucket in aws
tag : tag to select files, like *png
NOTE: if you use tag it must be given like --tag '*txt', in some quotation marks... for argparse
prefix : to remove initial '/' from file names
Returns
-------
None
"""
s3 = boto3.resource('s3')
cwd = str(Path.cwd())
p = Path(os.path.join(Path.cwd(), local_dir))
mydirs = list(p.glob('**'))
for mydir in mydirs:
file_names = glob.glob(os.path.join(mydir, tag))
file_names = [f for f in file_names if not Path(f).is_dir()]
rows = len(file_names)
for i, file_name in enumerate(file_names):
# file_name = str(file_name).replace(cwd, '')
s3_file_name = ""
if file_name.startswith(prefix): # only modify the text if it starts with the prefix
s3_file_name = file_name.replace(prefix, "", 1) # remove one instance of prefix
print(f"fileName {file_name}")
if len(s3_file_name) > 0:
s3_path = os.path.join(aws_init_dir, str(s3_file_name))
s3.meta.client.upload_file(file_name, bucket_name, s3_path)