import boto3 import time import os from utils.logger import logger import dotenv # loads .env file with your OPENAI_API_KEY dotenv.load_dotenv() def try_ntimes(func, params, success_msg='', error_msg='', error_res=False, ntimes=1, interval=1): count = 1 while True: try: res = func(**params) print(success_msg) return res except Exception as e: if count == ntimes: print(error_msg) return error_res print(f'Please Set AWS environment variables at first, error: {e}') print("Having tried {} times and trying one more time...".format(count)) time.sleep(interval) count += 1 def download_pdf_from_documents_warehouse(pdf_directory: str, doc_id: str): if pdf_directory is None or pdf_directory == "": logger.error("pdf_directory is not provided") return None os.makedirs(pdf_directory, exist_ok=True) pdf_file_path = os.path.join(pdf_directory, f"{doc_id}.pdf") if os.path.exists(pdf_file_path): logger.info(f"PDF file for {os.path.basename(pdf_file_path)} already exists. Skipping...") return pdf_file_path else: ACCESS_KEY = os.getenv('ACCESS_KEY') SECRET_KEY = os.getenv('SECRET_KEY') session = boto3.Session(aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY) s3 = session.client('s3') # s3 = boto3.client('s3') bucket_name = os.getenv('BUCKET_NAME') params = {'Bucket': bucket_name, 'Key': doc_id, 'Filename': pdf_file_path} success_msg = f'file downloaded from S3 successfully: {doc_id}' error_msg = f'failed to download file {doc_id} from S3' error_res = '__process_failed__' res = try_ntimes(func=s3.download_file, params=params, success_msg=success_msg, error_msg=error_msg, error_res=error_res, ntimes=3, interval=5) return pdf_file_path