import boto3 import time import os import platform from utils.logger import logger import dotenv import certifi # loads .env file with your OPENAI_API_KEY dotenv.load_dotenv() def try_ntimes(func, params, success_msg='', error_msg='', error_res=False, ntimes=1, interval=1): count = 1 while True: try: res = func(**params) print(success_msg) return res except Exception as e: if count == ntimes: print(error_msg) return error_res print(f'Please Set AWS environment variables at first, error: {e}') print("Having tried {} times and trying one more time...".format(count)) time.sleep(interval) count += 1 def download_pdf_from_documents_warehouse(pdf_directory: str, doc_id: str): if pdf_directory is None or pdf_directory == "": logger.error("pdf_directory is not provided") return None os.makedirs(pdf_directory, exist_ok=True) pdf_file_path = os.path.join(pdf_directory, f"{doc_id}.pdf") os_name = platform.system().lower() if os.path.exists(pdf_file_path): logger.info(f"PDF file for {os.path.basename(pdf_file_path)} already exists. Skipping...") return pdf_file_path else: if os_name == "windows": ACCESS_KEY = os.getenv('ACCESS_KEY') SECRET_KEY = os.getenv('SECRET_KEY') AWS_SESSION_TOKEN = os.getenv('AWS_SESSION_TOKEN') s3 = boto3.client("s3", region_name="us-east-1", verify=certifi.where(), aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY, aws_session_token=AWS_SESSION_TOKEN ) else: s3 = boto3.client('s3') # s3 = boto3.client('s3') bucket_name = os.getenv('BUCKET_NAME') params = {'Bucket': bucket_name, 'Key': doc_id, 'Filename': pdf_file_path} success_msg = f'file downloaded from S3 successfully: {doc_id}' error_msg = f'failed to download file {doc_id} from S3' error_res = '__process_failed__' res = try_ntimes(func=s3.download_file, params=params, success_msg=success_msg, error_msg=error_msg, error_res=error_res, ntimes=3, interval=5) return pdf_file_path