import json import time from urllib import request import pandas as pd import os import dotenv # loads .env file with your OPENAI_API_KEY dotenv.load_dotenv() def query_document_fund_mapping(doc_id, rerun=True, output_folder=r"/data/emea_ar/output/mapping/document/"): count = 1 while True: try: document_mapping_info_df = pd.DataFrame() if rerun is False and output_folder is not None and len(output_folder) > 0 and os.path.exists(output_folder): output_file = os.path.join(output_folder, f"{doc_id}.xlsx") if os.path.exists(output_file): document_mapping_info_df = pd.read_excel(output_file) if len(document_mapping_info_df) == 0: document_mapping_info_df = query_data_by_biz_type( biztype="getFundInfoByDocId", para=doc_id, return_df=True ).drop_duplicates() if len(document_mapping_info_df) == 0: return document_mapping_info_df document_mapping_info_df = document_mapping_info_df.sort_values( by=["FundName", "ShareClassName"] ).reset_index(drop=True) if output_folder is not None and len(output_folder) > 0: os.makedirs(output_folder, exist_ok=True) output_file = os.path.join(output_folder, f"{doc_id}.xlsx") with pd.ExcelWriter(output_file) as writer: document_mapping_info_df.to_excel(writer, index=False) return document_mapping_info_df except Exception as e: print(e) time.sleep(3) if count == 5: break count += 1 def query_investment_by_provider(company_id: str, rerun=True, output_folder=r"/data/emea_ar/output/mapping/provider/"): count = 1 while True: try: investment_by_provider_df = pd.DataFrame() if rerun is False and output_folder is not None and len(output_folder) > 0 and os.path.exists(output_folder): output_file = os.path.join(output_folder, f"{company_id}.xlsx") if os.path.exists(output_file): investment_by_provider_df = pd.read_excel(output_file) if len(investment_by_provider_df) == 0: investment_by_provider_df = query_data_by_biz_type(biztype='getInvestmentByProvider', para=company_id, return_df=True).drop_duplicates() if len(investment_by_provider_df) == 0: return investment_by_provider_df investment_by_provider_df = investment_by_provider_df \ .sort_values(by=['FundName', 'ShareClassName']) \ .reset_index(drop=True) if output_folder is not None and len(output_folder) > 0: os.makedirs(output_folder, exist_ok=True) output_file = os.path.join(output_folder, f"{company_id}.xlsx") with pd.ExcelWriter(output_file) as writer: investment_by_provider_df.to_excel(writer, index=False) return investment_by_provider_df except Exception as e: print(e) time.sleep(3) if count == 5: break count += 1 def query_data_by_biz_type(biztype: str, para, return_df: bool): sqlpass_url = "https://api.morningstar.com/sqlpassapi/v1/sql" url = sqlpass_url + "?sqlName={0}¶ms={1}".format(biztype, str(para)) headers = {"ApiKey": os.getenv("SQL_PASS_KEY")} if return_df: return pd.DataFrame(query_data_by_url(url, headers)) else: return query_data_by_url(url, headers) def query_data_by_url(url, headers): res = None count = 1 while True: try: req = request.Request(url=url, headers=headers) res = request.urlopen(req) res = res.read().decode(encoding="utf-8", errors="ignore") break except Exception as e: print(e) time.sleep(3) if count == 5: break count += 1 if res is not None: dic = json.loads(res) return dic["result"] else: return None