Merge branches 'aus_prospectus_ravi' and 'aus_prospectus_ravi' of https://msstash.morningstar.com/scm/dc/dc-ml-emea-ar into aus_prospectus_ravi

This commit is contained in:
Ravi Maheshwari 2025-03-13 17:34:35 +05:30
commit 1f6b781b12
3 changed files with 37 additions and 4 deletions

View File

@ -896,6 +896,8 @@ def calculate_metrics_based_db_data_file(audit_file_path: str = r"/data/aus_pros
verify_file_name = os.path.basename(verify_file_path).replace(".xlsx", "")
if is_for_all:
verify_file_name = f"{verify_file_name}_all"
if zero_equal_none:
verify_file_name = f"{verify_file_name}_zero_equal_none"
metrics_file_name = f"metrics_{verify_file_name}_{len(document_id_list)}_documents_4_dps_not_strict.xlsx"
output_file = os.path.join(output_folder, metrics_file_name)
with pd.ExcelWriter(output_file) as writer:
@ -1426,7 +1428,7 @@ if __name__ == "__main__":
audit_file_path: str = r"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx"
audit_data_sheet: str = "Sheet1"
verify_file_path: str = r"/data/aus_prospectus/output/mapping_data/total/merged_mapping_data_info_46_documents_by_text.xlsx"
verify_file_path: str = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250313024715.xlsx"
verify_data_sheet: str = "total_mapping_data"
# verify_document_list_file: str = "./sample_documents/aus_prospectus_29_documents_sample.txt"
verify_document_list_file_list = [None,

View File

@ -237,6 +237,11 @@
"---Example 3 End---",
"The relevant values: 0.67 and 1.17, are in the range, should ignore, so the output should be:",
"{\"data\": []}",
"---Example 4 Start---",
"Type of fee or cost Amount 2 How and when paid \nOngoing annual fees and costs 3 \nManagement fees and costs \n0.82% to 1.22% p.a. (estimated) \n",
"---Example 4 End---",
"The relevant values: 0.82 and 1.22, are in the range, should ignore, so the output should be:",
"{\"data\": []}",
"\n",
"H. If the management fee and costs value including the performance fee, please exclude or subtract the performance fee value, just output the management fee and costs value.",
"---Example 1 Start---",
@ -260,6 +265,8 @@
"The column: \"Total of management fees and costs and performance fees (% p.a.)\", meaning the value is the sum of \"Management fee and costs\" and \"performance fee\", We should ignore this column values.",
"The column \"Management fees and costs (% p.a.)\" is the value of \"Management fee and costs\".",
"Both of management_fee and management_fee_and_costs are the values for \"Management fees and costs (% p.a.)\" for this case.",
"If there are 3 decimal numbers, the 2nd decimal number is the management_fee_and_costs and management_fee, the 3rd decimal number is the buy_spread and sell_spread.",
"If there are 4 decimal numbers, the 2nd decimal number is the management_fee_and_costs and management_fee, the 3rd decimal number is the performance_fee_costs, the 4th decimal number is buy_spread and sell_spread.",
"So the output should be:",
"{\"data\": [{\"fund name\": \"CFS Real Return Class A\", \"share name\": \"CFS Real Return Class A\", \"management_fee_and_costs\": 0.87, \"management_fee\": 0.87, \"buy_spread\": 0.15, \"sell_spread\": 0.15}, {\"fund name\": \"CFS Defensive Builder\", \"share name\": \"CFS Defensive Builder\", \"management_fee_and_costs\": 0.67, \"management_fee\": 0.67, \"performance_fee_costs\": 0.01, \"buy_spread\": 0.15, \"sell_spread\": 0.15}]}",
"\n",
@ -356,13 +363,37 @@
"buy_spread": [
"A. Exclude reported name",
"Please don't extract data by the reported names for buy_spread or sell_spread, they are: ",
"Transaction costs buy/sell spread recovery, Transaction costs reducing return of the investment option (net transaction costs)",
"Transaction costs buy/sell spread recovery, Transaction costs reducing return of the investment option (net transaction costs), ",
"Estimated transaction costs offset by buy/sell spreads (% pa), ",
"---Example Start---",
"Option name \nTotal estimated \ntransaction costs \n(% pa) \nEstimated transaction costs \noffset by buy/sell spreads \n(% pa) \nEstimated transaction costs \nborne by the option \n(% pa) \nGenerations Defensive \n0.21 \n0.04 \n0.17 \n",
"---Example End---",
"The data should be excluded, the output should be:",
"{\"data\": []}",
"B. Simple case with simple table structure:",
"---Example 1 Start---",
"Investment option Buy cost Sell cost \nLifestyle Growth 0% 0%\nLifestyle Balanced 0% 0%\nProperty 0.10% 0.10%\n",
"---Example 1 End---",
"The output should be:",
"{\"data\": [{\"fund name\": \"Lifestyle Growth\", \"share name\": \"Lifestyle Growth\", \"buy_spread\": 0, \"sell_spread\": 0}, {\"fund name\": \"Lifestyle Balanced\", \"share name\": \"Lifestyle Balanced\", \"buy_spread\": 0, \"sell_spread\": 0}, {\"fund name\": \"Property\", \"share name\": \"Property\", \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}"
"{\"data\": [{\"fund name\": \"Lifestyle Growth\", \"share name\": \"Lifestyle Growth\", \"buy_spread\": 0, \"sell_spread\": 0}, {\"fund name\": \"Lifestyle Balanced\", \"share name\": \"Lifestyle Balanced\", \"buy_spread\": 0, \"sell_spread\": 0}, {\"fund name\": \"Property\", \"share name\": \"Property\", \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}",
"\n",
"---Example 2 Start---",
"Fund name \nManagement fees \nand costs (% p.a.) \n1 \nTransaction costs \n(% p.a.) \n1 \nBuy/sell spread \n(%) \n2 \nEveryday Investing Balanced Fund 0.35 0.05 0.00\n",
"---Example 2 End---",
"The output should be:",
"{\"data\": [{\"fund name\": \"Everyday Investing Balanced Fund\", \"share name\": \"Everyday Investing Balanced Fund\", \"management_fee_and_costs\": 0.35, \"management_fee\": 0.35, \"buy_spread\": 0, \"sell_spread\": 0}]}",
"\n",
"---Example 3 Start---",
"Fund name \nManagement \nfees and costs \n(p.a.) 1 \nBuy/sell \nspread \n(%) 2 \nAUSTRALIAN SHARE \nFirst Sentier Australian Share Fund 0.96% 0.10\nFirst Sentier Concentrated Australian \nShare Fund 0.96% 0.10\nFirst Sentier Imputation Fund 0.97% 0.15\nAUSTRALIAN SHARE SMALL COMPANIES \nFirst Sentier Australian Small \nCompanies Fund 1.12% 0.15\nGLOBAL SHARE \nStewart Investors Worldwide Leaders \nSustainability Fund 1.17% 0.10\nStewart Investors Worldwide Sustainability \nFund 1.02% 0.10\n",
"---Example 3 End---",
"The output should be:",
"{\"data\": [{\"fund name\": \"First Sentier Australian Share Fund\", \"share name\": \"First Sentier Australian Share Fund\", \"management_fee_and_costs\": 0.96, \"management_fee\": 0.96, \"buy_spread\": 0.1, \"sell_spread\": 0.1}, {\"fund name\": \"First Sentier Concentrated Australian Share Fund\", \"share name\": \"First Sentier Concentrated Australian Share Fund\", \"management_fee_and_costs\": 0.96, \"management_fee\": 0.96, \"buy_spread\": 0.1, \"sell_spread\": 0.1}, {\"fund name\": \"First Sentier Imputation Fund\", \"share name\": \"First Sentier Imputation Fund\", \"management_fee_and_costs\": 0.97, \"management_fee\": 0.97, \"buy_spread\": 0.15, \"sell_spread\": 0.15}, {\"fund name\": \"First Sentier Australian Small Companies Fund\", \"share name\": \"First Sentier Australian Small Companies Fund\", \"management_fee_and_costs\": 1.12, \"management_fee\": 1.12, \"buy_spread\": 0.15, \"sell_spread\": 0.15}, {\"fund name\": \"Stewart Investors Worldwide Leaders Sustainability Fund\", \"share name\": \"Stewart Investors Worldwide Leaders Sustainability Fund\", \"management_fee_and_costs\": 1.17, \"management_fee\": 1.17, \"buy_spread\": 0.1, \"sell_spread\": 0.1}, {\"fund name\": \"Stewart Investors Worldwide Sustainability Fund\", \"share name\": \"Stewart Investors Worldwide Sustainability Fund\", \"management_fee_and_costs\": 1.02, \"management_fee\": 1.02, \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}",
"\n",
"---Example 4 Start---",
"\n\nInvestment option \nGross total \ntransaction costs 1 \n% p.a. \nNet total transaction \ncosts 2 \n% p.a. \nBuy-sell \nspread (ITC) 3 \n% \nAllan Gray Australian Equity Fund Class A 0.06 0.00 0.40\nAlphinity Sustainable Share Fund 0.15 0.02 0.40\n",
"---Example 4 End---",
"The output should be:",
"{\"data\": [{\"fund name\": \"Allan Gray Australian Equity Fund Class A\", \"share name\": \"Allan Gray Australian Equity Fund Class A\", \"buy_spread\": 0.4, \"sell_spread\": 0.4}, {\"fund name\": \"Alphinity Sustainable Share Fund\", \"share name\": \"Alphinity Sustainable Share Fund\", \"buy_spread\": 0.4, \"sell_spread\": 0.4}]}"
],
"performance_fee_costs": [
"Performance fees is share class level data.",

View File

@ -1560,7 +1560,7 @@ if __name__ == "__main__":
# "544886057",
# "550769189",
# "553449663"]
# special_doc_id_list = ["414751292"]
# special_doc_id_list = ["446324179"]
# special_doc_id_list = ["391080133", "391080140", "401212184", "412778803", "420339794", "454036250", "414751292"]
pdf_folder: str = r"/data/aus_prospectus/pdf/"
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"