Merge branches 'aus_prospectus_ravi' and 'aus_prospectus_ravi' of https://msstash.morningstar.com/scm/dc/dc-ml-emea-ar into aus_prospectus_ravi
This commit is contained in:
commit
1f6b781b12
|
|
@ -896,6 +896,8 @@ def calculate_metrics_based_db_data_file(audit_file_path: str = r"/data/aus_pros
|
||||||
verify_file_name = os.path.basename(verify_file_path).replace(".xlsx", "")
|
verify_file_name = os.path.basename(verify_file_path).replace(".xlsx", "")
|
||||||
if is_for_all:
|
if is_for_all:
|
||||||
verify_file_name = f"{verify_file_name}_all"
|
verify_file_name = f"{verify_file_name}_all"
|
||||||
|
if zero_equal_none:
|
||||||
|
verify_file_name = f"{verify_file_name}_zero_equal_none"
|
||||||
metrics_file_name = f"metrics_{verify_file_name}_{len(document_id_list)}_documents_4_dps_not_strict.xlsx"
|
metrics_file_name = f"metrics_{verify_file_name}_{len(document_id_list)}_documents_4_dps_not_strict.xlsx"
|
||||||
output_file = os.path.join(output_folder, metrics_file_name)
|
output_file = os.path.join(output_folder, metrics_file_name)
|
||||||
with pd.ExcelWriter(output_file) as writer:
|
with pd.ExcelWriter(output_file) as writer:
|
||||||
|
|
@ -1426,7 +1428,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
audit_file_path: str = r"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx"
|
audit_file_path: str = r"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx"
|
||||||
audit_data_sheet: str = "Sheet1"
|
audit_data_sheet: str = "Sheet1"
|
||||||
verify_file_path: str = r"/data/aus_prospectus/output/mapping_data/total/merged_mapping_data_info_46_documents_by_text.xlsx"
|
verify_file_path: str = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250313024715.xlsx"
|
||||||
verify_data_sheet: str = "total_mapping_data"
|
verify_data_sheet: str = "total_mapping_data"
|
||||||
# verify_document_list_file: str = "./sample_documents/aus_prospectus_29_documents_sample.txt"
|
# verify_document_list_file: str = "./sample_documents/aus_prospectus_29_documents_sample.txt"
|
||||||
verify_document_list_file_list = [None,
|
verify_document_list_file_list = [None,
|
||||||
|
|
|
||||||
|
|
@ -237,6 +237,11 @@
|
||||||
"---Example 3 End---",
|
"---Example 3 End---",
|
||||||
"The relevant values: 0.67 and 1.17, are in the range, should ignore, so the output should be:",
|
"The relevant values: 0.67 and 1.17, are in the range, should ignore, so the output should be:",
|
||||||
"{\"data\": []}",
|
"{\"data\": []}",
|
||||||
|
"---Example 4 Start---",
|
||||||
|
"Type of fee or cost Amount 2 How and when paid \nOngoing annual fees and costs 3 \nManagement fees and costs \n0.82% to 1.22% p.a. (estimated) \n",
|
||||||
|
"---Example 4 End---",
|
||||||
|
"The relevant values: 0.82 and 1.22, are in the range, should ignore, so the output should be:",
|
||||||
|
"{\"data\": []}",
|
||||||
"\n",
|
"\n",
|
||||||
"H. If the management fee and costs value including the performance fee, please exclude or subtract the performance fee value, just output the management fee and costs value.",
|
"H. If the management fee and costs value including the performance fee, please exclude or subtract the performance fee value, just output the management fee and costs value.",
|
||||||
"---Example 1 Start---",
|
"---Example 1 Start---",
|
||||||
|
|
@ -260,6 +265,8 @@
|
||||||
"The column: \"Total of management fees and costs and performance fees (% p.a.)\", meaning the value is the sum of \"Management fee and costs\" and \"performance fee\", We should ignore this column values.",
|
"The column: \"Total of management fees and costs and performance fees (% p.a.)\", meaning the value is the sum of \"Management fee and costs\" and \"performance fee\", We should ignore this column values.",
|
||||||
"The column \"Management fees and costs (% p.a.)\" is the value of \"Management fee and costs\".",
|
"The column \"Management fees and costs (% p.a.)\" is the value of \"Management fee and costs\".",
|
||||||
"Both of management_fee and management_fee_and_costs are the values for \"Management fees and costs (% p.a.)\" for this case.",
|
"Both of management_fee and management_fee_and_costs are the values for \"Management fees and costs (% p.a.)\" for this case.",
|
||||||
|
"If there are 3 decimal numbers, the 2nd decimal number is the management_fee_and_costs and management_fee, the 3rd decimal number is the buy_spread and sell_spread.",
|
||||||
|
"If there are 4 decimal numbers, the 2nd decimal number is the management_fee_and_costs and management_fee, the 3rd decimal number is the performance_fee_costs, the 4th decimal number is buy_spread and sell_spread.",
|
||||||
"So the output should be:",
|
"So the output should be:",
|
||||||
"{\"data\": [{\"fund name\": \"CFS Real Return – Class A\", \"share name\": \"CFS Real Return – Class A\", \"management_fee_and_costs\": 0.87, \"management_fee\": 0.87, \"buy_spread\": 0.15, \"sell_spread\": 0.15}, {\"fund name\": \"CFS Defensive Builder\", \"share name\": \"CFS Defensive Builder\", \"management_fee_and_costs\": 0.67, \"management_fee\": 0.67, \"performance_fee_costs\": 0.01, \"buy_spread\": 0.15, \"sell_spread\": 0.15}]}",
|
"{\"data\": [{\"fund name\": \"CFS Real Return – Class A\", \"share name\": \"CFS Real Return – Class A\", \"management_fee_and_costs\": 0.87, \"management_fee\": 0.87, \"buy_spread\": 0.15, \"sell_spread\": 0.15}, {\"fund name\": \"CFS Defensive Builder\", \"share name\": \"CFS Defensive Builder\", \"management_fee_and_costs\": 0.67, \"management_fee\": 0.67, \"performance_fee_costs\": 0.01, \"buy_spread\": 0.15, \"sell_spread\": 0.15}]}",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
@ -356,13 +363,37 @@
|
||||||
"buy_spread": [
|
"buy_spread": [
|
||||||
"A. Exclude reported name",
|
"A. Exclude reported name",
|
||||||
"Please don't extract data by the reported names for buy_spread or sell_spread, they are: ",
|
"Please don't extract data by the reported names for buy_spread or sell_spread, they are: ",
|
||||||
"Transaction costs buy/sell spread recovery, Transaction costs reducing return of the investment option (net transaction costs)",
|
"Transaction costs buy/sell spread recovery, Transaction costs reducing return of the investment option (net transaction costs), ",
|
||||||
|
"Estimated transaction costs offset by buy/sell spreads (% pa), ",
|
||||||
|
"---Example Start---",
|
||||||
|
"Option name \nTotal estimated \ntransaction costs \n(% pa) \nEstimated transaction costs \noffset by buy/sell spreads \n(% pa) \nEstimated transaction costs \nborne by the option \n(% pa) \nGenerations Defensive \n0.21 \n0.04 \n0.17 \n",
|
||||||
|
"---Example End---",
|
||||||
|
"The data should be excluded, the output should be:",
|
||||||
|
"{\"data\": []}",
|
||||||
"B. Simple case with simple table structure:",
|
"B. Simple case with simple table structure:",
|
||||||
"---Example 1 Start---",
|
"---Example 1 Start---",
|
||||||
"Investment option Buy cost Sell cost \nLifestyle Growth 0% 0%\nLifestyle Balanced 0% 0%\nProperty 0.10% 0.10%\n",
|
"Investment option Buy cost Sell cost \nLifestyle Growth 0% 0%\nLifestyle Balanced 0% 0%\nProperty 0.10% 0.10%\n",
|
||||||
"---Example 1 End---",
|
"---Example 1 End---",
|
||||||
"The output should be:",
|
"The output should be:",
|
||||||
"{\"data\": [{\"fund name\": \"Lifestyle Growth\", \"share name\": \"Lifestyle Growth\", \"buy_spread\": 0, \"sell_spread\": 0}, {\"fund name\": \"Lifestyle Balanced\", \"share name\": \"Lifestyle Balanced\", \"buy_spread\": 0, \"sell_spread\": 0}, {\"fund name\": \"Property\", \"share name\": \"Property\", \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}"
|
"{\"data\": [{\"fund name\": \"Lifestyle Growth\", \"share name\": \"Lifestyle Growth\", \"buy_spread\": 0, \"sell_spread\": 0}, {\"fund name\": \"Lifestyle Balanced\", \"share name\": \"Lifestyle Balanced\", \"buy_spread\": 0, \"sell_spread\": 0}, {\"fund name\": \"Property\", \"share name\": \"Property\", \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}",
|
||||||
|
"\n",
|
||||||
|
"---Example 2 Start---",
|
||||||
|
"Fund name \nManagement fees \nand costs (% p.a.) \n1 \nTransaction costs \n(% p.a.) \n1 \nBuy/sell spread \n(%) \n2 \nEveryday Investing Balanced Fund 0.35 0.05 0.00\n",
|
||||||
|
"---Example 2 End---",
|
||||||
|
"The output should be:",
|
||||||
|
"{\"data\": [{\"fund name\": \"Everyday Investing Balanced Fund\", \"share name\": \"Everyday Investing Balanced Fund\", \"management_fee_and_costs\": 0.35, \"management_fee\": 0.35, \"buy_spread\": 0, \"sell_spread\": 0}]}",
|
||||||
|
"\n",
|
||||||
|
"---Example 3 Start---",
|
||||||
|
"Fund name \nManagement \nfees and costs \n(p.a.) 1 \nBuy/sell \nspread \n(%) 2 \nAUSTRALIAN SHARE \nFirst Sentier Australian Share Fund 0.96% 0.10\nFirst Sentier Concentrated Australian \nShare Fund 0.96% 0.10\nFirst Sentier Imputation Fund 0.97% 0.15\nAUSTRALIAN SHARE – SMALL COMPANIES \nFirst Sentier Australian Small \nCompanies Fund 1.12% 0.15\nGLOBAL SHARE \nStewart Investors Worldwide Leaders \nSustainability Fund 1.17% 0.10\nStewart Investors Worldwide Sustainability \nFund 1.02% 0.10\n",
|
||||||
|
"---Example 3 End---",
|
||||||
|
"The output should be:",
|
||||||
|
"{\"data\": [{\"fund name\": \"First Sentier Australian Share Fund\", \"share name\": \"First Sentier Australian Share Fund\", \"management_fee_and_costs\": 0.96, \"management_fee\": 0.96, \"buy_spread\": 0.1, \"sell_spread\": 0.1}, {\"fund name\": \"First Sentier Concentrated Australian Share Fund\", \"share name\": \"First Sentier Concentrated Australian Share Fund\", \"management_fee_and_costs\": 0.96, \"management_fee\": 0.96, \"buy_spread\": 0.1, \"sell_spread\": 0.1}, {\"fund name\": \"First Sentier Imputation Fund\", \"share name\": \"First Sentier Imputation Fund\", \"management_fee_and_costs\": 0.97, \"management_fee\": 0.97, \"buy_spread\": 0.15, \"sell_spread\": 0.15}, {\"fund name\": \"First Sentier Australian Small Companies Fund\", \"share name\": \"First Sentier Australian Small Companies Fund\", \"management_fee_and_costs\": 1.12, \"management_fee\": 1.12, \"buy_spread\": 0.15, \"sell_spread\": 0.15}, {\"fund name\": \"Stewart Investors Worldwide Leaders Sustainability Fund\", \"share name\": \"Stewart Investors Worldwide Leaders Sustainability Fund\", \"management_fee_and_costs\": 1.17, \"management_fee\": 1.17, \"buy_spread\": 0.1, \"sell_spread\": 0.1}, {\"fund name\": \"Stewart Investors Worldwide Sustainability Fund\", \"share name\": \"Stewart Investors Worldwide Sustainability Fund\", \"management_fee_and_costs\": 1.02, \"management_fee\": 1.02, \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}",
|
||||||
|
"\n",
|
||||||
|
"---Example 4 Start---",
|
||||||
|
"\n\nInvestment option \nGross total \ntransaction costs 1 \n% p.a. \nNet total transaction \ncosts 2 \n% p.a. \nBuy-sell \nspread (ITC) 3 \n% \nAllan Gray Australian Equity Fund – Class A 0.06 0.00 0.40\nAlphinity Sustainable Share Fund 0.15 0.02 0.40\n",
|
||||||
|
"---Example 4 End---",
|
||||||
|
"The output should be:",
|
||||||
|
"{\"data\": [{\"fund name\": \"Allan Gray Australian Equity Fund – Class A\", \"share name\": \"Allan Gray Australian Equity Fund – Class A\", \"buy_spread\": 0.4, \"sell_spread\": 0.4}, {\"fund name\": \"Alphinity Sustainable Share Fund\", \"share name\": \"Alphinity Sustainable Share Fund\", \"buy_spread\": 0.4, \"sell_spread\": 0.4}]}"
|
||||||
],
|
],
|
||||||
"performance_fee_costs": [
|
"performance_fee_costs": [
|
||||||
"Performance fees is share class level data.",
|
"Performance fees is share class level data.",
|
||||||
|
|
|
||||||
2
main.py
2
main.py
|
|
@ -1560,7 +1560,7 @@ if __name__ == "__main__":
|
||||||
# "544886057",
|
# "544886057",
|
||||||
# "550769189",
|
# "550769189",
|
||||||
# "553449663"]
|
# "553449663"]
|
||||||
# special_doc_id_list = ["414751292"]
|
# special_doc_id_list = ["446324179"]
|
||||||
# special_doc_id_list = ["391080133", "391080140", "401212184", "412778803", "420339794", "454036250", "414751292"]
|
# special_doc_id_list = ["391080133", "391080140", "401212184", "412778803", "420339794", "454036250", "414751292"]
|
||||||
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
||||||
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue