update instructions for total_annual_dollar_based_charges
This commit is contained in:
parent
0ce604021c
commit
b3941ee4b3
|
|
@ -1376,44 +1376,53 @@ def clean_text(text: str):
|
||||||
|
|
||||||
|
|
||||||
def merge_inference_data():
|
def merge_inference_data():
|
||||||
file1 = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250308220117.xlsx"
|
file1 = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250317_Ravi.xlsx"
|
||||||
file2 = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_5_documents_by_text_20250311165607.xlsx"
|
file2 = r"/data/aus_prospectus/output/merged_data/docs/excel/merged_420339794.xlsx"
|
||||||
columns = [
|
columns = [
|
||||||
"doc_id",
|
"doc_id",
|
||||||
|
"effective_date",
|
||||||
"raw_fund_name",
|
"raw_fund_name",
|
||||||
|
"raw_share_name",
|
||||||
|
"raw_name",
|
||||||
"fund_id",
|
"fund_id",
|
||||||
"fund_name",
|
"fund_name",
|
||||||
"raw_share_name",
|
|
||||||
"sec_id",
|
"sec_id",
|
||||||
"sec_name",
|
"sec_name",
|
||||||
|
"page_index",
|
||||||
"management_fee_and_costs",
|
"management_fee_and_costs",
|
||||||
"management_fee",
|
"management_fee",
|
||||||
"administration_fees",
|
"administration_fees",
|
||||||
"minimum_initial_investment",
|
|
||||||
"benchmark_name",
|
|
||||||
"performance_fee_costs",
|
"performance_fee_costs",
|
||||||
"interposed_vehicle_performance_fee_cost",
|
"interposed_vehicle_performance_fee_cost",
|
||||||
"buy_spread",
|
"buy_spread",
|
||||||
"sell_spread",
|
"sell_spread",
|
||||||
"total_annual_dollar_based_charges"
|
"total_annual_dollar_based_charges",
|
||||||
|
"minimum_initial_investment",
|
||||||
|
"benchmark_name",
|
||||||
|
"indirect_costs",
|
||||||
|
"recoverable_expenses",
|
||||||
|
"change_recoverable_expenses"
|
||||||
]
|
]
|
||||||
|
|
||||||
file1_data_df = pd.read_excel(file1, sheet_name="total_mapping_data")
|
file1_data_df = pd.read_excel(file1, sheet_name="total_mapping_data")
|
||||||
file1_data_df = file1_data_df[columns]
|
file1_data_df = file1_data_df[columns]
|
||||||
file2_data_df = pd.read_excel(file2, sheet_name="total_mapping_data")
|
# remove the rows which doc_id is 420339794 from file1_data_df
|
||||||
|
file1_data_df = file1_data_df[file1_data_df["doc_id"] != 420339794]
|
||||||
|
|
||||||
|
file2_data_df = pd.read_excel(file2, sheet_name="merged_data")
|
||||||
file2_data_df = file2_data_df[columns]
|
file2_data_df = file2_data_df[columns]
|
||||||
total_data_df = pd.concat([file1_data_df, file2_data_df])
|
total_data_df = pd.concat([file1_data_df, file2_data_df])
|
||||||
total_data_df.reset_index(drop=True, inplace=True)
|
total_data_df.reset_index(drop=True, inplace=True)
|
||||||
|
|
||||||
output_folder = r"/data/aus_prospectus/output/mapping_data/total/"
|
output_folder = r"/data/aus_prospectus/output/mapping_data/total/"
|
||||||
output_file = os.path.join(output_folder, "merged_mapping_data_info_46_documents_by_text.xlsx")
|
output_file = os.path.join(output_folder, "mapping_data_info_46_documents_by_text_20250317_Ravi_modified.xlsx")
|
||||||
with pd.ExcelWriter(output_file) as f:
|
with pd.ExcelWriter(output_file) as f:
|
||||||
total_data_df.to_excel(f, index=False, sheet_name="total_mapping_data")
|
total_data_df.to_excel(f, index=False, sheet_name="total_mapping_data")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# merge_inference_data()
|
merge_inference_data()
|
||||||
# adjust_column_order()
|
# adjust_column_order()
|
||||||
# set_mapping_to_data_side_documents_data()
|
# set_mapping_to_data_side_documents_data()
|
||||||
|
|
||||||
|
|
@ -1436,14 +1445,14 @@ if __name__ == "__main__":
|
||||||
"./sample_documents/aus_prospectus_17_documents_sample.txt"]
|
"./sample_documents/aus_prospectus_17_documents_sample.txt"]
|
||||||
zero_equal_none = False
|
zero_equal_none = False
|
||||||
is_for_all = True
|
is_for_all = True
|
||||||
for verify_document_list_file in verify_document_list_file_list:
|
# for verify_document_list_file in verify_document_list_file_list:
|
||||||
calculate_metrics_based_db_data_file(audit_file_path=audit_file_path,
|
# calculate_metrics_based_db_data_file(audit_file_path=audit_file_path,
|
||||||
audit_data_sheet=audit_data_sheet,
|
# audit_data_sheet=audit_data_sheet,
|
||||||
verify_file_path=verify_file_path,
|
# verify_file_path=verify_file_path,
|
||||||
verify_data_sheet=verify_data_sheet,
|
# verify_data_sheet=verify_data_sheet,
|
||||||
verify_document_list_file = verify_document_list_file,
|
# verify_document_list_file = verify_document_list_file,
|
||||||
is_for_all=is_for_all,
|
# is_for_all=is_for_all,
|
||||||
zero_equal_none=zero_equal_none)
|
# zero_equal_none=zero_equal_none)
|
||||||
|
|
||||||
# for verify_document_list_file in verify_document_list_file_list:
|
# for verify_document_list_file in verify_document_list_file_list:
|
||||||
# calculate_metrics_by_provider(audit_file_path=audit_file_path,
|
# calculate_metrics_by_provider(audit_file_path=audit_file_path,
|
||||||
|
|
|
||||||
|
|
@ -793,7 +793,7 @@ class DataExtraction:
|
||||||
previous_page_datapoints = []
|
previous_page_datapoints = []
|
||||||
previous_page_fund_name = None
|
previous_page_fund_name = None
|
||||||
for page_num, page_text in self.page_text_dict.items():
|
for page_num, page_text in self.page_text_dict.items():
|
||||||
# if page_num not in [25]:
|
# if page_num not in [4, 5]:
|
||||||
# continue
|
# continue
|
||||||
if page_num in handled_page_num_list:
|
if page_num in handled_page_num_list:
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
|
|
@ -351,12 +351,11 @@
|
||||||
"total_annual_dollar_based_charges": [
|
"total_annual_dollar_based_charges": [
|
||||||
"Total annual dollar-based charges are share class level data.",
|
"Total annual dollar-based charges are share class level data.",
|
||||||
"A. Its value corresponds to the administration fees and costs that are charged on a weekly basis.",
|
"A. Its value corresponds to the administration fees and costs that are charged on a weekly basis.",
|
||||||
"----Example 1 Start----",
|
"----Example Start----",
|
||||||
"MLC MasterKey Super & Pension Fundamentals\nType of fee or cost \nOngoing annual fees and costs 1 \nAmount \nHow and when paid \nOther administration costs paid from \nreserves of 0.00% pa of your account \nbalance. \nPlus \nA fixed fee of $1.50 per week \nThis fee is deducted monthly if your account balance is below $50,000 \nwhen the percentage administration fee is deducted. \nInvestment fees and \ncosts 2 \nInvestment fees and estimated costs \nfor MLC Horizon 4 Balanced Portfolio, \n1.20% pa. \nYou won ’ t see these fees and costs as direct charges to your account. \nThey're reflected in the daily unit price of each investment option and will \nreduce the net return on your investment \nInvestment fees and estimated costs \nfor other investment options, ranges \nfrom 0.00% pa to 2.84% pa \n(estimated). \nTransaction costs \nMLC Horizon 4 Balanced Portfolio, \n0.06% pa (estimated). \nOther investment options, ranges \nfrom 0.00% pa to 0.24% pa \n(estimated). \nYou won ’ t see these costs as direct charges to your account. They're \nreflected in the daily unit price of each investment option and will reduce \nthe net return on your investment. \nMember activity related fees and costs \nBuy-sell spread \nYou won ’ t see this fee as a direct charge to your account. It ’ s reflected in \nthe buy and sell unit price of each investment option when there ’ s a \ntransaction on your account. \nMLC Horizon 4 Balanced Portfolio, \n0.10%/0.10% \nOther investment options, ranges \nfrom 0.00%/0.00% to 0.30%/0.30% \nThe current buy-sell spreads of an investment option are available at \nmlc.com.au/buysellspreads \n",
|
"MLC MasterKey Super & Pension Fundamentals\nType of fee or cost \nOngoing annual fees and costs 1 \nAmount \nHow and when paid \nOther administration costs paid from \nreserves of 0.00% pa of your account \nbalance. \nPlus \nA fixed fee of $1.50 per week \nThis fee is deducted monthly if your account balance is below $50,000 \nwhen the percentage administration fee is deducted. \nInvestment fees and \ncosts 2 \nInvestment fees and estimated costs \nfor MLC Horizon 4 Balanced Portfolio, \n1.20% pa. \nYou won ’ t see these fees and costs as direct charges to your account. \nThey're reflected in the daily unit price of each investment option and will \nreduce the net return on your investment \nInvestment fees and estimated costs \nfor other investment options, ranges \nfrom 0.00% pa to 2.84% pa \n(estimated). \nTransaction costs \nMLC Horizon 4 Balanced Portfolio, \n0.06% pa (estimated). \nOther investment options, ranges \nfrom 0.00% pa to 0.24% pa \n(estimated). \nYou won ’ t see these costs as direct charges to your account. They're \nreflected in the daily unit price of each investment option and will reduce \nthe net return on your investment. \nMember activity related fees and costs \nBuy-sell spread \nYou won ’ t see this fee as a direct charge to your account. It ’ s reflected in \nthe buy and sell unit price of each investment option when there ’ s a \ntransaction on your account. \nMLC Horizon 4 Balanced Portfolio, \n0.10%/0.10% \nOther investment options, ranges \nfrom 0.00%/0.00% to 0.30%/0.30% \nThe current buy-sell spreads of an investment option are available at \nmlc.com.au/buysellspreads \n",
|
||||||
"----Example 1 End----",
|
"----Example End----",
|
||||||
"According to example, the fixed fee is $1.50 per week, so total_annual_dollar_based_charges is 1.50 * 52 = 78",
|
"According to example, the fixed fee is $1.50 per week, so total_annual_dollar_based_charges is 1.50 * 52 = 78",
|
||||||
"In the context, also with management fees and costs, management fee, buy_spread and sell_spread for specific fund: MLC Horizon 4 Balanced Portfolio.",
|
"In the example, also with management fees and costs, management fee, buy_spread and sell_spread for specific fund: MLC Horizon 4 Balanced Portfolio.",
|
||||||
"Please output the relevant values based on specific fund name.",
|
|
||||||
"The output should be:",
|
"The output should be:",
|
||||||
"{\"data\": [{\"fund name\": \"MLC MasterKey Super & Pension Fundamentals\", \"share name\": \"MLC MasterKey Super & Pension Fundamentals\", \"total_annual_dollar_based_charges\": 78}, {\"fund name\": \"MLC Horizon 4 Balanced Portfolio\", \"share name\": \"MLC Horizon 4 Balanced Portfolio\", \"management_fee_and_costs\": 1.2, \"management_fee\": 1.2, \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}",
|
"{\"data\": [{\"fund name\": \"MLC MasterKey Super & Pension Fundamentals\", \"share name\": \"MLC MasterKey Super & Pension Fundamentals\", \"total_annual_dollar_based_charges\": 78}, {\"fund name\": \"MLC Horizon 4 Balanced Portfolio\", \"share name\": \"MLC Horizon 4 Balanced Portfolio\", \"management_fee_and_costs\": 1.2, \"management_fee\": 1.2, \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
|
||||||
2
main.py
2
main.py
|
|
@ -1538,7 +1538,7 @@ if __name__ == "__main__":
|
||||||
with open(document_sample_file, "r", encoding="utf-8") as f:
|
with open(document_sample_file, "r", encoding="utf-8") as f:
|
||||||
special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()]
|
special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()]
|
||||||
document_mapping_file = r"/data/aus_prospectus/basic_information/46_documents/aus_prospectus_46_documents_mapping.xlsx"
|
document_mapping_file = r"/data/aus_prospectus/basic_information/46_documents/aus_prospectus_46_documents_mapping.xlsx"
|
||||||
# special_doc_id_list = ["441280757", "454036250"]
|
# special_doc_id_list = ["420339794"]
|
||||||
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
||||||
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
||||||
output_extract_data_child_folder: str = (
|
output_extract_data_child_folder: str = (
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 18,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
|
@ -30,7 +30,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 34,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
|
@ -44,53 +44,53 @@
|
||||||
"All Providers Results: \n",
|
"All Providers Results: \n",
|
||||||
"Document List File - None\n",
|
"Document List File - None\n",
|
||||||
"Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n",
|
"Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n",
|
||||||
"management_fee_and_costs \t0.9169 \t0.8581 \t0.9843 \t0.8465 \t442 \t375 \t0 \t62 \t6 \n",
|
"management_fee_and_costs \t0.9204 \t0.8852 \t0.9586 \t0.8533 \t406 \t347 \t2 \t45 \t15 \n",
|
||||||
"management_fee \t0.9351 \t0.8902 \t0.9848 \t0.8781 \t442 \t389 \t0 \t48 \t6 \n",
|
"management_fee \t0.9415 \t0.9235 \t0.9602 \t0.8900 \t406 \t362 \t2 \t30 \t15 \n",
|
||||||
"performance_fee_costs \t0.8653 \t0.8426 \t0.8893 \t0.8194 \t309 \t257 \t106 \t48 \t32 \n",
|
"performance_fee_costs \t0.8953 \t0.9277 \t0.8652 \t0.8680 \t281 \t231 \t124 \t18 \t36 \n",
|
||||||
"interposed_vehicle_performance_fee_cost \t0.9412 \t0.8889 \t1.0000 \t0.9797 \t73 \t72 \t362 \t9 \t0 \n",
|
"interposed_vehicle_performance_fee_cost \t0.9600 \t0.9231 \t1.0000 \t0.9853 \t73 \t72 \t331 \t6 \t0 \n",
|
||||||
"administration_fees \t0.9811 \t0.9873 \t0.9750 \t0.9932 \t80 \t78 \t362 \t1 \t2 \n",
|
"administration_fees \t0.8319 \t0.9592 \t0.7344 \t0.9535 \t64 \t47 \t343 \t2 \t17 \n",
|
||||||
"total_annual_dollar_based_charges \t0.9857 \t0.9718 \t1.0000 \t0.9955 \t69 \t69 \t372 \t2 \t0 \n",
|
"total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t66 \t66 \t343 \t0 \t0 \n",
|
||||||
"buy_spread \t0.9129 \t0.8879 \t0.9392 \t0.8668 \t363 \t309 \t75 \t39 \t20 \n",
|
"buy_spread \t0.9359 \t0.9235 \t0.9486 \t0.8949 \t349 \t314 \t52 \t26 \t17 \n",
|
||||||
"sell_spread \t0.9176 \t0.8966 \t0.9398 \t0.8736 \t363 \t312 \t75 \t36 \t20 \n",
|
"sell_spread \t0.9407 \t0.9324 \t0.9491 \t0.9022 \t349 \t317 \t52 \t23 \t17 \n",
|
||||||
"minimum_initial_investment \t0.9532 \t0.9641 \t0.9425 \t0.9345 \t313 \t295 \t119 \t11 \t18 \n",
|
"minimum_initial_investment \t0.9737 \t0.9642 \t0.9834 \t0.9609 \t301 \t296 \t97 \t11 \t5 \n",
|
||||||
"benchmark_name \t0.8100 \t0.7847 \t0.8370 \t0.8804 \t148 \t113 \t277 \t31 \t22 \n",
|
"benchmark_name \t0.8047 \t0.8175 \t0.7923 \t0.8778 \t141 \t103 \t256 \t23 \t27 \n",
|
||||||
"TOTAL \t0.9219 \t0.8972 \t0.9492 \t0.9068 \t2602 \t2269 \t1748 \t287 \t126 \n",
|
"TOTAL \t0.9204 \t0.9256 \t0.9192 \t0.9186 \t2436 \t2155 \t1602 \t184 \t149 \n",
|
||||||
"Total Funds Matched - 443\n",
|
"Total Funds Matched - 409\n",
|
||||||
"Total Funds Not Matched - 122\n",
|
"Total Funds Not Matched - 156\n",
|
||||||
"Percentage of Funds Matched - 78.40707964601769\n",
|
"Percentage of Funds Matched - 72.38938053097344\n",
|
||||||
"All Providers Results: \n",
|
"All Providers Results: \n",
|
||||||
"Document List File - ./sample_documents/aus_prospectus_29_documents_sample.txt\n",
|
"Document List File - ./sample_documents/aus_prospectus_29_documents_sample.txt\n",
|
||||||
"Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n",
|
"Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n",
|
||||||
"management_fee_and_costs \t0.9412 \t0.9040 \t0.9816 \t0.8889 \t179 \t160 \t0 \t17 \t3 \n",
|
"management_fee_and_costs \t0.9457 \t0.8970 \t1.0000 \t0.8970 \t164 \t148 \t0 \t17 \t0 \n",
|
||||||
"management_fee \t0.9744 \t0.9661 \t0.9828 \t0.9500 \t179 \t171 \t0 \t6 \t3 \n",
|
"management_fee \t0.9783 \t0.9576 \t1.0000 \t0.9576 \t164 \t158 \t0 \t7 \t0 \n",
|
||||||
"performance_fee_costs \t0.7876 \t0.8172 \t0.7600 \t0.7722 \t102 \t76 \t63 \t17 \t24 \n",
|
"performance_fee_costs \t0.8263 \t0.8846 \t0.7753 \t0.8242 \t95 \t69 \t67 \t9 \t20 \n",
|
||||||
"interposed_vehicle_performance_fee_cost \t0.9286 \t0.8667 \t1.0000 \t0.9556 \t53 \t52 \t120 \t8 \t0 \n",
|
"interposed_vehicle_performance_fee_cost \t0.9455 \t0.8966 \t1.0000 \t0.9636 \t53 \t52 \t107 \t6 \t0 \n",
|
||||||
"administration_fees \t0.9231 \t1.0000 \t0.8571 \t0.9889 \t14 \t12 \t166 \t0 \t2 \n",
|
"administration_fees \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t1 \t1 \t164 \t0 \t0 \n",
|
||||||
"buy_spread \t0.9217 \t0.9053 \t0.9387 \t0.8556 \t177 \t153 \t1 \t16 \t10 \n",
|
"buy_spread \t0.9812 \t0.9752 \t0.9874 \t0.9636 \t162 \t157 \t2 \t4 \t2 \n",
|
||||||
"sell_spread \t0.9281 \t0.9172 \t0.9394 \t0.8667 \t177 \t155 \t1 \t14 \t10 \n",
|
"sell_spread \t0.9876 \t0.9876 \t0.9876 \t0.9758 \t162 \t159 \t2 \t2 \t2 \n",
|
||||||
"minimum_initial_investment \t0.9118 \t0.9538 \t0.8732 \t0.8667 \t142 \t124 \t32 \t6 \t18 \n",
|
"minimum_initial_investment \t0.9569 \t0.9531 \t0.9606 \t0.9333 \t127 \t122 \t32 \t6 \t5 \n",
|
||||||
"benchmark_name \t0.8280 \t0.8333 \t0.8228 \t0.8500 \t87 \t65 \t88 \t13 \t14 \n",
|
"benchmark_name \t0.7651 \t0.7808 \t0.7500 \t0.7879 \t85 \t57 \t73 \t16 \t19 \n",
|
||||||
"TOTAL \t0.9049 \t0.9071 \t0.9062 \t0.8883 \t1110 \t968 \t471 \t97 \t210 \n",
|
"TOTAL \t0.9318 \t0.9258 \t0.9401 \t0.9226 \t1013 \t923 \t447 \t67 \t197 \n",
|
||||||
"Total Funds Matched - 180\n",
|
"Total Funds Matched - 165\n",
|
||||||
"Total Funds Not Matched - 16\n",
|
"Total Funds Not Matched - 31\n",
|
||||||
"Percentage of Funds Matched - 91.83673469387756\n",
|
"Percentage of Funds Matched - 84.18367346938776\n",
|
||||||
"All Providers Results: \n",
|
"All Providers Results: \n",
|
||||||
"Document List File - ./sample_documents/aus_prospectus_17_documents_sample.txt\n",
|
"Document List File - ./sample_documents/aus_prospectus_17_documents_sample.txt\n",
|
||||||
"Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n",
|
"Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n",
|
||||||
"management_fee_and_costs \t0.8996 \t0.8269 \t0.9862 \t0.8175 \t263 \t215 \t0 \t45 \t3 \n",
|
"management_fee_and_costs \t0.9025 \t0.8767 \t0.9299 \t0.8238 \t242 \t199 \t2 \t28 \t15 \n",
|
||||||
"management_fee \t0.9064 \t0.8385 \t0.9864 \t0.8289 \t263 \t218 \t0 \t42 \t3 \n",
|
"management_fee \t0.9148 \t0.8987 \t0.9315 \t0.8443 \t242 \t204 \t2 \t23 \t15 \n",
|
||||||
"performance_fee_costs \t0.9027 \t0.8538 \t0.9577 \t0.8517 \t207 \t181 \t43 \t31 \t8 \n",
|
"performance_fee_costs \t0.9284 \t0.9474 \t0.9101 \t0.8975 \t186 \t162 \t57 \t9 \t16 \n",
|
||||||
"interposed_vehicle_performance_fee_cost \t0.9756 \t0.9524 \t1.0000 \t0.9962 \t20 \t20 \t242 \t1 \t0 \n",
|
"interposed_vehicle_performance_fee_cost \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t20 \t20 \t224 \t0 \t0 \n",
|
||||||
"administration_fees \t0.9925 \t0.9851 \t1.0000 \t0.9962 \t66 \t66 \t196 \t1 \t0 \n",
|
"administration_fees \t0.8288 \t0.9583 \t0.7302 \t0.9221 \t63 \t46 \t179 \t2 \t17 \n",
|
||||||
"total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t69 \t69 \t194 \t0 \t0 \n",
|
"total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t66 \t66 \t178 \t0 \t0 \n",
|
||||||
"buy_spread \t0.9043 \t0.8715 \t0.9398 \t0.8745 \t186 \t156 \t74 \t23 \t10 \n",
|
"buy_spread \t0.8946 \t0.8771 \t0.9128 \t0.8484 \t187 \t157 \t50 \t22 \t15 \n",
|
||||||
"sell_spread \t0.9075 \t0.8771 \t0.9401 \t0.8783 \t186 \t157 \t74 \t22 \t10 \n",
|
"sell_spread \t0.8977 \t0.8827 \t0.9133 \t0.8525 \t187 \t158 \t50 \t21 \t15 \n",
|
||||||
"minimum_initial_investment \t0.9856 \t0.9716 \t1.0000 \t0.9810 \t171 \t171 \t87 \t5 \t0 \n",
|
"minimum_initial_investment \t0.9858 \t0.9721 \t1.0000 \t0.9795 \t174 \t174 \t65 \t5 \t0 \n",
|
||||||
"benchmark_name \t0.7869 \t0.7273 \t0.8571 \t0.9011 \t61 \t48 \t189 \t18 \t8 \n",
|
"benchmark_name \t0.8598 \t0.8679 \t0.8519 \t0.9385 \t56 \t46 \t183 \t7 \t8 \n",
|
||||||
"TOTAL \t0.9261 \t0.8904 \t0.9667 \t0.9125 \t1492 \t1301 \t1099 \t188 \t252 \n",
|
"TOTAL \t0.9212 \t0.9281 \t0.9180 \t0.9107 \t1423 \t1232 \t990 \t117 \t298 \n",
|
||||||
"Total Funds Matched - 263\n",
|
"Total Funds Matched - 244\n",
|
||||||
"Total Funds Not Matched - 106\n",
|
"Total Funds Not Matched - 125\n",
|
||||||
"Percentage of Funds Matched - 71.27371273712737\n"
|
"Percentage of Funds Matched - 66.12466124661248\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
@ -114,7 +114,8 @@
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx\"\n",
|
"path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx\"\n",
|
||||||
"path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250314113438.xlsx\"\n",
|
"path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250317_Ravi_modified.xlsx\"\n",
|
||||||
|
"# path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250317_Ravi.xlsx\"\n",
|
||||||
"provider_mapping_file_path = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/TopProvidersBiz.xlsx\"\n",
|
"provider_mapping_file_path = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/TopProvidersBiz.xlsx\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"funds_matched = 0\n",
|
"funds_matched = 0\n",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue