diff --git a/calc_metrics.py b/calc_metrics.py index 366c50b..7bc50e0 100644 --- a/calc_metrics.py +++ b/calc_metrics.py @@ -1376,8 +1376,8 @@ def clean_text(text: str): def merge_inference_data(): - file1 = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250318203253.xlsx" - file2 = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_2_documents_by_text_20250318220840.xlsx" + file1 = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250319000625.xlsx" + file2 = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_1_documents_by_text_20250319004903.xlsx" columns = [ "doc_id", "effective_date", @@ -1424,7 +1424,7 @@ def merge_inference_data(): total_extract_data_df.reset_index(drop=True, inplace=True) output_folder = r"/data/aus_prospectus/output/mapping_data/total/" - output_file = os.path.join(output_folder, "mapping_data_info_46_documents_by_text_20250318203253_new.xlsx") + output_file = os.path.join(output_folder, "mapping_data_info_46_documents_by_text_20250319000625.xlsx") with pd.ExcelWriter(output_file) as f: total_mapping_data_df.to_excel(f, index=False, sheet_name="total_mapping_data") total_extract_data_df.to_excel(f, index=False, sheet_name="total_extract_data") diff --git a/core/data_extraction.py b/core/data_extraction.py index b470792..6796661 100644 --- a/core/data_extraction.py +++ b/core/data_extraction.py @@ -285,6 +285,7 @@ class DataExtraction: data_dict["completion_token"] = result.get("completion_token", 0) data_dict["total_token"] = result.get("total_token", 0) """ + data_list = self.check_benchmark(data_list) data_list = self.supplement_ttr_pension(data_list) data_list = self.align_fund_share_name(data_list) data_list = self.supplement_minimum_initial_investment(data_list) @@ -294,7 +295,6 @@ class DataExtraction: data_list = self.post_adjust_management_fee_costs(data_list) data_list = self.check_administration_fees(data_list) - data_list = self.check_benchmark(data_list) return data_list def check_benchmark(self, data_list: list): @@ -327,8 +327,7 @@ class DataExtraction: data_item.pop("benchmark_name") elif benchmark_name[0].isalpha() and not benchmark_name[0].isupper(): data_item.pop("benchmark_name") - elif benchmark_name.lower() in ["benchmark", "composite benchmark", - "fund’s composite benchmark", "long term benchmark"]: + elif benchmark_name.lower().endswith("benchmark"): data_item.pop("benchmark_name") else: pass diff --git a/main.py b/main.py index cc27f9c..bc2d7eb 100644 --- a/main.py +++ b/main.py @@ -1538,7 +1538,7 @@ if __name__ == "__main__": with open(document_sample_file, "r", encoding="utf-8") as f: special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()] document_mapping_file = r"/data/aus_prospectus/basic_information/46_documents/aus_prospectus_46_documents_mapping.xlsx" - # special_doc_id_list = ["412778803", "471206458", "420339794", "441280757", "454036250", "397107472"] + # special_doc_id_list = ["441280757"] pdf_folder: str = r"/data/aus_prospectus/pdf/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/" output_extract_data_child_folder: str = ( diff --git a/performance.ipynb b/performance.ipynb index 38dd6f4..b1b3d9a 100644 --- a/performance.ipynb +++ b/performance.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 10, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -36,14 +36,14 @@ "\n", "path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx\"\n", "# path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250317.xlsx\"\n", - "path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250318203253_new.xlsx\"\n", + "path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250319000625.xlsx\"\n", "provider_mapping_file_path = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/TopProvidersBiz.xlsx\"\n", "\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -316,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -330,53 +330,53 @@ "All Providers Results: \n", "Document List File - None\n", "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", - "management_fee_and_costs \t0.9354 \t0.8870 \t0.9893 \t0.8786 \t419 \t369 \t0 \t47 \t4 \n", - "management_fee \t0.9591 \t0.9303 \t0.9898 \t0.9214 \t419 \t387 \t0 \t29 \t4 \n", - "performance_fee_costs \t0.9261 \t0.8955 \t0.9590 \t0.9024 \t285 \t257 \t122 \t30 \t11 \n", - "interposed_vehicle_performance_fee_cost \t0.9863 \t0.9730 \t1.0000 \t0.9952 \t73 \t72 \t346 \t2 \t0 \n", - "administration_fees \t0.9940 \t0.9881 \t1.0000 \t0.9976 \t83 \t83 \t336 \t1 \t0 \n", - "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t70 \t70 \t350 \t0 \t0 \n", - "buy_spread \t0.9486 \t0.9171 \t0.9822 \t0.9143 \t362 \t332 \t52 \t30 \t6 \n", - "sell_spread \t0.9516 \t0.9227 \t0.9824 \t0.9190 \t362 \t334 \t52 \t28 \t6 \n", - "minimum_initial_investment \t0.9544 \t0.9638 \t0.9452 \t0.9333 \t310 \t293 \t99 \t11 \t17 \n", - "benchmark_name \t0.8971 \t0.8652 \t0.9313 \t0.9333 \t142 \t122 \t270 \t19 \t9 \n", - "TOTAL \t0.9553 \t0.9343 \t0.9779 \t0.9395 \t2525 \t2319 \t1627 \t197 \t57 \n", - "Total Funds Matched - 420\n", - "Total Funds Not Matched - 145\n", - "Percentage of Funds Matched - 74.33628318584071\n", + "management_fee_and_costs \t0.9123 \t0.8465 \t0.9891 \t0.8387 \t433 \t364 \t0 \t66 \t4 \n", + "management_fee \t0.9284 \t0.8744 \t0.9895 \t0.8664 \t433 \t376 \t0 \t54 \t4 \n", + "performance_fee_costs \t0.9217 \t0.8691 \t0.9811 \t0.8986 \t291 \t259 \t131 \t39 \t5 \n", + "interposed_vehicle_performance_fee_cost \t0.9536 \t0.9114 \t1.0000 \t0.9839 \t73 \t72 \t355 \t7 \t0 \n", + "administration_fees \t0.9857 \t0.9857 \t0.9857 \t0.9954 \t70 \t69 \t363 \t1 \t1 \n", + "total_annual_dollar_based_charges \t0.9920 \t0.9841 \t1.0000 \t0.9977 \t62 \t62 \t371 \t1 \t0 \n", + "buy_spread \t0.9483 \t0.9187 \t0.9798 \t0.9147 \t370 \t339 \t58 \t30 \t7 \n", + "sell_spread \t0.9526 \t0.9268 \t0.9799 \t0.9217 \t370 \t342 \t58 \t27 \t7 \n", + "minimum_initial_investment \t0.9593 \t0.9641 \t0.9547 \t0.9424 \t309 \t295 \t114 \t11 \t14 \n", + "benchmark_name \t0.8738 \t0.8084 \t0.9507 \t0.9101 \t157 \t135 \t260 \t32 \t7 \n", + "TOTAL \t0.9428 \t0.9089 \t0.9810 \t0.9270 \t2568 \t2313 \t1710 \t268 \t49 \n", + "Total Funds Matched - 434\n", + "Total Funds Not Matched - 131\n", + "Percentage of Funds Matched - 76.8141592920354\n", "All Providers Results: \n", "Document List File - ./sample_documents/aus_prospectus_29_documents_sample.txt\n", "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", - "management_fee_and_costs \t0.9408 \t0.8883 \t1.0000 \t0.8883 \t178 \t159 \t0 \t20 \t0 \n", - "management_fee \t0.9742 \t0.9497 \t1.0000 \t0.9497 \t178 \t170 \t0 \t9 \t0 \n", - "performance_fee_costs \t0.9082 \t0.8900 \t0.9271 \t0.8994 \t100 \t89 \t72 \t11 \t7 \n", - "interposed_vehicle_performance_fee_cost \t0.9905 \t0.9811 \t1.0000 \t0.9944 \t53 \t52 \t126 \t1 \t0 \n", - "administration_fees \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t15 \t15 \t164 \t0 \t0 \n", - "buy_spread \t0.9799 \t0.9716 \t0.9884 \t0.9609 \t176 \t171 \t1 \t5 \t2 \n", - "sell_spread \t0.9829 \t0.9773 \t0.9885 \t0.9665 \t176 \t172 \t1 \t4 \t2 \n", - "minimum_initial_investment \t0.9151 \t0.9538 \t0.8794 \t0.8715 \t141 \t124 \t32 \t6 \t17 \n", - "benchmark_name \t0.8957 \t0.8488 \t0.9481 \t0.9050 \t85 \t73 \t89 \t13 \t4 \n", - "TOTAL \t0.9541 \t0.9401 \t0.9702 \t0.9373 \t1102 \t1025 \t485 \t69 \t89 \n", - "Total Funds Matched - 179\n", - "Total Funds Not Matched - 17\n", - "Percentage of Funds Matched - 91.3265306122449\n", + "management_fee_and_costs \t0.9462 \t0.9027 \t0.9940 \t0.8978 \t185 \t167 \t0 \t18 \t1 \n", + "management_fee \t0.9724 \t0.9514 \t0.9944 \t0.9462 \t185 \t176 \t0 \t9 \t1 \n", + "performance_fee_costs \t0.9239 \t0.8750 \t0.9785 \t0.9194 \t99 \t91 \t80 \t13 \t2 \n", + "interposed_vehicle_performance_fee_cost \t0.9369 \t0.8814 \t1.0000 \t0.9624 \t53 \t52 \t127 \t7 \t0 \n", + "administration_fees \t0.9412 \t1.0000 \t0.8889 \t0.9946 \t9 \t8 \t177 \t0 \t1 \n", + "buy_spread \t0.9779 \t0.9672 \t0.9888 \t0.9570 \t183 \t177 \t1 \t6 \t2 \n", + "sell_spread \t0.9835 \t0.9781 \t0.9890 \t0.9677 \t183 \t179 \t1 \t4 \t2 \n", + "minimum_initial_investment \t0.9306 \t0.9571 \t0.9054 \t0.8925 \t148 \t134 \t32 \t6 \t14 \n", + "benchmark_name \t0.9206 \t0.8878 \t0.9560 \t0.9194 \t99 \t87 \t84 \t11 \t4 \n", + "TOTAL \t0.9481 \t0.9334 \t0.9661 \t0.9397 \t1144 \t1071 \t502 \t74 \t76 \n", + "Total Funds Matched - 186\n", + "Total Funds Not Matched - 10\n", + "Percentage of Funds Matched - 94.89795918367348\n", "All Providers Results: \n", "Document List File - ./sample_documents/aus_prospectus_17_documents_sample.txt\n", "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", - "management_fee_and_costs \t0.9313 \t0.8861 \t0.9813 \t0.8714 \t241 \t210 \t0 \t27 \t4 \n", - "management_fee \t0.9476 \t0.9156 \t0.9819 \t0.9004 \t241 \t217 \t0 \t20 \t4 \n", - "performance_fee_costs \t0.9359 \t0.8984 \t0.9767 \t0.9046 \t185 \t168 \t50 \t19 \t4 \n", - "interposed_vehicle_performance_fee_cost \t0.9756 \t0.9524 \t1.0000 \t0.9959 \t20 \t20 \t220 \t1 \t0 \n", - "administration_fees \t0.9927 \t0.9855 \t1.0000 \t0.9959 \t68 \t68 \t172 \t1 \t0 \n", - "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t70 \t70 \t171 \t0 \t0 \n", - "buy_spread \t0.9174 \t0.8656 \t0.9758 \t0.8797 \t186 \t161 \t51 \t25 \t4 \n", - "sell_spread \t0.9205 \t0.8710 \t0.9759 \t0.8838 \t186 \t162 \t51 \t24 \t4 \n", - "minimum_initial_investment \t0.9854 \t0.9713 \t1.0000 \t0.9793 \t169 \t169 \t67 \t5 \t0 \n", - "benchmark_name \t0.8991 \t0.8909 \t0.9074 \t0.9544 \t57 \t49 \t181 \t6 \t5 \n", - "TOTAL \t0.9505 \t0.9237 \t0.9799 \t0.9365 \t1423 \t1294 \t963 \t128 \t114 \n", - "Total Funds Matched - 241\n", - "Total Funds Not Matched - 128\n", - "Percentage of Funds Matched - 65.31165311653116\n" + "management_fee_and_costs \t0.8854 \t0.8041 \t0.9850 \t0.7944 \t248 \t197 \t0 \t48 \t3 \n", + "management_fee \t0.8929 \t0.8163 \t0.9852 \t0.8065 \t248 \t200 \t0 \t45 \t3 \n", + "performance_fee_costs \t0.9205 \t0.8660 \t0.9825 \t0.8831 \t192 \t168 \t51 \t26 \t3 \n", + "interposed_vehicle_performance_fee_cost \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t20 \t20 \t228 \t0 \t0 \n", + "administration_fees \t0.9919 \t0.9839 \t1.0000 \t0.9960 \t61 \t61 \t186 \t1 \t0 \n", + "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t62 \t62 \t186 \t0 \t0 \n", + "buy_spread \t0.9178 \t0.8710 \t0.9701 \t0.8831 \t187 \t162 \t57 \t24 \t5 \n", + "sell_spread \t0.9209 \t0.8763 \t0.9702 \t0.8871 \t187 \t163 \t57 \t23 \t5 \n", + "minimum_initial_investment \t0.9847 \t0.9699 \t1.0000 \t0.9798 \t161 \t161 \t82 \t5 \t0 \n", + "benchmark_name \t0.8000 \t0.6957 \t0.9412 \t0.9032 \t58 \t48 \t176 \t21 \t3 \n", + "TOTAL \t0.9314 \t0.8883 \t0.9834 \t0.9133 \t1424 \t1242 \t1023 \t193 \t98 \n", + "Total Funds Matched - 248\n", + "Total Funds Not Matched - 121\n", + "Percentage of Funds Matched - 67.20867208672087\n" ] } ], @@ -478,7 +478,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -486,9 +486,16 @@ "output_type": "stream", "text": [ "{'data_point': 'performance_fee_costs', 'doc_id': 377377369, 'sec_name': 'SPDR® S&P Emerging Markets Carbon Control Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'ANZ OA Inv-OnePath Multi Asset Income NEF', 'truth': '0', 'generated': '0.11', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA Investment Portfolio-BlackRock Tactical Growth NE', 'truth': '0', 'generated': '0.33', 'error': 'Truth is not equal with generated'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -NE', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA Inv-Greencape Broadcap NEF', 'truth': '0.33', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA IP- Pendal Monthly Income Plus-NEF', 'truth': '0', 'generated': '0.02', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA IP-Alternatives Growth Fund-NEF', 'truth': '0.41', 'generated': '0.13', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA IP-Perpetual Balanced Growth Trust-NEF', 'truth': '0', 'generated': '0.15', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA IP-Perpetual Conservative Growth Trust-NEF', 'truth': '0', 'generated': '0.03', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -NE', 'truth': '0', 'generated': '0.15', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -NE', 'truth': '0', 'generated': '0.01', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -NE', 'truth': '0', 'generated': '0.01', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-NEF', 'truth': '0', 'generated': '0.03', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard Index Australian Shares Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard High Yield Australian Shares Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard Index Australian Property Securities Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", @@ -503,51 +510,51 @@ "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Healthcare Fund', 'truth': '0.86', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum European Fund', 'truth': '0.24', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Japan Fund', 'truth': '0.15', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 397107472, 'sec_name': 'AMP Capital Specialist Diversified Fixed Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Fairview Eq Ptnr Emg Comp', 'truth': '0.56', 'generated': '0.54', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MasterKey Pension Fundamentals (Pre Retirement) - Perpetual Smll Co Fund No.2', 'truth': '0', 'generated': '0.56', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 446324179, 'sec_name': 'Lifeplan Investment Bond - Allan Gray Australian Equity Fund Class A', 'truth': '0.28', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 446324179, 'sec_name': 'Lifeplan Investment Bond MLC Horizon 2-Capital Stable Open', 'truth': '0.05', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Balanced', 'truth': '0', 'generated': '0.05', 'error': 'Truth is not equal with generated'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 539261734, 'sec_name': 'ipac life choices Income Generator', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 521606716, 'sec_name': 'CFS Enhanced Index Balanced-Class A', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Balanced', 'truth': '0', 'generated': '0.05', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 506913190, 'sec_name': 'FC W Pen-CFS TTR Defensive', 'truth': '', 'generated': '0.15', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 523516443, 'sec_name': 'CFS MIF-Strategic Cash', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active High Growth Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Moderately Defensive', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Growth Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Balanced', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Defensive Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 527969661, 'sec_name': 'JPMorgan Global Equity Premium Income (Hedged) Complex ETF', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557526129, 'sec_name': 'Fortlake Real-Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Core Equity Tr AUDHdg', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Core Equity Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 557526129, 'sec_name': 'Fortlake Real-Higher Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Value Trust -Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Small Company Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 544886057, 'sec_name': 'CFS Growth Builder', 'truth': '0.01', 'generated': '0.04', 'error': 'Truth is not equal with generated'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 541356150, 'sec_name': 'JPMorgan Global Research Enhanced Index Equity Trust - Class I', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 541356150, 'sec_name': 'JPMorgan Global Research Enhanced Index Equity Trust - Class I (Hedged)', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557362553, 'sec_name': 'JPMorgan Global Select Equity Active ETF', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 550522985, 'sec_name': 'RQI Global Value – Class A', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 553449663, 'sec_name': 'AMP Capital Specialist International Share (Hedged) Fund - Class A', 'truth': '0', 'generated': '0.07', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Balanced', 'truth': '0', 'generated': '0.05', 'error': 'Truth is not equal with generated'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 539261734, 'sec_name': 'ipac life choices Income Generator', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 521606716, 'sec_name': 'CFS Enhanced Index Balanced-Class A', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Balanced', 'truth': '0', 'generated': '0.05', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 506913190, 'sec_name': 'FC W Pen-CFS TTR Defensive', 'truth': '', 'generated': '0.15', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 523516443, 'sec_name': 'CFS MIF-Strategic Cash', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active High Growth Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Moderately Defensive', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Growth Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Balanced', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Defensive Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 527969661, 'sec_name': 'JPMorgan Global Equity Premium Income (Hedged) Complex ETF', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557526129, 'sec_name': 'Fortlake Real-Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Core Equity Tr AUDHdg', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Core Equity Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 557526129, 'sec_name': 'Fortlake Real-Higher Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Value Trust -Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Small Company Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 544886057, 'sec_name': 'CFS Growth Builder', 'truth': '0.01', 'generated': '0.04', 'error': 'Truth is not equal with generated'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 541356150, 'sec_name': 'JPMorgan Global Research Enhanced Index Equity Trust - Class I', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 541356150, 'sec_name': 'JPMorgan Global Research Enhanced Index Equity Trust - Class I (Hedged)', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557362553, 'sec_name': 'JPMorgan Global Select Equity Active ETF', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 550522985, 'sec_name': 'RQI Global Value – Class A', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 553449663, 'sec_name': 'AMP Capital Specialist International Share (Hedged) Fund - Class A', 'truth': '0', 'generated': '0.07', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 377377369, 'sec_name': 'SPDR® S&P Emerging Markets Carbon Control Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'ANZ OA Inv-OnePath Multi Asset Income NEF', 'truth': '0', 'generated': '0.11', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA Investment Portfolio-BlackRock Tactical Growth NE', 'truth': '0', 'generated': '0.33', 'error': 'Truth is not equal with generated'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -NE', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA Inv-Greencape Broadcap NEF', 'truth': '0.33', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA IP- Pendal Monthly Income Plus-NEF', 'truth': '0', 'generated': '0.02', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA IP-Alternatives Growth Fund-NEF', 'truth': '0.41', 'generated': '0.13', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA IP-Perpetual Balanced Growth Trust-NEF', 'truth': '0', 'generated': '0.15', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA IP-Perpetual Conservative Growth Trust-NEF', 'truth': '0', 'generated': '0.03', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -NE', 'truth': '0', 'generated': '0.15', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -NE', 'truth': '0', 'generated': '0.01', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -NE', 'truth': '0', 'generated': '0.01', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-NEF', 'truth': '0', 'generated': '0.03', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard Index Australian Shares Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard High Yield Australian Shares Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard Index Australian Property Securities Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", @@ -562,7 +569,6 @@ "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Healthcare Fund', 'truth': '0.86', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum European Fund', 'truth': '0.24', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Japan Fund', 'truth': '0.15', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", - "{'data_point': 'performance_fee_costs', 'doc_id': 397107472, 'sec_name': 'AMP Capital Specialist Diversified Fixed Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Fairview Eq Ptnr Emg Comp', 'truth': '0.56', 'generated': '0.54', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MasterKey Pension Fundamentals (Pre Retirement) - Perpetual Smll Co Fund No.2', 'truth': '0', 'generated': '0.56', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 446324179, 'sec_name': 'Lifeplan Investment Bond - Allan Gray Australian Equity Fund Class A', 'truth': '0.28', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", @@ -578,7 +584,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 21, "metadata": {}, "outputs": [ { diff --git a/utils/benchmark_names.py b/utils/benchmark_names.py index b76b411..0b45a98 100644 --- a/utils/benchmark_names.py +++ b/utils/benchmark_names.py @@ -11,7 +11,6 @@ benchmark_keywords =[ "CSI300 index", "S&P/ASX Small Resources Accumulation Index", "Bloomberg AusBond Composite 0+ Yr Index", - "Composite Benchmark", "Bloomberg AusBond Bank Bill Index", "Bloomberg Barclays Global Aggregate Bond Index (fully hedged) in Australian dollars", "MSCI World ex Australia (Standard) Index (Net Dividends) in Australian dollars",