diff --git a/core/data_extraction.py b/core/data_extraction.py index 0a1b297..82a35de 100644 --- a/core/data_extraction.py +++ b/core/data_extraction.py @@ -289,6 +289,7 @@ class DataExtraction: data_list = self.supplement_ttr_pension(data_list) data_list = self.align_fund_share_name(data_list) data_list = self.supplement_minimum_initial_investment(data_list) + data_list = self.check_total_annual_dollar_based_charges(data_list) data_list, datapoint_list_with_production_name = self.post_adjust_for_value_with_production_name(data_list) data_list = self.remove_duplicate_data(data_list) if "management_fee" not in datapoint_list_with_production_name and "management_fee_and_costs" not in datapoint_list_with_production_name: @@ -503,6 +504,36 @@ class DataExtraction: pass return data_list + def check_total_annual_dollar_based_charges(self, data_list: list): + """ + If found total_annual_dollar_based_charges and could be divisible by 52 or 12, + then set the fund name and share name to be document production name. + """ + for data_dict in data_list: + extract_data = data_dict.get("extract_data", {}) + data = extract_data.get("data", []) + found = False + for data_item in data: + keys = list(data_item.keys()) + fund_name = data_item.get("fund_name", "") + share_name = data_item.get("share_name", "") + if len(fund_name) == 0: + continue + if "total_annual_dollar_based_charges" in keys: + value = data_item.get("total_annual_dollar_based_charges", -1) + if len(str(value)) > 0: + value_divide_52 = value / 52 + value_divide_12 = value / 12 + if (value_divide_52 == round(value_divide_52, 4)) or \ + (value_divide_12 == round(value_divide_12, 4)): + data_item["fund_name"] = self.document_production + data_item["share_name"] = self.document_production + found = True + break + if found: + break + return data_list + def post_adjust_for_value_with_production_name(self, data_list: list): """ If some datapoint with production name, then each fund/ share class in the same document for the datapoint should be with same value. diff --git a/main.py b/main.py index 8d2eeda..c2e4fed 100644 --- a/main.py +++ b/main.py @@ -1448,7 +1448,7 @@ def get_aus_prospectus_document_category(): def test_post_adjust_extract_data(): - doc_id = "462780211" + doc_id = "448576924" pdf_folder: str = r"/data/aus_prospectus/pdf/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/" output_extract_data_child_folder: str = ( @@ -1538,7 +1538,7 @@ if __name__ == "__main__": with open(document_sample_file, "r", encoding="utf-8") as f: special_doc_id_list = [doc_id.strip() for doc_id in f.readlines() if len(doc_id.strip()) > 0] - # special_doc_id_list = ["420339794"] + # special_doc_id_list = ["448576924"] pdf_folder: str = r"/data/aus_prospectus/pdf/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/" output_extract_data_child_folder: str = ( diff --git a/performance.ipynb b/performance.ipynb index 6a2f94c..1a2ea6e 100644 --- a/performance.ipynb +++ b/performance.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -15,51 +15,51 @@ "from utils.similarity import Similarity\n", "\n", "\n", - "# imp_datapoints = [\"Management Fee and Costs\", \"Management Fee\", \"Performance fee and cost\", \"Interposed vehicle Performance fee and Costs\",\n", - "# \"Administration Fee and costs\", \"Total Annual Dollar Based Charges\", \"Buy Spread\", \"Sell Spread\", \"Performance Fee\",\n", - "# \"Minimum Initial Investment\", \"Benchmark\"]\n", - "\n", - "\n", - "# imp_datapoints_mapping = {\n", - "# \"Management Fee and Costs\": \"management_fee_and_costs\",\n", - "# \"Management Fee\": \"management_fee\",\n", - "# \"Performance fee and cost\": \"performance_fee_costs\",\n", - "# \"Interposed vehicle Performance fee and Costs\": \"interposed_vehicle_performance_fee_cost\",\n", - "# \"Administration Fee and costs\": \"administration_fees\",\n", - "# \"Total Annual Dollar Based Charges\": \"total_annual_dollar_based_charges\",\n", - "# \"Buy Spread\": \"buy_spread\",\n", - "# \"Sell Spread\": \"sell_spread\",\n", - "# \"Performance Fee\": \"PerformanceFeeCharged\",\n", - "# \"Minimum Initial Investment\": \"minimum_initial_investment\",\n", - "# \"Benchmark\": \"benchmark_name\"\n", - "# }\n", - "\n", - "imp_datapoints = [\"Management Fee and Costs\", \"Management Fee\", \"Performance fee and cost\",\n", - " \"Administration Fee and costs\", \"Total Annual Dollar Based Charges\", \"Buy Spread\", \"Sell Spread\"]\n", + "imp_datapoints = [\"Management Fee and Costs\", \"Management Fee\", \"Performance fee and cost\", \"Interposed vehicle Performance fee and Costs\",\n", + " \"Administration Fee and costs\", \"Total Annual Dollar Based Charges\", \"Buy Spread\", \"Sell Spread\", \"Performance Fee\",\n", + " \"Minimum Initial Investment\", \"Benchmark\"]\n", "\n", "\n", "imp_datapoints_mapping = {\n", " \"Management Fee and Costs\": \"management_fee_and_costs\",\n", " \"Management Fee\": \"management_fee\",\n", " \"Performance fee and cost\": \"performance_fee_costs\",\n", + " \"Interposed vehicle Performance fee and Costs\": \"interposed_vehicle_performance_fee_cost\",\n", " \"Administration Fee and costs\": \"administration_fees\",\n", " \"Total Annual Dollar Based Charges\": \"total_annual_dollar_based_charges\",\n", " \"Buy Spread\": \"buy_spread\",\n", - " \"Sell Spread\": \"sell_spread\"\n", + " \"Sell Spread\": \"sell_spread\",\n", + " \"Performance Fee\": \"PerformanceFeeCharged\",\n", + " \"Minimum Initial Investment\": \"minimum_initial_investment\",\n", + " \"Benchmark\": \"benchmark_name\"\n", "}\n", "\n", - "# path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx\"\n", - "path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/next_round/next_round_6_documents_ground_truth_with_mapping.xlsx\"\n", + "# imp_datapoints = [\"Management Fee and Costs\", \"Management Fee\", \"Performance fee and cost\",\n", + "# \"Administration Fee and costs\", \"Total Annual Dollar Based Charges\", \"Buy Spread\", \"Sell Spread\"]\n", + "\n", + "\n", + "# imp_datapoints_mapping = {\n", + "# \"Management Fee and Costs\": \"management_fee_and_costs\",\n", + "# \"Management Fee\": \"management_fee\",\n", + "# \"Performance fee and cost\": \"performance_fee_costs\",\n", + "# \"Administration Fee and costs\": \"administration_fees\",\n", + "# \"Total Annual Dollar Based Charges\": \"total_annual_dollar_based_charges\",\n", + "# \"Buy Spread\": \"buy_spread\",\n", + "# \"Sell Spread\": \"sell_spread\"\n", + "# }\n", + "\n", + "path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx\"\n", + "# path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/next_round/next_round_6_documents_ground_truth_with_mapping.xlsx\"\n", "# path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250317.xlsx\"\n", - "# path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250327230323.xlsx\"\n", - "path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_6_documents_by_text_20250328004858.xlsx\"\n", + "path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250328010350.xlsx\"\n", + "# path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_6_documents_by_text_20250328004858.xlsx\"\n", "provider_mapping_file_path = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/TopProvidersBiz.xlsx\"\n", "\n" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -349,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -363,18 +363,56 @@ "All Providers Results: \n", "Document List File - None\n", "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", - "management_fee_and_costs \t0.9515 \t0.9074 \t1.0000 \t0.9074 \t54 \t49 \t0 \t5 \t0 \n", - "management_fee \t0.9515 \t0.9074 \t1.0000 \t0.9074 \t54 \t49 \t0 \t5 \t0 \n", - "performance_fee_costs \t0.9796 \t0.9796 \t0.9796 \t0.9630 \t50 \t48 \t4 \t1 \t1 \n", - "administration_fees \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t54 \t54 \t0 \t0 \t0 \n", - "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t16 \t16 \t38 \t0 \t0 \n", - "buy_spread \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t20 \t20 \t34 \t0 \t0 \n", - "sell_spread \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t20 \t20 \t34 \t0 \t0 \n", - "TOTAL \t0.9832 \t0.9706 \t0.9971 \t0.9683 \t268 \t256 \t110 \t11 \t1 \n", - "Total Shares Matched - 54\n", - "Total Shares Not Matched - 16\n", - "Percentage of Shares Matched - 77.14285714285715\n", - "Not Matched Shares Name List - ['Vision Balanced Growth Pen', 'CFS FC W PSup-FirstRate Term Dep (10yr)', 'CFS FC W PSup-FirstRate Term Dep (15yr)', 'CFS FC W PSup-FirstRate Term Dep (2yr)', 'CFS FC W PSup-FirstRate Term Dep (3yr)', 'CFS FC W PSup-FirstRate Term Dep (5yr)', 'CFS FC W PSup-FirstRate Term Dep (7yr)', 'AV Australian Shares TTR', 'AV Balanced Growth TTR', 'AV Cash TTR', 'AV Conservative Growth TTR', 'AV Diversified Index TTR', 'AV Growth TTR', 'AV High Growth TTR', 'AV International Shares TTR', 'AV Stable Growth TTR']\n" + "management_fee_and_costs \t0.9324 \t0.8811 \t0.9901 \t0.8734 \t458 \t400 \t0 \t54 \t4 \n", + "management_fee \t0.9615 \t0.9339 \t0.9907 \t0.9258 \t458 \t424 \t0 \t30 \t4 \n", + "performance_fee_costs \t0.9165 \t0.9088 \t0.9244 \t0.8930 \t306 \t269 \t140 \t27 \t22 \n", + "interposed_vehicle_performance_fee_cost \t0.9536 \t0.9114 \t1.0000 \t0.9847 \t73 \t72 \t379 \t7 \t0 \n", + "administration_fees \t0.9878 \t0.9759 \t1.0000 \t0.9956 \t81 \t81 \t375 \t2 \t0 \n", + "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t68 \t68 \t390 \t0 \t0 \n", + "buy_spread \t0.9346 \t0.9081 \t0.9628 \t0.8974 \t377 \t336 \t75 \t34 \t13 \n", + "sell_spread \t0.9331 \t0.9054 \t0.9626 \t0.8952 \t377 \t335 \t75 \t35 \t13 \n", + "minimum_initial_investment \t0.9635 \t0.9814 \t0.9463 \t0.9476 \t335 \t317 \t117 \t6 \t18 \n", + "benchmark_name \t0.9298 \t0.8968 \t0.9653 \t0.9541 \t153 \t139 \t298 \t16 \t5 \n", + "TOTAL \t0.9513 \t0.9303 \t0.9742 \t0.9367 \t2686 \t2441 \t1849 \t211 \t79 \n", + "Total Shares Matched - 411\n", + "Total Shares Not Matched - 106\n", + "Percentage of Shares Matched - 79.49709864603481\n", + "Not Matched Shares Name List - ['SPDR® S&P World ex Australia Carbon Control Fund', 'BT-BlackRock Scientific Diversified Growth', 'Mercer Multi-manager Balanced Fund – Retail Units', 'Mercer Multi-manager Conservative Fund – Retail Units', 'Mercer Multi-manager Growth Fund – Retail Units', 'Mercer Multi-manager High Growth Fund – Retail Units', 'ANZ OA Inv-OnePath Multi Asset Income EF', 'ANZ OA Inv-OnePath Multi Asset Income NEF', 'ANZ OA IP-OP Diversified Credit EF', 'ANZ OA IP-OP Diversified Credit NE', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond EF', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond NE', 'OnePath OA Inv-Nikko AM Australian Shares EF', 'OnePath OA Inv-Nikko AM Australian Shares NEF', 'OnePath OA IP- Pendal Monthly Income Plus-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-NEF', 'OnePath OA IP-Bentham Global Income Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-NEF', 'OnePath OA IP-Kapstream Absolute Return Income Trust-EF/Sel', 'OnePath OA IP-Kapstream Absolute Return Income Trust-NEF', 'OnePath OA IP-OnePath Active Growth Trust-NEF', 'OnePath OA IP-OnePath High Growth Trust-EF/Sel', 'OnePath OA IP-OnePath High Growth Trust-NEF', 'OnePath OA IP-OnePath Managed Growth Trust-EF/Sel', 'OnePath OA IP-OnePath Managed Growth Trust-NEF', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-NEF', 'OnePath OA IP-OptiMix Australian Share Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Share Trust-NEF', 'OnePath OA IP-OptiMix Global Emerging Markets Share-EF/Sel', 'OnePath OA IP-OptiMix Global Emerging Markets Share-NEF', 'OnePath OA IP-OptiMIx Global Share Trust-EF/Sel', 'OnePath OA IP-OptiMIx Global Share Trust-NEF', 'OnePath OA IP-OptiMix High Growth Trust-EF/Sel', 'OnePath OA IP-OptiMix High Growth Trust-NEF', 'OnePath OA IP-OptiMix Property Securities Trust-EF/Sel', 'OnePath OA IP-OptiMix Property Securities Trust-NEF', 'OnePath OA IP-Perpetual Conservative Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Conservative Growth Trust-NEF', 'OnePath OA IP-Platinum International Trust-EF/Sel', 'OnePath OA IP-Platinum International Trust-NEF', 'OnePath OA IP-Schroder Fixed Income-EF/Sel', 'OnePath OA IP-Schroder Fixed Income-NEF', 'OnePath OA IP-UBS Defensive Trust-EF/Sel', 'OnePath OA IP-UBS Defensive Trust-NEF', 'OnePath OA IP-UBS Diversified Fixed Income Trust-EF/Sel', 'OnePath OA IP-UBS Diversified Fixed Income Trust-NEF', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -NE', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -NE', 'OnePath Schroder Real Return Trust (Entry Fee)', 'OnePath Schroder Real Return Trust (Nil Entry Fee)', 'Telstra Growth Pen', 'First Sentier Concentrated Aus Share', 'First Sentier Australian Small Companies', 'First Sentier Imputation', 'First Sentier Global Property Securities', 'First Sentier Australian Share', 'CFS FC-Investors Mutual Future Leaders', 'Stewart Worldwide Leaders Sustainability', 'First Sentier Property Securities', 'MyNorth Index Defensive', 'MLC MKPFPR - Altrinsic Global Eq Trust', 'MLC MKPFPR - BlackRock Global Allocation', 'MLC MKPF - Hedged Global Share Fund', 'MLC MKPF - Inflation Plus - Conservative', 'MLC MKPFPR - MLC - Platinum Global Fund', 'MLC MasterKey Pension Fundamentals - Perpetual Australian Share', 'MLC MasterKey Super Fundamentals - Perpetual Australian Share', 'MLC MKPF - Perpetual WS Ethical SRI Fund', 'MLC MKSF - Perpetual WS Ethical SRI Fund', 'MLC MasterKey Pension Fundamentals (Pre Retirement) - Perpetual Smll Co Fund No.2', 'MLC MasterKey Super Fundamentals - Perpetual Small Co Fund No.2', 'MLC MKPF - PIMCO Div. Fixed Interest Wholesale Class', 'MLC MKSF - PIMCO Div. Fixed Interest Wholesale Class', 'MLC MKPF - PIMCO Global Bond Wholesale Class', 'MLC MKPFPR - Platinum Asia Fund', 'MLC MKSF - Platinum Asia Fund', 'MLC MKPF - Platinum International Fund', 'MLC MKSF - Platinum International Fund', 'MLC MKPF - PM CAPITAL Global Companies', 'MLC MKSF - PM CAPITAL Global Companies', 'MLC MKPF - Schroder WS Australian Equity', 'MLC MKSF - Schroder WS Australian Equity', 'MLC MasterKey Pension Fundamentals (Pre Retirement) - MLC Aust Property Index', 'MLC MasterKey Super Fundamentals - MLC Australian Property Index', 'MLC MKSF - Vanguard Intl Shr Indx (Hgd)', 'MLC MKSF - Vanguard Intl Shr Indx', 'HOSTPLUS Fixed Interest Indexed Super', 'Australian Unity Inv Wholesale Deposits Fund', 'Lifeplan Investment Bond Lifeplan Capital Guaranteed', 'Lifeplan Investment Bond MLC Horizon 2-Capital Stable Open', 'Dimensional Australian Core Equity Trust', 'CFS MIF-Geared Share NEF', 'BT Imputation Shares Retail', 'Dimensional Australia Core Equity Trust - Active ETF']\n", + "All Providers Results: \n", + "Document List File - ./sample_documents/aus_prospectus_29_documents_sample.txt\n", + "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", + "management_fee_and_costs \t0.9505 \t0.9058 \t1.0000 \t0.9058 \t191 \t173 \t0 \t18 \t0 \n", + "management_fee \t0.9867 \t0.9738 \t1.0000 \t0.9738 \t191 \t186 \t0 \t5 \t0 \n", + "performance_fee_costs \t0.8832 \t0.8700 \t0.8969 \t0.8796 \t99 \t87 \t81 \t13 \t10 \n", + "interposed_vehicle_performance_fee_cost \t0.9369 \t0.8814 \t1.0000 \t0.9634 \t53 \t52 \t132 \t7 \t0 \n", + "administration_fees \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t15 \t15 \t176 \t0 \t0 \n", + "buy_spread \t0.9812 \t0.9683 \t0.9946 \t0.9634 \t189 \t183 \t1 \t6 \t1 \n", + "sell_spread \t0.9757 \t0.9577 \t0.9945 \t0.9529 \t189 \t181 \t1 \t8 \t1 \n", + "minimum_initial_investment \t0.9189 \t0.9577 \t0.8831 \t0.8743 \t154 \t136 \t31 \t6 \t18 \n", + "benchmark_name \t0.9271 \t0.8812 \t0.9780 \t0.9267 \t99 \t89 \t88 \t12 \t2 \n", + "TOTAL \t0.9512 \t0.9329 \t0.9719 \t0.9378 \t1180 \t1102 \t510 \t75 \t111 \n", + "Total Shares Matched - 186\n", + "Total Shares Not Matched - 4\n", + "Percentage of Shares Matched - 97.89473684210527\n", + "Not Matched Shares Name List - ['Dimensional Australian Core Equity Trust', 'CFS MIF-Geared Share NEF', 'BT Imputation Shares Retail', 'Dimensional Australia Core Equity Trust - Active ETF']\n", + "All Providers Results: \n", + "Document List File - ./sample_documents/aus_prospectus_17_documents_sample.txt\n", + "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", + "management_fee_and_costs \t0.9190 \t0.8631 \t0.9827 \t0.8502 \t267 \t227 \t0 \t36 \t4 \n", + "management_fee \t0.9426 \t0.9049 \t0.9835 \t0.8914 \t267 \t238 \t0 \t25 \t4 \n", + "performance_fee_costs \t0.9333 \t0.9286 \t0.9381 \t0.9026 \t207 \t182 \t59 \t14 \t12 \n", + "interposed_vehicle_performance_fee_cost \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t20 \t20 \t247 \t0 \t0 \n", + "administration_fees \t0.9851 \t0.9706 \t1.0000 \t0.9925 \t66 \t66 \t199 \t2 \t0 \n", + "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t68 \t68 \t199 \t0 \t0 \n", + "buy_spread \t0.8844 \t0.8453 \t0.9273 \t0.8502 \t188 \t153 \t74 \t28 \t12 \n", + "sell_spread \t0.8876 \t0.8508 \t0.9277 \t0.8539 \t188 \t154 \t74 \t27 \t12 \n", + "minimum_initial_investment \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t181 \t181 \t86 \t0 \t0 \n", + "benchmark_name \t0.9346 \t0.9259 \t0.9434 \t0.9738 \t54 \t50 \t210 \t4 \t3 \n", + "TOTAL \t0.9487 \t0.9289 \t0.9703 \t0.9315 \t1506 \t1339 \t1148 \t136 \t158 \n", + "Total Shares Matched - 267\n", + "Total Shares Not Matched - 102\n", + "Percentage of Shares Matched - 72.35772357723577\n", + "Not Matched Shares Name List - ['SPDR® S&P World ex Australia Carbon Control Fund', 'BT-BlackRock Scientific Diversified Growth', 'Mercer Multi-manager Balanced Fund – Retail Units', 'Mercer Multi-manager Conservative Fund – Retail Units', 'Mercer Multi-manager Growth Fund – Retail Units', 'Mercer Multi-manager High Growth Fund – Retail Units', 'ANZ OA Inv-OnePath Multi Asset Income EF', 'ANZ OA Inv-OnePath Multi Asset Income NEF', 'ANZ OA IP-OP Diversified Credit EF', 'ANZ OA IP-OP Diversified Credit NE', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond EF', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond NE', 'OnePath OA Inv-Nikko AM Australian Shares EF', 'OnePath OA Inv-Nikko AM Australian Shares NEF', 'OnePath OA IP- Pendal Monthly Income Plus-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-NEF', 'OnePath OA IP-Bentham Global Income Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-NEF', 'OnePath OA IP-Kapstream Absolute Return Income Trust-EF/Sel', 'OnePath OA IP-Kapstream Absolute Return Income Trust-NEF', 'OnePath OA IP-OnePath Active Growth Trust-NEF', 'OnePath OA IP-OnePath High Growth Trust-EF/Sel', 'OnePath OA IP-OnePath High Growth Trust-NEF', 'OnePath OA IP-OnePath Managed Growth Trust-EF/Sel', 'OnePath OA IP-OnePath Managed Growth Trust-NEF', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-NEF', 'OnePath OA IP-OptiMix Australian Share Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Share Trust-NEF', 'OnePath OA IP-OptiMix Global Emerging Markets Share-EF/Sel', 'OnePath OA IP-OptiMix Global Emerging Markets Share-NEF', 'OnePath OA IP-OptiMIx Global Share Trust-EF/Sel', 'OnePath OA IP-OptiMIx Global Share Trust-NEF', 'OnePath OA IP-OptiMix High Growth Trust-EF/Sel', 'OnePath OA IP-OptiMix High Growth Trust-NEF', 'OnePath OA IP-OptiMix Property Securities Trust-EF/Sel', 'OnePath OA IP-OptiMix Property Securities Trust-NEF', 'OnePath OA IP-Perpetual Conservative Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Conservative Growth Trust-NEF', 'OnePath OA IP-Platinum International Trust-EF/Sel', 'OnePath OA IP-Platinum International Trust-NEF', 'OnePath OA IP-Schroder Fixed Income-EF/Sel', 'OnePath OA IP-Schroder Fixed Income-NEF', 'OnePath OA IP-UBS Defensive Trust-EF/Sel', 'OnePath OA IP-UBS Defensive Trust-NEF', 'OnePath OA IP-UBS Diversified Fixed Income Trust-EF/Sel', 'OnePath OA IP-UBS Diversified Fixed Income Trust-NEF', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -NE', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -NE', 'OnePath Schroder Real Return Trust (Entry Fee)', 'OnePath Schroder Real Return Trust (Nil Entry Fee)', 'Telstra Growth Pen', 'First Sentier Concentrated Aus Share', 'First Sentier Australian Small Companies', 'First Sentier Imputation', 'First Sentier Global Property Securities', 'First Sentier Australian Share', 'CFS FC-Investors Mutual Future Leaders', 'Stewart Worldwide Leaders Sustainability', 'First Sentier Property Securities', 'MyNorth Index Defensive', 'MLC MKPFPR - Altrinsic Global Eq Trust', 'MLC MKPFPR - BlackRock Global Allocation', 'MLC MKPF - Hedged Global Share Fund', 'MLC MKPF - Inflation Plus - Conservative', 'MLC MKPFPR - MLC - Platinum Global Fund', 'MLC MasterKey Pension Fundamentals - Perpetual Australian Share', 'MLC MasterKey Super Fundamentals - Perpetual Australian Share', 'MLC MKPF - Perpetual WS Ethical SRI Fund', 'MLC MKSF - Perpetual WS Ethical SRI Fund', 'MLC MasterKey Pension Fundamentals (Pre Retirement) - Perpetual Smll Co Fund No.2', 'MLC MasterKey Super Fundamentals - Perpetual Small Co Fund No.2', 'MLC MKPF - PIMCO Div. Fixed Interest Wholesale Class', 'MLC MKSF - PIMCO Div. Fixed Interest Wholesale Class', 'MLC MKPF - PIMCO Global Bond Wholesale Class', 'MLC MKPFPR - Platinum Asia Fund', 'MLC MKSF - Platinum Asia Fund', 'MLC MKPF - Platinum International Fund', 'MLC MKSF - Platinum International Fund', 'MLC MKPF - PM CAPITAL Global Companies', 'MLC MKSF - PM CAPITAL Global Companies', 'MLC MKPF - Schroder WS Australian Equity', 'MLC MKSF - Schroder WS Australian Equity', 'MLC MasterKey Pension Fundamentals (Pre Retirement) - MLC Aust Property Index', 'MLC MasterKey Super Fundamentals - MLC Australian Property Index', 'MLC MKSF - Vanguard Intl Shr Indx (Hgd)', 'MLC MKSF - Vanguard Intl Shr Indx', 'HOSTPLUS Fixed Interest Indexed Super', 'Australian Unity Inv Wholesale Deposits Fund', 'Lifeplan Investment Bond Lifeplan Capital Guaranteed', 'Lifeplan Investment Bond MLC Horizon 2-Capital Stable Open']\n" ] } ], @@ -430,8 +468,8 @@ "print(\"\\n\")\n", "print(\"\\n\")\n", "document_list_file_list = [None, \n", - " # \"./sample_documents/aus_prospectus_29_documents_sample.txt\", \n", - " # \"./sample_documents/aus_prospectus_17_documents_sample.txt\"\n", + " \"./sample_documents/aus_prospectus_29_documents_sample.txt\", \n", + " \"./sample_documents/aus_prospectus_17_documents_sample.txt\"\n", " ]\n", "# document_list_file_list = [None]\n", "for document_list_file in document_list_file_list:\n",