From 427a379b3b2c9a0cc8f4252885e4ba3f63c9d3c4 Mon Sep 17 00:00:00 2001 From: Blade He Date: Wed, 2 Apr 2025 16:34:41 -0500 Subject: [PATCH] 1. support re-call ChatGPT API to match non-matched prediction fund/ share names 2. If document fund amount less than 3, cancel the production name judgment logic --- core/auz_nz/hybrid_solution_script.py | 165 +++++++++++------- core/data_extraction.py | 6 +- .../data_extraction_prompts_config.json | 8 +- main.py | 7 +- performance.ipynb | 96 +++++----- ...spectus_87_vision_cfs_documents_sample.txt | 87 +++++++++ 6 files changed, 258 insertions(+), 111 deletions(-) create mode 100644 sample_documents/aus_prospectus_87_vision_cfs_documents_sample.txt diff --git a/core/auz_nz/hybrid_solution_script.py b/core/auz_nz/hybrid_solution_script.py index 3ff701a..e693963 100644 --- a/core/auz_nz/hybrid_solution_script.py +++ b/core/auz_nz/hybrid_solution_script.py @@ -653,66 +653,113 @@ def final_function_to_match(doc_id, pred_list, db_list, provider_name, doc_sourc llm_result = json_repair.loads(llm_response['response']) except: llm_result = {} - # try: - # llm_result = ast.literal_eval(llm_response['response'].replace('\n','')) - # except Exception as e: - # logger.info(f"error: {e}") - # cleaned_response = llm_response['response'].strip("```json").strip("```").replace('\n', '') - # llm_result = json.loads(cleaned_response) - # logger.info(f"\n\n llm_result: {llm_result}") - for pred_name,db_name in llm_result.items(): - # print("k: ",k) - # print("v: ",v) - og_db_index=-1 - # og_pred_index = -1 - og_pred_index_list = [] - if pred_name in cleaned_unmatched_pred_list: - for c_idx, c_item in enumerate(cleaned_unmatched_pred_list): - if c_item==pred_name: - og_pred_index_list.append(c_idx) - # og_pred_index = cleaned_unmatched_pred_list.index(k) + unmantched_pred_index_list = post_handle_fund_matching_call(llm_result, + unmatched_pred_list, + cleaned_unmatched_pred_list, + unmatched_db_list, + cleaned_unmatched_db_list, + df_data, + final_result, + record_empty=False) + """ + For some cases, same document, + perhaps same funds/ shares are with different raw names in different pages. + e.g. High Growth Fund in page 8, Vision High Growth Fund in page 10, and they are same fund. + But if only call ChatGPT API one time, it will not be able to match all of them. + """ + if len(unmantched_pred_index_list)>0: + unmatched_pred_list = [unmatched_pred_list[i] for i in unmantched_pred_index_list] + cleaned_unmatched_pred_list = [cleaned_unmatched_pred_list[i] for i in unmantched_pred_index_list] + prompt_context = f""" + {prompt_instruction} + + provider_name: {provider_name} + + prediction_fund: + {cleaned_unmatched_pred_list} - if len(og_pred_index_list) == 0: - # sometimes, the raw name and db name reversed from the LLM response - if db_name in cleaned_unmatched_pred_list and pred_name in cleaned_unmatched_db_list: - for c_idx, c_item in enumerate(cleaned_unmatched_pred_list): - if c_item==db_name: - og_pred_index_list.append(c_idx) - # og_pred_index = cleaned_unmatched_pred_list.index(v) - og_db_index = cleaned_unmatched_db_list.index(pred_name) - # v and k are swapped - temp = db_name - db_name = pred_name - pred_name = temp - if len(og_pred_index_list)==0: - continue - # og_db_index = cleaned_unmatched_db_list.index(v) - if og_db_index == -1 and db_name in cleaned_unmatched_db_list: - og_db_index = cleaned_unmatched_db_list.index(db_name) - # print("og_db_index: ",og_db_index, cleaned_unmatched_db_list) - # print("unmatched_db_list: ",unmatched_db_list) - - for i in df_data: - for og_pred_index in og_pred_index_list: - if i['pred_fund']==unmatched_pred_list[og_pred_index]: - if og_db_index!=-1: - i['db_fund']=unmatched_db_list[og_db_index] - i['cleaned_db_fund_name'] = db_name - final_result.update({unmatched_pred_list[og_pred_index]:unmatched_db_list[og_db_index]}) - else: - i['db_fund'] = '' - i['cleaned_db_fund_name'] = '' - final_result.update({unmatched_pred_list[og_pred_index]:""}) - i['llm_clean_pred_list'] = cleaned_unmatched_pred_list - i['llm_clean_db_list'] = cleaned_unmatched_db_list, - i['llm_pred_fund'] = pred_name - i['llm_matched_db_name'] = db_name - i['llm_result'] = llm_result - break - - - # break - return final_result + true_fund: + {cleaned_unmatched_db_list} + """ + llm_response, with_error = chat( + prompt=prompt_context, system_prompt=system_prompt, response_format={"type": "json_object"} + ) + # logger.info(f"fund matching LLM Response: {llm_response}") + if 'response' in llm_response.keys(): + try: + llm_result = json.loads(llm_response['response']) + except: + try: + llm_result = json_repair.loads(llm_response['response']) + except: + llm_result = {} + unmantched_pred_index_list = post_handle_fund_matching_call(llm_result, + unmatched_pred_list, + cleaned_unmatched_pred_list, + unmatched_db_list, + cleaned_unmatched_db_list, + df_data, + final_result, + record_empty=True) + + return final_result + + +def post_handle_fund_matching_call(llm_result, + unmatched_pred_list, + cleaned_unmatched_pred_list, + unmatched_db_list, + cleaned_unmatched_db_list, + df_data, + final_result, + record_empty: bool = False): + unmantched_pred_index_list = [] + for pred_name,db_name in llm_result.items(): + og_db_index=-1 + og_pred_index_list = [] + if pred_name in cleaned_unmatched_pred_list: + for c_idx, c_item in enumerate(cleaned_unmatched_pred_list): + if c_item==pred_name: + og_pred_index_list.append(c_idx) + + if len(og_pred_index_list) == 0: + # sometimes, the raw name and db name reversed from the LLM response + if db_name in cleaned_unmatched_pred_list and pred_name in cleaned_unmatched_db_list: + for c_idx, c_item in enumerate(cleaned_unmatched_pred_list): + if c_item==db_name: + og_pred_index_list.append(c_idx) + og_db_index = cleaned_unmatched_db_list.index(pred_name) + # v and k are swapped + temp = db_name + db_name = pred_name + pred_name = temp + if len(og_pred_index_list)==0: + continue + if og_db_index == -1 and db_name in cleaned_unmatched_db_list: + og_db_index = cleaned_unmatched_db_list.index(db_name) + + for i in df_data: + for og_pred_index in og_pred_index_list: + if i['pred_fund']==unmatched_pred_list[og_pred_index]: + if og_db_index!=-1: + i['db_fund']=unmatched_db_list[og_db_index] + i['cleaned_db_fund_name'] = db_name + final_result.update({unmatched_pred_list[og_pred_index]:unmatched_db_list[og_db_index]}) + else: + unmantched_pred_index_list.append(og_pred_index) + i['db_fund'] = '' + i['cleaned_db_fund_name'] = '' + if record_empty: + final_result.update({unmatched_pred_list[og_pred_index]:""}) + i['llm_clean_pred_list'] = cleaned_unmatched_pred_list + i['llm_clean_db_list'] = cleaned_unmatched_db_list, + i['llm_pred_fund'] = pred_name + i['llm_matched_db_name'] = db_name + i['llm_result'] = llm_result + break + return unmantched_pred_index_list + + def api_for_fund_matching_call(doc_id, api_response, providerName, all_investment_db_names): result = api_response['data'] diff --git a/core/data_extraction.py b/core/data_extraction.py index 7d2ea56..fab74ab 100644 --- a/core/data_extraction.py +++ b/core/data_extraction.py @@ -560,6 +560,8 @@ class DataExtraction: """ raw_name_dict = self.get_raw_name_dict(data_list) raw_name_list = list(raw_name_dict.keys()) + if len(raw_name_list) < 3: + return data_list, [] raw_name_as_production_name = None for raw_name in raw_name_list: if self.is_production_name(raw_name): @@ -716,6 +718,8 @@ class DataExtraction: raw_name = self.get_raw_name(fund_name, share_name) if len(raw_name) == 0: continue + if raw_name.lower() in ["the fund", "sample fund"]: + continue # if isinstance(self.document_production, str) and \ # raw_name.lower() in self.document_production.lower(): # continue @@ -1732,7 +1736,7 @@ class DataExtraction: continue found_regex_text = False for regex_text in regex_text_list: - regex_search = re.search(regex_text, page_text) + regex_search = re.search(regex_text, page_text, re.IGNORECASE) if regex_search is not None: found_regex_text = True break diff --git a/instructions/aus_prospectus/data_extraction_prompts_config.json b/instructions/aus_prospectus/data_extraction_prompts_config.json index ac2bd3b..37489a6 100644 --- a/instructions/aus_prospectus/data_extraction_prompts_config.json +++ b/instructions/aus_prospectus/data_extraction_prompts_config.json @@ -499,7 +499,13 @@ "\n\nInvestment option \nGross total \ntransaction costs 1 \n% p.a. \nNet total transaction \ncosts 2 \n% p.a. \nBuy-sell \nspread (ITC) 3 \n% \nAllan Gray Australian Equity Fund – Class A 0.06 0.00 0.40\nAlphinity Sustainable Share Fund 0.15 0.02 0.40\n", "---Example 4 End---", "The output should be:", - "{\"data\": [{\"fund name\": \"Allan Gray Australian Equity Fund – Class A\", \"share name\": \"Allan Gray Australian Equity Fund – Class A\", \"buy_spread\": 0.4, \"sell_spread\": 0.4}, {\"fund name\": \"Alphinity Sustainable Share Fund\", \"share name\": \"Alphinity Sustainable Share Fund\", \"buy_spread\": 0.4, \"sell_spread\": 0.4}]}" + "{\"data\": [{\"fund name\": \"Allan Gray Australian Equity Fund – Class A\", \"share name\": \"Allan Gray Australian Equity Fund – Class A\", \"buy_spread\": 0.4, \"sell_spread\": 0.4}, {\"fund name\": \"Alphinity Sustainable Share Fund\", \"share name\": \"Alphinity Sustainable Share Fund\", \"buy_spread\": 0.4, \"sell_spread\": 0.4}]}", + "\n", + "---Example 5 Start---", + "Fees and costs \n\nFund name \nManagement fees \nand costs (p.a.) \n1 \nBuy/sell spread \n(%) \n2 \nBaillie Gifford Sustainable \nGrowth Fund – Class A \n0.88% 0.10%\nBaillie Gifford Long Term \nGlobal Growth Fund – Class A \n0.96% 0.05%\n\n", + "---Example 5 End---", + "The output should be:", + "{\"data\": [{\"fund name\": \"Baillie Gifford Sustainable Growth Fund – Class A\", \"share name\": \"Baillie Gifford Sustainable Growth Fund – Class A\", \"management_fee_and_costs\": 0.88, \"management_fee\": 0.88, \"buy_spread\": 0.1, \"sell_spread\": 0.1}, {\"fund name\": \"Baillie Gifford Long Term Global Growth Fund – Class A\", \"share name\": \"Baillie Gifford Long Term Global Growth Fund – Class A\", \"management_fee_and_costs\": 0.96, \"management_fee\": 0.96, \"buy_spread\": 0.05, \"sell_spread\": 0.05}]}" ], "performance_fee_costs": [ "### Performance fees", diff --git a/main.py b/main.py index 5a24076..0a61596 100644 --- a/main.py +++ b/main.py @@ -1448,7 +1448,7 @@ def get_aus_prospectus_document_category(): def test_post_adjust_extract_data(): - doc_id = "448576924" + doc_id = "480854121" pdf_folder: str = r"/data/aus_prospectus/pdf/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/" output_extract_data_child_folder: str = ( @@ -1534,11 +1534,14 @@ if __name__ == "__main__": document_sample_file = ( r"./sample_documents/aus_prospectus_46_documents_sample.txt" ) + # document_sample_file = ( + # r"./sample_documents/aus_prospectus_87_vision_cfs_documents_sample.txt" + # ) logger.info(f"Start to run document sample file: {document_sample_file}") with open(document_sample_file, "r", encoding="utf-8") as f: special_doc_id_list = [doc_id.strip() for doc_id in f.readlines() if len(doc_id.strip()) > 0] - # special_doc_id_list = ["384508026"] + # special_doc_id_list = ["527969661"] pdf_folder: str = r"/data/aus_prospectus/pdf/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/" output_extract_data_child_folder: str = ( diff --git a/performance.ipynb b/performance.ipynb index 73aaa37..4391eae 100644 --- a/performance.ipynb +++ b/performance.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 29, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ "\n", "path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx\"\n", "# path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/next_round/next_round_6_documents_ground_truth_with_mapping.xlsx\"\n", - "path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250331220152.xlsx\"\n", + "path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250402120421.xlsx\"\n", "# path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250328035602.xlsx\"\n", "# path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_6_documents_by_text_20250331180753.xlsx\"\n", "provider_mapping_file_path = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/TopProvidersBiz.xlsx\"\n", @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -349,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -363,56 +363,56 @@ "All Providers Results: \n", "Document List File - None\n", "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", - "management_fee_and_costs \t0.9369 \t0.8988 \t0.9785 \t0.8814 \t413 \t364 \t0 \t41 \t8 \n", - "management_fee \t0.9478 \t0.9185 \t0.9789 \t0.9007 \t413 \t372 \t0 \t33 \t8 \n", - "performance_fee_costs \t0.9160 \t0.9231 \t0.9091 \t0.8935 \t273 \t240 \t129 \t20 \t24 \n", - "interposed_vehicle_performance_fee_cost \t0.9114 \t0.8372 \t1.0000 \t0.9661 \t73 \t72 \t327 \t14 \t0 \n", - "administration_fees \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t52 \t52 \t361 \t0 \t0 \n", - "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t52 \t52 \t361 \t0 \t0 \n", - "buy_spread \t0.9290 \t0.8920 \t0.9691 \t0.8838 \t359 \t314 \t51 \t38 \t10 \n", - "sell_spread \t0.9258 \t0.8864 \t0.9689 \t0.8789 \t359 \t312 \t51 \t40 \t10 \n", - "minimum_initial_investment \t0.9733 \t0.9799 \t0.9669 \t0.9613 \t302 \t292 \t105 \t6 \t10 \n", - "benchmark_name \t0.9109 \t0.8790 \t0.9452 \t0.9346 \t155 \t138 \t248 \t19 \t8 \n", - "TOTAL \t0.9451 \t0.9215 \t0.9717 \t0.9300 \t2451 \t2208 \t1633 \t211 \t78 \n", - "Total Shares Matched - 365\n", - "Total Shares Not Matched - 142\n", - "Percentage of Shares Matched - 71.99211045364892\n", - "Not Matched Shares Name List - ['SPDR® S&P World ex Australia Carbon Control Fund', 'Mercer Multi-manager Growth Fund – Retail Units', 'Mercer Multi-manager High Growth Fund – Retail Units', 'ANZ OA IP-OP Diversified Credit EF', 'ANZ OA IP-OP Diversified Credit NE', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond EF', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond NE', 'OnePath OA IP- Pendal Monthly Income Plus-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-NEF', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-NEF', 'OnePath OA IP-Fidelity Australian Equities-EF/Sel', 'OnePath OA IP-Fidelity Australian Equities-NEF', 'OnePath OA IP-Investors Mutual Australian Share Trust- EF/Sel', 'OnePath OA IP-Investors Mutual Australian Share Trust- NEF', 'OnePath OA IP-Kapstream Absolute Return Income Trust-EF/Sel', 'OnePath OA IP-Kapstream Absolute Return Income Trust-NEF', 'OnePath OA IP-Merlon Australian Share Income-EF/Sel', 'OnePath OA IP-OnePath Active Growth Trust-NEF', 'OnePath OA IP-OnePath High Growth Trust-EF/Sel', 'OnePath OA IP-OnePath High Growth Trust-NEF', 'OnePath OA IP-OnePath Managed Growth Trust-EF/Sel', 'OnePath OA IP-OnePath Managed Growth Trust-NEF', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-NEF', 'OnePath OA IP-OptiMix Australian Share Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Share Trust-NEF', 'OnePath OA IP-OptiMix Global Emerging Markets Share-EF/Sel', 'OnePath OA IP-OptiMix Global Emerging Markets Share-NEF', 'OnePath OA IP-OptiMIx Global Share Trust-EF/Sel', 'OnePath OA IP-OptiMIx Global Share Trust-NEF', 'OnePath OA IP-OptiMix High Growth Trust-EF/Sel', 'OnePath OA IP-OptiMix High Growth Trust-NEF', 'OnePath OA IP-OptiMix Property Securities Trust-EF/Sel', 'OnePath OA IP-OptiMix Property Securities Trust-NEF', 'OnePath OA IP-Perpetual Balanced Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Balanced Growth Trust-NEF', 'OnePath OA IP-Perpetual Conservative Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Conservative Growth Trust-NEF', 'OnePath OA IP-Platinum International Trust-EF/Sel', 'OnePath OA IP-Platinum International Trust-NEF', 'OnePath OA IP-UBS Balanced Trust-EF/Sel', 'OnePath OA IP-UBS Balanced Trust-NEF', 'OnePath OA IP-UBS Defensive Trust-EF/Sel', 'OnePath OA IP-UBS Defensive Trust-NEF', 'OnePath OA IP-UBS Diversified Fixed Income Trust-EF/Sel', 'OnePath OA IP-UBS Diversified Fixed Income Trust-NEF', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -NE', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -NE', 'OnePath OneAnswer Investment Portfolio - First Sentier Imputation -EF/Sel', 'OnePath OneAnswer Investment Portfolio - First Sentier Imputation -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Australian Shares Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Australian Shares Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath High Growth Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath High Growth Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -NE', 'OnePath Schroder Real Return Trust (Entry Fee)', 'OnePath Schroder Real Return Trust (Nil Entry Fee)', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-NEF', 'Telstra Growth Pen', 'First Sentier Concentrated Aus Share', 'First Sentier Australian Small Companies', 'First Sentier Imputation', 'First Sentier Global Property Securities', 'First Sentier Australian Share', 'CFS FC-Investors Mutual Future Leaders', 'Stewart Worldwide Leaders Sustainability', 'First Sentier Property Securities', 'MyNorth Index Defensive', 'MLC MKPFPR - Altrinsic Global Eq Trust', 'MLC MKPFPR - BlackRock Global Allocation', 'MLC MKPFPR - MLC - Platinum Global Fund', 'MLC MasterKey Pension Fundamentals - Perpetual Australian Share', 'MLC MasterKey Super Fundamentals - Perpetual Australian Share', 'MLC MKPF - Perpetual WS Ethical SRI Fund', 'MLC MKSF - Perpetual WS Ethical SRI Fund', 'MLC MasterKey Pension Fundamentals (Pre Retirement) - Perpetual Smll Co Fund No.2', 'MLC MasterKey Super Fundamentals - Perpetual Small Co Fund No.2', 'MLC MKSF - PIMCO Div. Fixed Interest Wholesale Class', 'MLC MKPFPR - Platinum Asia Fund', 'MLC MKSF - Platinum Asia Fund', 'MLC MKPF - Platinum International Fund', 'MLC MKSF - Platinum International Fund', 'MLC MKPF - PM CAPITAL Global Companies', 'MLC MKSF - PM CAPITAL Global Companies', 'MLC MKPF - Schroder WS Australian Equity', 'MLC MKSF - Schroder WS Australian Equity', 'MLC MasterKey Super Fundamentals - MLC Australian Property Index', 'MLC MKSF - Vanguard Intl Shr Indx (Hgd)', 'MLC MKSF - Vanguard Intl Shr Indx', 'HOSTPLUS Fixed Interest Indexed Super', 'Australian Unity Inv Wholesale Deposits Fund', 'Lifeplan Investment Bond Lifeplan Capital Guaranteed', 'Lifeplan Investment Bond Perpetual Balanced Growth', 'Lifeplan Investment Bond Perpetual Conservative Growth', 'Lifeplan Investment Bond Perpetual Industrial Share', 'Legal Super High Growth Pen', 'Legal Super Balanced Socially Responsible Pen', 'Legal Super Growth Pen', 'Legal Super Conservative Pen', 'Legal Super Conservative Balanced Pen', 'Legal Super Balanced Index Pen', 'Legal Super Balanced Pen', 'Legal Super Cash Pen', 'Legal Super Australian Shares Pen', 'Legal Super Overseas Shares Pen', 'Legal Super Enhanced Cash Pen', 'Dimensional Australian Core Equity Trust', 'CFS FC ESup-CFS Diversified Fix Int', 'FC W Pen-CFS TTR Conservative', 'FC W Pen-CFS TTR Diversified', 'FC W Pen-CFS TTR High Growth', 'FC W Pen-CFS TTR Australian Share', 'FC W Pen-CFS TTR Property Securities', 'FC W Pen-CFS TTR Moderate', 'FC W Pen-CFS TTR Balanced', 'FC W Pen-CFS TTR Growth', 'FC W Pen-CFS TTR Australian Small Companies', 'FC W Pen-CFS TTR Global Infrastructure Securities', 'FC W Pen-CFS TTR Fixed Interest', 'FC W Pen-CFS TTR Global Share', 'FC W Pen-CFS TTR Emerging Markets', 'FC W Pen-CFS TTR Defensive', 'CFS MIF-Geared Share NEF', 'Dimensional Australia Core Equity Trust - Active ETF']\n", + "management_fee_and_costs \t0.9318 \t0.8859 \t0.9826 \t0.8722 \t454 \t396 \t0 \t51 \t7 \n", + "management_fee \t0.9442 \t0.9083 \t0.9831 \t0.8943 \t454 \t406 \t0 \t41 \t7 \n", + "performance_fee_costs \t0.9347 \t0.9394 \t0.9300 \t0.9141 \t314 \t279 \t136 \t18 \t21 \n", + "interposed_vehicle_performance_fee_cost \t0.9605 \t0.9241 \t1.0000 \t0.9868 \t73 \t73 \t375 \t6 \t0 \n", + "administration_fees \t0.9878 \t0.9759 \t1.0000 \t0.9956 \t81 \t81 \t371 \t2 \t0 \n", + "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t82 \t82 \t372 \t0 \t0 \n", + "buy_spread \t0.9407 \t0.9142 \t0.9688 \t0.9053 \t379 \t341 \t70 \t32 \t11 \n", + "sell_spread \t0.9378 \t0.9088 \t0.9686 \t0.9009 \t379 \t339 \t70 \t34 \t11 \n", + "minimum_initial_investment \t0.9704 \t0.9811 \t0.9599 \t0.9581 \t324 \t311 \t124 \t6 \t13 \n", + "benchmark_name \t0.9212 \t0.8994 \t0.9441 \t0.9427 \t169 \t152 \t276 \t17 \t9 \n", + "TOTAL \t0.9529 \t0.9337 \t0.9737 \t0.9370 \t2709 \t2460 \t1794 \t207 \t79 \n", + "Total Shares Matched - 406\n", + "Total Shares Not Matched - 111\n", + "Percentage of Shares Matched - 78.52998065764024\n", + "Not Matched Shares Name List - ['SPDR® S&P World ex Australia Carbon Control Fund', 'Mercer Multi-manager Growth Fund – Retail Units', 'Mercer Multi-manager High Growth Fund – Retail Units', 'ANZ OA IP-OP Diversified Credit EF', 'ANZ OA IP-OP Diversified Credit NE', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond EF', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond NE', 'OnePath OA IP- Pendal Monthly Income Plus-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-NEF', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-NEF', 'OnePath OA IP-Investors Mutual Australian Share Trust- EF/Sel', 'OnePath OA IP-Investors Mutual Australian Share Trust- NEF', 'OnePath OA IP-Kapstream Absolute Return Income Trust-EF/Sel', 'OnePath OA IP-Kapstream Absolute Return Income Trust-NEF', 'OnePath OA IP-Merlon Australian Share Income-EF/Sel', 'OnePath OA IP-OnePath Active Growth Trust-NEF', 'OnePath OA IP-OnePath High Growth Trust-EF/Sel', 'OnePath OA IP-OnePath High Growth Trust-NEF', 'OnePath OA IP-OnePath Managed Growth Trust-EF/Sel', 'OnePath OA IP-OnePath Managed Growth Trust-NEF', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-NEF', 'OnePath OA IP-OptiMix Australian Share Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Share Trust-NEF', 'OnePath OA IP-OptiMix Global Emerging Markets Share-EF/Sel', 'OnePath OA IP-OptiMix Global Emerging Markets Share-NEF', 'OnePath OA IP-OptiMIx Global Share Trust-EF/Sel', 'OnePath OA IP-OptiMIx Global Share Trust-NEF', 'OnePath OA IP-OptiMix High Growth Trust-EF/Sel', 'OnePath OA IP-OptiMix High Growth Trust-NEF', 'OnePath OA IP-OptiMix Property Securities Trust-EF/Sel', 'OnePath OA IP-OptiMix Property Securities Trust-NEF', 'OnePath OA IP-Perpetual Conservative Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Conservative Growth Trust-NEF', 'OnePath OA IP-Platinum International Trust-EF/Sel', 'OnePath OA IP-Platinum International Trust-NEF', 'OnePath OA IP-Schroder Fixed Income-EF/Sel', 'OnePath OA IP-Schroder Fixed Income-NEF', 'OnePath OA IP-UBS Defensive Trust-EF/Sel', 'OnePath OA IP-UBS Defensive Trust-NEF', 'OnePath OA IP-UBS Diversified Fixed Income Trust-EF/Sel', 'OnePath OA IP-UBS Diversified Fixed Income Trust-NEF', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -NE', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -NE', 'OnePath OneAnswer Investment Portfolio - First Sentier Imputation -EF/Sel', 'OnePath OneAnswer Investment Portfolio - First Sentier Imputation -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -NE', 'OnePath Schroder Real Return Trust (Entry Fee)', 'OnePath Schroder Real Return Trust (Nil Entry Fee)', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-NEF', 'Telstra Growth Pen', 'First Sentier Concentrated Aus Share', 'First Sentier Australian Small Companies', 'First Sentier Imputation', 'First Sentier Global Property Securities', 'First Sentier Australian Share', 'CFS FC-Investors Mutual Future Leaders', 'Stewart Worldwide Leaders Sustainability', 'First Sentier Property Securities', 'MyNorth Index Defensive', 'MLC MKPF - Inflation Plus - Conservative', 'MLC MasterKey Super Fundamentals - Perpetual Australian Share', 'MLC MKSF - Perpetual WS Ethical SRI Fund', 'MLC MasterKey Super Fundamentals - Perpetual Small Co Fund No.2', 'MLC MKSF - PIMCO Div. Fixed Interest Wholesale Class', 'MLC MKSF - Platinum Asia Fund', 'MLC MKSF - Platinum International Fund', 'MLC MKSF - PM CAPITAL Global Companies', 'MLC MKSF - Schroder WS Australian Equity', 'MLC MasterKey Pension Fundamentals (Pre Retirement) - MLC Aust Property Index', 'MLC MasterKey Super Fundamentals - MLC Australian Property Index', 'MLC MKSF - Vanguard Intl Shr Indx (Hgd)', 'MLC MKSF - Vanguard Intl Shr Indx', 'Australian Unity Inv Wholesale Deposits Fund', 'Lifeplan Investment Bond Lifeplan Capital Guaranteed', 'Lifeplan Investment Bond Perpetual Balanced Growth', 'Lifeplan Investment Bond Perpetual Conservative Growth', 'Lifeplan Investment Bond Perpetual Industrial Share', 'Lifeplan Investment Bond Vanguard® Australian Shares Index', 'Dimensional Australian Core Equity Trust', 'CFS FC ESup-CFS Diversified Fix Int', 'FC W Pen-CFS TTR Conservative', 'FC W Pen-CFS TTR Diversified', 'FC W Pen-CFS TTR High Growth', 'FC W Pen-CFS TTR Australian Share', 'FC W Pen-CFS TTR Property Securities', 'FC W Pen-CFS TTR Moderate', 'FC W Pen-CFS TTR Balanced', 'FC W Pen-CFS TTR Growth', 'FC W Pen-CFS TTR Australian Small Companies', 'FC W Pen-CFS TTR Fixed Interest', 'FC W Pen-CFS TTR Global Share', 'FC W Pen-CFS TTR Emerging Markets', 'FC W Pen-CFS TTR Defensive', 'CFS MIF-Geared Share NEF']\n", "All Providers Results: \n", "Document List File - ./sample_documents/aus_prospectus_29_documents_sample.txt\n", "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", - "management_fee_and_costs \t0.9529 \t0.9101 \t1.0000 \t0.9101 \t178 \t162 \t0 \t16 \t0 \n", - "management_fee \t0.9829 \t0.9663 \t1.0000 \t0.9663 \t178 \t172 \t0 \t6 \t0 \n", - "performance_fee_costs \t0.8830 \t0.8646 \t0.9022 \t0.8764 \t95 \t83 \t73 \t13 \t9 \n", - "interposed_vehicle_performance_fee_cost \t0.8814 \t0.7879 \t1.0000 \t0.9213 \t53 \t52 \t112 \t14 \t0 \n", - "administration_fees \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t1 \t1 \t177 \t0 \t0 \n", - "buy_spread \t0.9856 \t0.9716 \t1.0000 \t0.9719 \t176 \t171 \t2 \t5 \t0 \n", - "sell_spread \t0.9767 \t0.9545 \t1.0000 \t0.9551 \t176 \t168 \t2 \t8 \t0 \n", - "minimum_initial_investment \t0.9611 \t0.9577 \t0.9645 \t0.9382 \t141 \t136 \t31 \t6 \t5 \n", - "benchmark_name \t0.9184 \t0.8654 \t0.9783 \t0.9101 \t100 \t90 \t72 \t14 \t2 \n", - "TOTAL \t0.9491 \t0.9198 \t0.9828 \t0.9388 \t1098 \t1035 \t469 \t82 \t94 \n", - "Total Shares Matched - 173\n", - "Total Shares Not Matched - 18\n", - "Percentage of Shares Matched - 90.57591623036649\n", - "Not Matched Shares Name List - ['Dimensional Australian Core Equity Trust', 'CFS FC ESup-CFS Diversified Fix Int', 'FC W Pen-CFS TTR Conservative', 'FC W Pen-CFS TTR Diversified', 'FC W Pen-CFS TTR High Growth', 'FC W Pen-CFS TTR Australian Share', 'FC W Pen-CFS TTR Property Securities', 'FC W Pen-CFS TTR Moderate', 'FC W Pen-CFS TTR Balanced', 'FC W Pen-CFS TTR Growth', 'FC W Pen-CFS TTR Australian Small Companies', 'FC W Pen-CFS TTR Global Infrastructure Securities', 'FC W Pen-CFS TTR Fixed Interest', 'FC W Pen-CFS TTR Global Share', 'FC W Pen-CFS TTR Emerging Markets', 'FC W Pen-CFS TTR Defensive', 'CFS MIF-Geared Share NEF', 'Dimensional Australia Core Equity Trust - Active ETF']\n", + "management_fee_and_costs \t0.9412 \t0.8889 \t1.0000 \t0.8889 \t180 \t160 \t0 \t20 \t0 \n", + "management_fee \t0.9888 \t0.9778 \t1.0000 \t0.9778 \t180 \t176 \t0 \t4 \t0 \n", + "performance_fee_costs \t0.8939 \t0.9091 \t0.8791 \t0.8944 \t96 \t80 \t81 \t8 \t11 \n", + "interposed_vehicle_performance_fee_cost \t0.9464 \t0.8983 \t1.0000 \t0.9667 \t53 \t53 \t121 \t6 \t0 \n", + "administration_fees \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t2 \t2 \t178 \t0 \t0 \n", + "buy_spread \t0.9801 \t0.9718 \t0.9885 \t0.9611 \t178 \t172 \t1 \t5 \t2 \n", + "sell_spread \t0.9713 \t0.9548 \t0.9883 \t0.9444 \t178 \t169 \t1 \t8 \t2 \n", + "minimum_initial_investment \t0.9507 \t0.9574 \t0.9441 \t0.9222 \t143 \t135 \t31 \t6 \t8 \n", + "benchmark_name \t0.9246 \t0.8762 \t0.9787 \t0.9167 \t101 \t92 \t73 \t13 \t2 \n", + "TOTAL \t0.9552 \t0.9371 \t0.9754 \t0.9414 \t1111 \t1039 \t486 \t70 \t104 \n", + "Total Shares Matched - 175\n", + "Total Shares Not Matched - 16\n", + "Percentage of Shares Matched - 91.62303664921467\n", + "Not Matched Shares Name List - ['Dimensional Australian Core Equity Trust', 'CFS FC ESup-CFS Diversified Fix Int', 'FC W Pen-CFS TTR Conservative', 'FC W Pen-CFS TTR Diversified', 'FC W Pen-CFS TTR High Growth', 'FC W Pen-CFS TTR Australian Share', 'FC W Pen-CFS TTR Property Securities', 'FC W Pen-CFS TTR Moderate', 'FC W Pen-CFS TTR Balanced', 'FC W Pen-CFS TTR Growth', 'FC W Pen-CFS TTR Australian Small Companies', 'FC W Pen-CFS TTR Fixed Interest', 'FC W Pen-CFS TTR Global Share', 'FC W Pen-CFS TTR Emerging Markets', 'FC W Pen-CFS TTR Defensive', 'CFS MIF-Geared Share NEF']\n", "All Providers Results: \n", "Document List File - ./sample_documents/aus_prospectus_17_documents_sample.txt\n", "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", - "management_fee_and_costs \t0.9245 \t0.8899 \t0.9619 \t0.8596 \t235 \t202 \t0 \t25 \t8 \n", - "management_fee \t0.9195 \t0.8811 \t0.9615 \t0.8511 \t235 \t200 \t0 \t27 \t8 \n", - "performance_fee_costs \t0.9345 \t0.9573 \t0.9128 \t0.9064 \t178 \t157 \t56 \t7 \t15 \n", - "interposed_vehicle_performance_fee_cost \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t20 \t20 \t215 \t0 \t0 \n", - "administration_fees \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t51 \t51 \t184 \t0 \t0 \n", - "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t52 \t52 \t183 \t0 \t0 \n", - "buy_spread \t0.8693 \t0.8125 \t0.9346 \t0.8170 \t183 \t143 \t49 \t33 \t10 \n", - "sell_spread \t0.8727 \t0.8182 \t0.9351 \t0.8213 \t183 \t144 \t49 \t32 \t10 \n", - "minimum_initial_investment \t0.9842 \t1.0000 \t0.9689 \t0.9787 \t161 \t156 \t74 \t0 \t5 \n", - "benchmark_name \t0.8972 \t0.9057 \t0.8889 \t0.9532 \t55 \t48 \t176 \t5 \t6 \n", - "TOTAL \t0.9402 \t0.9265 \t0.9564 \t0.9187 \t1353 \t1173 \t986 \t129 \t156 \n", - "Total Shares Matched - 235\n", - "Total Shares Not Matched - 124\n", - "Percentage of Shares Matched - 65.45961002785515\n", - "Not Matched Shares Name List - ['SPDR® S&P World ex Australia Carbon Control Fund', 'Mercer Multi-manager Growth Fund – Retail Units', 'Mercer Multi-manager High Growth Fund – Retail Units', 'ANZ OA IP-OP Diversified Credit EF', 'ANZ OA IP-OP Diversified Credit NE', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond EF', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond NE', 'OnePath OA IP- Pendal Monthly Income Plus-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-NEF', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-NEF', 'OnePath OA IP-Fidelity Australian Equities-EF/Sel', 'OnePath OA IP-Fidelity Australian Equities-NEF', 'OnePath OA IP-Investors Mutual Australian Share Trust- EF/Sel', 'OnePath OA IP-Investors Mutual Australian Share Trust- NEF', 'OnePath OA IP-Kapstream Absolute Return Income Trust-EF/Sel', 'OnePath OA IP-Kapstream Absolute Return Income Trust-NEF', 'OnePath OA IP-Merlon Australian Share Income-EF/Sel', 'OnePath OA IP-OnePath Active Growth Trust-NEF', 'OnePath OA IP-OnePath High Growth Trust-EF/Sel', 'OnePath OA IP-OnePath High Growth Trust-NEF', 'OnePath OA IP-OnePath Managed Growth Trust-EF/Sel', 'OnePath OA IP-OnePath Managed Growth Trust-NEF', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-NEF', 'OnePath OA IP-OptiMix Australian Share Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Share Trust-NEF', 'OnePath OA IP-OptiMix Global Emerging Markets Share-EF/Sel', 'OnePath OA IP-OptiMix Global Emerging Markets Share-NEF', 'OnePath OA IP-OptiMIx Global Share Trust-EF/Sel', 'OnePath OA IP-OptiMIx Global Share Trust-NEF', 'OnePath OA IP-OptiMix High Growth Trust-EF/Sel', 'OnePath OA IP-OptiMix High Growth Trust-NEF', 'OnePath OA IP-OptiMix Property Securities Trust-EF/Sel', 'OnePath OA IP-OptiMix Property Securities Trust-NEF', 'OnePath OA IP-Perpetual Balanced Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Balanced Growth Trust-NEF', 'OnePath OA IP-Perpetual Conservative Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Conservative Growth Trust-NEF', 'OnePath OA IP-Platinum International Trust-EF/Sel', 'OnePath OA IP-Platinum International Trust-NEF', 'OnePath OA IP-UBS Balanced Trust-EF/Sel', 'OnePath OA IP-UBS Balanced Trust-NEF', 'OnePath OA IP-UBS Defensive Trust-EF/Sel', 'OnePath OA IP-UBS Defensive Trust-NEF', 'OnePath OA IP-UBS Diversified Fixed Income Trust-EF/Sel', 'OnePath OA IP-UBS Diversified Fixed Income Trust-NEF', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -NE', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -NE', 'OnePath OneAnswer Investment Portfolio - First Sentier Imputation -EF/Sel', 'OnePath OneAnswer Investment Portfolio - First Sentier Imputation -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Australian Shares Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Australian Shares Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath High Growth Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath High Growth Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -NE', 'OnePath Schroder Real Return Trust (Entry Fee)', 'OnePath Schroder Real Return Trust (Nil Entry Fee)', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-NEF', 'Telstra Growth Pen', 'First Sentier Concentrated Aus Share', 'First Sentier Australian Small Companies', 'First Sentier Imputation', 'First Sentier Global Property Securities', 'First Sentier Australian Share', 'CFS FC-Investors Mutual Future Leaders', 'Stewart Worldwide Leaders Sustainability', 'First Sentier Property Securities', 'MyNorth Index Defensive', 'MLC MKPFPR - Altrinsic Global Eq Trust', 'MLC MKPFPR - BlackRock Global Allocation', 'MLC MKPFPR - MLC - Platinum Global Fund', 'MLC MasterKey Pension Fundamentals - Perpetual Australian Share', 'MLC MasterKey Super Fundamentals - Perpetual Australian Share', 'MLC MKPF - Perpetual WS Ethical SRI Fund', 'MLC MKSF - Perpetual WS Ethical SRI Fund', 'MLC MasterKey Pension Fundamentals (Pre Retirement) - Perpetual Smll Co Fund No.2', 'MLC MasterKey Super Fundamentals - Perpetual Small Co Fund No.2', 'MLC MKSF - PIMCO Div. Fixed Interest Wholesale Class', 'MLC MKPFPR - Platinum Asia Fund', 'MLC MKSF - Platinum Asia Fund', 'MLC MKPF - Platinum International Fund', 'MLC MKSF - Platinum International Fund', 'MLC MKPF - PM CAPITAL Global Companies', 'MLC MKSF - PM CAPITAL Global Companies', 'MLC MKPF - Schroder WS Australian Equity', 'MLC MKSF - Schroder WS Australian Equity', 'MLC MasterKey Super Fundamentals - MLC Australian Property Index', 'MLC MKSF - Vanguard Intl Shr Indx (Hgd)', 'MLC MKSF - Vanguard Intl Shr Indx', 'HOSTPLUS Fixed Interest Indexed Super', 'Australian Unity Inv Wholesale Deposits Fund', 'Lifeplan Investment Bond Lifeplan Capital Guaranteed', 'Lifeplan Investment Bond Perpetual Balanced Growth', 'Lifeplan Investment Bond Perpetual Conservative Growth', 'Lifeplan Investment Bond Perpetual Industrial Share', 'Legal Super High Growth Pen', 'Legal Super Balanced Socially Responsible Pen', 'Legal Super Growth Pen', 'Legal Super Conservative Pen', 'Legal Super Conservative Balanced Pen', 'Legal Super Balanced Index Pen', 'Legal Super Balanced Pen', 'Legal Super Cash Pen', 'Legal Super Australian Shares Pen', 'Legal Super Overseas Shares Pen', 'Legal Super Enhanced Cash Pen']\n" + "management_fee_and_costs \t0.9255 \t0.8839 \t0.9712 \t0.8613 \t274 \t236 \t0 \t31 \t7 \n", + "management_fee \t0.9127 \t0.8614 \t0.9705 \t0.8394 \t274 \t230 \t0 \t37 \t7 \n", + "performance_fee_costs \t0.9522 \t0.9522 \t0.9522 \t0.9270 \t218 \t199 \t55 \t10 \t10 \n", + "interposed_vehicle_performance_fee_cost \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t20 \t20 \t254 \t0 \t0 \n", + "administration_fees \t0.9875 \t0.9753 \t1.0000 \t0.9927 \t79 \t79 \t193 \t2 \t0 \n", + "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t82 \t82 \t192 \t0 \t0 \n", + "buy_spread \t0.9037 \t0.8622 \t0.9494 \t0.8686 \t201 \t169 \t69 \t27 \t9 \n", + "sell_spread \t0.9067 \t0.8673 \t0.9497 \t0.8723 \t201 \t170 \t69 \t26 \t9 \n", + "minimum_initial_investment \t0.9860 \t1.0000 \t0.9724 \t0.9818 \t181 \t176 \t93 \t0 \t5 \n", + "benchmark_name \t0.9160 \t0.9375 \t0.8955 \t0.9599 \t68 \t60 \t203 \t4 \t7 \n", + "TOTAL \t0.9490 \t0.9340 \t0.9661 \t0.9303 \t1598 \t1421 \t1128 \t137 \t158 \n", + "Total Shares Matched - 274\n", + "Total Shares Not Matched - 95\n", + "Percentage of Shares Matched - 74.25474254742548\n", + "Not Matched Shares Name List - ['SPDR® S&P World ex Australia Carbon Control Fund', 'Mercer Multi-manager Growth Fund – Retail Units', 'Mercer Multi-manager High Growth Fund – Retail Units', 'ANZ OA IP-OP Diversified Credit EF', 'ANZ OA IP-OP Diversified Credit NE', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond EF', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond NE', 'OnePath OA IP- Pendal Monthly Income Plus-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-NEF', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-NEF', 'OnePath OA IP-Investors Mutual Australian Share Trust- EF/Sel', 'OnePath OA IP-Investors Mutual Australian Share Trust- NEF', 'OnePath OA IP-Kapstream Absolute Return Income Trust-EF/Sel', 'OnePath OA IP-Kapstream Absolute Return Income Trust-NEF', 'OnePath OA IP-Merlon Australian Share Income-EF/Sel', 'OnePath OA IP-OnePath Active Growth Trust-NEF', 'OnePath OA IP-OnePath High Growth Trust-EF/Sel', 'OnePath OA IP-OnePath High Growth Trust-NEF', 'OnePath OA IP-OnePath Managed Growth Trust-EF/Sel', 'OnePath OA IP-OnePath Managed Growth Trust-NEF', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-NEF', 'OnePath OA IP-OptiMix Australian Share Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Share Trust-NEF', 'OnePath OA IP-OptiMix Global Emerging Markets Share-EF/Sel', 'OnePath OA IP-OptiMix Global Emerging Markets Share-NEF', 'OnePath OA IP-OptiMIx Global Share Trust-EF/Sel', 'OnePath OA IP-OptiMIx Global Share Trust-NEF', 'OnePath OA IP-OptiMix High Growth Trust-EF/Sel', 'OnePath OA IP-OptiMix High Growth Trust-NEF', 'OnePath OA IP-OptiMix Property Securities Trust-EF/Sel', 'OnePath OA IP-OptiMix Property Securities Trust-NEF', 'OnePath OA IP-Perpetual Conservative Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Conservative Growth Trust-NEF', 'OnePath OA IP-Platinum International Trust-EF/Sel', 'OnePath OA IP-Platinum International Trust-NEF', 'OnePath OA IP-Schroder Fixed Income-EF/Sel', 'OnePath OA IP-Schroder Fixed Income-NEF', 'OnePath OA IP-UBS Defensive Trust-EF/Sel', 'OnePath OA IP-UBS Defensive Trust-NEF', 'OnePath OA IP-UBS Diversified Fixed Income Trust-EF/Sel', 'OnePath OA IP-UBS Diversified Fixed Income Trust-NEF', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -NE', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -NE', 'OnePath OneAnswer Investment Portfolio - First Sentier Imputation -EF/Sel', 'OnePath OneAnswer Investment Portfolio - First Sentier Imputation -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -NE', 'OnePath Schroder Real Return Trust (Entry Fee)', 'OnePath Schroder Real Return Trust (Nil Entry Fee)', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-NEF', 'Telstra Growth Pen', 'First Sentier Concentrated Aus Share', 'First Sentier Australian Small Companies', 'First Sentier Imputation', 'First Sentier Global Property Securities', 'First Sentier Australian Share', 'CFS FC-Investors Mutual Future Leaders', 'Stewart Worldwide Leaders Sustainability', 'First Sentier Property Securities', 'MyNorth Index Defensive', 'MLC MKPF - Inflation Plus - Conservative', 'MLC MasterKey Super Fundamentals - Perpetual Australian Share', 'MLC MKSF - Perpetual WS Ethical SRI Fund', 'MLC MasterKey Super Fundamentals - Perpetual Small Co Fund No.2', 'MLC MKSF - PIMCO Div. Fixed Interest Wholesale Class', 'MLC MKSF - Platinum Asia Fund', 'MLC MKSF - Platinum International Fund', 'MLC MKSF - PM CAPITAL Global Companies', 'MLC MKSF - Schroder WS Australian Equity', 'MLC MasterKey Pension Fundamentals (Pre Retirement) - MLC Aust Property Index', 'MLC MasterKey Super Fundamentals - MLC Australian Property Index', 'MLC MKSF - Vanguard Intl Shr Indx (Hgd)', 'MLC MKSF - Vanguard Intl Shr Indx', 'Australian Unity Inv Wholesale Deposits Fund', 'Lifeplan Investment Bond Lifeplan Capital Guaranteed', 'Lifeplan Investment Bond Perpetual Balanced Growth', 'Lifeplan Investment Bond Perpetual Conservative Growth', 'Lifeplan Investment Bond Perpetual Industrial Share', 'Lifeplan Investment Bond Vanguard® Australian Shares Index']\n" ] } ], diff --git a/sample_documents/aus_prospectus_87_vision_cfs_documents_sample.txt b/sample_documents/aus_prospectus_87_vision_cfs_documents_sample.txt new file mode 100644 index 0000000..a661078 --- /dev/null +++ b/sample_documents/aus_prospectus_87_vision_cfs_documents_sample.txt @@ -0,0 +1,87 @@ +430229604 +430249980 +434533711 +448576798 +448576868 +448576914 +448576924 +448577874 +448577877 +448578148 +448701586 +448906715 +448906720 +448906722 +448907811 +451234748 +454947973 +454947982 +454948291 +454948296 +455232983 +455235248 +462770987 +470958290 +470958296 +478920274 +478946988 +479996914 +479996918 +480713037 +480726184 +480726185 +480854103 +480854105 +480854113 +480854115 +480854118 +480854120 +480854121 +480854129 +481877313 +484628699 +484628701 +484628702 +484628703 +495516375 +495547519 +500579230 +506913190 +509581748 +520698753 +520702746 +520703007 +521591949 +521606716 +521606755 +523516443 +525464665 +528208796 +534933875 +539999907 +539999916 +540028470 +542294088 +544886057 +548035617 +550533961 +550769189 +552727485 +555377021 +556527310 +557362550 +557526104 +557526108 +557526111 +557526129 +557526130 +557526143 +557526145 +562753667 +562753673 +562754590 +570781265 +572302455 +572302463 +573372424 +577949367 \ No newline at end of file