From ad371f6584b069ac9153b550769e7664d6164f81 Mon Sep 17 00:00:00 2001 From: Ravi Maheshwari Date: Tue, 18 Mar 2025 16:43:55 +0530 Subject: [PATCH] Changed Performance matrix code to get all anomilities to analyze and Prompt to get better accuracy --- .../data_extraction_prompts_config.json | 6 +- performance.ipynb | 361 ++++++++++++------ 2 files changed, 250 insertions(+), 117 deletions(-) diff --git a/instructions/aus_prospectus/data_extraction_prompts_config.json b/instructions/aus_prospectus/data_extraction_prompts_config.json index f4e832a..e1db089 100644 --- a/instructions/aus_prospectus/data_extraction_prompts_config.json +++ b/instructions/aus_prospectus/data_extraction_prompts_config.json @@ -441,7 +441,11 @@ "The output should be", "{\"data\": [{\"fund name\": \"Platinum International Fund\", \"share name\": \"C Class\", \"performance_fee_costs\": 0}, {\"fund name\": \"Platinum International Fund\", \"share name\": \"E Class\", \"performance_fee_costs\": 0}, {\"fund name\": \"Platinum International Fund\", \"share name\": \"P Class\", \"performance_fee_costs\": 0.15}, {\"fund name\": \"Platinum Global Fund (Long Only)\", \"share name\": \"C Class\", \"performance_fee_costs\": 0}, {\"fund name\": \"Platinum Global Fund (Long Only)\", \"share name\": \"E Class\", \"performance_fee_costs\": 0}, {\"fund name\": \"Platinum Global Fund (Long Only)\", \"share name\": \"P Class\", \"performance_fee_costs\": 0.24}]}", "D. Identify the value of performance fee and if it is written 0% or 0.00% or 0 or 0.00 then extract the same as 0 do not assume null for the same and return its values as 0", - "E. If for performacen fee specifically Nil is written in the value then return NULL('') for the same", + "---Example Start---", + "Fund/Investment Option \nManagement Fees \nand Costs \n(% pa) \n1 \nPerformance Fees 2 \n(% pa) \nTransaction Costs 3 \n(% pa) \nBT American Share Fund 1.08 0.00 0.00\nBT Asian Share Fund 1.10 0.00 0.10", + "---Example End---", + "a. For this example, as Performance fee mentioned as 0.00 so return 0 as performance fee datapoint value.", + "E. If for performance fee specifically Nil is written in the value then return NULL('') for the same", "---Example Start---", "Vanguard Investor Short Term Fixed Interest Fund PLUS Performance fees Nil \nAnd, you will be charged or have deducted \nfrom your investment $0 in performance fees \neach year.", "---Example End---", diff --git a/performance.ipynb b/performance.ipynb index 4869a68..e7a0050 100644 --- a/performance.ipynb +++ b/performance.ipynb @@ -2,10 +2,17 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ + "import openpyxl\n", + "from collections import defaultdict\n", + "import pandas as pd\n", + "import statistics\n", + "import os\n", + "import re\n", + "from utils.similarity import Similarity\n", "\n", "\n", "imp_datapoints = [\"Management Fee and Costs\", \"Management Fee\", \"Performance fee and cost\", \"Interposed vehicle Performance fee and Costs\",\n", @@ -25,101 +32,23 @@ " \"Performance Fee\": \"PerformanceFeeCharged\",\n", " \"Minimum Initial Investment\": \"minimum_initial_investment\",\n", " \"Benchmark\": \"benchmark_name\"\n", - "}\n" + "}\n", + "\n", + "path_ground_truth = r\"/data/aus_prospectus/ground_truth/46_documents_ground_truth_with_mapping.xlsx\"\n", + "path_generated_results = r\"C:\\Users\\rmahesh\\Downloads\\mapping_data_info_46_documents_by_text_20250317.xlsx\"\n", + "provider_mapping_file_path = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/TopProvidersBiz.xlsx\"\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\n", - "\n", - "All Providers Results: \n", - "Document List File - None\n", - "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", - "management_fee_and_costs \t0.9204 \t0.8852 \t0.9586 \t0.8533 \t406 \t347 \t2 \t45 \t15 \n", - "management_fee \t0.9415 \t0.9235 \t0.9602 \t0.8900 \t406 \t362 \t2 \t30 \t15 \n", - "performance_fee_costs \t0.8953 \t0.9277 \t0.8652 \t0.8680 \t281 \t231 \t124 \t18 \t36 \n", - "interposed_vehicle_performance_fee_cost \t0.9600 \t0.9231 \t1.0000 \t0.9853 \t73 \t72 \t331 \t6 \t0 \n", - "administration_fees \t0.8319 \t0.9592 \t0.7344 \t0.9535 \t64 \t47 \t343 \t2 \t17 \n", - "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t66 \t66 \t343 \t0 \t0 \n", - "buy_spread \t0.9359 \t0.9235 \t0.9486 \t0.8949 \t349 \t314 \t52 \t26 \t17 \n", - "sell_spread \t0.9407 \t0.9324 \t0.9491 \t0.9022 \t349 \t317 \t52 \t23 \t17 \n", - "minimum_initial_investment \t0.9737 \t0.9642 \t0.9834 \t0.9609 \t301 \t296 \t97 \t11 \t5 \n", - "benchmark_name \t0.8047 \t0.8175 \t0.7923 \t0.8778 \t141 \t103 \t256 \t23 \t27 \n", - "TOTAL \t0.9204 \t0.9256 \t0.9192 \t0.9186 \t2436 \t2155 \t1602 \t184 \t149 \n", - "Total Funds Matched - 409\n", - "Total Funds Not Matched - 156\n", - "Percentage of Funds Matched - 72.38938053097344\n", - "All Providers Results: \n", - "Document List File - ./sample_documents/aus_prospectus_29_documents_sample.txt\n", - "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", - "management_fee_and_costs \t0.9457 \t0.8970 \t1.0000 \t0.8970 \t164 \t148 \t0 \t17 \t0 \n", - "management_fee \t0.9783 \t0.9576 \t1.0000 \t0.9576 \t164 \t158 \t0 \t7 \t0 \n", - "performance_fee_costs \t0.8263 \t0.8846 \t0.7753 \t0.8242 \t95 \t69 \t67 \t9 \t20 \n", - "interposed_vehicle_performance_fee_cost \t0.9455 \t0.8966 \t1.0000 \t0.9636 \t53 \t52 \t107 \t6 \t0 \n", - "administration_fees \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t1 \t1 \t164 \t0 \t0 \n", - "buy_spread \t0.9812 \t0.9752 \t0.9874 \t0.9636 \t162 \t157 \t2 \t4 \t2 \n", - "sell_spread \t0.9876 \t0.9876 \t0.9876 \t0.9758 \t162 \t159 \t2 \t2 \t2 \n", - "minimum_initial_investment \t0.9569 \t0.9531 \t0.9606 \t0.9333 \t127 \t122 \t32 \t6 \t5 \n", - "benchmark_name \t0.7651 \t0.7808 \t0.7500 \t0.7879 \t85 \t57 \t73 \t16 \t19 \n", - "TOTAL \t0.9318 \t0.9258 \t0.9401 \t0.9226 \t1013 \t923 \t447 \t67 \t197 \n", - "Total Funds Matched - 165\n", - "Total Funds Not Matched - 31\n", - "Percentage of Funds Matched - 84.18367346938776\n", - "All Providers Results: \n", - "Document List File - ./sample_documents/aus_prospectus_17_documents_sample.txt\n", - "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", - "management_fee_and_costs \t0.9025 \t0.8767 \t0.9299 \t0.8238 \t242 \t199 \t2 \t28 \t15 \n", - "management_fee \t0.9148 \t0.8987 \t0.9315 \t0.8443 \t242 \t204 \t2 \t23 \t15 \n", - "performance_fee_costs \t0.9284 \t0.9474 \t0.9101 \t0.8975 \t186 \t162 \t57 \t9 \t16 \n", - "interposed_vehicle_performance_fee_cost \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t20 \t20 \t224 \t0 \t0 \n", - "administration_fees \t0.8288 \t0.9583 \t0.7302 \t0.9221 \t63 \t46 \t179 \t2 \t17 \n", - "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t66 \t66 \t178 \t0 \t0 \n", - "buy_spread \t0.8946 \t0.8771 \t0.9128 \t0.8484 \t187 \t157 \t50 \t22 \t15 \n", - "sell_spread \t0.8977 \t0.8827 \t0.9133 \t0.8525 \t187 \t158 \t50 \t21 \t15 \n", - "minimum_initial_investment \t0.9858 \t0.9721 \t1.0000 \t0.9795 \t174 \t174 \t65 \t5 \t0 \n", - "benchmark_name \t0.8598 \t0.8679 \t0.8519 \t0.9385 \t56 \t46 \t183 \t7 \t8 \n", - "TOTAL \t0.9212 \t0.9281 \t0.9180 \t0.9107 \t1423 \t1232 \t990 \t117 \t298 \n", - "Total Funds Matched - 244\n", - "Total Funds Not Matched - 125\n", - "Percentage of Funds Matched - 66.12466124661248\n" - ] - } - ], + "outputs": [], "source": [ - "import openpyxl\n", - "from collections import defaultdict\n", - "import pandas as pd\n", - "import statistics\n", - "import os\n", - "import re\n", - "from utils.similarity import Similarity\n", "\n", - "\"\"\"\n", - "Blade's updates\n", - "1. Set the secondary key to be the share class name, instead of the fund name\n", - "2. Remove the data point which support is 0 to calculate the metrics\n", - "3. Add the message list to store the error message\n", - "4. Support save metrics/ error message to excel file\n", - "5. Support statistics for different document list\n", - "6. Set F1-Score to the first column in the metrics table\n", - "\"\"\"\n", - "\n", - "path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx\"\n", - "path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250317_Ravi_modified.xlsx\"\n", - "# path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250317_Ravi.xlsx\"\n", - "provider_mapping_file_path = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/TopProvidersBiz.xlsx\"\n", - "\n", - "funds_matched = 0\n", - "funds_not_matched = 0\n", + "message_list = []\n", + "total_fn = []\n", "def load_excel(filepath, header_row_index):\n", " \"\"\"Load an Excel file and use the specified row as the header.\"\"\"\n", " wb = openpyxl.load_workbook(filepath, data_only=True)\n", @@ -169,6 +98,7 @@ " value1 = convert_if_number(value1)\n", " value2 = convert_if_number(value2)\n", " return value1 == value2\n", + "\n", "def compare_data(ground_truth, generated_results, headers, doc_id_index, fund_name_index, intersection_list, funds_matched, funds_not_matched, document_list):\n", " \"\"\"Compare data from two indexed sets, with the focus on matching generated results against ground truth.\"\"\"\n", " results = {}\n", @@ -186,7 +116,6 @@ " # Iterate over the generated results instead of the ground truth\n", " \n", " total = 0\n", - " message_list = []\n", " # print(document_list)\n", " for doc_id, secs in ground_truth.items():\n", " if document_list is not None and str(doc_id) not in document_list:\n", @@ -260,25 +189,8 @@ " jacard_score = similarity.jaccard_similarity(source_text.lower().split(), target_text.lower().split())\n", " if jacard_score > 0.8:\n", " return True\n", - "\n", - "# Load the files\n", - "headers_gt, ground_truth_data = load_excel(path_ground_truth, 0)\n", - "headers_gen, generated_results_data = load_excel(path_generated_results, 0)\n", - "\n", - "# Assuming doc_id is the first column and fund_name is the second column\n", - "doc_id_index = 0\n", - "fund_name_index = 1\n", - "\n", - "# Index the data\n", - "ground_truth_indexed = index_data_by_key(ground_truth_data, doc_id_index, fund_name_index, headers_gt)\n", - "generated_results_indexed = index_data_by_key(generated_results_data, doc_id_index, fund_name_index, headers_gen)\n", - "\n", - "intersection = set(headers_gen).intersection(headers_gt)\n", - "\n", - "# Convert the result back to a list (if you need it as a list)\n", - "intersection_list = list(intersection)\n", - "\n", - "total_fn = []\n", + " \n", + " \n", "def calculate_metrics(tp, tn, fp, fn):\n", " \"\"\"Calculate precision, recall, accuracy, and F1-score.\"\"\"\n", " precision = tp / (tp + fp) if (tp + fp) != 0 else 0\n", @@ -398,12 +310,112 @@ " for doc_id in doc_list:\n", " if doc_id in results:\n", " provider_res[doc_id] = results[doc_id]\n", - " return provider_res\n", + " return provider_res\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "All Providers Results: \n", + "Document List File - None\n", + "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", + "management_fee_and_costs \t0.9211 \t0.8861 \t0.9589 \t0.8544 \t409 \t350 \t2 \t45 \t15 \n", + "management_fee \t0.9419 \t0.9241 \t0.9605 \t0.8908 \t409 \t365 \t2 \t30 \t15 \n", + "performance_fee_costs \t0.8987 \t0.9325 \t0.8672 \t0.8714 \t284 \t235 \t124 \t17 \t36 \n", + "interposed_vehicle_performance_fee_cost \t0.9600 \t0.9231 \t1.0000 \t0.9854 \t73 \t72 \t334 \t6 \t0 \n", + "administration_fees \t0.9853 \t0.9710 \t1.0000 \t0.9951 \t67 \t67 \t343 \t2 \t0 \n", + "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t69 \t69 \t343 \t0 \t0 \n", + "buy_spread \t0.9365 \t0.9242 \t0.9491 \t0.8956 \t352 \t317 \t52 \t26 \t17 \n", + "sell_spread \t0.9412 \t0.9329 \t0.9496 \t0.9029 \t352 \t320 \t52 \t23 \t17 \n", + "minimum_initial_investment \t0.9737 \t0.9642 \t0.9834 \t0.9612 \t301 \t296 \t100 \t11 \t5 \n", + "benchmark_name \t0.8092 \t0.8217 \t0.7970 \t0.8786 \t144 \t106 \t256 \t23 \t27 \n", + "TOTAL \t0.9367 \t0.9280 \t0.9466 \t0.9235 \t2460 \t2197 \t1608 \t183 \t132 \n", + "Total Funds Matched - 412\n", + "Total Funds Not Matched - 153\n", + "Percentage of Funds Matched - 72.9203539823009\n", + "All Providers Results: \n", + "Document List File - ./sample_documents/aus_prospectus_29_documents_sample.txt\n", + "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", + "management_fee_and_costs \t0.9457 \t0.8970 \t1.0000 \t0.8970 \t164 \t148 \t0 \t17 \t0 \n", + "management_fee \t0.9783 \t0.9576 \t1.0000 \t0.9576 \t164 \t158 \t0 \t7 \t0 \n", + "performance_fee_costs \t0.8263 \t0.8846 \t0.7753 \t0.8242 \t95 \t69 \t67 \t9 \t20 \n", + "interposed_vehicle_performance_fee_cost \t0.9455 \t0.8966 \t1.0000 \t0.9636 \t53 \t52 \t107 \t6 \t0 \n", + "administration_fees \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t1 \t1 \t164 \t0 \t0 \n", + "buy_spread \t0.9812 \t0.9752 \t0.9874 \t0.9636 \t162 \t157 \t2 \t4 \t2 \n", + "sell_spread \t0.9876 \t0.9876 \t0.9876 \t0.9758 \t162 \t159 \t2 \t2 \t2 \n", + "minimum_initial_investment \t0.9569 \t0.9531 \t0.9606 \t0.9333 \t127 \t122 \t32 \t6 \t5 \n", + "benchmark_name \t0.7651 \t0.7808 \t0.7500 \t0.7879 \t85 \t57 \t73 \t16 \t19 \n", + "TOTAL \t0.9318 \t0.9258 \t0.9401 \t0.9226 \t1013 \t923 \t447 \t67 \t180 \n", + "Total Funds Matched - 165\n", + "Total Funds Not Matched - 31\n", + "Percentage of Funds Matched - 84.18367346938776\n", + "All Providers Results: \n", + "Document List File - ./sample_documents/aus_prospectus_17_documents_sample.txt\n", + "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", + "management_fee_and_costs \t0.9038 \t0.8783 \t0.9309 \t0.8259 \t245 \t202 \t2 \t28 \t15 \n", + "management_fee \t0.9159 \t0.9000 \t0.9324 \t0.8462 \t245 \t207 \t2 \t23 \t15 \n", + "performance_fee_costs \t0.9326 \t0.9540 \t0.9121 \t0.9028 \t189 \t166 \t57 \t8 \t16 \n", + "interposed_vehicle_performance_fee_cost \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t20 \t20 \t227 \t0 \t0 \n", + "administration_fees \t0.9851 \t0.9706 \t1.0000 \t0.9919 \t66 \t66 \t179 \t2 \t0 \n", + "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t69 \t69 \t178 \t0 \t0 \n", + "buy_spread \t0.8964 \t0.8791 \t0.9143 \t0.8502 \t190 \t160 \t50 \t22 \t15 \n", + "sell_spread \t0.8994 \t0.8846 \t0.9148 \t0.8543 \t190 \t161 \t50 \t21 \t15 \n", + "minimum_initial_investment \t0.9858 \t0.9721 \t1.0000 \t0.9798 \t174 \t174 \t68 \t5 \t0 \n", + "benchmark_name \t0.8673 \t0.8750 \t0.8596 \t0.9393 \t59 \t49 \t183 \t7 \t8 \n", + "TOTAL \t0.9386 \t0.9314 \t0.9464 \t0.9190 \t1447 \t1274 \t996 \t116 \t264 \n", + "Total Funds Matched - 247\n", + "Total Funds Not Matched - 122\n", + "Percentage of Funds Matched - 66.93766937669376\n" + ] + } + ], + "source": [ "\n", + "\"\"\"\n", + "Blade's updates\n", + "1. Set the secondary key to be the share class name, instead of the fund name\n", + "2. Remove the data point which support is 0 to calculate the metrics\n", + "3. Add the message list to store the error message\n", + "4. Support save metrics/ error message to excel file\n", + "5. Support statistics for different document list\n", + "6. Set F1-Score to the first column in the metrics table\n", + "\"\"\"\n", "\n", - "df_provider_mapping = get_provider_mapping(provider_mapping_file_path)\n", + "funds_matched = 0\n", + "funds_not_matched = 0\n", "\n", - "all_provider_dict = get_provider_names(generated_results_indexed, df_provider_mapping)\n", + "# Load the files\n", + "headers_gt, ground_truth_data = load_excel(path_ground_truth, 0)\n", + "headers_gen, generated_results_data = load_excel(path_generated_results, 0)\n", + "\n", + "# Assuming doc_id is the first column and fund_name is the second column\n", + "doc_id_index = 0\n", + "fund_name_index = 1\n", + "\n", + "# Index the data\n", + "ground_truth_indexed = index_data_by_key(ground_truth_data, doc_id_index, fund_name_index, headers_gt)\n", + "generated_results_indexed = index_data_by_key(generated_results_data, doc_id_index, fund_name_index, headers_gen)\n", + "\n", + "intersection = set(headers_gen).intersection(headers_gt)\n", + "\n", + "# Convert the result back to a list (if you need it as a list)\n", + "intersection_list = list(intersection)\n", + "\n", + "total_fn = []\n", + "\n", + "# df_provider_mapping = get_provider_mapping(provider_mapping_file_path)\n", + "\n", + "# all_provider_dict = get_provider_names(generated_results_indexed, df_provider_mapping)\n", "\n", "\n", "# for provider_name in all_provider_dict:\n", @@ -465,10 +477,127 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'ANZ OA Inv-OnePath Multi Asset Income NEF', 'truth': '0', 'generated': '0.11', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -NE', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Macquarie Income Opps', 'truth': '0.03', 'generated': '0.12', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Perpetual Diversified Inc', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Perpetual Share Plus L/S', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Schroder Fixed Income', 'truth': '0', 'generated': '0.01', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Balanced Pen', 'truth': '0.24', 'generated': '0.23', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Property Pension', 'truth': '0.67', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Cash Pension', 'truth': '0.01', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Australian shares Pension', 'truth': '0.01', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Defensive growth Pension', 'truth': '0.14', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Growth Pen', 'truth': '0.24', 'generated': '0.23', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Conservative Pen', 'truth': '0.15', 'generated': '0.13', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra International shares Pension', 'truth': '0.01', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Global Fund (Long Only)', 'truth': '0.24', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Global Fund (Long Only) P Class', 'truth': '0.24', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Fund', 'truth': '0.15', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Asia Fund', 'truth': '0.27', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Brands Fund', 'truth': '0.03', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Technology Fund', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum European Fund', 'truth': '0.24', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Japan Fund', 'truth': '0.15', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 446324179, 'sec_name': 'Lifeplan Investment Bond - Allan Gray Australian Equity Fund Class A', 'truth': '0.28', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 446324179, 'sec_name': 'Lifeplan Investment Bond MLC Horizon 2-Capital Stable Open', 'truth': '0.05', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Small Company Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Value Trust -Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Core Equity Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Core Equity Tr AUDHdg', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 550522985, 'sec_name': 'RQI Global Value – Class A', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Balanced', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional High Growth', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Conservative', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Growth', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Moderately Conservative', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539261734, 'sec_name': 'ipac life choices Income Generator', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Future Goals Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Property Securities Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Asian Share Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT International Share Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Smaller Companies Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Technology Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT European Share Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT American Share Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Imputation Shares Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 523516443, 'sec_name': 'CFS MIF-Strategic Cash', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 557526129, 'sec_name': 'Fortlake Real-Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Core Equity Tr AUDHdg', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Core Equity Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Value Trust -Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Small Company Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 544886057, 'sec_name': 'CFS Growth Builder', 'truth': '0.01', 'generated': '0.04', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Small Company Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Value Trust -Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Core Equity Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Core Equity Tr AUDHdg', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 550522985, 'sec_name': 'RQI Global Value – Class A', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Balanced', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional High Growth', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Conservative', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Growth', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Moderately Conservative', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 539261734, 'sec_name': 'ipac life choices Income Generator', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Future Goals Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Property Securities Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Asian Share Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT International Share Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Smaller Companies Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Technology Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT European Share Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT American Share Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 526200514, 'sec_name': 'BT Imputation Shares Retail', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 523516443, 'sec_name': 'CFS MIF-Strategic Cash', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 557526129, 'sec_name': 'Fortlake Real-Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Core Equity Tr AUDHdg', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Core Equity Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Value Trust -Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Small Company Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 544886057, 'sec_name': 'CFS Growth Builder', 'truth': '0.01', 'generated': '0.04', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'ANZ OA Inv-OnePath Multi Asset Income NEF', 'truth': '0', 'generated': '0.11', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -NE', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Macquarie Income Opps', 'truth': '0.03', 'generated': '0.12', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Perpetual Diversified Inc', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Perpetual Share Plus L/S', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Schroder Fixed Income', 'truth': '0', 'generated': '0.01', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Balanced Pen', 'truth': '0.24', 'generated': '0.23', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Property Pension', 'truth': '0.67', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Cash Pension', 'truth': '0.01', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Australian shares Pension', 'truth': '0.01', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Defensive growth Pension', 'truth': '0.14', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Growth Pen', 'truth': '0.24', 'generated': '0.23', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra Conservative Pen', 'truth': '0.15', 'generated': '0.13', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 412778803, 'sec_name': 'Telstra International shares Pension', 'truth': '0.01', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Global Fund (Long Only)', 'truth': '0.24', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Global Fund (Long Only) P Class', 'truth': '0.24', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Fund', 'truth': '0.15', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Asia Fund', 'truth': '0.27', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Brands Fund', 'truth': '0.03', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Technology Fund', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum European Fund', 'truth': '0.24', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Japan Fund', 'truth': '0.15', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 446324179, 'sec_name': 'Lifeplan Investment Bond - Allan Gray Australian Equity Fund Class A', 'truth': '0.28', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", + "{'data_point': 'performance_fee_costs', 'doc_id': 446324179, 'sec_name': 'Lifeplan Investment Bond MLC Horizon 2-Capital Stable Open', 'truth': '0.05', 'generated': '', 'error': 'Generated is null and truth is not null'}\n" + ] + } + ], + "source": [ + "for message_list_element in message_list:\n", + " if message_list_element[\"data_point\"] == \"performance_fee_costs\":\n", + " print(message_list_element)" + ] }, { "cell_type": "code", @@ -515,7 +644,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.6" + "version": "3.12.4" }, "orig_nbformat": 4 },