{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import openpyxl\n", "from collections import defaultdict\n", "import pandas as pd\n", "import statistics\n", "import os\n", "import re\n", "from utils.similarity import Similarity\n", "\n", "\n", "imp_datapoints = [\"Management Fee and Costs\", \"Management Fee\", \"Performance fee and cost\", \"Interposed vehicle Performance fee and Costs\",\n", " \"Administration Fee and costs\", \"Total Annual Dollar Based Charges\", \"Buy Spread\", \"Sell Spread\", \"Performance Fee\",\n", " \"Minimum Initial Investment\", \"Benchmark\"]\n", "\n", "\n", "imp_datapoints_mapping = {\n", " \"Management Fee and Costs\": \"management_fee_and_costs\",\n", " \"Management Fee\": \"management_fee\",\n", " \"Performance fee and cost\": \"performance_fee_costs\",\n", " \"Interposed vehicle Performance fee and Costs\": \"interposed_vehicle_performance_fee_cost\",\n", " \"Administration Fee and costs\": \"administration_fees\",\n", " \"Total Annual Dollar Based Charges\": \"total_annual_dollar_based_charges\",\n", " \"Buy Spread\": \"buy_spread\",\n", " \"Sell Spread\": \"sell_spread\",\n", " \"Performance Fee\": \"PerformanceFeeCharged\",\n", " \"Minimum Initial Investment\": \"minimum_initial_investment\",\n", " \"Benchmark\": \"benchmark_name\"\n", "}\n", "\n", "# imp_datapoints = [\"Management Fee and Costs\", \"Management Fee\", \"Performance fee and cost\",\n", "# \"Administration Fee and costs\", \"Total Annual Dollar Based Charges\", \"Buy Spread\", \"Sell Spread\"]\n", "\n", "\n", "# imp_datapoints_mapping = {\n", "# \"Management Fee and Costs\": \"management_fee_and_costs\",\n", "# \"Management Fee\": \"management_fee\",\n", "# \"Performance fee and cost\": \"performance_fee_costs\",\n", "# \"Administration Fee and costs\": \"administration_fees\",\n", "# \"Total Annual Dollar Based Charges\": \"total_annual_dollar_based_charges\",\n", "# \"Buy Spread\": \"buy_spread\",\n", "# \"Sell Spread\": \"sell_spread\"\n", "# }\n", "\n", "path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx\"\n", "# path_ground_truth = r\"/data/aus_prospectus/ground_truth/phase2_file/next_round/next_round_6_documents_ground_truth_with_mapping.xlsx\"\n", "# path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250317.xlsx\"\n", "path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250326224343.xlsx\"\n", "# path_generated_results = r\"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_6_documents_by_text_20250326203744.xlsx\"\n", "provider_mapping_file_path = r\"/data/aus_prospectus/ground_truth/phase2_file/46_documents/TopProvidersBiz.xlsx\"\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "\n", "message_list = []\n", "total_fn = []\n", "def load_excel(filepath, header_row_index):\n", " \"\"\"Load an Excel file and use the specified row as the header.\"\"\"\n", " wb = openpyxl.load_workbook(filepath, data_only=True)\n", " sheet = wb.active\n", " headers = []\n", " data = []\n", "\n", " for index, row in enumerate(sheet.iter_rows(values_only=True)):\n", " if index == header_row_index:\n", " headers = [cell if cell is not None else \"\" for cell in row]\n", " elif index > header_row_index:\n", " data.append([cell if cell is not None else \"\" for cell in row])\n", "\n", " return headers, data\n", "\n", "def index_data_by_key(data, header):\n", " \"\"\"Index data by primary and secondary keys (doc_id and sec_name).\"\"\"\n", " indexed_data = defaultdict(dict)\n", " \n", " for row in data:\n", " row_data = {}\n", " # Store the entire row, which will be useful for full row comparison\n", " for i in range(len(row)):\n", " if header[i] == \"doc_id\":\n", " primary_key = int(row[i])\n", " elif header[i] == \"sec_name\":\n", " # share class should be the comparison level and key\n", " secondary_key = str(row[i])\n", " else:\n", " row_data[header[i]] = convert_if_number(row[i])\n", " if secondary_key is None or (isinstance(secondary_key, str) and len(secondary_key) == 0):\n", " continue\n", " indexed_data[primary_key][secondary_key] = row_data\n", " return indexed_data\n", "\n", "def convert_if_number(value):\n", " \"\"\"Attempt to convert value to a float or int, otherwise return as string.\"\"\"\n", " try:\n", " float_value = round(float(value), 2)\n", " int_value = int(float_value)\n", " return int_value if int_value == float_value else float_value\n", " except (ValueError, TypeError):\n", " return value\n", "\n", "def compare_values(value1, value2):\n", " \"\"\"Convert values to numbers if possible and compare, otherwise compare as strings.\"\"\"\n", " value1 = convert_if_number(value1)\n", " value2 = convert_if_number(value2)\n", " return value1 == value2\n", "\n", "def compare_data(ground_truth, generated_results, headers, intersection_list, document_list):\n", " \"\"\"Compare data from two indexed sets, with the focus on matching generated results against ground truth.\"\"\"\n", " results = {}\n", " share_name_list = []\n", " not_matched_share_name_list = []\n", " share_matched, share_not_matched = 0, 0\n", " # Initialize result dictionaries for each column except 'doc_id'\n", " for keys in headers:\n", " if keys != \"doc_id\":\n", " results[keys] = {}\n", " results[keys][\"TP\"] = 0\n", " results[keys][\"TN\"] = 0\n", " results[keys][\"FP\"] = 0\n", " results[keys][\"FN\"] = 0\n", " results[keys][\"SUPPORT\"] = 0\n", " \n", " # Iterate over the generated results instead of the ground truth\n", " \n", " total = 0\n", " # print(document_list)\n", " for doc_id, secs in ground_truth.items():\n", " if document_list is not None and str(doc_id) not in document_list:\n", " continue\n", " if doc_id in generated_results:\n", " for sec_name, truth_values in secs.items():\n", " if sec_name in generated_results[doc_id]:\n", " generated_values = generated_results[doc_id][sec_name]\n", " # Compare all other columns\n", " for i in intersection_list:\n", " for keys in imp_datapoints:\n", " if i == imp_datapoints_mapping[keys]:\n", " truth = str(truth_values[i]).strip()\n", " generated = str(generated_values[i]).strip()\n", " total = total +1\n", " if truth == \"\":\n", " if truth == generated:\n", " results[i][\"TN\"] = results[i][\"TN\"] + 1\n", " else:\n", " results[i][\"FP\"] = results[i][\"FP\"] + 1\n", " # if \"Performance fee and cost\" in keys:\n", " debug = 0\n", " # print(keys, \" - \" , doc_id, \" truth is null and generated - \", generated_values[i], sec_name) \n", " message = {\"data_point\": i, \"doc_id\": doc_id, \"sec_name\": sec_name, \n", " \"truth\": truth, \"generated\": generated, \"error\": \"Truth is null and generated is not null\"}\n", " message_list.append(message) \n", " else:\n", " if truth == generated:\n", " results[i][\"TP\"] = results[i][\"TP\"] + 1\n", " elif generated != \"\":\n", " if i == \"benchmark_name\" and compare_text(truth, generated):\n", " results[i][\"TP\"] = results[i][\"TP\"] + 1\n", " else:\n", " results[i][\"FP\"] = results[i][\"FP\"] + 1\n", " # if \"Performance fee and cost\" in keys:\n", " debug = 0\n", " # print(keys, \" - \" , doc_id, \" truth - \", truth_values[i], \" and generated - \", generated_values[i], \" \", sec_name)\n", " message = {\"data_point\": i, \"doc_id\": doc_id, \"sec_name\": sec_name, \n", " \"truth\": truth, \"generated\": generated, \"error\": \"Truth is not equal with generated\"}\n", " message_list.append(message)\n", " else:\n", " results[i][\"FN\"] = results[i][\"FN\"] + 1\n", " # if \"Performance fee and cost\" in keys:\n", " debug = 0\n", " # print(keys, \" - \" , doc_id, \" generated is null and truth is - \", truth_values[i], sec_name)\n", " message = {\"data_point\": i, \"doc_id\": doc_id, \"sec_name\": sec_name, \n", " \"truth\": truth, \"generated\": generated, \"error\": \"Generated is null and truth is not null\"}\n", " message_list.append(message)\n", " results[i][\"SUPPORT\"] = results[i][\"SUPPORT\"] + 1\n", " if sec_name not in share_name_list:\n", " share_name_list.append(sec_name)\n", " share_matched += 1\n", " else:\n", " if sec_name not in share_name_list:\n", " share_name_list.append(sec_name)\n", " if sec_name not in not_matched_share_name_list:\n", " # If the share class is not found in the generated results, count it as not matched\n", " # print(\"Share class not matched - \", sec_name, doc_id)\n", " message = {\"data_point\": \"Share Class\", \"doc_id\": doc_id, \"sec_name\": sec_name, \n", " \"truth\": \"\", \"generated\": \"\", \"error\": \"Share class not found in generated results\"}\n", " message_list.append(message)\n", " share_not_matched += 1\n", " not_matched_share_name_list.append(sec_name)\n", " else:\n", " # If the entire document is not found, count all funds as not matched\n", " message = {\"data_point\": \"Document\", \"doc_id\": doc_id, \"sec_name\": \"\",\n", " \"truth\": \"\", \"generated\": \"\", \"error\": \"Document not found in generated results\"}\n", " message_list.append(message)\n", " \n", " # share_not_matched += len(secs)\n", " return results, message_list, share_matched, share_not_matched, not_matched_share_name_list\n", "\n", "def clean_text(text: str):\n", " if text is None or len(text) == 0:\n", " return text\n", " text = re.sub(r\"\\W\", \" \", text)\n", " text = re.sub(r\"\\s+\", \" \", text)\n", " return text\n", "\n", "def compare_text(source_text, target_text):\n", " source_text = clean_text(source_text)\n", " target_text = clean_text(target_text)\n", " if source_text == target_text or source_text in target_text or target_text in source_text:\n", " return True\n", " similarity = Similarity()\n", " jacard_score = similarity.jaccard_similarity(source_text.lower().split(), target_text.lower().split())\n", " if jacard_score > 0.8:\n", " return True\n", " \n", " \n", "def calculate_metrics(tp, tn, fp, fn):\n", " \"\"\"Calculate precision, recall, accuracy, and F1-score.\"\"\"\n", " precision = tp / (tp + fp) if (tp + fp) != 0 else 0\n", " recall = tp / (tp + fn) if (tp + fn) != 0 else 0\n", " accuracy = (tp + tn) / (tp + tn + fp + fn) if (tp + tn + fp + fn) != 0 else 0\n", " f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0\n", " return precision, recall, accuracy, f1_score\n", "\n", "def print_metrics_table(data):\n", " # Print table headers\n", " print(\"{:<50}\\t{:<10}\\t{:<10}\\t{:<10}\\t{:<10}\\t{:<10}\\t{:<10}\\t{:<10}\\t{:<10}\\t{:<10}\".format(\"Metric\", \"F1-Score\", \"Precision\", \"Recall\", \"Accuracy\", \"SUPPORT\", \"TP\", \"TN\", \"FP\", \"FN\"))\n", " total_precision, total_recall, total_accuracy, total_f1_score, total_support= [],[],[],[],[]\n", " \n", " total_tp = []\n", " total_tn = []\n", " total_fp = []\n", " #total_fn = []\n", " # Calculate and print metrics for each item\n", " metrics_list = []\n", " for keys in imp_datapoints:\n", " try:\n", " key = imp_datapoints_mapping[keys]\n", " values = data[key]\n", " if values[\"SUPPORT\"] == 0:\n", " continue\n", " tp, tn, fp, fn = values['TP'], values['TN'], values['FP'], values['FN']\n", " precision, recall, accuracy, f1_score = calculate_metrics(tp, tn, fp, fn)\n", " metrics = {\"Datapoint\": key, \"F1-Score\": f1_score, \"Precision\": precision, \"Recall\": recall, \"Accuracy\": accuracy, \"SUPPORT\": values[\"SUPPORT\"], \"TP\": tp, \"TN\": tn, \"FP\": fp, \"FN\": fn}\n", " metrics_list.append(metrics)\n", " total_precision.append(precision)\n", " total_recall.append(recall)\n", " total_accuracy.append(accuracy)\n", " total_f1_score.append(f1_score)\n", " total_support.append(values[\"SUPPORT\"])\n", " total_tp.append(tp)\n", " total_tn.append(tn)\n", " total_fp.append(fp)\n", " total_fn.append(fn)\n", "\n", " if values[\"SUPPORT\"] > 0 and key > \"\":\n", " print(\"{:<50}\\t{:<10.4f}\\t{:<10.4f}\\t{:<10.4f}\\t{:<10.4f}\\t{:<10.0f}\\t{:<10.0f}\\t{:<10.0f}\\t{:<10.0f}\\t{:<10.0f}\".format(key, f1_score, precision, recall, accuracy, values[\"SUPPORT\"], tp, tn, fp, fn))\n", " except:\n", " pass\n", " total_mean_precision = statistics.mean(total_precision)\n", " total_mean_recall = statistics.mean(total_recall)\n", " total_mean_accuracy = statistics.mean(total_accuracy)\n", " total_mean_f1_score = statistics.mean(total_f1_score)\n", " total_sum_support = sum(total_support)\n", " total_sum_tp = sum(total_tp)\n", " total_sum_tn = sum(total_tn)\n", " total_sum_fp = sum(total_fp)\n", " total_sum_fn = sum(total_fn)\n", " total_metrics = {\"Datapoint\": \"TOTAL\", \"F1-Score\": total_mean_f1_score, \"Precision\": total_mean_precision, \"Recall\": total_mean_recall, \"Accuracy\": total_mean_accuracy, \"SUPPORT\": total_sum_support, \"TP\": total_sum_tp, \"TN\": total_sum_tn, \"FP\": total_sum_fp, \"FN\": total_sum_fn}\n", " metrics_list.append(total_metrics)\n", " print(\"{:<50}\\t{:<10.4f}\\t{:<10.4f}\\t{:<10.4f}\\t{:<10.4f}\\t{:<10.0f}\\t{:<10.0f}\\t{:<10.0f}\\t{:<10.0f}\\t{:<10.0f}\".format(\"TOTAL\", total_mean_f1_score, total_mean_precision, total_mean_recall, total_mean_accuracy, total_sum_support, total_sum_tp, total_sum_tn, total_sum_fp, total_sum_fn))\n", " return metrics_list\n", " \n", "def create_metrics_df(data):\n", " # Define a list to hold data for DataFrame\n", " rows = []\n", " \n", " # Iterate through each metric item\n", " for key in imp_datapoints:\n", " try:\n", " mapped_key = imp_datapoints_mapping[key]\n", " values = data[mapped_key]\n", " tp, tn, fp, fn = values['TP'], values['TN'], values['FP'], values['FN']\n", " precision, recall, accuracy, f1_score = calculate_metrics(tp, tn, fp, fn)\n", " \n", " # Only add rows where SUPPORT > 0\n", " if values[\"SUPPORT\"] > 0:\n", " row = {\n", " \"Metric\": key,\n", " \"Precision\": precision,\n", " \"Recall\": recall,\n", " \"Accuracy\": accuracy,\n", " \"F1-Score\": f1_score,\n", " \"SUPPORT\": values[\"SUPPORT\"]\n", " }\n", " rows.append(row)\n", " except KeyError as e:\n", " continue\n", "\n", " # Create a DataFrame from the list of rows\n", " df_metrics = pd.DataFrame(rows)\n", " df_metrics.reset_index(inplace=True)\n", " df_metrics.drop(columns=[\"index\"], inplace=True)\n", " print(df_metrics)\n", " return df_metrics\n", "\n", "\n", "\n", "def get_provider_mapping(file_path):\n", " df = pd.read_excel(file_path)\n", " df = (df.groupby([\"Docid\", \"ProviderName\"]).first())\n", " df.reset_index(inplace = True)\n", " return df[[\"Docid\", \"ProviderName\"]]\n", "\n", "\n", "def get_provider_names(generated_results_indexed, df_provider_mapping):\n", " providers_dict = {}\n", " for doc_id in generated_results_indexed:\n", " try:\n", " provider_name = (df_provider_mapping[df_provider_mapping[\"Docid\"] == doc_id][\"ProviderName\"].values)[0]\n", " if provider_name in providers_dict:\n", " providers_dict[provider_name].append(doc_id)\n", " else:\n", " providers_dict[provider_name] = []\n", " providers_dict[provider_name].append(doc_id)\n", "\n", " except:\n", " pass\n", " return providers_dict\n", "\n", "def get_specified_doc_data(results, doc_list):\n", " provider_res = {}\n", " for doc_id in doc_list:\n", " if doc_id in results:\n", " provider_res[doc_id] = results[doc_id]\n", " return provider_res\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "\n", "\n", "All Providers Results: \n", "Document List File - None\n", "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", "management_fee_and_costs \t0.9375 \t0.8993 \t0.9791 \t0.8829 \t424 \t375 \t2 \t42 \t8 \n", "management_fee \t0.9571 \t0.9353 \t0.9799 \t0.9180 \t424 \t390 \t2 \t27 \t8 \n", "performance_fee_costs \t0.8801 \t0.8601 \t0.9011 \t0.8431 \t291 \t246 \t114 \t40 \t27 \n", "interposed_vehicle_performance_fee_cost \t0.9172 \t0.8471 \t1.0000 \t0.9696 \t73 \t72 \t342 \t13 \t0 \n", "administration_fees \t0.9081 \t0.8317 \t1.0000 \t0.9602 \t84 \t84 \t326 \t17 \t0 \n", "total_annual_dollar_based_charges \t0.9930 \t0.9861 \t1.0000 \t0.9977 \t71 \t71 \t355 \t1 \t0 \n", "buy_spread \t0.9291 \t0.8930 \t0.9681 \t0.8806 \t376 \t334 \t42 \t40 \t11 \n", "sell_spread \t0.9291 \t0.8930 \t0.9681 \t0.8806 \t376 \t334 \t42 \t40 \t11 \n", "minimum_initial_investment \t0.9507 \t0.9633 \t0.9383 \t0.9297 \t308 \t289 \t108 \t11 \t19 \n", "benchmark_name \t0.9139 \t0.8846 \t0.9452 \t0.9391 \t156 \t138 \t263 \t18 \t8 \n", "TOTAL \t0.9316 \t0.8994 \t0.9680 \t0.9201 \t2583 \t2333 \t1596 \t249 \t92 \n", "Total Shares Matched - 379\n", "Total Shares Not Matched - 128\n", "Percentage of Shares Matched - 74.7534516765286\n", "Not Matched Shares Name List - ['SPDR® S&P World ex Australia Carbon Control Fund', 'Mercer Multi-manager Growth Fund – Retail Units', 'Mercer Multi-manager High Growth Fund – Retail Units', 'ANZ OA Inv-OnePath Multi Asset Income EF', 'ANZ OA IP-OnePath Australian Shares', 'ANZ OA IP-OnePath Diversified Fixed Interest', 'ANZ OA IP-OP Diversified Credit EF', 'ANZ OA IP-OP Diversified Credit NE', 'OneAnswer Investment Portfolio - Schroder Strategic Growth -NE', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond EF', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond NE', 'OnePath OA Investment Portfolio-BlackRock Tactical Growth EF', 'OnePath OA Inv-Greencape Broadcap EF', 'OnePath OA Inv-Nikko AM Australian Shares EF', 'OnePath OA IP- Pendal Monthly Income Plus-EF/Sel', 'OnePath OA IP- Pendal Monthly Income Plus-NEF', 'OnePath OA IP-Alternatives Growth Fund-EF/Sel', 'OnePath OA IP-Alternatives Growth Fund-NEF', 'OnePath OA IP-ANZ Cash Advantage-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-NEF', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-EF/Sel', 'OnePath OA IP-Bennelong Australian Equities-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-NEF', 'OnePath OA IP-Fidelity Australian Equities-EF/Sel', 'OnePath OA IP-Investors Mutual Australian Share Trust- EF/Sel', 'OnePath OA IP-Kapstream Absolute Return Income Trust-EF/Sel', 'OnePath OA IP-Kapstream Absolute Return Income Trust-NEF', 'OnePath OA IP-Merlon Australian Share Income-EF/Sel', 'OnePath OA IP-OnePath Active Growth Trust-NEF', 'OnePath OA IP-OnePath High Growth Trust-EF/Sel', 'OnePath OA IP-OnePath High Growth Trust-NEF', 'OnePath OA IP-OnePath Managed Growth Trust-EF/Sel', 'OnePath OA IP-OnePath Managed Growth Trust-NEF', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-NEF', 'OnePath OA IP-OptiMix Australian Share Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Share Trust-NEF', 'OnePath OA IP-OptiMix Global Emerging Markets Share-EF/Sel', 'OnePath OA IP-OptiMix Global Emerging Markets Share-NEF', 'OnePath OA IP-OptiMIx Global Share Trust-EF/Sel', 'OnePath OA IP-OptiMIx Global Share Trust-NEF', 'OnePath OA IP-OptiMix High Growth Trust-EF/Sel', 'OnePath OA IP-OptiMix High Growth Trust-NEF', 'OnePath OA IP-OptiMix Property Securities Trust-EF/Sel', 'OnePath OA IP-OptiMix Property Securities Trust-NEF', 'OnePath OA IP-Perpetual Balanced Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Balanced Growth Trust-NEF', 'OnePath OA IP-Perpetual Conservative Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Conservative Growth Trust-NEF', 'OnePath OA IP-Schroder Fixed Income-EF/Sel', 'OnePath OA IP-Schroder Fixed Income-NEF', 'OnePath OA IP-UBS Balanced Trust-EF/Sel', 'OnePath OA IP-UBS Balanced Trust-NEF', 'OnePath OA IP-UBS Defensive Trust-EF/Sel', 'OnePath OA IP-UBS Defensive Trust-NEF', 'OnePath OA IP-UBS Diversified Fixed Income Trust-EF/Sel', 'OnePath OA IP-UBS Diversified Fixed Income Trust-NEF', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -NE', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -NE', 'OnePath OneAnswer Investment Portfolio - First Sentier Imputation -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Australian Shares Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath High Growth Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath High Growth Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Schroder Strategic Growth -EF/Sel', 'OnePath Schroder Real Return Trust (Entry Fee)', 'OnePath Schroder Real Return Trust (Nil Entry Fee)', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-NEF', 'Telstra Growth Pen', 'First Sentier Concentrated Aus Share', 'First Sentier Australian Small Companies', 'First Sentier Imputation', 'First Sentier Global Property Securities', 'First Sentier Australian Share', 'CFS FC-Investors Mutual Future Leaders', 'Stewart Worldwide Leaders Sustainability', 'First Sentier Property Securities', 'MyNorth Index Defensive', 'MLC MKPF - Inflation Plus - Conservative', 'MLC MasterKey Super Fundamentals - Perpetual Australian Share', 'MLC MKSF - Perpetual WS Ethical SRI Fund', 'MLC MasterKey Super Fundamentals - Perpetual Small Co Fund No.2', 'MLC MKSF - PIMCO Div. Fixed Interest Wholesale Class', 'MLC MKSF - Platinum Asia Fund', 'MLC MKSF - Platinum International Fund', 'MLC MKSF - PM CAPITAL Global Companies', 'MLC MKSF - Schroder WS Australian Equity', 'MLC MasterKey Pension Fundamentals (Pre Retirement) - MLC Aust Property Index', 'MLC MasterKey Super Fundamentals - MLC Australian Property Index', 'MLC MKSF - Vanguard Intl Shr Indx (Hgd)', 'MLC MKSF - Vanguard Intl Shr Indx', 'HOSTPLUS Fixed Interest Indexed Super', 'Lifeplan Investment Bond Perpetual Balanced Growth', 'Lifeplan Investment Bond Perpetual Conservative Growth', 'Lifeplan Investment Bond Perpetual Industrial Share', 'Lifeplan Investment Bond Vanguard® Australian Shares Index', 'Dimensional Australian Core Equity Trust', 'FC W Pen-CFS TTR Global Infrastructure Securities', 'CFS MIF-High Growth', 'CFS MIF-Property Securities', 'CFS MIF-Geared Share NEF', 'CFS MIF-Australian Share', 'CFS MIF-Geared Global Share', 'CFS MIF-Global Tech & Comm', 'CFS MIF-Stewart Inv Worldwide Leaders Sustainability', 'CFS MIF-Geared Share', 'CFS MIF-Diversified', 'CFS MIF-Janus Henderson Global Natural Resources Fund', 'CFS MIF-Macquarie Australian Emerging Companies', 'CFS MIF-Balanced', 'CFS MIF-Conservative', 'CFS MIF-Imputation', 'CFS MIF-Global Health & Biotech', 'Dimensional Australia Core Equity Trust - Active ETF']\n", "All Providers Results: \n", "Document List File - ./sample_documents/aus_prospectus_29_documents_sample.txt\n", "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", "management_fee_and_costs \t0.9621 \t0.9270 \t1.0000 \t0.9270 \t177 \t165 \t0 \t13 \t0 \n", "management_fee \t0.9886 \t0.9775 \t1.0000 \t0.9775 \t177 \t174 \t0 \t4 \t0 \n", "performance_fee_costs \t0.8557 \t0.8037 \t0.9149 \t0.8371 \t100 \t86 \t63 \t21 \t8 \n", "interposed_vehicle_performance_fee_cost \t0.8966 \t0.8125 \t1.0000 \t0.9326 \t53 \t52 \t114 \t12 \t0 \n", "administration_fees \t0.9655 \t0.9333 \t1.0000 \t0.9944 \t14 \t14 \t163 \t1 \t0 \n", "buy_spread \t0.9496 \t0.9091 \t0.9938 \t0.9045 \t175 \t160 \t1 \t16 \t1 \n", "sell_spread \t0.9464 \t0.9034 \t0.9938 \t0.8989 \t175 \t159 \t1 \t17 \t1 \n", "minimum_initial_investment \t0.9064 \t0.9528 \t0.8643 \t0.8596 \t140 \t121 \t32 \t6 \t19 \n", "benchmark_name \t0.9186 \t0.8587 \t0.9875 \t0.9213 \t89 \t79 \t85 \t13 \t1 \n", "TOTAL \t0.9322 \t0.8976 \t0.9727 \t0.9170 \t1100 \t1010 \t459 \t103 \t122 \n", "Total Shares Matched - 173\n", "Total Shares Not Matched - 18\n", "Percentage of Shares Matched - 90.57591623036649\n", "Not Matched Shares Name List - ['Dimensional Australian Core Equity Trust', 'FC W Pen-CFS TTR Global Infrastructure Securities', 'CFS MIF-High Growth', 'CFS MIF-Property Securities', 'CFS MIF-Geared Share NEF', 'CFS MIF-Australian Share', 'CFS MIF-Geared Global Share', 'CFS MIF-Global Tech & Comm', 'CFS MIF-Stewart Inv Worldwide Leaders Sustainability', 'CFS MIF-Geared Share', 'CFS MIF-Diversified', 'CFS MIF-Janus Henderson Global Natural Resources Fund', 'CFS MIF-Macquarie Australian Emerging Companies', 'CFS MIF-Balanced', 'CFS MIF-Conservative', 'CFS MIF-Imputation', 'CFS MIF-Global Health & Biotech', 'Dimensional Australia Core Equity Trust - Active ETF']\n", "All Providers Results: \n", "Document List File - ./sample_documents/aus_prospectus_17_documents_sample.txt\n", "Metric \tF1-Score \tPrecision \tRecall \tAccuracy \tSUPPORT \tTP \tTN \tFP \tFN \n", "management_fee_and_costs \t0.9190 \t0.8787 \t0.9633 \t0.8514 \t247 \t210 \t2 \t29 \t8 \n", "management_fee \t0.9330 \t0.9038 \t0.9643 \t0.8755 \t247 \t216 \t2 \t23 \t8 \n", "performance_fee_costs \t0.8939 \t0.8939 \t0.8939 \t0.8474 \t191 \t160 \t51 \t19 \t19 \n", "interposed_vehicle_performance_fee_cost \t0.9756 \t0.9524 \t1.0000 \t0.9960 \t20 \t20 \t228 \t1 \t0 \n", "administration_fees \t0.8974 \t0.8140 \t1.0000 \t0.9357 \t70 \t70 \t163 \t16 \t0 \n", "total_annual_dollar_based_charges \t1.0000 \t1.0000 \t1.0000 \t1.0000 \t71 \t71 \t178 \t0 \t0 \n", "buy_spread \t0.9110 \t0.8788 \t0.9457 \t0.8635 \t201 \t174 \t41 \t24 \t10 \n", "sell_spread \t0.9138 \t0.8838 \t0.9459 \t0.8675 \t201 \t175 \t41 \t23 \t10 \n", "minimum_initial_investment \t0.9853 \t0.9711 \t1.0000 \t0.9799 \t168 \t168 \t76 \t5 \t0 \n", "benchmark_name \t0.9077 \t0.9219 \t0.8939 \t0.9518 \t67 \t59 \t178 \t5 \t7 \n", "TOTAL \t0.9337 \t0.9098 \t0.9607 \t0.9169 \t1483 \t1323 \t960 \t145 \t184 \n", "Total Shares Matched - 249\n", "Total Shares Not Matched - 110\n", "Percentage of Shares Matched - 69.35933147632312\n", "Not Matched Shares Name List - ['SPDR® S&P World ex Australia Carbon Control Fund', 'Mercer Multi-manager Growth Fund – Retail Units', 'Mercer Multi-manager High Growth Fund – Retail Units', 'ANZ OA Inv-OnePath Multi Asset Income EF', 'ANZ OA IP-OnePath Australian Shares', 'ANZ OA IP-OnePath Diversified Fixed Interest', 'ANZ OA IP-OP Diversified Credit EF', 'ANZ OA IP-OP Diversified Credit NE', 'OneAnswer Investment Portfolio - Schroder Strategic Growth -NE', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond EF', 'OnePath ANZ OA IP-T. Rowe Price Dyna Gl Bond NE', 'OnePath OA Investment Portfolio-BlackRock Tactical Growth EF', 'OnePath OA Inv-Greencape Broadcap EF', 'OnePath OA Inv-Nikko AM Australian Shares EF', 'OnePath OA IP- Pendal Monthly Income Plus-EF/Sel', 'OnePath OA IP- Pendal Monthly Income Plus-NEF', 'OnePath OA IP-Alternatives Growth Fund-EF/Sel', 'OnePath OA IP-Alternatives Growth Fund-NEF', 'OnePath OA IP-ANZ Cash Advantage-EF/Sel', 'OnePath OA IP-ANZ Cash Advantage-NEF', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-EF/Sel', 'OnePath OA IP-Bennelong Australian Equities-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-EF/Sel', 'OnePath OA IP-Bentham Global Income Trust-NEF', 'OnePath OA IP-Fidelity Australian Equities-EF/Sel', 'OnePath OA IP-Investors Mutual Australian Share Trust- EF/Sel', 'OnePath OA IP-Kapstream Absolute Return Income Trust-EF/Sel', 'OnePath OA IP-Kapstream Absolute Return Income Trust-NEF', 'OnePath OA IP-Merlon Australian Share Income-EF/Sel', 'OnePath OA IP-OnePath Active Growth Trust-NEF', 'OnePath OA IP-OnePath High Growth Trust-EF/Sel', 'OnePath OA IP-OnePath High Growth Trust-NEF', 'OnePath OA IP-OnePath Managed Growth Trust-EF/Sel', 'OnePath OA IP-OnePath Managed Growth Trust-NEF', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Fixed Interest Trust-NEF', 'OnePath OA IP-OptiMix Australian Share Trust-EF/Sel', 'OnePath OA IP-OptiMix Australian Share Trust-NEF', 'OnePath OA IP-OptiMix Global Emerging Markets Share-EF/Sel', 'OnePath OA IP-OptiMix Global Emerging Markets Share-NEF', 'OnePath OA IP-OptiMIx Global Share Trust-EF/Sel', 'OnePath OA IP-OptiMIx Global Share Trust-NEF', 'OnePath OA IP-OptiMix High Growth Trust-EF/Sel', 'OnePath OA IP-OptiMix High Growth Trust-NEF', 'OnePath OA IP-OptiMix Property Securities Trust-EF/Sel', 'OnePath OA IP-OptiMix Property Securities Trust-NEF', 'OnePath OA IP-Perpetual Balanced Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Balanced Growth Trust-NEF', 'OnePath OA IP-Perpetual Conservative Growth Trust-EF/Sel', 'OnePath OA IP-Perpetual Conservative Growth Trust-NEF', 'OnePath OA IP-Schroder Fixed Income-EF/Sel', 'OnePath OA IP-Schroder Fixed Income-NEF', 'OnePath OA IP-UBS Balanced Trust-EF/Sel', 'OnePath OA IP-UBS Balanced Trust-NEF', 'OnePath OA IP-UBS Defensive Trust-EF/Sel', 'OnePath OA IP-UBS Defensive Trust-NEF', 'OnePath OA IP-UBS Diversified Fixed Income Trust-EF/Sel', 'OnePath OA IP-UBS Diversified Fixed Income Trust-NEF', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Ardea Real Outcome -NE', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Barrow Hanley Concentrated Global Shares Hedged -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Advantage Australian Equity -NE', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -EF/Sel', 'OnePath OneAnswer Investment Portfolio - BlackRock Diversified ESG Growth -NE', 'OnePath OneAnswer Investment Portfolio - First Sentier Imputation -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Australian Shares Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Balanced Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Conservative Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath Diversified Bond Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath High Growth Index -EF/Sel', 'OnePath OneAnswer Investment Portfolio - OnePath High Growth Index -NE', 'OnePath OneAnswer Investment Portfolio - OnePath International Shares Index (Hedged) -EF/Sel', 'OnePath OneAnswer Investment Portfolio - Schroder Strategic Growth -EF/Sel', 'OnePath Schroder Real Return Trust (Entry Fee)', 'OnePath Schroder Real Return Trust (Nil Entry Fee)', 'OnePath OA IP-Ausbil Australian Emerging Leaders Trust-NEF', 'Telstra Growth Pen', 'First Sentier Concentrated Aus Share', 'First Sentier Australian Small Companies', 'First Sentier Imputation', 'First Sentier Global Property Securities', 'First Sentier Australian Share', 'CFS FC-Investors Mutual Future Leaders', 'Stewart Worldwide Leaders Sustainability', 'First Sentier Property Securities', 'MyNorth Index Defensive', 'MLC MKPF - Inflation Plus - Conservative', 'MLC MasterKey Super Fundamentals - Perpetual Australian Share', 'MLC MKSF - Perpetual WS Ethical SRI Fund', 'MLC MasterKey Super Fundamentals - Perpetual Small Co Fund No.2', 'MLC MKSF - PIMCO Div. Fixed Interest Wholesale Class', 'MLC MKSF - Platinum Asia Fund', 'MLC MKSF - Platinum International Fund', 'MLC MKSF - PM CAPITAL Global Companies', 'MLC MKSF - Schroder WS Australian Equity', 'MLC MasterKey Pension Fundamentals (Pre Retirement) - MLC Aust Property Index', 'MLC MasterKey Super Fundamentals - MLC Australian Property Index', 'MLC MKSF - Vanguard Intl Shr Indx (Hgd)', 'MLC MKSF - Vanguard Intl Shr Indx', 'HOSTPLUS Fixed Interest Indexed Super', 'Lifeplan Investment Bond Perpetual Balanced Growth', 'Lifeplan Investment Bond Perpetual Conservative Growth', 'Lifeplan Investment Bond Perpetual Industrial Share', 'Lifeplan Investment Bond Vanguard® Australian Shares Index']\n" ] } ], "source": [ "\n", "\"\"\"\n", "Blade's updates\n", "1. Set the secondary key to be the share class name, instead of the fund name\n", "2. Remove the data point which support is 0 to calculate the metrics\n", "3. Add the message list to store the error message\n", "4. Support save metrics/ error message to excel file\n", "5. Support statistics for different document list\n", "6. Set F1-Score to the first column in the metrics table\n", "\"\"\"\n", "\n", "# Load the files\n", "headers_gt, ground_truth_data = load_excel(path_ground_truth, 0)\n", "headers_gen, generated_results_data = load_excel(path_generated_results, 0)\n", "\n", "# Assuming doc_id is the first column and fund_name is the second column\n", "# doc_id_index = 0\n", "# fund_name_index = 1\n", "\n", "# Index the data\n", "ground_truth_indexed = index_data_by_key(ground_truth_data, headers_gt)\n", "generated_results_indexed = index_data_by_key(generated_results_data, headers_gen)\n", "\n", "intersection = set(headers_gen).intersection(headers_gt)\n", "\n", "# Convert the result back to a list (if you need it as a list)\n", "intersection_list = list(intersection)\n", "\n", "total_fn = []\n", "\n", "# df_provider_mapping = get_provider_mapping(provider_mapping_file_path)\n", "\n", "# all_provider_dict = get_provider_names(generated_results_indexed, df_provider_mapping)\n", "\n", "\n", "# for provider_name in all_provider_dict:\n", "# provider_vise_generated_results = get_specified_doc_data(generated_results_indexed, all_provider_dict[provider_name])\n", "# comparison_results, funds_matched, funds_not_matched = compare_data(ground_truth_indexed, provider_vise_generated_results, headers_gt, doc_id_index, fund_name_index, intersection_list,funds_matched, funds_not_matched)\n", "# print(\"\\n\")\n", "# print(\"\\n\")\n", "# print(\"Provider Name - \" + provider_name + \"\\t Number of Docs - \" + str(len(all_provider_dict[provider_name])))\n", "# #create_metrics_df(comparison_results)\n", "# print_metrics_table(comparison_results)\n", "# print(\"Total Funds Matched - \" + str(funds_matched) + \"\\nTotal Funds Not Matched - \" + str(funds_not_matched))\n", "# print(\"Percentage of Funds Matched - \" + str((funds_matched/(funds_matched + funds_not_matched))*100))\n", "\n", "\n", "\n", "print(\"\\n\")\n", "print(\"\\n\")\n", "document_list_file_list = [None, \n", " \"./sample_documents/aus_prospectus_29_documents_sample.txt\", \n", " \"./sample_documents/aus_prospectus_17_documents_sample.txt\"]\n", "# document_list_file_list = [None]\n", "for document_list_file in document_list_file_list:\n", " document_list = None\n", " if document_list_file is not None:\n", " with open(document_list_file, \"r\", encoding=\"utf-8\") as f:\n", " document_list = f.readlines()\n", " document_list = [doc_id.strip() for doc_id in document_list]\n", " \n", " print(\"All Providers Results: \")\n", " print(\"Document List File - \", document_list_file)\n", " comparison_results, message_list, share_matched, \\\n", " share_not_matched, not_matched_share_name_list = compare_data(ground_truth_indexed, \n", " generated_results_indexed,\n", " headers_gt,\n", " intersection_list,\n", " document_list)\n", " metrics_list = print_metrics_table(comparison_results)\n", " print(\"Total Shares Matched - \" + str(share_matched) + \"\\nTotal Shares Not Matched - \" + str(share_not_matched))\n", " print(\"Percentage of Shares Matched - \" + str((share_matched/(share_matched + share_not_matched))*100))\n", " print(\"Not Matched Shares Name List - \", not_matched_share_name_list)\n", "\n", " metrics_df = pd.DataFrame(metrics_list)\n", " message_df = pd.DataFrame(message_list)\n", " share_matched_data = {\"share_matched\": share_matched, \"share_not_matched\": share_not_matched, \"not_matched_share_name_list\": not_matched_share_name_list}\n", " share_matched_df = pd.DataFrame([share_matched_data])\n", "\n", " output_metrics_folder = r\"/data/aus_prospectus/output/metrics_data/\"\n", " os.makedirs(output_metrics_folder, exist_ok=True)\n", " if os.path.exists(output_metrics_folder):\n", " generated_file_base_name = os.path.basename(path_generated_results).replace(\".xlsx\", \"\")\n", " metrics_file_name = f\"metrics_{generated_file_base_name}\"\n", " if document_list_file is not None:\n", " metrics_file_name = f\"{metrics_file_name}_{len(document_list)}_documents.xlsx\"\n", " else:\n", " metrics_file_name = f\"{metrics_file_name}_all_documents.xlsx\"\n", " metrics_file_path = os.path.join(output_metrics_folder, metrics_file_name)\n", " with pd.ExcelWriter(metrics_file_path) as writer:\n", " metrics_df.to_excel(writer, sheet_name=\"metrics_data\", index=False)\n", " message_df.to_excel(writer, sheet_name=\"message_data\", index=False)\n", " share_matched_df.to_excel(writer, sheet_name=\"share_matched_data\", index=False)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'data_point': 'performance_fee_costs', 'doc_id': 377377369, 'sec_name': 'SPDR® S&P Emerging Markets Carbon Control Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'ANZ OA Inv-OnePath Multi Asset Income NEF', 'truth': '0', 'generated': '0.11', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'ANZ OA IP-OnePath Australian Shares NE', 'truth': '0', 'generated': '0.07', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA Investment Portfolio-BlackRock Tactical Growth NE', 'truth': '0', 'generated': '0.33', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -NE', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard Index Diversified Bond Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard Index Australian Shares Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard High Yield Australian Shares Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard Index Australian Property Securities Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Macquarie Income Opps', 'truth': '0.03', 'generated': '0.12', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Perpetual Diversified Inc', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Schroder Fixed Income', 'truth': '0', 'generated': '0.01', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Perpetual Share Plus L/S', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Global Fund (Long Only)', 'truth': '0.24', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Global Fund (Long Only) P Class', 'truth': '0.24', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Fund', 'truth': '0.15', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Asia Fund', 'truth': '0.27', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Brands Fund', 'truth': '0.03', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Brands Fund P Class', 'truth': '0.03', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Healthcare Fund', 'truth': '0.86', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Technology Fund', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum European Fund', 'truth': '0.24', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Japan Fund', 'truth': '0.15', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 397107472, 'sec_name': 'AMP Capital Specialist Diversified Fixed Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Ausbil Aus. Emrging Leaders', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Investors Mutual Aus. Shre', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Macquarie Inc Opportunities', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MasterKey Pension Fundamentals (Pre Retirement) - MLC Cash', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Global Share Fund', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - IncomeBuilder', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPF - Hedged Global Share Fund', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Hedged Global Share Fund', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPF - PIMCO Div. Fixed Interest Wholesale Class', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPF - PIMCO Global Bond Wholesale Class', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - PIMCO Global Bond Wholesale Class', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPF - PM CAPITAL Global Companies', 'truth': '1.54', 'generated': '1.45', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 446324179, 'sec_name': 'Lifeplan Investment Bond - Allan Gray Australian Equity Fund Class A', 'truth': '0.28', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 446324179, 'sec_name': 'Lifeplan Investment Bond MLC Horizon 2-Capital Stable Open', 'truth': '0.05', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Core Equity Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Small Company Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Value Trust -Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Core Equity Tr AUDHdg', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP - Generations - BlackRock Australian Fixed Interest Index', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP - Generations - BlackRock Australian Equity Index', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP Generations - AMP Cash Mgmt', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP - Generations - BlackRock Property Securities Index', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP - Generations - BlackRock International Equity Index (Unhedged)', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP - Generations - BlackRock International Equity Index (Hedged)', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Balanced', 'truth': '0', 'generated': '0.05', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539261734, 'sec_name': 'ipac life choices Income Generator', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active High Growth Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Moderately Defensive', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Growth Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Balanced', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Defensive Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 527969661, 'sec_name': 'JPMorgan Global Equity Premium Income (Hedged) Complex ETF', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557526129, 'sec_name': 'Fortlake Real-Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557526129, 'sec_name': 'Fortlake Real-Higher Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Small Company Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 541356150, 'sec_name': 'JPMorgan Global Research Enhanced Index Equity Trust - Class I', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 541356150, 'sec_name': 'JPMorgan Global Research Enhanced Index Equity Trust - Class I (Hedged)', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557362553, 'sec_name': 'JPMorgan Global Select Equity Active ETF', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 550522985, 'sec_name': 'RQI Global Value – Class A', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557362556, 'sec_name': 'JPMorgan Global Select Equity Fund - Class A (Hedged) Units', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557362556, 'sec_name': 'JPMorgan Global Select Equity Fund - Class A Units', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Core Equity Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Small Company Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Value Trust -Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 530101994, 'sec_name': 'Dimensional Global Core Equity Tr AUDHdg', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP - Generations - BlackRock Australian Fixed Interest Index', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP - Generations - BlackRock Australian Equity Index', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP Generations - AMP Cash Mgmt', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP - Generations - BlackRock Property Securities Index', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP - Generations - BlackRock International Equity Index (Unhedged)', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266893, 'sec_name': 'AMP - Generations - BlackRock International Equity Index (Hedged)', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539241700, 'sec_name': 'North Professional Balanced', 'truth': '0', 'generated': '0.05', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539261734, 'sec_name': 'ipac life choices Income Generator', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active High Growth Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Moderately Defensive', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Growth Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Balanced', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 539266874, 'sec_name': 'SUMMIT Select - Active Defensive Units', 'truth': '0', 'generated': '0.06', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 527969661, 'sec_name': 'JPMorgan Global Equity Premium Income (Hedged) Complex ETF', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557526129, 'sec_name': 'Fortlake Real-Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557526129, 'sec_name': 'Fortlake Real-Higher Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Australian Value Trust - Active ETF', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 531373053, 'sec_name': 'Dimensional Global Small Company Trust', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 541356150, 'sec_name': 'JPMorgan Global Research Enhanced Index Equity Trust - Class I', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 541356150, 'sec_name': 'JPMorgan Global Research Enhanced Index Equity Trust - Class I (Hedged)', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557362553, 'sec_name': 'JPMorgan Global Select Equity Active ETF', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 550522985, 'sec_name': 'RQI Global Value – Class A', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557362556, 'sec_name': 'JPMorgan Global Select Equity Fund - Class A (Hedged) Units', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 557362556, 'sec_name': 'JPMorgan Global Select Equity Fund - Class A Units', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 377377369, 'sec_name': 'SPDR® S&P Emerging Markets Carbon Control Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'ANZ OA Inv-OnePath Multi Asset Income NEF', 'truth': '0', 'generated': '0.11', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'ANZ OA IP-OnePath Australian Shares NE', 'truth': '0', 'generated': '0.07', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OA Investment Portfolio-BlackRock Tactical Growth NE', 'truth': '0', 'generated': '0.33', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 401212184, 'sec_name': 'OnePath OneAnswer Investment Portfolio - OnePath Growth Index -NE', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard Index Diversified Bond Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard Index Australian Shares Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard High Yield Australian Shares Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 409723592, 'sec_name': 'Vanguard Index Australian Property Securities Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Macquarie Income Opps', 'truth': '0.03', 'generated': '0.12', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Perpetual Diversified Inc', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Schroder Fixed Income', 'truth': '0', 'generated': '0.01', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 411062815, 'sec_name': 'Perpetual WFP-Perpetual Share Plus L/S', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Global Fund (Long Only)', 'truth': '0.24', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Global Fund (Long Only) P Class', 'truth': '0.24', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Fund', 'truth': '0.15', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Asia Fund', 'truth': '0.27', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Brands Fund', 'truth': '0.03', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Brands Fund P Class', 'truth': '0.03', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Healthcare Fund', 'truth': '0.86', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum International Technology Fund', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum European Fund', 'truth': '0.24', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 414751292, 'sec_name': 'Platinum Japan Fund', 'truth': '0.15', 'generated': '0', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 397107472, 'sec_name': 'AMP Capital Specialist Diversified Fixed Income Fund', 'truth': '', 'generated': '0', 'error': 'Truth is null and generated is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Ausbil Aus. Emrging Leaders', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Investors Mutual Aus. Shre', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Macquarie Inc Opportunities', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MasterKey Pension Fundamentals (Pre Retirement) - MLC Cash', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Global Share Fund', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - IncomeBuilder', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPF - Hedged Global Share Fund', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - Hedged Global Share Fund', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPF - PIMCO Div. Fixed Interest Wholesale Class', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPF - PIMCO Global Bond Wholesale Class', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPFPR - PIMCO Global Bond Wholesale Class', 'truth': '0', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 420339794, 'sec_name': 'MLC MKPF - PM CAPITAL Global Companies', 'truth': '1.54', 'generated': '1.45', 'error': 'Truth is not equal with generated'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 446324179, 'sec_name': 'Lifeplan Investment Bond - Allan Gray Australian Equity Fund Class A', 'truth': '0.28', 'generated': '', 'error': 'Generated is null and truth is not null'}\n", "{'data_point': 'performance_fee_costs', 'doc_id': 446324179, 'sec_name': 'Lifeplan Investment Bond MLC Horizon 2-Capital Stable Open', 'truth': '0.05', 'generated': '', 'error': 'Generated is null and truth is not null'}\n" ] } ], "source": [ "for message_list_element in message_list:\n", " if message_list_element[\"data_point\"] == \"performance_fee_costs\":\n", " print(message_list_element)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "\n", "# Convert data to DataFrame\n", "df = pd.DataFrame(message_list)\n", "\n", "# Sort DataFrame by 'doc_id'\n", "df_sorted = df.sort_values(by=['doc_id'])\n", "\n", "# Save DataFrame to Excel file\n", "os.makedirs(\"/data/aus_prospectus/output/error_analysis/\", exist_ok=True)\n", "output_filename = r\"/data/aus_prospectus/output/error_analysis/anomalies_found.xlsx\"\n", "df_sorted.to_excel(output_filename, index=False)\n", "\n", "print(f\"Excel file '{output_filename}' has been created successfully.\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "emea_ar_test", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.6" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }