1. update performance_fee name to performance_fee_costs

2. support extract data for total_annual_dollar_based_charges
This commit is contained in:
Blade He 2025-03-11 17:15:39 -05:00
parent b7506c78f3
commit c7c36dbdd2
11 changed files with 191 additions and 100 deletions

View File

@ -576,7 +576,7 @@ def calculate_metrics_based_db_data_file(audit_file_path: str = r"/data/aus_pros
"administration_fees",
"minimum_initial_investment",
"benchmark_name",
"performance_fee",
"performance_fee_costs",
"interposed_vehicle_performance_fee_cost",
"buy_spread",
"sell_spread",
@ -613,7 +613,7 @@ def calculate_metrics_based_db_data_file(audit_file_path: str = r"/data/aus_pros
"administration_fees",
"minimum_initial_investment",
"benchmark_name",
"performance_fee",
"performance_fee_costs",
"interposed_vehicle_performance_fee_cost",
"buy_spread",
"sell_spread",
@ -649,8 +649,8 @@ def calculate_metrics_based_db_data_file(audit_file_path: str = r"/data/aus_pros
gt_benchmark_name_list = []
pred_benchmark_name_list = []
if is_for_all:
gt_performance_fee_list = []
pred_performance_fee_list = []
gt_performance_fee_costs_list = []
pred_performance_fee_costs_list = []
gt_interposed_vehicle_performance_fee_cost_list = []
pred_interposed_vehicle_performance_fee_cost_list = []
gt_buy_spread_list = []
@ -701,7 +701,7 @@ def calculate_metrics_based_db_data_file(audit_file_path: str = r"/data/aus_pros
minimum_initial_investment = str(row["minimum_initial_investment"])
benchmark_name = str(row["benchmark_name"])
if is_for_all:
performance_fee = str(row["performance_fee"])
performance_fee_costs = str(row["performance_fee_costs"])
interposed_vehicle_performance_fee_cost = str(row["interposed_vehicle_performance_fee_cost"])
buy_spread = str(row["buy_spread"])
sell_spread = str(row["sell_spread"])
@ -720,7 +720,7 @@ def calculate_metrics_based_db_data_file(audit_file_path: str = r"/data/aus_pros
v_minimum_initial_investment = str(doc_verify_sec_row["minimum_initial_investment"])
v_benchmark_name = str(doc_verify_sec_row["benchmark_name"])
if is_for_all:
v_performance_fee = str(doc_verify_sec_row["performance_fee"])
v_performance_fee_costs = str(doc_verify_sec_row["performance_fee_costs"])
v_interposed_vehicle_performance_fee_cost = str(doc_verify_sec_row["interposed_vehicle_performance_fee_cost"])
v_buy_spread = str(doc_verify_sec_row["buy_spread"])
v_sell_spread = str(doc_verify_sec_row["sell_spread"])
@ -744,8 +744,8 @@ def calculate_metrics_based_db_data_file(audit_file_path: str = r"/data/aus_pros
message = get_gt_pred_by_compare_values(benchmark_name, v_benchmark_name, gt_benchmark_name_list, pred_benchmark_name_list, data_point="benchmark_name")
message_list.append(generate_message(message, document_id, sec_id, fund_name, raw_fund_name, raw_share_name, "benchmark_name"))
if is_for_all:
message = get_gt_pred_by_compare_values(performance_fee, v_performance_fee, gt_performance_fee_list, pred_performance_fee_list)
message_list.append(generate_message(message, document_id, sec_id, fund_name, raw_fund_name, raw_share_name, "performance_fee"))
message = get_gt_pred_by_compare_values(performance_fee_costs, v_performance_fee_costs, gt_performance_fee_costs_list, pred_performance_fee_costs_list)
message_list.append(generate_message(message, document_id, sec_id, fund_name, raw_fund_name, raw_share_name, "performance_fee_costs"))
message = get_gt_pred_by_compare_values(interposed_vehicle_performance_fee_cost, v_interposed_vehicle_performance_fee_cost,
gt_interposed_vehicle_performance_fee_cost_list, pred_interposed_vehicle_performance_fee_cost_list)
message_list.append(generate_message(message, document_id, sec_id, fund_name, raw_fund_name, raw_share_name, "interposed_vehicle_performance_fee_cost"))
@ -803,11 +803,11 @@ def calculate_metrics_based_db_data_file(audit_file_path: str = r"/data/aus_pros
support_benchmark_name = sum(gt_benchmark_name_list)
if is_for_all:
precision_performance_fee = precision_score(gt_performance_fee_list, pred_performance_fee_list)
recall_performance_fee = recall_score(gt_performance_fee_list, pred_performance_fee_list)
f1_performance_fee = f1_score(gt_performance_fee_list, pred_performance_fee_list)
accuracy_performance_fee = accuracy_score(gt_performance_fee_list, pred_performance_fee_list)
support_performance_fee = sum(gt_performance_fee_list)
precision_performance_fee_costs = precision_score(gt_performance_fee_costs_list, pred_performance_fee_costs_list)
recall_performance_fee_costs = recall_score(gt_performance_fee_costs_list, pred_performance_fee_costs_list)
f1_performance_fee_costs = f1_score(gt_performance_fee_costs_list, pred_performance_fee_costs_list)
accuracy_performance_fee_costs = accuracy_score(gt_performance_fee_costs_list, pred_performance_fee_costs_list)
support_performance_fee_costs = sum(gt_performance_fee_costs_list)
precision_interposed_vehicle_performance_fee_cost = precision_score(gt_interposed_vehicle_performance_fee_cost_list, pred_interposed_vehicle_performance_fee_cost_list)
recall_interposed_vehicle_performance_fee_cost = recall_score(gt_interposed_vehicle_performance_fee_cost_list, pred_interposed_vehicle_performance_fee_cost_list)
@ -856,7 +856,7 @@ def calculate_metrics_based_db_data_file(audit_file_path: str = r"/data/aus_pros
{"item": "administration_fees", "precision": precision_administration_fees, "recall": recall_administration_fees, "f1": f1_administration_fees, "accuracy": accuracy_administration_fees, "support": support_administration_fees},
{"item": "minimum_initial_investment", "precision": precision_miminimum_initial_investment, "recall": recall_miminimum_initial_investment, "f1": f1_miminimum_initial_investment, "accuracy": accuracy_miminimum_initial_investment, "support": support_miminimum_initial_investment},
{"item": "benchmark_name", "precision": precision_benchmark_name, "recall": recall_benchmark_name, "f1": f1_benchmark_name, "accuracy": accuracy_benchmark_name, "support": support_benchmark_name},
{"item": "performance_fee", "precision": precision_performance_fee, "recall": recall_performance_fee, "f1": f1_performance_fee, "accuracy": accuracy_performance_fee, "support": support_performance_fee},
{"item": "performance_fee_costs", "precision": precision_performance_fee_costs, "recall": recall_performance_fee_costs, "f1": f1_performance_fee_costs, "accuracy": accuracy_performance_fee_costs, "support": support_performance_fee_costs},
{"item": "interposed_vehicle_performance_fee_cost", "precision": precision_interposed_vehicle_performance_fee_cost, "recall": recall_interposed_vehicle_performance_fee_cost,
"f1": f1_interposed_vehicle_performance_fee_cost, "accuracy": accuracy_interposed_vehicle_performance_fee_cost, "support": support_interposed_vehicle_performance_fee_cost},
{"item": "buy_spread", "precision": precision_buy_spread, "recall": recall_buy_spread, "f1": f1_buy_spread, "accuracy": accuracy_buy_spread, "support": support_buy_spread},
@ -924,7 +924,7 @@ def calculate_metrics_by_provider(audit_file_path: str = r"/data/aus_prospectus/
"administration_fees",
"minimum_initial_investment",
"benchmark_name",
"performance_fee",
"performance_fee_costs",
"interposed_vehicle_performance_fee_cost",
"buy_spread",
"sell_spread",
@ -942,7 +942,7 @@ def calculate_metrics_by_provider(audit_file_path: str = r"/data/aus_prospectus/
audit_data_df.reset_index(drop=True, inplace=True)
verify_fields = [
"DocumentId",
"doc_id",
"raw_fund_name",
"fund_id",
"fund_name",
@ -954,7 +954,7 @@ def calculate_metrics_by_provider(audit_file_path: str = r"/data/aus_prospectus/
"administration_fees",
"minimum_initial_investment",
"benchmark_name",
"performance_fee",
"performance_fee_costs",
"interposed_vehicle_performance_fee_cost",
"buy_spread",
"sell_spread",
@ -963,7 +963,6 @@ def calculate_metrics_by_provider(audit_file_path: str = r"/data/aus_prospectus/
verify_data_df = pd.read_excel(verify_file_path, sheet_name=verify_data_sheet)
verify_data_df = verify_data_df[verify_fields]
verify_data_df = verify_data_df.drop_duplicates()
verify_data_df = verify_data_df.rename(columns={"DocumentId": "doc_id"})
verify_data_df.fillna("", inplace=True)
verify_data_df.reset_index(drop=True, inplace=True)
@ -1002,8 +1001,8 @@ def calculate_metrics_by_provider(audit_file_path: str = r"/data/aus_prospectus/
"gt_benchmark_name_list": [],
"pred_benchmark_name_list": []}
if is_for_all:
provider_gt_pred_data[provider_id].update({"gt_performance_fee_list": [],
"pred_performance_fee_list": [],
provider_gt_pred_data[provider_id].update({"gt_performance_fee_costs_list": [],
"pred_performance_fee_costs_list": [],
"gt_interposed_vehicle_performance_fee_cost_list": [],
"pred_interposed_vehicle_performance_fee_cost_list": [],
"gt_buy_spread_list": [],
@ -1026,7 +1025,7 @@ def calculate_metrics_by_provider(audit_file_path: str = r"/data/aus_prospectus/
minimum_initial_investment = str(row["minimum_initial_investment"])
benchmark_name = str(row["benchmark_name"])
if is_for_all:
performance_fee = str(row["performance_fee"])
performance_fee_costs = str(row["performance_fee_costs"])
interposed_vehicle_performance_fee_cost = str(row["interposed_vehicle_performance_fee_cost"])
buy_spread = str(row["buy_spread"])
sell_spread = str(row["sell_spread"])
@ -1045,7 +1044,7 @@ def calculate_metrics_by_provider(audit_file_path: str = r"/data/aus_prospectus/
v_minimum_initial_investment = str(doc_verify_sec_row["minimum_initial_investment"])
v_benchmark_name = str(doc_verify_sec_row["benchmark_name"])
if is_for_all:
v_performance_fee = str(doc_verify_sec_row["performance_fee"])
v_performance_fee_costs = str(doc_verify_sec_row["performance_fee_costs"])
v_interposed_vehicle_performance_fee_cost = str(doc_verify_sec_row["interposed_vehicle_performance_fee_cost"])
v_buy_spread = str(doc_verify_sec_row["buy_spread"])
v_sell_spread = str(doc_verify_sec_row["sell_spread"])
@ -1082,12 +1081,12 @@ def calculate_metrics_by_provider(audit_file_path: str = r"/data/aus_prospectus/
data_point="benchmark_name")
message_list.append(generate_message(message, document_id, sec_id, fund_name, raw_fund_name, raw_share_name, "benchmark_name"))
if is_for_all:
message = get_gt_pred_by_compare_values(performance_fee,
v_performance_fee,
provider_gt_pred_data[provider_id]["gt_performance_fee_list"],
provider_gt_pred_data[provider_id]["pred_performance_fee_list"],
data_point="performance_fee")
message_list.append(generate_message(message, document_id, sec_id, fund_name, raw_fund_name, raw_share_name, "performance_fee"))
message = get_gt_pred_by_compare_values(performance_fee_costs,
v_performance_fee_costs,
provider_gt_pred_data[provider_id]["gt_performance_fee_costs_list"],
provider_gt_pred_data[provider_id]["pred_performance_fee_costs_list"],
data_point="performance_fee_costs")
message_list.append(generate_message(message, document_id, sec_id, fund_name, raw_fund_name, raw_share_name, "performance_fee_costs"))
message = get_gt_pred_by_compare_values(interposed_vehicle_performance_fee_cost,
v_interposed_vehicle_performance_fee_cost,
provider_gt_pred_data[provider_id]["gt_interposed_vehicle_performance_fee_cost_list"],
@ -1165,15 +1164,15 @@ def calculate_metrics_by_provider(audit_file_path: str = r"/data/aus_prospectus/
support_benchmark_name = sum(gt_pred_data["gt_benchmark_name_list"])
if is_for_all:
precision_performance_fee = precision_score(gt_pred_data["gt_performance_fee_list"],
gt_pred_data["pred_performance_fee_list"])
recall_performance_fee = recall_score(gt_pred_data["gt_performance_fee_list"],
gt_pred_data["pred_performance_fee_list"])
f1_performance_fee = f1_score(gt_pred_data["gt_performance_fee_list"],
gt_pred_data["pred_performance_fee_list"])
accuracy_performance_fee = accuracy_score(gt_pred_data["gt_performance_fee_list"],
gt_pred_data["pred_performance_fee_list"])
support_performance_fee = sum(gt_pred_data["gt_performance_fee_list"])
precision_performance_fee_costs = precision_score(gt_pred_data["gt_performance_fee_costs_list"],
gt_pred_data["pred_performance_fee_costs_list"])
recall_performance_fee_costs = recall_score(gt_pred_data["gt_performance_fee_costs_list"],
gt_pred_data["pred_performance_fee_costs_list"])
f1_performance_fee_costs = f1_score(gt_pred_data["gt_performance_fee_costs_list"],
gt_pred_data["pred_performance_fee_costs_list"])
accuracy_performance_fee_costs = accuracy_score(gt_pred_data["gt_performance_fee_costs_list"],
gt_pred_data["pred_performance_fee_costs_list"])
support_performance_fee_costs = sum(gt_pred_data["gt_performance_fee_costs_list"])
precision_interposed_vehicle_performance_fee_cost = precision_score(gt_pred_data["gt_interposed_vehicle_performance_fee_cost_list"],
gt_pred_data["pred_interposed_vehicle_performance_fee_cost_list"])
@ -1221,7 +1220,7 @@ def calculate_metrics_by_provider(audit_file_path: str = r"/data/aus_prospectus/
{"provider_id": provider_id, "provider_name": provider_name, "item": "administration_fees", "precision": precision_administration_fees, "recall": recall_administration_fees, "f1": f1_administration_fees, "accuracy": accuracy_administration_fees, "support": support_administration_fees},
{"provider_id": provider_id, "provider_name": provider_name, "item": "minimum_initial_investment", "precision": precision_miminimum_initial_investment, "recall": recall_miminimum_initial_investment, "f1": f1_miminimum_initial_investment, "accuracy": accuracy_miminimum_initial_investment, "support": support_miminimum_initial_investment},
{"provider_id": provider_id, "provider_name": provider_name, "item": "benchmark_name", "precision": precision_benchmark_name, "recall": recall_benchmark_name, "f1": f1_benchmark_name, "accuracy": accuracy_benchmark_name, "support": support_benchmark_name},
{"provider_id": provider_id, "provider_name": provider_name, "item": "performance_fee", "precision": precision_performance_fee, "recall": recall_performance_fee, "f1": f1_performance_fee, "accuracy": accuracy_performance_fee, "support": support_performance_fee},
{"provider_id": provider_id, "provider_name": provider_name, "item": "performance_fee_costs", "precision": precision_performance_fee_costs, "recall": recall_performance_fee_costs, "f1": f1_performance_fee_costs, "accuracy": accuracy_performance_fee_costs, "support": support_performance_fee_costs},
{"provider_id": provider_id, "provider_name": provider_name, "item": "interposed_vehicle_performance_fee_cost", "precision": precision_interposed_vehicle_performance_fee_cost, "recall": recall_interposed_vehicle_performance_fee_cost,
"f1": f1_interposed_vehicle_performance_fee_cost, "accuracy": accuracy_interposed_vehicle_performance_fee_cost, "support": support_interposed_vehicle_performance_fee_cost},
{"provider_id": provider_id, "provider_name": provider_name, "item": "buy_spread", "precision": precision_buy_spread, "recall": recall_buy_spread, "f1": f1_buy_spread, "accuracy": accuracy_buy_spread, "support": support_buy_spread},
@ -1306,12 +1305,24 @@ def get_gt_pred_by_compare_values(gt_value, pred_value, gt_list, pred_list, data
def is_equal(gt_value, pred_value, data_point: str = ""):
if gt_value is not None and len(str(gt_value)) > 0 and \
pred_value is not None and len(str(pred_value)) > 0:
if gt_value is not None and len(str(gt_value).strip()) > 0 and \
pred_value is not None and len(str(pred_value).strip()) > 0:
if gt_value == "0.0":
gt_value = "0"
if pred_value == "0.0":
pred_value = "0"
if data_point not in ["benchmark_name"]:
try:
gt_num = float(gt_value)
# round to 2 decimal places
gt_value = round(gt_num, 4)
except Exception as e:
pass
try:
pred_value = float(pred_value)
pred_value = round(pred_value, 4)
except Exception as e:
pass
if gt_value == pred_value:
return True
if data_point == "benchmark_name":
@ -1334,7 +1345,45 @@ def clean_text(text: str):
return text
def merge_inference_data():
file1 = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250308220117.xlsx"
file2 = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_5_documents_by_text_20250311165607.xlsx"
columns = [
"doc_id",
"raw_fund_name",
"fund_id",
"fund_name",
"raw_share_name",
"sec_id",
"sec_name",
"management_fee_and_costs",
"management_fee",
"administration_fees",
"minimum_initial_investment",
"benchmark_name",
"performance_fee_costs",
"interposed_vehicle_performance_fee_cost",
"buy_spread",
"sell_spread",
"total_annual_dollar_based_charges"
]
file1_data_df = pd.read_excel(file1, sheet_name="total_mapping_data")
file1_data_df = file1_data_df[columns]
file2_data_df = pd.read_excel(file2, sheet_name="total_mapping_data")
file2_data_df = file2_data_df[columns]
total_data_df = pd.concat([file1_data_df, file2_data_df])
total_data_df.reset_index(drop=True, inplace=True)
output_folder = r"/data/aus_prospectus/output/mapping_data/total/"
output_file = os.path.join(output_folder, "merged_mapping_data_info_46_documents_by_text.xlsx")
with pd.ExcelWriter(output_file) as f:
total_data_df.to_excel(f, index=False, sheet_name="total_mapping_data")
if __name__ == "__main__":
# merge_inference_data()
# adjust_column_order()
# set_mapping_to_data_side_documents_data()
@ -1349,7 +1398,7 @@ if __name__ == "__main__":
audit_file_path: str = r"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx"
audit_data_sheet: str = "Sheet1"
verify_file_path: str = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_46_documents_by_text_20250308220117.xlsx"
verify_file_path: str = r"/data/aus_prospectus/output/mapping_data/total/merged_mapping_data_info_46_documents_by_text.xlsx"
verify_data_sheet: str = "total_mapping_data"
# verify_document_list_file: str = "./sample_documents/aus_prospectus_29_documents_sample.txt"
verify_document_list_file_list = [None,

View File

@ -1,8 +1,8 @@
{
"total_annual_dollar_based_charges": {"english": ["total annual dollar based charges", "total annual dollar based charges ($)","total annual dollar"]},
"total_annual_dollar_based_charges": {"english": ["total annual dollar based charges", "total annual dollar based charges ($)","total annual dollar", "administration fees and costs", "Administration fee", "Administration fees"]},
"management_fee_and_costs": {"english": ["management fee", "management fees", "investment management fees", "management fees and cost", "management fees and costs", "investment fees and costs", "Management costs", "investment fee and costs", "Investment fees", "investment option management costs", "investment option management costs1"]},
"management_fee": {"english": ["management fee", "management fees", "investment management fees", "management fees and cost", "management fees and costs", "investment fees and costs", "Management costs", "investment fee and costs", "Investment fees", "investment option management costs", "investment option management costs1"]},
"performance_fee": {"english": ["performance fee", "performance fees"]},
"performance_fee_costs": {"english": ["performance fee", "performance fees"]},
"buy_spread": {"english": ["buy-spread", "buy spread", "buy/sell spreads", "BUY-SELL SPREAD"]},
"sell_spread": {"english": ["sell-spread", "sell spread", "buy/sell spreads", "BUY-SELL SPREAD", "Buy:", "Sell:"]},
"administration_fees": {"english": ["administration fee", "administration fees","admin fee"]},

View File

@ -2,7 +2,7 @@
"total_annual_dollar_based_charges": "share_level",
"management_fee_and_costs": "share_level",
"management_fee": "share_level",
"performance_fee": "share_level",
"performance_fee_costs": "share_level",
"buy_spread": "share_level",
"sell_spread": "share_level",
"administration_fees": "share_level",

View File

@ -2,7 +2,7 @@
"management_fee_and_costs": "management fee and costs",
"management_fee": "management fee",
"administration_fees": "administration fee",
"performance_fee": "performance fee",
"performance_fee_costs": "performance fee",
"interposed_vehicle_performance_fee_cost": "interposed vehicle performance fee cost",
"buy_spread": "buy spread",
"sell_spread": "sell spread",

View File

@ -1,8 +1,8 @@
{
"total_annual_dollar_based_charges": {"english": ["total annual dollar based charges", "total annual dollar based charges ($)","total annual dollar"]},
"total_annual_dollar_based_charges": {"english": ["total annual dollar based charges", "total annual dollar based charges ($)","total annual dollar", "administration fees and costs", "Administration fee", "Administration fees"]},
"management_fee_and_costs": {"english": ["management fees and cost", "management fees and costs", "management fee and cost", "Plus other investment fees and costs", "Management costs", "investment fees and costs", "investment fee and cost", "Investment fees"]},
"management_fee": {"english": ["management fee", "management fees","investment management fees","management fees and cost", "investment option management costs", "investment option management costs1", "Plus other investment fees and costs", "Management costs", "investment fees and costs", "investment fee and cost", "Investment fees"]},
"performance_fee": {"english": ["performance fee", "performance fees"]},
"performance_fee_costs": {"english": ["performance fee", "performance fees"]},
"buy_spread": {"english": ["buy-spread", "buy spread", "buy/sell spreads", "BUY-SELL SPREAD"]},
"sell_spread": {"english": ["sell-spread", "sell spread", "buy/sell spreads", "BUY-SELL SPREAD", "Buy:", "Sell:"]},
"administration_fees": {"english": ["administration fee", "administration fees","admin fee"]},

View File

@ -2,7 +2,7 @@
"total_annual_dollar_based_charges": "float",
"management_fee_and_costs": "float",
"management_fee": "float",
"performance_fee": "float",
"performance_fee_costs": "float",
"buy_spread": "float",
"sell_spread": "float",
"administration_fees": "float",

View File

@ -24,7 +24,7 @@
"total_annual_dollar_based_charges",
"management_fee_and_costs",
"management_fee",
"performance_fee",
"performance_fee_costs",
"buy_spread",
"sell_spread",
"administration_fees",

View File

@ -117,6 +117,8 @@ class DataExtraction:
if self.doc_source == "aus_prospectus" and self.document_category.upper() == "MIS":
if "administration_fees" in list(datapoint_page_info.keys()):
datapoint_page_info.pop("administration_fees")
if "total_annual_dollar_based_charges" in list(datapoint_page_info.keys()):
datapoint_page_info.pop("total_annual_dollar_based_charges")
return datapoint_page_info
def get_investment_objective_pages(self):
@ -282,6 +284,8 @@ class DataExtraction:
keys = list(data_item.keys())
if "administration_fees" in keys:
data_item.pop("administration_fees")
if "total_annual_dollar_based_charges" in keys:
data_item.pop("total_annual_dollar_based_charges")
keys = [key for key in list(data_item.keys()) if key not in ["fund_name", "share_name"]]
if len(keys) == 0:
remove_items.append(data_item)
@ -327,7 +331,9 @@ class DataExtraction:
dp_keys = [key for key in keys if key not in ["fund_name",
"share_name",
"management_fee_and_costs",
"management_fee"]]
"management_fee",
"buy_spread",
"sell_spread"]]
for dp_key in dp_keys:
if dp_key not in datapoint_list_with_production_name:
datapoint_list_with_production_name.append(dp_key)
@ -608,7 +614,7 @@ class DataExtraction:
previous_page_datapoints = []
previous_page_fund_name = None
for page_num, page_text in self.page_text_dict.items():
# if page_num != 16:
# if page_num not in [4, 5]:
# continue
if page_num in handled_page_num_list:
continue

View File

@ -70,7 +70,6 @@
"management_fee_and_costs": "Management fee and costs is share level data.",
"management_fee": "Management fee is share level data.",
"performance_fee_costs": "Performance fee costs is share class level data.",
"performance_fee": "Performance fees is share class level data.",
"buy_spread": "Buy spread is share class level data.",
"sell_spread": "Sell spread is share class level data.",
"establishment_fee": "Establishment fee is share class level data.",
@ -97,7 +96,6 @@
"management_fee_and_costs": "Management fee and costs is belong to percentage number, the value should be less than 100.",
"management_fee": "Management fee is belong to percentage number, the value should be less than 100.",
"performance_fee_costs": "Performance fees costs is belong to percentage number, the value should be less than 100.",
"performance_fee": "Performance fees is belong to percentage number, the value should be less than 100.",
"buy_spread": "Buy spread is belong to percentage number, the value should be less than 100.",
"sell_spread": "Sell spread is belong to percentage number, the value should be less than 100.",
"establishment_fee": "Establishment fee is belong to percentage number, the value should be less than 100.",
@ -168,7 +166,7 @@
"\n\nManagement fees \nManagement fees and costs \nIndirect Fee \nPerformance Fees \nTransaction Costs \nTotal \nMLC diversified investment \noption \nMLC Horizon 2 \nIncome Portfolio \n1.35% p.a. \n0.07% p.a. \n0.06% p.a. \n0.01% p.a. \n1.49% p.a. \n",
"---Example End---",
"The output should be:",
"{\"data\": [{\"fund name\": \"MLC Horizon 2 Income Portfolio\", \"share name\": \"MLC Horizon 2 Income Portfolio\", \"management_fee_and_costs\": 1.42, \"management_fee\": 1.35, \"indirect_costs\": 0.07, \"performance_fee\": 0.06}]}",
"{\"data\": [{\"fund name\": \"MLC Horizon 2 Income Portfolio\", \"share name\": \"MLC Horizon 2 Income Portfolio\", \"management_fee_and_costs\": 1.42, \"management_fee\": 1.35, \"indirect_costs\": 0.07, \"performance_fee_costs\": 0.06}]}",
"\n",
"C.2 With \"Total management cost (% pa)\" = \"Management fee (% pa)\" + \"Estimated other indirect costs\" + \"Estimated expense recoveries\" + \"Estimated Regulatory Change Expense Recovery\".",
"The management_fee is the value of \"Management fee (% pa)\".",
@ -177,7 +175,7 @@
"Fund/Investment\nOption\nManagement\nfee (% pa)\nEstimated \nPerformance \n-related \nfees \nEstimated\nother\nindirect\ncosts\nEstimated\nexpense\nrecoveries\nEstimated\nRegulatory\nChange\nExpense\nRecovery\nTotal\nmanagement\ncost (% pa)\nEstimated\nbuy-sell\nspread (%)\nBT Future \nGoals Fund \n1.33 0.000.04 0.000.01 1.38 0.31\n1.29 0.000.00 0.000.01 1.30 0.29\n",
"---Example End---",
"The output should be:",
"{\"data\": [{\"fund name\": \"BT Future Goals Fund\", \"share name\": \"BT Future Goals Fund\", \"management_fee_and_costs\": 1.38, \"management_fee\": 1.33, \"indirect_costs\": 0.04, \"recoverable_expenses\": 0, \"change_recoverable_expenses\": 0.01, \"performance_fee\": 0, \"buy_spread\": 0.31, \"sell_spread\": 0.31}]}",
"{\"data\": [{\"fund name\": \"BT Future Goals Fund\", \"share name\": \"BT Future Goals Fund\", \"management_fee_and_costs\": 1.38, \"management_fee\": 1.33, \"indirect_costs\": 0.04, \"recoverable_expenses\": 0, \"change_recoverable_expenses\": 0.01, \"performance_fee_costs\": 0, \"buy_spread\": 0.31, \"sell_spread\": 0.31}]}",
"\n",
"D. If only find \"Management fees and costs\", please output the relevant same value for both of data point keys: \"management_fee_and_costs\" and \"management_fee\".",
"---Example 1 Start---",
@ -209,9 +207,9 @@
"---Example 1 End---",
"For this example, please ignore the \"Total investment fees and costs\" and \"Transaction costs\" columns, ",
"just output the values from \"Investment fees and costs (excl Performance Fees)\" as management_fee and management_fee_and_costs, ",
"output the values from \"Performance Fee\" as performance_fee.",
"output the values from \"Performance Fee\" as performance_fee_costs.",
"The output should be:",
"{\"data\": [{\"fund name\": \"Balanced\", \"share name\": \"Balanced\", \"management_fee_and_costs\": 0.53, \"management_fee\": 0.53, \"performance_fee\": 0.43}, {\"fund name\": \"Capital Stable\", \"share name\": \"Capital Stable\", \"management_fee_and_costs\": 0.32, \"management_fee\": 0.32, \"performance_fee\": 0.18}]}",
"{\"data\": [{\"fund name\": \"Balanced\", \"share name\": \"Balanced\", \"management_fee_and_costs\": 0.53, \"management_fee\": 0.53, \"performance_fee_costs\": 0.43}, {\"fund name\": \"Capital Stable\", \"share name\": \"Capital Stable\", \"management_fee_and_costs\": 0.32, \"management_fee\": 0.32, \"performance_fee_costs\": 0.18}]}",
"\n",
"G. If the management fee/ management fee and costs is with the range, e.g. 0.05% to 1.00% or 0.55%-1.00%, please ignore and output empty.",
"---Example 1 Start---",
@ -236,22 +234,22 @@
"---Example 1 End---",
"The column: \"Equals investment fees and costs\" is the sum of \"Performance fee\" and \"Plus other investment fees and costs\", we should ignore the \"Performance fee\" value, just output the \"Plus other investment fees and costs\" value.",
"The \"Plus other investment fees and costs\" could be the values for both of \"management fee\" and \"management fee and costs\", so the output should be:",
"{\"data\": [{\"fund name\": \"MLC Inflation Plus Conservative Portfolio\", \"share name\": \"Super & Pension pre-retirement phase\", \"management_fee_and_costs\": 0.77, \"management_fee\": 0.77, \"performance_fee\": 0.18, \"buy_spread\": 0.1, \"sell_spread\": 0.1}, {\"fund name\": \"MLC Inflation Plus Conservative Portfolio\", \"share name\": \"Retirement Phase\", \"management_fee_and_costs\": 0.77, \"management_fee\": 0.77, \"performance_fee\": 0.18, \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}",
"{\"data\": [{\"fund name\": \"MLC Inflation Plus Conservative Portfolio\", \"share name\": \"Super & Pension pre-retirement phase\", \"management_fee_and_costs\": 0.77, \"management_fee\": 0.77, \"performance_fee_costs\": 0.18, \"buy_spread\": 0.1, \"sell_spread\": 0.1}, {\"fund name\": \"MLC Inflation Plus Conservative Portfolio\", \"share name\": \"Retirement Phase\", \"management_fee_and_costs\": 0.77, \"management_fee\": 0.77, \"performance_fee_costs\": 0.18, \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}",
"---Example 2 Start---",
"MANAGEMENT COSTS AND TRANSACTION COSTS \n\nOption name Management costs \nEstimated \nperformance \nfee (pa) 1 \nTotal management\ncosts (including\nestimated performance\nfee) pa\nTransaction costs \nper transaction (%) \nMULTI-MANAGER MULTI-SECTOR (These investment options are located in the Investment Options Menu on pages 18 to 19.) \nFirstChoice Wholesale Defensive 0.85% 0.85% 0.15\nFirstChoice Wholesale Conservative 0.90% 0.02%1 0.92% 1 0.15 \n",
"---Example 2 End---",
"The column: \"Total management costs (including estimated performance fee) pa\" is the sum of \"Management costs\" and \"Estimated performance fee (pa)\", we should ignore the \"Estimated performance fee (pa)\" value, just output the \"Management costs\" value.",
"Both of management_fee and management_fee_and_costs are the values for \"Management costs\", so the output should be:",
"{\"data\": [{\"fund name\": \"FirstChoice Wholesale Defensive\", \"share name\": \"FirstChoice Wholesale Defensive\", \"management_fee_and_costs\": 0.85, \"management_fee\": 0.85}, {\"fund name\": \"FirstChoice Wholesale Conservative\", \"share name\": \"FirstChoice Wholesale Conservative\", \"management_fee_and_costs\": 0.9, \"management_fee\": 0.9, \"performance_fee\": 0.02}]}",
"{\"data\": [{\"fund name\": \"FirstChoice Wholesale Defensive\", \"share name\": \"FirstChoice Wholesale Defensive\", \"management_fee_and_costs\": 0.85, \"management_fee\": 0.85}, {\"fund name\": \"FirstChoice Wholesale Conservative\", \"share name\": \"FirstChoice Wholesale Conservative\", \"management_fee_and_costs\": 0.9, \"management_fee\": 0.9, \"performance_fee_costs\": 0.02}]}",
"---Example 3 Start---",
"Investment \noption \nInvestment fees and \ncosts (p.a.) \n1 \nTransaction \ncosts (p.a.) \nMySuper/ \nBalanced \n0.38% (including 0.09% \nPerformance fee) \n0.18% \nManaged \nGrowth \n0.38% (including 0.11% \nPerformance fee) \n0.08% \n",
"---Example 3 End---",
"The column: \"Investment fees and costs (p.a.)\", \"including Performance fee\", meaning the value is the sum of \"Management costs\" and \"performance fee\", We should subtract the \"performance fee\" value, just output the \"Management costs\" value.",
"Both of management_fee and management_fee_and_costs are the values for \"Management costs\".",
"So, for fund: MySuper/Balanced, the value 0.38, including 0.09 Performance fee, so the Management costs is 0.38 - 0.09 = 0.29, performance_fee is 0.09.",
"For fund: Managed Growth, the value 0.38, including 0.11 Performance fee, so the Management costs is 0.38 - 0.11 = 0.27, performance_fee is 0.11.",
"So, for fund: MySuper/Balanced, the value 0.38, including 0.09 Performance fee, so the Management costs is 0.38 - 0.09 = 0.29, performance_fee_costs is 0.09.",
"For fund: Managed Growth, the value 0.38, including 0.11 Performance fee, so the Management costs is 0.38 - 0.11 = 0.27, performance_fee_costs is 0.11.",
"So the output should be:",
"{\"data\": [{\"fund name\": \"MySuper/Balanced\", \"share name\": \"MySuper/Balanced\", \"management_fee_and_costs\": 0.29, \"management_fee\": 0.29, \"performance_fee\": 0.09}, {\"fund name\": \"Managed Growth\", \"share name\": \"Managed Growth\", \"management_fee_and_costs\": 0.27, \"management_fee\": 0.27, \"performance_fee\": 0.11}]}",
"{\"data\": [{\"fund name\": \"MySuper/Balanced\", \"share name\": \"MySuper/Balanced\", \"management_fee_and_costs\": 0.29, \"management_fee\": 0.29, \"performance_fee_costs\": 0.09}, {\"fund name\": \"Managed Growth\", \"share name\": \"Managed Growth\", \"management_fee_and_costs\": 0.27, \"management_fee\": 0.27, \"performance_fee_costs\": 0.11}]}",
"---Example 4 Start---",
"Fund name \nTotal of management \nfees and costs and \nperformance \nfees (% p.a.) \n= \nManagement \nfees and costs \n(% p.a.) \n+ \nPerformance \nfee (% p.a.) \nBuy/sell \nspread \nCFS Real Return Class A 1 \n0.87% \n0.87% \n0.15% \nCFS Defensive Builder \n0.68% \n0.67% \n0.01% \n0.15% \n",
"---Example 4 End---",
@ -259,7 +257,7 @@
"The column \"Management fees and costs (% p.a.)\" is the value of \"Management fee and costs\".",
"Both of management_fee and management_fee_and_costs are the values for \"Management fees and costs (% p.a.)\" for this case.",
"So the output should be:",
"{\"data\": [{\"fund name\": \"CFS Real Return Class A\", \"share name\": \"CFS Real Return Class A\", \"management_fee_and_costs\": 0.87, \"management_fee\": 0.87, \"buy_spread\": 0.15, \"sell_spread\": 0.15}, {\"fund name\": \"CFS Defensive Builder\", \"share name\": \"CFS Defensive Builder\", \"management_fee_and_costs\": 0.67, \"management_fee\": 0.67, \"performance_fee\": 0.01, \"buy_spread\": 0.15, \"sell_spread\": 0.15}]}",
"{\"data\": [{\"fund name\": \"CFS Real Return Class A\", \"share name\": \"CFS Real Return Class A\", \"management_fee_and_costs\": 0.87, \"management_fee\": 0.87, \"buy_spread\": 0.15, \"sell_spread\": 0.15}, {\"fund name\": \"CFS Defensive Builder\", \"share name\": \"CFS Defensive Builder\", \"management_fee_and_costs\": 0.67, \"management_fee\": 0.67, \"performance_fee_costs\": 0.01, \"buy_spread\": 0.15, \"sell_spread\": 0.15}]}",
"\n",
"I. Some table is very complex, with many data points columns, please extract the relevant values.",
"---Example 1 Start---",
@ -267,12 +265,12 @@
"---Example 1 End---",
"For this table, there are \"Administration fees and costs (p.a.)\" as administration_fees, ",
"\"Investment fees and costs (p.a.)\" as management_fee_and_costs and management_fee, ",
"\"Performance fee (p.a.)\" as performance_fee, ",
"\"Performance fee (p.a.)\" as performance_fee_costs, ",
"\"Buy/sell spread (%)\" as buy_spread and sell_spread.",
"If one row has 5 decimal numbers, ",
"the 2nd decimal number is the administration_fees, ",
"the 3rd decimal number is the management_fee_and_costs and management_fee, ",
"the 4th decimal number is the performance_fee, ",
"the 4th decimal number is the performance_fee_costs, ",
"the 5th decimal number is the buy_spread and sell_spread.",
"If one row has 4 decimal numbers, ",
"the 2nd decimal number is the administration_fees, ",
@ -280,7 +278,7 @@
"the 4th decimal number is the buy_spread and sell_spread.",
"Please always ignore the 1st decimal number, we need not the total sum values.",
"The output should be:",
"{\"data\": [{\"fund name\": \"CFS Multi-Manager Multi-Sector\", \"share name\": \"CFS Defensive\", \"management_fee_and_costs\": 0.74, \"management_fee\": 0.74, \"administration_fees\": 0.2, \"buy_spread\": 0.15, \"sell_spread\": 0.15}, {\"fund name\": \"CFS Multi-Manager Multi-Sector\", \"share name\": \"CFS Conservative\", \"management_fee_and_costs\": 0.81, \"management_fee\": 0.81, \"administration_fees\": 0.20, \"performance_fee\": 0.03, \"buy_spread\": 0.15, \"sell_spread\": 0.15}]}",
"{\"data\": [{\"fund name\": \"CFS Multi-Manager Multi-Sector\", \"share name\": \"CFS Defensive\", \"management_fee_and_costs\": 0.74, \"management_fee\": 0.74, \"administration_fees\": 0.2, \"buy_spread\": 0.15, \"sell_spread\": 0.15}, {\"fund name\": \"CFS Multi-Manager Multi-Sector\", \"share name\": \"CFS Conservative\", \"management_fee_and_costs\": 0.81, \"management_fee\": 0.81, \"administration_fees\": 0.20, \"performance_fee_costs\": 0.03, \"buy_spread\": 0.15, \"sell_spread\": 0.15}]}",
"J. If exist **\"Maximum management fee\"** in context, please ignore relevant values.",
"---Example Start---",
"Fund name \nMaximum \nmanagement \nfee (p.a.) \nLOWER VOLATILITY SHARE \nFirst Sentier Wholesale Equity Income Fund 3.075% \nAUSTRALIAN SHARE \nFirst Sentier Wholesale Australian Share Fund 1.538%",
@ -290,40 +288,72 @@
"{\"data\": []}"
],
"administration_fees":[
"Administration fees and costs is share class level data.",
"Administration fees and costs and total annual dollar-based charges are share class level data.",
"Simple case:",
"----Example 1 Start----",
"Fees and costs summary \n\nLegalsuper Pension \n\nType of fee or cost Amount How and when paid \nOngoing annual fees and costs \n1 \nAdministration fees and \ncosts \n$67.60 pa ($1.30 per week) plus 0.29% pa \nof your account balance \n",
"----Example 1 End----",
"According to example, the administration fee is $1.30 per week plus 0.29% pa, so administration_fees is 0.29, ",
"total_annual_dollar_based_charges is 1.30 * 52 = 67.6",
"The output should be:",
"{\"data\": [{\"fund name\": \"Legalsuper Pension\", \"share name\": \"Legalsuper Pension\", \"administration_fees\": 0.29}]}",
"{\"data\": [{\"fund name\": \"Legalsuper Pension\", \"share name\": \"Legalsuper Pension\", \"administration_fees\": 0.29, \"total_annual_dollar_based_charges\": 67.6}]}",
"\n",
"----Example 2 Start----",
"At a glance summary \n\nImportant information about TelstraSuper RetireAccess income streams \n\nAdministration fee • \n• \n$1.00 per week plus 0.17% pa - if you have more than one account the $1.00 per \nweek fee will only apply to one account \nA fee rebate applies if your balance exceeds $1m, or if your and your spouses \ncombined account balances exceed $969,410 (conditions apply)",
"----Example 2 End----",
"The administration fee is $1.00 per week plus 0.17% pa, so the output should be:",
"{\"data\": [{\"fund name\": \"TelstraSuper RetireAccess\", \"share name\": \"TelstraSuper RetireAccess\", \"administration_fees\": 0.17}]}",
"According to example, the administration fee is $1.00 per week plus 0.17% pa, so administration_fees is 0.17, ",
"total_annual_dollar_based_charges is 1 * 52 = 52",
"The output should be:",
"{\"data\": [{\"fund name\": \"TelstraSuper RetireAccess\", \"share name\": \"TelstraSuper RetireAccess\", \"administration_fees\": 0.17, \"total_annual_dollar_based_charges\": 52}]}",
"---Example 3 Start---",
"\nPrime Super Income Stream\nType of fee \nor cost \nAmount How and when paid \nOngoing annual fees and costs \n1 \nAdministration \nfees and costs \nAdministration \nfees of $1.30 \nper week \nPlus \n0.50% p.a. of \nyour account \nbalance, capped \nat $500 p.a. \nDeducted from your \naccount on the last \nbusiness day of each \nmonth, except if you \nare leaving Prime \nSuper, in which case \nit is deducted prior to \nyour exit from Prime \nSuper. \nInvestment \nfees and costs \n2 \n0.07% to 1.00% \nof assets p.a. \ndepending on \nthe investment \noption \nTaken into account \nprior to the declaration \nof weekly earning \nrates. This cost is not \ndeducted directly from \nyour account. \n",
"---Example 3 End---",
"The administration fee is $1.30 per week plus 0.50% p.a., so the output should be:",
"{\"data\": [{\"fund name\": \"Prime Super Income Stream\", \"share name\": \"Prime Super Income Stream\", \"administration_fees\": 0.50}]}",
"According to example, the administration fee is $1.30 per week plus 0.50% p.a., so administration_fees is 0.5, ",
"total_annual_dollar_based_charges is 1.30 * 52 = 67.6",
"The output should be:",
"{\"data\": [{\"fund name\": \"Prime Super Income Stream\", \"share name\": \"Prime Super Income Stream\", \"administration_fees\": 0.5, \"total_annual_dollar_based_charges\": 67.6}]}",
"---Example 4 Start---",
"At a glance summary \n\nImportant information about TelstraSuper RetireAccess income streams \n\nTTR income stream Retirement income stream Reference \nAdministration fee • \n• \n$1.00 per week plus 0.17% pa - if you have more than one account the $1.00 per \nweek fee will only apply to one account \nA fee rebate applies if your balance exceeds $1m, or if your and your spouses \ncombined account balances exceed $969,410 (conditions apply) \nRefer to the Fees and \nother costs section on \npages 40-46 for details \n",
"---Example 4 End---",
"According to example, the administration fee is $1.00 per week plus 0.17% pa, so administration_fees is 0.17, ",
"total_annual_dollar_based_charges is 1 * 52 = 52",
"The output should be:",
"{\"data\": [{\"fund name\": \"TelstraSuper RetireAccess\", \"share name\": \"TelstraSuper RetireAccess\", \"administration_fees\": 0.17, \"total_annual_dollar_based_charges\": 52}]}",
"\n",
"Complex cases:",
"A. Need to add multiple numbers together.",
"----Example 1 Start----",
"---Example 1 Start---",
"MLC MasterKey Super & Pension Fundamentals \n\nType of fee or cost \nOngoing annual fees and costs 1 \n\nAdministration fees and \ncosts \n\nAccount balance \n\nFirst $150,000 \n\nRemaining balance \nover $150,000 \n\nThe percentage Administration fee \ncharged to each account you have \n(excluding the fixed fee and Trustee \nLevy) is capped at $2,500 pa. \n\nPlus \n\nTrustee Levy of 0.02% pa of your \naccount balance. \n\nPlus \n\nAmount \n\nHow and when paid \n\nPercentage fee \n(% pa) \n\n0.30 \n\n0.10 \n\nAdministration fee \n\nThe Administration fee is deducted monthly from your account and will \nbe rounded off to 2 decimal points. As a result of the rounding, the total \nannual amount may slightly differ. \n\nThe percentage fee for each month is calculated using your average Super \nand Pension account balance for the previous month. \n\nThe Trustee Levy will be deducted monthly from your account balance. \n\nThe levy amount for each month is calculated using your account balance \nat the date it's deducted. \n\nYou won't see these costs as direct charges to your account. They reduce \nthe balance held in reserves used to cover certain costs related to the \nrunning of the MLC Super Fund. \n\n4 \n\nMLC MasterKey Super & Pension Fundamentals Product Disclosure Statement",
"----Example 1 End----",
"---Example 1 End---",
"For this case, the relevant values: first: 0.30%, remaining balance over: 0.10%, Plus Trustee Levy: 0.02%.",
"Please ignore the remaining balance over 0.10%, add first: 0.30% and Plus Trustee Levy: 0.02% = 0.32%",
"The output should be:",
"{\"data\": [{\"fund name\": \"MLC MasterKey Super & Pension Fundamentals\", \"share name\": \"MLC MasterKey Super & Pension Fundamentals\", \"administration_fees\": 0.32}]}"
"{\"data\": [{\"fund name\": \"MLC MasterKey Super & Pension Fundamentals\", \"share name\": \"MLC MasterKey Super & Pension Fundamentals\", \"administration_fees\": 0.32}]}",
"---Example 2 Start---",
"Fees and costs summary\n\nHostplus Superannuation and Personal Super Plan \n\nType of fee \nAmount \nHow and when paid \nOngoing annual fees and costs1 \nAdministration \nfees and costs \n$78.00 p.a. \n($1.50 per week) \nplus $32.24 p.a. \nDeducted monthly from \nyour account. \nDeducted from the Funds \nAdministration Reserve \nthroughout the year (and \nnot from your account). \nplus trustee fee \nof 0.0165% p.a. \nof your account \nbalance. \n",
"---Example 2 End---",
"According to example, the total annual dollar-based charges is $78.00 p.a. ($1.50 per week), so total_annual_dollar_based_charges is 78.",
"Attention: about plus trustee fee of 0.0165% p.a. of your account balance., it's only part of administration_fees, missing the \"first\" part, so please ignore this part.",
"The output should be:",
"{\"data\": [{\"fund name\": \"Hostplus Superannuation and Personal Super Plan\", \"share name\": \"Hostplus Superannuation and Personal Super Plan\", \"total_annual_dollar_based_charges\": 78}]}"
],
"total_annual_dollar_based_charges": [
"Total annual dollar-based charges are share class level data.",
"Its value corresponds to the administration fees and costs that are charged on a weekly basis.",
"----Example 1 Start----",
"MLC MasterKey Super & Pension Fundamentals\nType of fee or cost \nOngoing annual fees and costs 1 \nAmount \nHow and when paid \nOther administration costs paid from \nreserves of 0.00% pa of your account \nbalance. \nPlus \nA fixed fee of $1.50 per week \nThis fee is deducted monthly if your account balance is below $50,000 \nwhen the percentage administration fee is deducted. \nInvestment fees and \ncosts 2 \nInvestment fees and estimated costs \nfor MLC Horizon 4 Balanced Portfolio, \n1.20% pa. \nYou won t see these fees and costs as direct charges to your account. \nThey're reflected in the daily unit price of each investment option and will \nreduce the net return on your investment \nInvestment fees and estimated costs \nfor other investment options, ranges \nfrom 0.00% pa to 2.84% pa \n(estimated). \nTransaction costs \nMLC Horizon 4 Balanced Portfolio, \n0.06% pa (estimated). \nOther investment options, ranges \nfrom 0.00% pa to 0.24% pa \n(estimated). \nYou won t see these costs as direct charges to your account. They're \nreflected in the daily unit price of each investment option and will reduce \nthe net return on your investment. \nMember activity related fees and costs \nBuy-sell spread \nYou won t see this fee as a direct charge to your account. It s reflected in \nthe buy and sell unit price of each investment option when there s a \ntransaction on your account. \nMLC Horizon 4 Balanced Portfolio, \n0.10%/0.10% \nOther investment options, ranges \nfrom 0.00%/0.00% to 0.30%/0.30% \nThe current buy-sell spreads of an investment option are available at \nmlc.com.au/buysellspreads \n",
"----Example 1 End----",
"According to example, the fixed fee is $1.50 per week, so total_annual_dollar_based_charges is 1.50 * 52 = 78",
"In the context, also with management fees and costs, management fee, buy_spread and sell_spread for specific fund: MLC Horizon 4 Balanced Portfolio.",
"Please output the relevant values based on specific fund name.",
"The output should be:",
"{\"data\": [{\"fund name\": \"MLC MasterKey Super & Pension Fundamentals\", \"share name\": \"MLC MasterKey Super & Pension Fundamentals\", \"total_annual_dollar_based_charges\": 78}, {\"fund name\": \"MLC Horizon 4 Balanced Portfolio\", \"share name\": \"MLC Horizon 4 Balanced Portfolio\", \"management_fee_and_costs\": 1.2, \"management_fee\": 1.2, \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}"
],
"buy_spread": [
"Please don't extract data by the reported names for buy_spread or sell_spread, they are: ",
"Transaction costs buy/sell spread recovery, Transaction costs reducing return of the investment option (net transaction costs)"
],
"performance_fee": [
"performance_fee_costs": [
"Performance fees is share class level data.",
"If the performance fees is with the range, please ignore and output empty.",
"---Example 1 Start---",
@ -404,7 +434,7 @@
"The administration_fees is \"Administration fees\"",
"The management_fee is \"Investment fees\".",
"The management_fee_and_costs is \"Investment fees\" + \"Estimated other investment costs\".",
"The performance_fee is \"Estimated performance fees\"",
"The performance_fee_costs is \"Estimated performance fees\"",
"---Example 1 Start---",
"\nInvestment option \nAdministration fees \nEstimated administration costs \nInvestment fees \nEstimated performance fees \nEstimated other investment costs \nEstimated transaction costs \nEstimated total ongoing annual fees and costs \nCash \nPerpetual Cash \n0.10% \n0.00% \n0.00% \nn/a \n0.00% \n0.02% \n0.12% \nFixed income and credit \nBentham Global \nIncome \n0.25% \n0.00% \n0.67% \nn/a \n0.00% \n0.05% \n0.97% \nInternetional shares \nPerpetual Global \nInnovation Share \n0.25% \n0.00% \n0.99% \n2.30 \n0.01% \n0.27% \n3.82% \n",
"---Example 1 End---",
@ -412,9 +442,9 @@
"Please pay attention below information",
"Assume the column sequence number is from 1.",
"\"Administration fees\" values are as the column 1 numbers, \"Investment fees\" values are as the column 3 numbers, \"Estimated other investment costs\" values are as the column 5 numbers, \"Estimated performance fees\" values are as the column 4 numbers.",
"For fund: Perpetual Global Innovation Share, the administration_fees should be the column 1 number: 0.25, the management_fee should be the column 3 number: 0.99, the management_fee_and_costs should be 1 = 0.99(the column 3 number) + 0.01 (the column 5 number), the performance_fee should be 2.3 (the column 4 number)",
"For fund: Perpetual Global Innovation Share, the administration_fees should be the column 1 number: 0.25, the management_fee should be the column 3 number: 0.99, the management_fee_and_costs should be 1 = 0.99(the column 3 number) + 0.01 (the column 5 number), the performance_fee_costs should be 2.3 (the column 4 number)",
"Therefore, the output should be:",
"{\"data\": [{\"fund name\": \"Perpetual Cash\", \"share name\": \"Perpetual Cash\", \"management_fee_and_costs\": 0, \"management_fee\": 0, \"administration_fees\": 0.10}, {\"fund name\": \"Bentham Global Income\", \"share name\": \"Bentham Global Income\", \"management_fee_and_costs\": 0.67, \"management_fee\": 0.67, \"administration_fees\": 0.25}]}, {\"fund name\": \"Perpetual Global Innovation Share\", \"share name\": \"Perpetual Global Innovation Share\", \"management_fee_and_costs\": 1, \"management_fee\": 0.99, \"administration_fees\": 0.25, \"performance_fee\": 2.3}"
"{\"data\": [{\"fund name\": \"Perpetual Cash\", \"share name\": \"Perpetual Cash\", \"management_fee_and_costs\": 0, \"management_fee\": 0, \"administration_fees\": 0.10}, {\"fund name\": \"Bentham Global Income\", \"share name\": \"Bentham Global Income\", \"management_fee_and_costs\": 0.67, \"management_fee\": 0.67, \"administration_fees\": 0.25}]}, {\"fund name\": \"Perpetual Global Innovation Share\", \"share name\": \"Perpetual Global Innovation Share\", \"management_fee_and_costs\": 1, \"management_fee\": 0.99, \"administration_fees\": 0.25, \"performance_fee_costs\": 2.3}"
]
},
{
@ -422,7 +452,7 @@
"prompts": ["Complex management fee and costs rule:",
"If the table with columns:",
"\"Entry Fee option\", \"Nil Entry option\", \"Estimated Other investment costs\", \"Estimated Performance fees\"",
"The performance_fee is \"Estimated Performance fees\"",
"The performance_fee_costs is \"Estimated Performance fees\"",
"The fund name's tail is \"Entry Fee\" for \"Entry Fee option\", e.g. if fund name is \"MultiSeries 30\", the Entry Fee fund name is \"MultiSeries 30 Entry Fee\"",
"The fund name's tail is \"Nil Entry\" for \"Nil Entry option\", e.g. if fund name is \"MultiSeries 30\", the Nil Entry fund name is \"MultiSeries 30 Nil Entry\".",
"For Entry Fee fund, both of management_fee and management_fee_and_costs are \"Entry Fee option\" + \"Estimated other investment costs\".",
@ -435,10 +465,10 @@
"Assume the column sequence number is from 1.",
"\"Entry Fee option\" values are as the column 1 numbers, \"Nil Entry option\" values are as the column 2 numbers, \"Estimated other investment costs\" values are as the column 3 numbers, \"Estimated Performance fees\" values are as the column 4 numbers.",
"For main fund: Platinum Asia with values: 2.14 2.990.02 0.000.21 2.37 3.22, ",
"the fund: Platinum Asia Entry Fee, both of management_fee and management_fee_and_costs should be 2.16 = 2.14(the column 1 number) + 0.02 (the column 3 number), performance_fee is 0 (the column 4 number)",
"the fund: Platinum Asia Nil Entry, both of management_fee and management_fee_and_costs should be 3.01 = 2.99(the column 2 number) + 0.02 (the column 3 number), performance_fee is 0 (the column 4 number)",
"the fund: Platinum Asia Entry Fee, both of management_fee and management_fee_and_costs should be 2.16 = 2.14(the column 1 number) + 0.02 (the column 3 number), performance_fee_costs is 0 (the column 4 number)",
"the fund: Platinum Asia Nil Entry, both of management_fee and management_fee_and_costs should be 3.01 = 2.99(the column 2 number) + 0.02 (the column 3 number), performance_fee_costs is 0 (the column 4 number)",
"Therefore, the output should be:",
"{\"data\": [{\"fund name\": \"OnePath International Shares Index (Hedged) Entry Fee\", \"share name\": \"OnePath International Shares Index (Hedged) Entry Fee\", \"management_fee_and_costs\": 0.47, \"management_fee\": 0.47, \"performance_fee\": 0},{\"fund name\": \"OnePath International Shares Index (Hedged) Nil Entry\", \"share name\": \"OnePath International Shares Index (Hedged) Nil Entry\", \"management_fee_and_costs\": 1.32, \"management_fee\": 1.32, \"performance_fee\": 0}, {\"fund name\": \"Pendal Concentrated Global Shares Hedged II Entry Fee\", \"share name\": \"Pendal Concentrated Global Shares Hedged II Entry Fee\", \"management_fee_and_costs\": 1.44, \"management_fee\": 1.44, \"performance_fee\": 0}]}, {\"fund name\": \"Pendal Concentrated Global Shares Hedged II Nil Entry\", \"share name\": \"Pendal Concentrated Global Shares Hedged II Nil Entry\", \"management_fee_and_costs\": 2.29, \"management_fee\": 2.29, \"performance_fee\": 0}]}, {\"fund name\": \"Platinum Asia Entry Fee\", \"share name\": \"Platinum Asia Entry Fee\", \"management_fee_and_costs\": 2.16, \"management_fee\": 2.16, \"performance_fee\": 0}, {\"fund name\": \"Platinum Asia Nil Entry\", \"share name\": \"Platinum Asia Nil Entry\", \"management_fee_and_costs\": 3.01, \"management_fee\": 3.01, \"performance_fee\": 0}"
"{\"data\": [{\"fund name\": \"OnePath International Shares Index (Hedged) Entry Fee\", \"share name\": \"OnePath International Shares Index (Hedged) Entry Fee\", \"management_fee_and_costs\": 0.47, \"management_fee\": 0.47, \"performance_fee_costs\": 0},{\"fund name\": \"OnePath International Shares Index (Hedged) Nil Entry\", \"share name\": \"OnePath International Shares Index (Hedged) Nil Entry\", \"management_fee_and_costs\": 1.32, \"management_fee\": 1.32, \"performance_fee_costs\": 0}, {\"fund name\": \"Pendal Concentrated Global Shares Hedged II Entry Fee\", \"share name\": \"Pendal Concentrated Global Shares Hedged II Entry Fee\", \"management_fee_and_costs\": 1.44, \"management_fee\": 1.44, \"performance_fee_costs\": 0}]}, {\"fund name\": \"Pendal Concentrated Global Shares Hedged II Nil Entry\", \"share name\": \"Pendal Concentrated Global Shares Hedged II Nil Entry\", \"management_fee_and_costs\": 2.29, \"management_fee\": 2.29, \"performance_fee_costs\": 0}]}, {\"fund name\": \"Platinum Asia Entry Fee\", \"share name\": \"Platinum Asia Entry Fee\", \"management_fee_and_costs\": 2.16, \"management_fee\": 2.16, \"performance_fee_costs\": 0}, {\"fund name\": \"Platinum Asia Nil Entry\", \"share name\": \"Platinum Asia Nil Entry\", \"management_fee_and_costs\": 3.01, \"management_fee\": 3.01, \"performance_fee_costs\": 0}"
]
},
{
@ -470,7 +500,7 @@
"The management_fee_costs is \"Management fee (% pa)\" + \"Recoverable expenses\" + \"Estimated other indirect costs\".",
"The recoverable_expenses is \"Recoverable expenses\"",
"The indirect_costs is \"Estimated other indirect costs\"",
"The performance_fee is \"Peformance fees charged to the Investment Option by underlying managers\".",
"The performance_fee_costs is \"Peformance fees charged to the Investment Option by underlying managers\".",
"The interposed_vehicle_performance_fee_cost is \"Performance fees charged by interposed vehicles\"",
"The buy_spread and sell_spread are \"Buy/sell spreads\".",
"---Example 1 Start---",
@ -483,7 +513,7 @@
"The 1st number: 0.62 is the management_fee,",
"the 2nd number: 0.18 is the recoverable_expenses,",
"the 3rd number: 0.05 is the indirect_costs",
"the 4th number: 0.00 is the performance_fee,",
"the 4th number: 0.00 is the performance_fee_costs,",
"the 5th number: 0.00 is the interposed_vehicle_performance_fee_cost, ",
"the 6th number: 0.14 is the Transaction costs (% pa).",
"the 7th number: 0.08 is the buy_spread, ",
@ -491,7 +521,7 @@
"The management_fee_and_costs is Management fee (i) + Recoverable expenses + Estimated other indirect costs = 0.62 + 0.18 + 0.05= 0.85",
"**Attention: Ignore Transaction costs (% pa), the 6th number, DO NOT APPLY ITS VALUE TO CALCULATE management_fee_and_costs!!!**",
"The output should be: ",
"{\"data\": [{\"fund name\": \"North Active Defensive\", \"share name\": \"North Active Defensive\", \"management_fee_and_costs\": 0.85, \"management_fee\": 0.62, \"recoverable_expenses\": 0.18, \"indirect_costs\": 0.05, \"performance_fee\": 0, \"interposed_vehicle_performance_fee_cost\": 0, \"buy_spread\": 0.08, \"sell_spread\": 0.08}, {\"fund name\": \"North Active Moderately Defensive\", \"share name\": \"Active Moderately Defensive\", \"management_fee_and_costs\": 0.83, \"management_fee\": 0.72, \"recoverable_expenses\": 0.07, \"indirect_costs\": 0.04,\"performance_fee\": 0, \"interposed_vehicle_performance_fee_cost\": 0.01, \"buy_spread\": 0.09, \"sell_spread\": 0.09}, {\"fund name\": \"North Index Growth\", \"share name\": \"North Index Growth\", \"management_fee_and_costs\": 0.45, \"management_fee\": 0.45, \"recoverable_expenses\": 0, \"indirect_costs\": 0,\"performance_fee\": 0, \"interposed_vehicle_performance_fee_cost\": 0, \"buy_spread\": 0.06, \"sell_spread\": 0.06}, {\"fund name\": \"North Index High Growth\", \"share name\": \"North Index High Growth\", \"management_fee_and_costs\": 0.46, \"management_fee\": 0.45, \"recoverable_expenses\": 0, \"indirect_costs\": 0.01,\"performance_fee\": 0, \"interposed_vehicle_performance_fee_cost\": 0, \"buy_spread\": 0.06, \"sell_spread\": 0.07}]}",
"{\"data\": [{\"fund name\": \"North Active Defensive\", \"share name\": \"North Active Defensive\", \"management_fee_and_costs\": 0.85, \"management_fee\": 0.62, \"recoverable_expenses\": 0.18, \"indirect_costs\": 0.05, \"performance_fee_costs\": 0, \"interposed_vehicle_performance_fee_cost\": 0, \"buy_spread\": 0.08, \"sell_spread\": 0.08}, {\"fund name\": \"North Active Moderately Defensive\", \"share name\": \"Active Moderately Defensive\", \"management_fee_and_costs\": 0.83, \"management_fee\": 0.72, \"recoverable_expenses\": 0.07, \"indirect_costs\": 0.04,\"performance_fee_costs\": 0, \"interposed_vehicle_performance_fee_cost\": 0.01, \"buy_spread\": 0.09, \"sell_spread\": 0.09}, {\"fund name\": \"North Index Growth\", \"share name\": \"North Index Growth\", \"management_fee_and_costs\": 0.45, \"management_fee\": 0.45, \"recoverable_expenses\": 0, \"indirect_costs\": 0,\"performance_fee_costs\": 0, \"interposed_vehicle_performance_fee_cost\": 0, \"buy_spread\": 0.06, \"sell_spread\": 0.06}, {\"fund name\": \"North Index High Growth\", \"share name\": \"North Index High Growth\", \"management_fee_and_costs\": 0.46, \"management_fee\": 0.45, \"recoverable_expenses\": 0, \"indirect_costs\": 0.01,\"performance_fee_costs\": 0, \"interposed_vehicle_performance_fee_cost\": 0, \"buy_spread\": 0.06, \"sell_spread\": 0.07}]}",
"---Example 2 Start---",
"Investment Option \nManagement fee (% pa) \nRecoverable expenses \nEstimated other indirect costs \nPerformance fees charged to the Fund by underlying managers \nPerformance fees charged by interposed vehicles \nTransaction costs (% pa) \nBuy/sell spreads (%) \n0.20 \n0.01 \n0.00 \n0.00 \n0.00 \n0.00 \n0.08/0.08 \nMyNorth \nAustralian Fixed \nInterest Index \niv \n0.25 \n0.01 \n0.00 \n0.00 \n0.00 \n0.07 \n0.10/0.10 \nMyNorth \nInternational \nFixed Interest \nIndex - Hedged \n",
"---Example 2 End---",
@ -500,7 +530,7 @@
"b. The algorithm to calculate management_fee_and_costs is same as Example 1.",
"c. The difference is **the fund name is after the data row, e.g. the fund name of the first data row is: MyNorth Australian Fixed Interest Index**",
"The output should be: ",
"{\"data\": [{\"fund name\": \"MyNorth Australian Fixed Interest Index\", \"share name\": \"MyNorth Australian Fixed Interest Index\", \"management_fee_and_costs\": 0.21, \"management_fee\": 0.20, \"recoverable_expenses\": 0, \"indirect_costs\": 0, \"performance_fee\": 0, \"interposed_vehicle_performance_fee_cost\": 0, \"buy_spread\": 0.08, \"sell_spread\": 0.08}, {\"fund name\": \"MyNorth International Fixed Interest Index - Hedged\", \"share name\": \"MyNorth International Fixed Interest Index - Hedged\", \"management_fee_and_costs\": 0.26, \"management_fee\": 0.25, \"recoverable_expenses\": 0, \"indirect_costs\": 0, \"performance_fee\": 0, \"interposed_vehicle_performance_fee_cost\": 0, \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}"
"{\"data\": [{\"fund name\": \"MyNorth Australian Fixed Interest Index\", \"share name\": \"MyNorth Australian Fixed Interest Index\", \"management_fee_and_costs\": 0.21, \"management_fee\": 0.20, \"recoverable_expenses\": 0, \"indirect_costs\": 0, \"performance_fee_costs\": 0, \"interposed_vehicle_performance_fee_cost\": 0, \"buy_spread\": 0.08, \"sell_spread\": 0.08}, {\"fund name\": \"MyNorth International Fixed Interest Index - Hedged\", \"share name\": \"MyNorth International Fixed Interest Index - Hedged\", \"management_fee_and_costs\": 0.26, \"management_fee\": 0.25, \"recoverable_expenses\": 0, \"indirect_costs\": 0, \"performance_fee_costs\": 0, \"interposed_vehicle_performance_fee_cost\": 0, \"buy_spread\": 0.1, \"sell_spread\": 0.1}]}"
]
}
]
@ -594,11 +624,10 @@
"share 2",
"share 3"
],
"total_annual_dollar_based_charges_value": [125.00, 95.00, 26.00],
"total_annual_dollar_based_charges_value": [65, 57, 67.6],
"management_fee_and_costs_value": [2.63, 1.58, 2.55],
"management_fee_value": [0.85, 1.10, 0.23],
"performance_fee_value": [0.03, 0.21, 0.08],
"performance_fee_costs_value": [0.05, 0.25, 0.09],
"performance_fee_costs_value": [0.03, 0.21, 0.08],
"buy_spread_value": [0.10, 0.15, 0.12],
"sell_spread_value": [0.10, 0.10, 0.15],
"establishment_fee_value": [0.75, 1.20, 0.25],
@ -623,7 +652,7 @@
"total_annual_dollar_based_charges": "Total annual dollar based charges",
"management_fee_and_costs": "Management fee and costs",
"management_fee": "Management fee",
"performance_fee": "Performance fee",
"performance_fee_costs": "Performance fee",
"buy_spread": "Buy spread",
"sell_spread": "Sell spread",
"administration_fees": "Administration fee",

View File

@ -1526,8 +1526,8 @@ if __name__ == "__main__":
# special_doc_id_list = ["553242411"]
re_run_extract_data = True
re_run_mapping_data = True
re_run_extract_data = False
re_run_mapping_data = False
force_save_total_data = True
doc_source = "aus_prospectus"
# doc_source = "emea_ar"
@ -1560,7 +1560,8 @@ if __name__ == "__main__":
# "544886057",
# "550769189",
# "553449663"]
# special_doc_id_list = ["411062815"]
special_doc_id_list = ["420339794", "441280757", "454036250", "471206458", "412778803"]
# special_doc_id_list = ["441280757"]
pdf_folder: str = r"/data/aus_prospectus/pdf/"
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
output_extract_data_child_folder: str = (

View File

@ -1727,13 +1727,19 @@ def set_provider_to_ground_truth(groud_truth_file: str,
ground_truth_df.to_excel(file, index=False)
def update_data_by_latest_ground_truth():
# TODO: update current ground truth data by the latest version
latest_ground_truth_file = r"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx"
if __name__ == "__main__":
set_provider_to_ground_truth(
groud_truth_file=r"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx",
ground_truth_sheet="Sheet1",
document_mapping_file=r"/data/aus_prospectus/basic_information/46_documents/aus_prospectus_46_documents_mapping.xlsx",
document_mapping_sheet="document_mapping"
)
update_data_by_latest_ground_truth()
# set_provider_to_ground_truth(
# groud_truth_file=r"/data/aus_prospectus/ground_truth/phase2_file/46_documents/46_documents_ground_truth_with_mapping.xlsx",
# ground_truth_sheet="Sheet1",
# document_mapping_file=r"/data/aus_prospectus/basic_information/46_documents/aus_prospectus_46_documents_mapping.xlsx",
# document_mapping_sheet="document_mapping"
# )
# set_mapping_to_data_side_documents_data()