optimize performance_fee_costs by document 391080133
This commit is contained in:
parent
c7c36dbdd2
commit
765772e5a8
|
|
@ -30,6 +30,12 @@
|
||||||
["Investment\\s*option\\s*Administration fees[\\s\\S]*?administration\\s*costs\\s*Investment\\s*fees[\\s\\S]*?investment\\s*costs\\s*Administration\\s*fees[\\s\\S]*?Investment\\s*fees[\\s\\S]*?Estimated\\s*administration[\\s\\S]*?transaction\\s*costs[\\s\\S]*?annual\\s*fees\\s*and\\s*costs\\s*\\(\\%\\s*pa\\)\\s*\\n"],
|
["Investment\\s*option\\s*Administration fees[\\s\\S]*?administration\\s*costs\\s*Investment\\s*fees[\\s\\S]*?investment\\s*costs\\s*Administration\\s*fees[\\s\\S]*?Investment\\s*fees[\\s\\S]*?Estimated\\s*administration[\\s\\S]*?transaction\\s*costs[\\s\\S]*?annual\\s*fees\\s*and\\s*costs\\s*\\(\\%\\s*pa\\)\\s*\\n"],
|
||||||
"replace_text": "\nInvestment option \nAdministration fees \nEstimated administration costs \nInvestment fees \nEstimated performance fees \nEstimated other investment costs \nEstimated transaction costs \nEstimated total ongoing annual fees and costs \n",
|
"replace_text": "\nInvestment option \nAdministration fees \nEstimated administration costs \nInvestment fees \nEstimated performance fees \nEstimated other investment costs \nEstimated transaction costs \nEstimated total ongoing annual fees and costs \n",
|
||||||
"comments": ["item 0: document 411062815, page 17"]
|
"comments": ["item 0: document 411062815, page 17"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"regex_all_list":
|
||||||
|
["\\nFund\\s*name\\s*Management\\s*fee\\s*Indirect\\s*costs\\s*Recoverable\\s*expenses[\\s\\S]*?performance.*\\s*fee\\s*Estimated\\s*other\\s*indirect\\s*costs\\s*\\n"],
|
||||||
|
"replace_text": "\nFund name \nManagement fee \nRecoverable expenses \nEstimated performance-related fee \nEstimated other indirect costs \n",
|
||||||
|
"comments": ["item 0: document 391080133, page 21"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -614,7 +614,7 @@ class DataExtraction:
|
||||||
previous_page_datapoints = []
|
previous_page_datapoints = []
|
||||||
previous_page_fund_name = None
|
previous_page_fund_name = None
|
||||||
for page_num, page_text in self.page_text_dict.items():
|
for page_num, page_text in self.page_text_dict.items():
|
||||||
# if page_num not in [4, 5]:
|
# if page_num not in [20]:
|
||||||
# continue
|
# continue
|
||||||
if page_num in handled_page_num_list:
|
if page_num in handled_page_num_list:
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
|
|
@ -171,11 +171,21 @@
|
||||||
"C.2 With \"Total management cost (% pa)\" = \"Management fee (% pa)\" + \"Estimated other indirect costs\" + \"Estimated expense recoveries\" + \"Estimated Regulatory Change Expense Recovery\".",
|
"C.2 With \"Total management cost (% pa)\" = \"Management fee (% pa)\" + \"Estimated other indirect costs\" + \"Estimated expense recoveries\" + \"Estimated Regulatory Change Expense Recovery\".",
|
||||||
"The management_fee is the value of \"Management fee (% pa)\".",
|
"The management_fee is the value of \"Management fee (% pa)\".",
|
||||||
"The management_fee_and_costs is the value of \"Total management cost (% pa)\".",
|
"The management_fee_and_costs is the value of \"Total management cost (% pa)\".",
|
||||||
"---Example Start---",
|
"---Example 1 Start---",
|
||||||
"Fund/Investment\nOption\nManagement\nfee (% pa)\nEstimated \nPerformance \n-related \nfees \nEstimated\nother\nindirect\ncosts\nEstimated\nexpense\nrecoveries\nEstimated\nRegulatory\nChange\nExpense\nRecovery\nTotal\nmanagement\ncost (% pa)\nEstimated\nbuy-sell\nspread (%)\nBT Future \nGoals Fund \n1.33 0.000.04 0.000.01 1.38 0.31\n1.29 0.000.00 0.000.01 1.30 0.29\n",
|
"Fund/Investment\nOption\nManagement\nfee (% pa)\nEstimated \nPerformance \n-related \nfees \nEstimated\nother\nindirect\ncosts\nEstimated\nexpense\nrecoveries\nEstimated\nRegulatory\nChange\nExpense\nRecovery\nTotal\nmanagement\ncost (% pa)\nEstimated\nbuy-sell\nspread (%)\nBT Future \nGoals Fund \n1.33 0.000.04 0.000.01 1.38 0.31\n1.29 0.000.00 0.000.01 1.30 0.29\n",
|
||||||
"---Example End---",
|
"---Example 1 End---",
|
||||||
"The output should be:",
|
"The output should be:",
|
||||||
"{\"data\": [{\"fund name\": \"BT Future Goals Fund\", \"share name\": \"BT Future Goals Fund\", \"management_fee_and_costs\": 1.38, \"management_fee\": 1.33, \"indirect_costs\": 0.04, \"recoverable_expenses\": 0, \"change_recoverable_expenses\": 0.01, \"performance_fee_costs\": 0, \"buy_spread\": 0.31, \"sell_spread\": 0.31}]}",
|
"{\"data\": [{\"fund name\": \"BT Future Goals Fund\", \"share name\": \"BT Future Goals Fund\", \"management_fee_and_costs\": 1.38, \"management_fee\": 1.33, \"indirect_costs\": 0.04, \"recoverable_expenses\": 0, \"change_recoverable_expenses\": 0.01, \"performance_fee_costs\": 0, \"buy_spread\": 0.31, \"sell_spread\": 0.31}]}",
|
||||||
|
"---Example 2 Start---",
|
||||||
|
"\nFund name \nManagement fee \nRecoverable expenses \nEstimated performance-related fee \nEstimated other indirect costs \nPathways 30 \n1.16% pa \n0.02% pa \n0.05% pa \n0.05% pa \nPathways 70 \n1.30% pa \n0.01% pa \n0.06% pa \n0.04% pa \n",
|
||||||
|
"---Example 2 End---",
|
||||||
|
"The management_fee_and_costs is the value of \"Management fee\" + \"Recoverable expenses\" + \"Estimated other indirect costs\".",
|
||||||
|
"The management_fee is the value of \"Management fee\".",
|
||||||
|
"The performance_fee_costs is the value of \"Estimated performance-related fee\".",
|
||||||
|
"The indirect_costs is the value of \"Estimated other indirect costs\".",
|
||||||
|
"The recoverable_expenses is the value of \"Recoverable expenses\".",
|
||||||
|
"The output should be:",
|
||||||
|
"{\"data\": [{\"fund name\": \"Pathways 30\", \"share name\": \"Pathways 30\", \"management_fee_and_costs\": 1.23, \"management_fee\": 1.16, \"recoverable_expenses\": 0.02, \"performance_fee_costs\": 0.05, \"indirect_costs\": 0.05}, {\"fund name\": \"Pathways 70\", \"share name\": \"Pathways 70\", \"management_fee_and_costs\": 1.35, \"management_fee\": 1.3, \"recoverable_expenses\": 0.01, \"performance_fee_costs\": 0.06, \"indirect_costs\": 0.04}]}",
|
||||||
"\n",
|
"\n",
|
||||||
"D. If only find \"Management fees and costs\", please output the relevant same value for both of data point keys: \"management_fee_and_costs\" and \"management_fee\".",
|
"D. If only find \"Management fees and costs\", please output the relevant same value for both of data point keys: \"management_fee_and_costs\" and \"management_fee\".",
|
||||||
"---Example 1 Start---",
|
"---Example 1 Start---",
|
||||||
|
|
|
||||||
8
main.py
8
main.py
|
|
@ -1526,8 +1526,8 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
# special_doc_id_list = ["553242411"]
|
# special_doc_id_list = ["553242411"]
|
||||||
|
|
||||||
re_run_extract_data = False
|
re_run_extract_data = True
|
||||||
re_run_mapping_data = False
|
re_run_mapping_data = True
|
||||||
force_save_total_data = True
|
force_save_total_data = True
|
||||||
doc_source = "aus_prospectus"
|
doc_source = "aus_prospectus"
|
||||||
# doc_source = "emea_ar"
|
# doc_source = "emea_ar"
|
||||||
|
|
@ -1560,8 +1560,8 @@ if __name__ == "__main__":
|
||||||
# "544886057",
|
# "544886057",
|
||||||
# "550769189",
|
# "550769189",
|
||||||
# "553449663"]
|
# "553449663"]
|
||||||
special_doc_id_list = ["420339794", "441280757", "454036250", "471206458", "412778803"]
|
special_doc_id_list = ["391080133"]
|
||||||
# special_doc_id_list = ["441280757"]
|
# special_doc_id_list = ["391080133", ""]
|
||||||
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
||||||
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
||||||
output_extract_data_child_folder: str = (
|
output_extract_data_child_folder: str = (
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue