optimize instructions for performance fees.

This commit is contained in:
Blade He 2024-09-13 16:10:44 -05:00
parent e17414173a
commit 0f6dbd27eb
3 changed files with 16 additions and 13 deletions

View File

@ -35,6 +35,9 @@
"- With both of \"Synthetic TER\" and \"Fund TER\", if \"Synthetic TER\" with value(s), pick up the value(s) from \"Synthetic TER\", otherwise, pick up the value(s) from \"Fund TER\".", "- With both of \"Synthetic TER\" and \"Fund TER\", if \"Synthetic TER\" with value(s), pick up the value(s) from \"Synthetic TER\", otherwise, pick up the value(s) from \"Fund TER\".",
"- With both of \"Net TER (including reimbursement)\" and \"Capped Expense Ratio\", the priority is \"Capped Expense Ratio\", please exclude the column: \"Net TER (including reimbursement)\", only pick up the values from \"Capped Expense Ratio\".", "- With both of \"Net TER (including reimbursement)\" and \"Capped Expense Ratio\", the priority is \"Capped Expense Ratio\", please exclude the column: \"Net TER (including reimbursement)\", only pick up the values from \"Capped Expense Ratio\".",
"Please ignore TER values which with the exception of performance fees or excluded performance fees." "Please ignore TER values which with the exception of performance fees or excluded performance fees."
],
"performance_fee": [
"The performance fees should not be the presence of the rates at which the performance fees are calculated."
] ]
} }
}, },

View File

@ -505,7 +505,7 @@ if __name__ == "__main__":
# doc_id = "476492237" # doc_id = "476492237"
# extract_data(doc_id, pdf_folder, output_extract_data_child_folder, re_run) # extract_data(doc_id, pdf_folder, output_extract_data_child_folder, re_run)
special_doc_id_list = ["458291624"] special_doc_id_list = ["491593469"]
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/" output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/" output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
re_run_mapping_data = True re_run_mapping_data = True

View File

@ -240,31 +240,31 @@ def replace_abbrevation(text: str):
text_splits = text.split() text_splits = text.split()
new_text_splits = [] new_text_splits = []
for split in text_splits: for split in text_splits:
if split.lower() in ['acc']: if split.lower() in ['acc', 'acc.']:
new_text_splits.append('Accumulation') new_text_splits.append('Accumulation')
elif split.lower() in ['inc']: elif split.lower() in ['inc', 'inc.']:
new_text_splits.append('Income') new_text_splits.append('Income')
elif split.lower() in ['dist']: elif split.lower() in ['dist', 'dist.']:
new_text_splits.append('Distribution') new_text_splits.append('Distribution')
elif split.lower() in ['inv']: elif split.lower() in ['inv', 'inv.']:
new_text_splits.append('Investor') new_text_splits.append('Investor')
elif split.lower() in ['inst', 'inst', 'institution']: elif split.lower() in ['inst', 'inst.', 'institution']:
new_text_splits.append('Institutional') new_text_splits.append('Institutional')
elif split.lower() in ['cap']: elif split.lower() in ['cap', 'cap.']:
new_text_splits.append('Capitalisation') new_text_splits.append('Capitalisation')
elif split.lower() in ['adm']: elif split.lower() in ['adm', 'adm.']:
new_text_splits.append('Admin') new_text_splits.append('Admin')
elif split.lower() in ['adv']: elif split.lower() in ['adv', 'adv.']:
new_text_splits.append('Advantage') new_text_splits.append('Advantage')
elif split.lower() in ['hdg', 'hgd', '(h)']: elif split.lower() in ['hdg', 'hgd', 'hdg.', 'hgd.', '(h)']:
new_text_splits.append('Hedged') new_text_splits.append('Hedged')
elif split.lower() in ['cl']: elif split.lower() in ['cl', 'cl.']:
new_text_splits.append('Class') new_text_splits.append('Class')
elif split.lower() in ['ser']: elif split.lower() in ['ser', 'ser.']:
new_text_splits.append('Series') new_text_splits.append('Series')
elif split.lower() in ['u.s.']: elif split.lower() in ['u.s.']:
new_text_splits.append('US') new_text_splits.append('US')
elif split.lower() in ['nc']: elif split.lower() in ['nc', 'nc.']:
new_text_splits.append('no trail') new_text_splits.append('no trail')
else: else:
new_text_splits.append(split) new_text_splits.append(split)