optimize instructions for performance fees.

This commit is contained in:
Blade He 2024-09-13 16:10:44 -05:00
parent e17414173a
commit 0f6dbd27eb
3 changed files with 16 additions and 13 deletions

View File

@ -35,6 +35,9 @@
"- With both of \"Synthetic TER\" and \"Fund TER\", if \"Synthetic TER\" with value(s), pick up the value(s) from \"Synthetic TER\", otherwise, pick up the value(s) from \"Fund TER\".",
"- With both of \"Net TER (including reimbursement)\" and \"Capped Expense Ratio\", the priority is \"Capped Expense Ratio\", please exclude the column: \"Net TER (including reimbursement)\", only pick up the values from \"Capped Expense Ratio\".",
"Please ignore TER values which with the exception of performance fees or excluded performance fees."
],
"performance_fee": [
"The performance fees should not be the presence of the rates at which the performance fees are calculated."
]
}
},

View File

@ -505,7 +505,7 @@ if __name__ == "__main__":
# doc_id = "476492237"
# extract_data(doc_id, pdf_folder, output_extract_data_child_folder, re_run)
special_doc_id_list = ["458291624"]
special_doc_id_list = ["491593469"]
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
re_run_mapping_data = True

View File

@ -240,31 +240,31 @@ def replace_abbrevation(text: str):
text_splits = text.split()
new_text_splits = []
for split in text_splits:
if split.lower() in ['acc']:
if split.lower() in ['acc', 'acc.']:
new_text_splits.append('Accumulation')
elif split.lower() in ['inc']:
elif split.lower() in ['inc', 'inc.']:
new_text_splits.append('Income')
elif split.lower() in ['dist']:
elif split.lower() in ['dist', 'dist.']:
new_text_splits.append('Distribution')
elif split.lower() in ['inv']:
elif split.lower() in ['inv', 'inv.']:
new_text_splits.append('Investor')
elif split.lower() in ['inst', 'inst', 'institution']:
elif split.lower() in ['inst', 'inst.', 'institution']:
new_text_splits.append('Institutional')
elif split.lower() in ['cap']:
elif split.lower() in ['cap', 'cap.']:
new_text_splits.append('Capitalisation')
elif split.lower() in ['adm']:
elif split.lower() in ['adm', 'adm.']:
new_text_splits.append('Admin')
elif split.lower() in ['adv']:
elif split.lower() in ['adv', 'adv.']:
new_text_splits.append('Advantage')
elif split.lower() in ['hdg', 'hgd', '(h)']:
elif split.lower() in ['hdg', 'hgd', 'hdg.', 'hgd.', '(h)']:
new_text_splits.append('Hedged')
elif split.lower() in ['cl']:
elif split.lower() in ['cl', 'cl.']:
new_text_splits.append('Class')
elif split.lower() in ['ser']:
elif split.lower() in ['ser', 'ser.']:
new_text_splits.append('Series')
elif split.lower() in ['u.s.']:
new_text_splits.append('US')
elif split.lower() in ['nc']:
elif split.lower() in ['nc', 'nc.']:
new_text_splits.append('no trail')
else:
new_text_splits.append(split)