optimize instructions for performance fees.
This commit is contained in:
parent
e17414173a
commit
0f6dbd27eb
|
|
@ -35,6 +35,9 @@
|
|||
"- With both of \"Synthetic TER\" and \"Fund TER\", if \"Synthetic TER\" with value(s), pick up the value(s) from \"Synthetic TER\", otherwise, pick up the value(s) from \"Fund TER\".",
|
||||
"- With both of \"Net TER (including reimbursement)\" and \"Capped Expense Ratio\", the priority is \"Capped Expense Ratio\", please exclude the column: \"Net TER (including reimbursement)\", only pick up the values from \"Capped Expense Ratio\".",
|
||||
"Please ignore TER values which with the exception of performance fees or excluded performance fees."
|
||||
],
|
||||
"performance_fee": [
|
||||
"The performance fees should not be the presence of the rates at which the performance fees are calculated."
|
||||
]
|
||||
}
|
||||
},
|
||||
|
|
|
|||
2
main.py
2
main.py
|
|
@ -505,7 +505,7 @@ if __name__ == "__main__":
|
|||
|
||||
# doc_id = "476492237"
|
||||
# extract_data(doc_id, pdf_folder, output_extract_data_child_folder, re_run)
|
||||
special_doc_id_list = ["458291624"]
|
||||
special_doc_id_list = ["491593469"]
|
||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||
re_run_mapping_data = True
|
||||
|
|
|
|||
|
|
@ -240,31 +240,31 @@ def replace_abbrevation(text: str):
|
|||
text_splits = text.split()
|
||||
new_text_splits = []
|
||||
for split in text_splits:
|
||||
if split.lower() in ['acc']:
|
||||
if split.lower() in ['acc', 'acc.']:
|
||||
new_text_splits.append('Accumulation')
|
||||
elif split.lower() in ['inc']:
|
||||
elif split.lower() in ['inc', 'inc.']:
|
||||
new_text_splits.append('Income')
|
||||
elif split.lower() in ['dist']:
|
||||
elif split.lower() in ['dist', 'dist.']:
|
||||
new_text_splits.append('Distribution')
|
||||
elif split.lower() in ['inv']:
|
||||
elif split.lower() in ['inv', 'inv.']:
|
||||
new_text_splits.append('Investor')
|
||||
elif split.lower() in ['inst', 'inst', 'institution']:
|
||||
elif split.lower() in ['inst', 'inst.', 'institution']:
|
||||
new_text_splits.append('Institutional')
|
||||
elif split.lower() in ['cap']:
|
||||
elif split.lower() in ['cap', 'cap.']:
|
||||
new_text_splits.append('Capitalisation')
|
||||
elif split.lower() in ['adm']:
|
||||
elif split.lower() in ['adm', 'adm.']:
|
||||
new_text_splits.append('Admin')
|
||||
elif split.lower() in ['adv']:
|
||||
elif split.lower() in ['adv', 'adv.']:
|
||||
new_text_splits.append('Advantage')
|
||||
elif split.lower() in ['hdg', 'hgd', '(h)']:
|
||||
elif split.lower() in ['hdg', 'hgd', 'hdg.', 'hgd.', '(h)']:
|
||||
new_text_splits.append('Hedged')
|
||||
elif split.lower() in ['cl']:
|
||||
elif split.lower() in ['cl', 'cl.']:
|
||||
new_text_splits.append('Class')
|
||||
elif split.lower() in ['ser']:
|
||||
elif split.lower() in ['ser', 'ser.']:
|
||||
new_text_splits.append('Series')
|
||||
elif split.lower() in ['u.s.']:
|
||||
new_text_splits.append('US')
|
||||
elif split.lower() in ['nc']:
|
||||
elif split.lower() in ['nc', 'nc.']:
|
||||
new_text_splits.append('no trail')
|
||||
else:
|
||||
new_text_splits.append(split)
|
||||
|
|
|
|||
Loading…
Reference in New Issue