optimize instructions for performance fees.
This commit is contained in:
parent
e17414173a
commit
0f6dbd27eb
|
|
@ -35,6 +35,9 @@
|
||||||
"- With both of \"Synthetic TER\" and \"Fund TER\", if \"Synthetic TER\" with value(s), pick up the value(s) from \"Synthetic TER\", otherwise, pick up the value(s) from \"Fund TER\".",
|
"- With both of \"Synthetic TER\" and \"Fund TER\", if \"Synthetic TER\" with value(s), pick up the value(s) from \"Synthetic TER\", otherwise, pick up the value(s) from \"Fund TER\".",
|
||||||
"- With both of \"Net TER (including reimbursement)\" and \"Capped Expense Ratio\", the priority is \"Capped Expense Ratio\", please exclude the column: \"Net TER (including reimbursement)\", only pick up the values from \"Capped Expense Ratio\".",
|
"- With both of \"Net TER (including reimbursement)\" and \"Capped Expense Ratio\", the priority is \"Capped Expense Ratio\", please exclude the column: \"Net TER (including reimbursement)\", only pick up the values from \"Capped Expense Ratio\".",
|
||||||
"Please ignore TER values which with the exception of performance fees or excluded performance fees."
|
"Please ignore TER values which with the exception of performance fees or excluded performance fees."
|
||||||
|
],
|
||||||
|
"performance_fee": [
|
||||||
|
"The performance fees should not be the presence of the rates at which the performance fees are calculated."
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
||||||
2
main.py
2
main.py
|
|
@ -505,7 +505,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
# doc_id = "476492237"
|
# doc_id = "476492237"
|
||||||
# extract_data(doc_id, pdf_folder, output_extract_data_child_folder, re_run)
|
# extract_data(doc_id, pdf_folder, output_extract_data_child_folder, re_run)
|
||||||
special_doc_id_list = ["458291624"]
|
special_doc_id_list = ["491593469"]
|
||||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||||
re_run_mapping_data = True
|
re_run_mapping_data = True
|
||||||
|
|
|
||||||
|
|
@ -240,31 +240,31 @@ def replace_abbrevation(text: str):
|
||||||
text_splits = text.split()
|
text_splits = text.split()
|
||||||
new_text_splits = []
|
new_text_splits = []
|
||||||
for split in text_splits:
|
for split in text_splits:
|
||||||
if split.lower() in ['acc']:
|
if split.lower() in ['acc', 'acc.']:
|
||||||
new_text_splits.append('Accumulation')
|
new_text_splits.append('Accumulation')
|
||||||
elif split.lower() in ['inc']:
|
elif split.lower() in ['inc', 'inc.']:
|
||||||
new_text_splits.append('Income')
|
new_text_splits.append('Income')
|
||||||
elif split.lower() in ['dist']:
|
elif split.lower() in ['dist', 'dist.']:
|
||||||
new_text_splits.append('Distribution')
|
new_text_splits.append('Distribution')
|
||||||
elif split.lower() in ['inv']:
|
elif split.lower() in ['inv', 'inv.']:
|
||||||
new_text_splits.append('Investor')
|
new_text_splits.append('Investor')
|
||||||
elif split.lower() in ['inst', 'inst', 'institution']:
|
elif split.lower() in ['inst', 'inst.', 'institution']:
|
||||||
new_text_splits.append('Institutional')
|
new_text_splits.append('Institutional')
|
||||||
elif split.lower() in ['cap']:
|
elif split.lower() in ['cap', 'cap.']:
|
||||||
new_text_splits.append('Capitalisation')
|
new_text_splits.append('Capitalisation')
|
||||||
elif split.lower() in ['adm']:
|
elif split.lower() in ['adm', 'adm.']:
|
||||||
new_text_splits.append('Admin')
|
new_text_splits.append('Admin')
|
||||||
elif split.lower() in ['adv']:
|
elif split.lower() in ['adv', 'adv.']:
|
||||||
new_text_splits.append('Advantage')
|
new_text_splits.append('Advantage')
|
||||||
elif split.lower() in ['hdg', 'hgd', '(h)']:
|
elif split.lower() in ['hdg', 'hgd', 'hdg.', 'hgd.', '(h)']:
|
||||||
new_text_splits.append('Hedged')
|
new_text_splits.append('Hedged')
|
||||||
elif split.lower() in ['cl']:
|
elif split.lower() in ['cl', 'cl.']:
|
||||||
new_text_splits.append('Class')
|
new_text_splits.append('Class')
|
||||||
elif split.lower() in ['ser']:
|
elif split.lower() in ['ser', 'ser.']:
|
||||||
new_text_splits.append('Series')
|
new_text_splits.append('Series')
|
||||||
elif split.lower() in ['u.s.']:
|
elif split.lower() in ['u.s.']:
|
||||||
new_text_splits.append('US')
|
new_text_splits.append('US')
|
||||||
elif split.lower() in ['nc']:
|
elif split.lower() in ['nc', 'nc.']:
|
||||||
new_text_splits.append('no trail')
|
new_text_splits.append('no trail')
|
||||||
else:
|
else:
|
||||||
new_text_splits.append(split)
|
new_text_splits.append(split)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue