parent
53dadf61f4
commit
fa763f4f14
|
|
@ -103,7 +103,14 @@
|
|||
"The output should be:",
|
||||
"{\"data\": [{\"fund name\": \"TerreNeuve\", \"share name\": \"N A EUR SH X1\", \"ter\": 1.61, \"performance_fee\": 0.01}, {\"fund name\": \"TerreNeuve\", \"share name\": \"N D GBP SH\", \"ter\": 1.85, \"performance_fee\": 0}]}",
|
||||
"Summary: \nIf there are several data value columns in the table, please extract the data from the latest date column(s).",
|
||||
"If you are not sure which column is the latest date column, please extract the data from the first 1 - 2 data value columns."
|
||||
"If you are not sure which column is the latest date column, please extract the data from the first 1 - 2 data value columns.",
|
||||
"Case 3:",
|
||||
"If the value of column with latest date is N/A or -, please ignore.",
|
||||
"-----Example Start-----",
|
||||
"I-class income shares\n\n31.10.22\n30.04.22\n30.04.21\n30.04.20\n\npence per share\npence per share\npence per share\npence per share\nOther information\nOperating charges**\nN/A\n—\n0.90%\n0.90%",
|
||||
"-----Example End-----",
|
||||
"The output should be:",
|
||||
"{\"data\": []}"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
|
|||
4
main.py
4
main.py
|
|
@ -854,10 +854,10 @@ if __name__ == "__main__":
|
|||
]
|
||||
# special_doc_id_list = check_mapping_doc_id_list
|
||||
special_doc_id_list = check_db_mapping_doc_id_list
|
||||
special_doc_id_list = ["543243654"]
|
||||
special_doc_id_list = ["423418540"]
|
||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||
re_run_extract_data = False
|
||||
re_run_extract_data = True
|
||||
re_run_mapping_data = True
|
||||
force_save_total_data = False
|
||||
calculate_metrics = False
|
||||
|
|
|
|||
|
|
@ -812,8 +812,6 @@ def replace_abbrevation(text: str):
|
|||
new_text_splits.append('Advantage')
|
||||
elif split.lower() in ['hdg', 'hgd', 'hdg.', 'hgd.', '(h)']:
|
||||
new_text_splits.append('Hedged')
|
||||
elif split.lower() in ['unhgd']:
|
||||
split = ""
|
||||
elif split.lower() in ['cl', 'cl.']:
|
||||
new_text_splits.append('Class')
|
||||
elif split.lower() in ['ser', 'ser.']:
|
||||
|
|
@ -824,8 +822,11 @@ def replace_abbrevation(text: str):
|
|||
new_text_splits.append('no trail')
|
||||
elif split.lower() in ['non']:
|
||||
new_text_splits.append('Not')
|
||||
elif split.lower() in ['net', 'unhgd']:
|
||||
new_text_splits.append('')
|
||||
else:
|
||||
new_text_splits.append(split)
|
||||
|
||||
new_text = ' '.join(new_text_splits)
|
||||
new_text = re.sub(r'\s+', ' ', new_text).strip()
|
||||
return new_text
|
||||
Loading…
Reference in New Issue