support more performance fee keywords
This commit is contained in:
parent
65e752e25a
commit
9348e32caa
|
|
@ -398,6 +398,8 @@
|
|||
"Performancegebühren",
|
||||
"Performancevergütung",
|
||||
"Anlageerfolgsprämie",
|
||||
"Anlageerfolgs-prämie",
|
||||
"Anlageerfolgs- prämie",
|
||||
"TER in % (inkl.",
|
||||
"TER % (inkl.",
|
||||
"TER in % (exkl.",
|
||||
|
|
|
|||
|
|
@ -223,6 +223,14 @@
|
|||
"The output should be:",
|
||||
"{\"data\": [{\"fund name\": \"PIANO 400 Fund\", \"ter\": 0.58, \"performance_fee\": 0}]}",
|
||||
"The performance fees value is TER % (inkl. Anlageerfolgsprämie) - TER % (exkl. Anlageerfolgsprämie) = 0,58 - 0,58 = 0",
|
||||
"Example 4:",
|
||||
"-----Example Start-----",
|
||||
"Fonds \nTER % \n(einschließlich \nAnlageerfolgs- \nprämie) \nTER % \n(ohne \nAnlageerfolgs-\nprämie) \ndb x-trackers EUR Liquid Corporate 12.5 UCITS ETF \n \n \nKlasse 1C \n0,35 % \n0,35 %",
|
||||
"-----Example End-----",
|
||||
"The output should be:",
|
||||
"{\"data\": [{\"fund name\": \"db x-trackers EUR Liquid Corporate 12.5 UCITS ETF\", \"share name\": \"Klasse 1C\", \"ter\": 0.35, \"performance_fee\": 0}]}",
|
||||
"The performance fees value is TER % (einschließlich Anlageerfolgsprämie) - TER % (ohne Anlageerfolgsprämie) = 0,35 - 0,35 = 0",
|
||||
"or TER % (einschließlich Anlageerfolgs- \nprämie) - TER % (ohne Anlageerfolgs- \nprämie) = 0,35 - 0,35 = 0",
|
||||
"Case 2:",
|
||||
"If some table is with three value columns: \"TER including performance fees\", \"TER excluding performance fees\", \"Performance fees\", ",
|
||||
"The Performance fees value in column: Performance fees, chould be \"-\", because of \"TER including performance fees\" - \"TER excluding performance fees\" = 0, ",
|
||||
|
|
|
|||
11
main.py
11
main.py
|
|
@ -869,7 +869,7 @@ def replace_rerun_data(new_data_file: str, original_data_file: str):
|
|||
new_extract_data.to_excel(writer, index=False, sheet_name=extract_data_sheet)
|
||||
|
||||
|
||||
def batch_run_documents():
|
||||
def batch_run_documents(special_doc_id_list: list = None):
|
||||
sample_document_list_folder = r'./sample_documents/'
|
||||
document_list_files = glob(sample_document_list_folder + "*.txt")
|
||||
|
||||
|
|
@ -887,8 +887,8 @@ def batch_run_documents():
|
|||
calculate_metrics = False
|
||||
|
||||
extract_way = "text"
|
||||
special_doc_id_list = []
|
||||
if len(special_doc_id_list) == 0:
|
||||
# special_doc_id_list = []
|
||||
if special_doc_id_list is None or len(special_doc_id_list) == 0:
|
||||
force_save_total_data = True
|
||||
# file_base_name_candidates = ["sample_document_complex", "emea_case_from_word_complex"]
|
||||
file_base_name_candidates = ["sample_documents_12_11"]
|
||||
|
|
@ -1031,9 +1031,10 @@ if __name__ == "__main__":
|
|||
data_file_path = r"/data/emea_ar/ground_truth/data_extraction/verify/20241211/sample_documents_12_11_mapping_data_info_44_documents_by_text_20241211185546.xlsx"
|
||||
document_mapping_file_path = r"/data/emea_ar/basic_information/sample_documents/sample_doc/sample_documents_12_11/doc_mapping_12_11.xlsx"
|
||||
output_data_file_path = r"/data/emea_ar/ground_truth/data_extraction/verify/20241211/sample_documents_12_11_merged_data_info.xlsx"
|
||||
merge_output_data(data_file_path, document_mapping_file_path, output_data_file_path)
|
||||
# merge_output_data(data_file_path, document_mapping_file_path, output_data_file_path)
|
||||
# batch_initial_document()
|
||||
# batch_run_documents()
|
||||
special_doc_id_list = ["553242411"]
|
||||
batch_run_documents(special_doc_id_list)
|
||||
|
||||
# new_data_file = r"/data/emea_ar/output/mapping_data/total/mapping_data_info_15_documents_by_text_20241121154243.xlsx"
|
||||
# original_data_file = r"/data/emea_ar/ground_truth/data_extraction/verify/mapping_data_info_30_documents_all_4_datapoints_20241106_verify_mapping.xlsx"
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ def add_slash_to_text_as_regex(text: str):
|
|||
continue
|
||||
replace = r"\{0}".format(special_iter.group())
|
||||
if replace not in text:
|
||||
text = re.sub(replace, r"\\W", text)
|
||||
text = re.sub(replace, r"\\W*", text)
|
||||
text = re.sub(r"( ){2,}", " ", text)
|
||||
text = text.replace(" ", r"\s*")
|
||||
return text
|
||||
|
|
|
|||
Loading…
Reference in New Issue