1. Optimize instructions: not to fetch the data with "up to" statement.
2. Add exception handler in function.
This commit is contained in:
parent
bc32860f87
commit
a11a99fdc3
|
|
@ -35,6 +35,7 @@
|
||||||
"Ratio de gastos totales"
|
"Ratio de gastos totales"
|
||||||
],
|
],
|
||||||
"german": [
|
"german": [
|
||||||
|
"Mit anteiliger Performance Fee in %",
|
||||||
"TER inkl. Performance-Fee in % **)",
|
"TER inkl. Performance-Fee in % **)",
|
||||||
"Gesamtgebühren",
|
"Gesamtgebühren",
|
||||||
"Kostenpauschale",
|
"Kostenpauschale",
|
||||||
|
|
@ -45,7 +46,6 @@
|
||||||
"kostenquote",
|
"kostenquote",
|
||||||
"Gesamt kostenquote",
|
"Gesamt kostenquote",
|
||||||
"Betriebskostenquote des Fonds",
|
"Betriebskostenquote des Fonds",
|
||||||
"Pauschalgebühr",
|
|
||||||
"Total Expense Ratio in Prozent",
|
"Total Expense Ratio in Prozent",
|
||||||
"Annualisierte TER in % (Mit Gebührenverzicht)"
|
"Annualisierte TER in % (Mit Gebührenverzicht)"
|
||||||
],
|
],
|
||||||
|
|
|
||||||
|
|
@ -813,8 +813,11 @@ class DataExtraction:
|
||||||
instructions.append("Special cases:\n")
|
instructions.append("Special cases:\n")
|
||||||
special_cases = self.instructions_config.get("special_cases", {})
|
special_cases = self.instructions_config.get("special_cases", {})
|
||||||
special_cases_common_list = special_cases.get("common", [])
|
special_cases_common_list = special_cases.get("common", [])
|
||||||
|
special_cases_number = 1
|
||||||
for special_cases_common in special_cases_common_list:
|
for special_cases_common in special_cases_common_list:
|
||||||
title = special_cases_common.get("title", "")
|
title = special_cases_common.get("title", "")
|
||||||
|
title = f"{special_cases_number}. {title} "
|
||||||
|
special_cases_number += 1
|
||||||
instructions.append(title)
|
instructions.append(title)
|
||||||
instructions.append("\n")
|
instructions.append("\n")
|
||||||
contents_list = special_cases_common.get("contents", [])
|
contents_list = special_cases_common.get("contents", [])
|
||||||
|
|
@ -826,6 +829,8 @@ class DataExtraction:
|
||||||
special_case_list = special_cases.get(datapoint, [])
|
special_case_list = special_cases.get(datapoint, [])
|
||||||
for special_case in special_case_list:
|
for special_case in special_case_list:
|
||||||
title = special_case.get("title", "")
|
title = special_case.get("title", "")
|
||||||
|
title = f"{special_cases_number}. {title} "
|
||||||
|
special_cases_number += 1
|
||||||
instructions.append(title)
|
instructions.append(title)
|
||||||
instructions.append("\n")
|
instructions.append("\n")
|
||||||
contents_list = special_case.get("contents", [])
|
contents_list = special_case.get("contents", [])
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,7 @@
|
||||||
"- With \"TER including Performance Fee\" and \"TER excluding Performance Fee\", pick up the values from \"TER including Performance Fee\".",
|
"- With \"TER including Performance Fee\" and \"TER excluding Performance Fee\", pick up the values from \"TER including Performance Fee\".",
|
||||||
"- With \"TER inkl. Performance-Fee in % **)\" and \"TER exkl. Performance-Fee in % **)\", pick up the values from \"TER inkl. Performance-Fee in % **)\".",
|
"- With \"TER inkl. Performance-Fee in % **)\" and \"TER exkl. Performance-Fee in % **)\", pick up the values from \"TER inkl. Performance-Fee in % **)\".",
|
||||||
"- With \"TER inkl. Performance-Fee in % **)\" and \"TER inkl. Performance-Fee in % (inkl. Zielfonds)\", pick up the values from \"TER inkl. Performance-Fee in % **)\".",
|
"- With \"TER inkl. Performance-Fee in % **)\" and \"TER inkl. Performance-Fee in % (inkl. Zielfonds)\", pick up the values from \"TER inkl. Performance-Fee in % **)\".",
|
||||||
|
"- With \"Mit anteiliger Performance Fee in %\" and \"Ohne anteilige Performance-Fee in %\", pick up the values from \"Mit anteiliger Performance Fee in %\".",
|
||||||
"- With both of \"Synthetic TER\" and \"Fund TER\", if \"Synthetic TER\" with value(s), pick up the value(s) from \"Synthetic TER\", otherwise, pick up the value(s) from \"Fund TER\".",
|
"- With both of \"Synthetic TER\" and \"Fund TER\", if \"Synthetic TER\" with value(s), pick up the value(s) from \"Synthetic TER\", otherwise, pick up the value(s) from \"Fund TER\".",
|
||||||
"- With both of \"Net TER\" and \"Capped Expense Ratio\", the priority is \"Net TER\", please exclude the column: \"Capped Expense Ratio\", only pick up the values from \"Net TER\".",
|
"- With both of \"Net TER\" and \"Capped Expense Ratio\", the priority is \"Net TER\", please exclude the column: \"Capped Expense Ratio\", only pick up the values from \"Net TER\".",
|
||||||
"- With \"Gross TER\", \"Waiver\", \"Net TER\", \"Capped Expense Ratio\" as column titles, pick up the values from \"Net TER\".",
|
"- With \"Gross TER\", \"Waiver\", \"Net TER\", \"Capped Expense Ratio\" as column titles, pick up the values from \"Net TER\".",
|
||||||
|
|
@ -132,6 +133,26 @@
|
||||||
"The output should be:",
|
"The output should be:",
|
||||||
"{\"data\": []}"
|
"{\"data\": []}"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Don't fetch data with \"up to\" statement",
|
||||||
|
"contents":[
|
||||||
|
"If the value is with \"up to\" statement, please ignore the value.",
|
||||||
|
"Example 1:",
|
||||||
|
"-----Example Start-----",
|
||||||
|
"A-Class\nB-Class\nC-Class\n",
|
||||||
|
"TER\nUp to 1.00%\nUp to 1.20%\nUp to 1.50%\n",
|
||||||
|
"-----Example End-----",
|
||||||
|
"The output should be:",
|
||||||
|
"{\"data\": []}",
|
||||||
|
"Example 2:",
|
||||||
|
"-----Example Start-----",
|
||||||
|
"A-Aktien\nB-Aktien\nC-Aktien\n",
|
||||||
|
"TER\nbis zu 1,20 % p.a.\nbis zu 2,20 % p.a.\nbis zu 1,00 % p.a.\n",
|
||||||
|
"-----Example End-----",
|
||||||
|
"The output should be:",
|
||||||
|
"{\"data\": []}"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"ter": [
|
"ter": [
|
||||||
|
|
|
||||||
2
main.py
2
main.py
|
|
@ -1197,7 +1197,7 @@ if __name__ == "__main__":
|
||||||
"534535767"
|
"534535767"
|
||||||
]
|
]
|
||||||
special_doc_id_list = check_db_mapping_doc_id_list
|
special_doc_id_list = check_db_mapping_doc_id_list
|
||||||
special_doc_id_list = ["451878128"]
|
special_doc_id_list = ["532998065"]
|
||||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||||
re_run_extract_data = True
|
re_run_extract_data = True
|
||||||
|
|
|
||||||
|
|
@ -377,7 +377,8 @@ def replace_share_name_for_multilingual(text: str, share_name: str):
|
||||||
multilingual_share_list = ["Catégorie de parts", "Classe di quote",
|
multilingual_share_list = ["Catégorie de parts", "Classe di quote",
|
||||||
"Kategorie Anteile", "Kategorie anteile",
|
"Kategorie Anteile", "Kategorie anteile",
|
||||||
"Clase de participaciones", "Aandelenklasse",
|
"Clase de participaciones", "Aandelenklasse",
|
||||||
"aandelenklasse", "Anteilklasse", "anteilklasse"]
|
"aandelenklasse", "Anteilklasse", "anteilklasse",
|
||||||
|
"Aktien", "Aktienklasse", "aktien", "aktienklasse"]
|
||||||
for multilingual_share in multilingual_share_list:
|
for multilingual_share in multilingual_share_list:
|
||||||
if multilingual_share in text:
|
if multilingual_share in text:
|
||||||
text = text.replace(multilingual_share, "Class")
|
text = text.replace(multilingual_share, "Class")
|
||||||
|
|
@ -531,113 +532,116 @@ def get_currency_from_text(text: str):
|
||||||
|
|
||||||
|
|
||||||
def update_for_currency(text: str, share_name: str, compare_list: list):
|
def update_for_currency(text: str, share_name: str, compare_list: list):
|
||||||
currency_in_text = get_currency_from_text(text)
|
try:
|
||||||
with_currency = False
|
currency_in_text = get_currency_from_text(text)
|
||||||
if currency_in_text is not None:
|
with_currency = False
|
||||||
with_currency = True
|
if currency_in_text is not None:
|
||||||
|
with_currency = True
|
||||||
with_currency_list = []
|
|
||||||
without_currency_list = []
|
with_currency_list = []
|
||||||
for index, compare in enumerate(compare_list):
|
without_currency_list = []
|
||||||
# compare_split = compare.split()
|
for index, compare in enumerate(compare_list):
|
||||||
with_currency_compare = False
|
# compare_split = compare.split()
|
||||||
currecy_in_compare = get_currency_from_text(compare)
|
with_currency_compare = False
|
||||||
if currecy_in_compare is not None:
|
currecy_in_compare = get_currency_from_text(compare)
|
||||||
with_currency_compare = True
|
if currecy_in_compare is not None:
|
||||||
|
with_currency_compare = True
|
||||||
if with_currency_compare:
|
|
||||||
with_currency_list.append(index)
|
if with_currency_compare:
|
||||||
else:
|
with_currency_list.append(index)
|
||||||
without_currency_list.append(index)
|
|
||||||
if not with_currency and len(with_currency_list) == 0:
|
|
||||||
pass
|
|
||||||
elif not with_currency and len(with_currency_list) > 0:
|
|
||||||
share_short_name_list = []
|
|
||||||
if share_name is not None and len(share_name.strip()) > 0:
|
|
||||||
share_short_name_list = get_share_short_name_from_text(share_name)
|
|
||||||
updated = False
|
|
||||||
if len(share_short_name_list) > 0:
|
|
||||||
if len(without_currency_list) > 0:
|
|
||||||
for index in without_currency_list:
|
|
||||||
all_in_list = True
|
|
||||||
compare_split = [split.upper() for split in compare_list[index].split()]
|
|
||||||
for share_shot_name in share_short_name_list:
|
|
||||||
if share_shot_name not in compare_split:
|
|
||||||
all_in_list = False
|
|
||||||
break
|
|
||||||
if all_in_list:
|
|
||||||
text = text + ' ' + 'USD'
|
|
||||||
if share_name is not None:
|
|
||||||
share_name = share_name + ' ' + 'USD'
|
|
||||||
updated = True
|
|
||||||
break
|
|
||||||
if not updated:
|
|
||||||
currency_list = []
|
|
||||||
for index in with_currency_list:
|
|
||||||
all_in_list = True
|
|
||||||
compare_split = [split.upper() for split in compare_list[index].split()]
|
|
||||||
for share_shot_name in share_short_name_list:
|
|
||||||
if share_shot_name not in compare_split:
|
|
||||||
all_in_list = False
|
|
||||||
break
|
|
||||||
if all_in_list:
|
|
||||||
current_currency_list = [split for split in compare_split
|
|
||||||
if split.upper() in total_currency_list]
|
|
||||||
if len(current_currency_list) > 0:
|
|
||||||
currency_list.append(current_currency_list[-1])
|
|
||||||
if len(currency_list) == 1:
|
|
||||||
text = text + ' ' + currency_list[0]
|
|
||||||
if share_name is not None:
|
|
||||||
share_name = share_name + ' ' + currency_list[0]
|
|
||||||
updated = True
|
|
||||||
|
|
||||||
for index in without_currency_list:
|
|
||||||
compare_list[index] = compare_list[index] + ' ' + 'USD'
|
|
||||||
|
|
||||||
if not updated:
|
|
||||||
text = text + ' ' + 'USD'
|
|
||||||
if share_name is not None:
|
|
||||||
share_name = share_name + ' ' + 'USD'
|
|
||||||
# return text, share_name, compare_list
|
|
||||||
elif with_currency and len(without_currency_list) == 0:
|
|
||||||
for index in without_currency_list:
|
|
||||||
compare_list[index] = compare_list[index] + ' ' + 'USD'
|
|
||||||
# return text, share_name, compare_list
|
|
||||||
else:
|
|
||||||
# return text, share_name, compare_list
|
|
||||||
pass
|
|
||||||
default_currency = 'USD'
|
|
||||||
if with_currency:
|
|
||||||
share_name_split = share_name.split()
|
|
||||||
share_name_currency = get_currency_from_text(share_name)
|
|
||||||
if share_name_currency is not None and share_name_currency in total_currency_list:
|
|
||||||
for split in share_name_split:
|
|
||||||
if split in total_currency_list and split != share_name_currency:
|
|
||||||
default_currency = split
|
|
||||||
break
|
|
||||||
new_share_name = ' '.join([split for split in share_name_split
|
|
||||||
if split not in total_currency_list
|
|
||||||
or (split == share_name_currency)])
|
|
||||||
if share_name in text:
|
|
||||||
text = text.replace(share_name, new_share_name)
|
|
||||||
else:
|
else:
|
||||||
text = ' '.join([split for split in text.split()
|
without_currency_list.append(index)
|
||||||
if split not in total_currency_list
|
if not with_currency and len(with_currency_list) == 0:
|
||||||
or (split == share_name_currency)])
|
pass
|
||||||
share_name = new_share_name
|
elif not with_currency and len(with_currency_list) > 0:
|
||||||
|
share_short_name_list = []
|
||||||
for c_i in range(len(compare_list)):
|
if share_name is not None and len(share_name.strip()) > 0:
|
||||||
compare = compare_list[c_i]
|
share_short_name_list = get_share_short_name_from_text(share_name)
|
||||||
compare_share_part = get_share_part_list([compare])[0]
|
updated = False
|
||||||
compare_share_part_split = compare_share_part.split()
|
if len(share_short_name_list) > 0:
|
||||||
compare_share_part_currency_list = []
|
if len(without_currency_list) > 0:
|
||||||
for split in compare_share_part_split:
|
for index in without_currency_list:
|
||||||
if split.upper() in total_currency_list and split.upper() not in compare_share_part_currency_list:
|
all_in_list = True
|
||||||
compare_share_part_currency_list.append(split)
|
compare_split = [split.upper() for split in compare_list[index].split()]
|
||||||
if len(compare_share_part_currency_list) > 1 and default_currency in compare_share_part_currency_list:
|
for share_shot_name in share_short_name_list:
|
||||||
compare_share_part_split = [split for split in compare_share_part_split if split.upper() != default_currency]
|
if share_shot_name not in compare_split:
|
||||||
new_compare_share_part = ' '.join(compare_share_part_split)
|
all_in_list = False
|
||||||
compare_list[c_i] = compare.replace(compare_share_part, new_compare_share_part)
|
break
|
||||||
|
if all_in_list:
|
||||||
|
text = text + ' ' + 'USD'
|
||||||
|
if share_name is not None:
|
||||||
|
share_name = share_name + ' ' + 'USD'
|
||||||
|
updated = True
|
||||||
|
break
|
||||||
|
if not updated:
|
||||||
|
currency_list = []
|
||||||
|
for index in with_currency_list:
|
||||||
|
all_in_list = True
|
||||||
|
compare_split = [split.upper() for split in compare_list[index].split()]
|
||||||
|
for share_shot_name in share_short_name_list:
|
||||||
|
if share_shot_name not in compare_split:
|
||||||
|
all_in_list = False
|
||||||
|
break
|
||||||
|
if all_in_list:
|
||||||
|
current_currency_list = [split for split in compare_split
|
||||||
|
if split.upper() in total_currency_list]
|
||||||
|
if len(current_currency_list) > 0:
|
||||||
|
currency_list.append(current_currency_list[-1])
|
||||||
|
if len(currency_list) == 1:
|
||||||
|
text = text + ' ' + currency_list[0]
|
||||||
|
if share_name is not None:
|
||||||
|
share_name = share_name + ' ' + currency_list[0]
|
||||||
|
updated = True
|
||||||
|
|
||||||
|
for index in without_currency_list:
|
||||||
|
compare_list[index] = compare_list[index] + ' ' + 'USD'
|
||||||
|
|
||||||
|
if not updated:
|
||||||
|
text = text + ' ' + 'USD'
|
||||||
|
if share_name is not None:
|
||||||
|
share_name = share_name + ' ' + 'USD'
|
||||||
|
# return text, share_name, compare_list
|
||||||
|
elif with_currency and len(without_currency_list) == 0:
|
||||||
|
for index in without_currency_list:
|
||||||
|
compare_list[index] = compare_list[index] + ' ' + 'USD'
|
||||||
|
# return text, share_name, compare_list
|
||||||
|
else:
|
||||||
|
# return text, share_name, compare_list
|
||||||
|
pass
|
||||||
|
default_currency = 'USD'
|
||||||
|
if with_currency and share_name is not None:
|
||||||
|
share_name_split = share_name.split()
|
||||||
|
share_name_currency = get_currency_from_text(share_name)
|
||||||
|
if share_name_currency is not None and share_name_currency in total_currency_list:
|
||||||
|
for split in share_name_split:
|
||||||
|
if split in total_currency_list and split != share_name_currency:
|
||||||
|
default_currency = split
|
||||||
|
break
|
||||||
|
new_share_name = ' '.join([split for split in share_name_split
|
||||||
|
if split not in total_currency_list
|
||||||
|
or (split == share_name_currency)])
|
||||||
|
if share_name in text:
|
||||||
|
text = text.replace(share_name, new_share_name)
|
||||||
|
else:
|
||||||
|
text = ' '.join([split for split in text.split()
|
||||||
|
if split not in total_currency_list
|
||||||
|
or (split == share_name_currency)])
|
||||||
|
share_name = new_share_name
|
||||||
|
|
||||||
|
for c_i in range(len(compare_list)):
|
||||||
|
compare = compare_list[c_i]
|
||||||
|
compare_share_part = get_share_part_list([compare])[0]
|
||||||
|
compare_share_part_split = compare_share_part.split()
|
||||||
|
compare_share_part_currency_list = []
|
||||||
|
for split in compare_share_part_split:
|
||||||
|
if split.upper() in total_currency_list and split.upper() not in compare_share_part_currency_list:
|
||||||
|
compare_share_part_currency_list.append(split)
|
||||||
|
if len(compare_share_part_currency_list) > 1 and default_currency in compare_share_part_currency_list:
|
||||||
|
compare_share_part_split = [split for split in compare_share_part_split if split.upper() != default_currency]
|
||||||
|
new_compare_share_part = ' '.join(compare_share_part_split)
|
||||||
|
compare_list[c_i] = compare.replace(compare_share_part, new_compare_share_part)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in update_for_currency: {e}")
|
||||||
return text, share_name, compare_list
|
return text, share_name, compare_list
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue