1. Optimize instructions: not to fetch the data with "up to" statement.

2. Add exception handler in function.
This commit is contained in:
Blade He 2024-12-03 11:27:28 -06:00
parent bc32860f87
commit a11a99fdc3
5 changed files with 139 additions and 109 deletions

View File

@ -35,6 +35,7 @@
"Ratio de gastos totales" "Ratio de gastos totales"
], ],
"german": [ "german": [
"Mit anteiliger Performance Fee in %",
"TER inkl. Performance-Fee in % **)", "TER inkl. Performance-Fee in % **)",
"Gesamtgebühren", "Gesamtgebühren",
"Kostenpauschale", "Kostenpauschale",
@ -45,7 +46,6 @@
"kostenquote", "kostenquote",
"Gesamt kostenquote", "Gesamt kostenquote",
"Betriebskostenquote des Fonds", "Betriebskostenquote des Fonds",
"Pauschalgebühr",
"Total Expense Ratio in Prozent", "Total Expense Ratio in Prozent",
"Annualisierte TER in % (Mit Gebührenverzicht)" "Annualisierte TER in % (Mit Gebührenverzicht)"
], ],

View File

@ -813,8 +813,11 @@ class DataExtraction:
instructions.append("Special cases:\n") instructions.append("Special cases:\n")
special_cases = self.instructions_config.get("special_cases", {}) special_cases = self.instructions_config.get("special_cases", {})
special_cases_common_list = special_cases.get("common", []) special_cases_common_list = special_cases.get("common", [])
special_cases_number = 1
for special_cases_common in special_cases_common_list: for special_cases_common in special_cases_common_list:
title = special_cases_common.get("title", "") title = special_cases_common.get("title", "")
title = f"{special_cases_number}. {title} "
special_cases_number += 1
instructions.append(title) instructions.append(title)
instructions.append("\n") instructions.append("\n")
contents_list = special_cases_common.get("contents", []) contents_list = special_cases_common.get("contents", [])
@ -826,6 +829,8 @@ class DataExtraction:
special_case_list = special_cases.get(datapoint, []) special_case_list = special_cases.get(datapoint, [])
for special_case in special_case_list: for special_case in special_case_list:
title = special_case.get("title", "") title = special_case.get("title", "")
title = f"{special_cases_number}. {title} "
special_cases_number += 1
instructions.append(title) instructions.append(title)
instructions.append("\n") instructions.append("\n")
contents_list = special_case.get("contents", []) contents_list = special_case.get("contents", [])

View File

@ -70,6 +70,7 @@
"- With \"TER including Performance Fee\" and \"TER excluding Performance Fee\", pick up the values from \"TER including Performance Fee\".", "- With \"TER including Performance Fee\" and \"TER excluding Performance Fee\", pick up the values from \"TER including Performance Fee\".",
"- With \"TER inkl. Performance-Fee in % **)\" and \"TER exkl. Performance-Fee in % **)\", pick up the values from \"TER inkl. Performance-Fee in % **)\".", "- With \"TER inkl. Performance-Fee in % **)\" and \"TER exkl. Performance-Fee in % **)\", pick up the values from \"TER inkl. Performance-Fee in % **)\".",
"- With \"TER inkl. Performance-Fee in % **)\" and \"TER inkl. Performance-Fee in % (inkl. Zielfonds)\", pick up the values from \"TER inkl. Performance-Fee in % **)\".", "- With \"TER inkl. Performance-Fee in % **)\" and \"TER inkl. Performance-Fee in % (inkl. Zielfonds)\", pick up the values from \"TER inkl. Performance-Fee in % **)\".",
"- With \"Mit anteiliger Performance Fee in %\" and \"Ohne anteilige Performance-Fee in %\", pick up the values from \"Mit anteiliger Performance Fee in %\".",
"- With both of \"Synthetic TER\" and \"Fund TER\", if \"Synthetic TER\" with value(s), pick up the value(s) from \"Synthetic TER\", otherwise, pick up the value(s) from \"Fund TER\".", "- With both of \"Synthetic TER\" and \"Fund TER\", if \"Synthetic TER\" with value(s), pick up the value(s) from \"Synthetic TER\", otherwise, pick up the value(s) from \"Fund TER\".",
"- With both of \"Net TER\" and \"Capped Expense Ratio\", the priority is \"Net TER\", please exclude the column: \"Capped Expense Ratio\", only pick up the values from \"Net TER\".", "- With both of \"Net TER\" and \"Capped Expense Ratio\", the priority is \"Net TER\", please exclude the column: \"Capped Expense Ratio\", only pick up the values from \"Net TER\".",
"- With \"Gross TER\", \"Waiver\", \"Net TER\", \"Capped Expense Ratio\" as column titles, pick up the values from \"Net TER\".", "- With \"Gross TER\", \"Waiver\", \"Net TER\", \"Capped Expense Ratio\" as column titles, pick up the values from \"Net TER\".",
@ -132,6 +133,26 @@
"The output should be:", "The output should be:",
"{\"data\": []}" "{\"data\": []}"
] ]
},
{
"title": "Don't fetch data with \"up to\" statement",
"contents":[
"If the value is with \"up to\" statement, please ignore the value.",
"Example 1:",
"-----Example Start-----",
"A-Class\nB-Class\nC-Class\n",
"TER\nUp to 1.00%\nUp to 1.20%\nUp to 1.50%\n",
"-----Example End-----",
"The output should be:",
"{\"data\": []}",
"Example 2:",
"-----Example Start-----",
"A-Aktien\nB-Aktien\nC-Aktien\n",
"TER\nbis zu 1,20 % p.a.\nbis zu 2,20 % p.a.\nbis zu 1,00 % p.a.\n",
"-----Example End-----",
"The output should be:",
"{\"data\": []}"
]
} }
], ],
"ter": [ "ter": [

View File

@ -1197,7 +1197,7 @@ if __name__ == "__main__":
"534535767" "534535767"
] ]
special_doc_id_list = check_db_mapping_doc_id_list special_doc_id_list = check_db_mapping_doc_id_list
special_doc_id_list = ["451878128"] special_doc_id_list = ["532998065"]
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/" output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/" output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
re_run_extract_data = True re_run_extract_data = True

View File

@ -377,7 +377,8 @@ def replace_share_name_for_multilingual(text: str, share_name: str):
multilingual_share_list = ["Catégorie de parts", "Classe di quote", multilingual_share_list = ["Catégorie de parts", "Classe di quote",
"Kategorie Anteile", "Kategorie anteile", "Kategorie Anteile", "Kategorie anteile",
"Clase de participaciones", "Aandelenklasse", "Clase de participaciones", "Aandelenklasse",
"aandelenklasse", "Anteilklasse", "anteilklasse"] "aandelenklasse", "Anteilklasse", "anteilklasse",
"Aktien", "Aktienklasse", "aktien", "aktienklasse"]
for multilingual_share in multilingual_share_list: for multilingual_share in multilingual_share_list:
if multilingual_share in text: if multilingual_share in text:
text = text.replace(multilingual_share, "Class") text = text.replace(multilingual_share, "Class")
@ -531,6 +532,7 @@ def get_currency_from_text(text: str):
def update_for_currency(text: str, share_name: str, compare_list: list): def update_for_currency(text: str, share_name: str, compare_list: list):
try:
currency_in_text = get_currency_from_text(text) currency_in_text = get_currency_from_text(text)
with_currency = False with_currency = False
if currency_in_text is not None: if currency_in_text is not None:
@ -607,7 +609,7 @@ def update_for_currency(text: str, share_name: str, compare_list: list):
# return text, share_name, compare_list # return text, share_name, compare_list
pass pass
default_currency = 'USD' default_currency = 'USD'
if with_currency: if with_currency and share_name is not None:
share_name_split = share_name.split() share_name_split = share_name.split()
share_name_currency = get_currency_from_text(share_name) share_name_currency = get_currency_from_text(share_name)
if share_name_currency is not None and share_name_currency in total_currency_list: if share_name_currency is not None and share_name_currency in total_currency_list:
@ -638,6 +640,8 @@ def update_for_currency(text: str, share_name: str, compare_list: list):
compare_share_part_split = [split for split in compare_share_part_split if split.upper() != default_currency] compare_share_part_split = [split for split in compare_share_part_split if split.upper() != default_currency]
new_compare_share_part = ' '.join(compare_share_part_split) new_compare_share_part = ' '.join(compare_share_part_split)
compare_list[c_i] = compare.replace(compare_share_part, new_compare_share_part) compare_list[c_i] = compare.replace(compare_share_part, new_compare_share_part)
except Exception as e:
logger.error(f"Error in update_for_currency: {e}")
return text, share_name, compare_list return text, share_name, compare_list