1. Update for mapping multilingual share class names.
2. Optimize getting currency logic
This commit is contained in:
parent
843bbbd13f
commit
5b9f9416de
4
main.py
4
main.py
|
|
@ -1203,10 +1203,10 @@ if __name__ == "__main__":
|
|||
"501380497",
|
||||
"514636959",
|
||||
"508981020"]
|
||||
special_doc_id_list = ["514636993"]
|
||||
special_doc_id_list = ["514636953"]
|
||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||
re_run_extract_data = True
|
||||
re_run_extract_data = False
|
||||
re_run_mapping_data = True
|
||||
force_save_total_data = False
|
||||
calculate_metrics = False
|
||||
|
|
|
|||
|
|
@ -97,6 +97,8 @@ def get_most_similar_name(text: str,
|
|||
|
||||
for i in range(len(copy_name_list)):
|
||||
copy_name = copy_name_list[i]
|
||||
if matching_type == "share":
|
||||
copy_name, _ = replace_share_name_for_multilingual(copy_name, None)
|
||||
share_part = get_share_part_list([copy_name])[0]
|
||||
if '-' in share_part:
|
||||
copy_name = copy_name.replace('-', ' ')
|
||||
|
|
@ -128,6 +130,9 @@ def get_most_similar_name(text: str,
|
|||
share_name = remove_special_characters(share_name)
|
||||
share_name = replace_abbrevation(share_name)
|
||||
|
||||
text, share_name = replace_share_name_for_multilingual(text, share_name)
|
||||
|
||||
|
||||
text_splits = text.split()
|
||||
if len(text_splits) == 1:
|
||||
text = split_words_without_space(text)
|
||||
|
|
@ -332,6 +337,23 @@ def get_most_similar_name(text: str,
|
|||
return None, 0.0
|
||||
|
||||
|
||||
def replace_share_name_for_multilingual(text: str, share_name: str):
|
||||
if text is None or len(text.strip()) == 0:
|
||||
return text, share_name
|
||||
|
||||
multilingual_share_list = ["Catégorie de parts", "Classe di quote",
|
||||
"Kategorie Anteile", "Kategorie anteile",
|
||||
"Clase de participaciones", "Aandelenklasse",
|
||||
"aandelenklasse", "Anteilklasse", "anteilklasse"]
|
||||
for multilingual_share in multilingual_share_list:
|
||||
if multilingual_share in text:
|
||||
text = text.replace(multilingual_share, "Class")
|
||||
if share_name is not None and len(share_name.strip()) > 0:
|
||||
share_name = share_name.replace(multilingual_share, "Class")
|
||||
break
|
||||
return text, share_name
|
||||
|
||||
|
||||
def compare_both_short_name(text_short_name_list: list, compare_short_name_list: list):
|
||||
copy_text_short_name_list = deepcopy(text_short_name_list)
|
||||
copy_compare_short_name_list = deepcopy(compare_short_name_list)
|
||||
|
|
@ -448,12 +470,20 @@ def get_currency_from_text(text: str):
|
|||
text = text.strip()
|
||||
text_split = text.split()
|
||||
count = 0
|
||||
currency_list = []
|
||||
for split in text_split[::-1]:
|
||||
if count == 4:
|
||||
break
|
||||
if split.upper() in total_currency_list:
|
||||
return split
|
||||
currency_list.append(split.upper())
|
||||
count += 1
|
||||
if len(currency_list) > 1:
|
||||
# remove the first currency from currency list
|
||||
currency_list.pop(0)
|
||||
return currency_list[0]
|
||||
elif len(currency_list) == 1:
|
||||
return currency_list[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue