optimize mapping algorithm: check whether exist "-" to connect share names
This commit is contained in:
parent
035f028155
commit
f06355e0c8
9
main.py
9
main.py
|
|
@ -574,8 +574,9 @@ def test_data_extraction_metrics():
|
|||
|
||||
|
||||
def test_mapping_raw_name():
|
||||
doc_id = "333207452"
|
||||
raw_name = "Rathbone SICAV Income Fund L ACC GBP"
|
||||
doc_id = "481475385"
|
||||
raw_name = "Emerging Markets Fund A-ACC Shares USD"
|
||||
raw_share_name = "A-ACC Shares USD"
|
||||
output_folder = r"/data/emea_ar/output/mapping_data/docs/by_text/"
|
||||
data_mapping = DataMapping(
|
||||
doc_id,
|
||||
|
|
@ -587,7 +588,7 @@ def test_mapping_raw_name():
|
|||
process_cache = {}
|
||||
mapping_info = data_mapping.matching_with_database(
|
||||
raw_name=raw_name,
|
||||
raw_share_name=None,
|
||||
raw_share_name=raw_share_name,
|
||||
parent_id=None,
|
||||
matching_type="share",
|
||||
process_cache=process_cache
|
||||
|
|
@ -704,7 +705,7 @@ if __name__ == "__main__":
|
|||
]
|
||||
# special_doc_id_list = check_mapping_doc_id_list
|
||||
special_doc_id_list = check_db_mapping_doc_id_list
|
||||
# special_doc_id_list = ["479042269"]
|
||||
# special_doc_id_list = ["481475385"]
|
||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||
re_run_extract_data = False
|
||||
|
|
|
|||
|
|
@ -90,8 +90,13 @@ def get_most_similar_name(text: str,
|
|||
copy_name_list is None or len(copy_name_list) == 0:
|
||||
return None, None
|
||||
|
||||
copy_name_list = [replace_abbrevation(copy_name) for copy_name
|
||||
in copy_name_list]
|
||||
for i in range(len(copy_name_list)):
|
||||
copy_name = copy_name_list[i]
|
||||
share_part = get_share_part_list([copy_name])[0]
|
||||
if '-' in share_part:
|
||||
copy_name = copy_name.replace('-', ' ')
|
||||
copy_name = replace_abbrevation(copy_name)
|
||||
copy_name_list[i] = copy_name
|
||||
|
||||
# get common words in fund_name_list
|
||||
common_word_list = []
|
||||
|
|
|
|||
Loading…
Reference in New Issue