optimize mapping algorithm: check whether exist "-" to connect share names
This commit is contained in:
parent
035f028155
commit
f06355e0c8
9
main.py
9
main.py
|
|
@ -574,8 +574,9 @@ def test_data_extraction_metrics():
|
||||||
|
|
||||||
|
|
||||||
def test_mapping_raw_name():
|
def test_mapping_raw_name():
|
||||||
doc_id = "333207452"
|
doc_id = "481475385"
|
||||||
raw_name = "Rathbone SICAV Income Fund L ACC GBP"
|
raw_name = "Emerging Markets Fund A-ACC Shares USD"
|
||||||
|
raw_share_name = "A-ACC Shares USD"
|
||||||
output_folder = r"/data/emea_ar/output/mapping_data/docs/by_text/"
|
output_folder = r"/data/emea_ar/output/mapping_data/docs/by_text/"
|
||||||
data_mapping = DataMapping(
|
data_mapping = DataMapping(
|
||||||
doc_id,
|
doc_id,
|
||||||
|
|
@ -587,7 +588,7 @@ def test_mapping_raw_name():
|
||||||
process_cache = {}
|
process_cache = {}
|
||||||
mapping_info = data_mapping.matching_with_database(
|
mapping_info = data_mapping.matching_with_database(
|
||||||
raw_name=raw_name,
|
raw_name=raw_name,
|
||||||
raw_share_name=None,
|
raw_share_name=raw_share_name,
|
||||||
parent_id=None,
|
parent_id=None,
|
||||||
matching_type="share",
|
matching_type="share",
|
||||||
process_cache=process_cache
|
process_cache=process_cache
|
||||||
|
|
@ -704,7 +705,7 @@ if __name__ == "__main__":
|
||||||
]
|
]
|
||||||
# special_doc_id_list = check_mapping_doc_id_list
|
# special_doc_id_list = check_mapping_doc_id_list
|
||||||
special_doc_id_list = check_db_mapping_doc_id_list
|
special_doc_id_list = check_db_mapping_doc_id_list
|
||||||
# special_doc_id_list = ["479042269"]
|
# special_doc_id_list = ["481475385"]
|
||||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||||
re_run_extract_data = False
|
re_run_extract_data = False
|
||||||
|
|
|
||||||
|
|
@ -90,8 +90,13 @@ def get_most_similar_name(text: str,
|
||||||
copy_name_list is None or len(copy_name_list) == 0:
|
copy_name_list is None or len(copy_name_list) == 0:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
copy_name_list = [replace_abbrevation(copy_name) for copy_name
|
for i in range(len(copy_name_list)):
|
||||||
in copy_name_list]
|
copy_name = copy_name_list[i]
|
||||||
|
share_part = get_share_part_list([copy_name])[0]
|
||||||
|
if '-' in share_part:
|
||||||
|
copy_name = copy_name.replace('-', ' ')
|
||||||
|
copy_name = replace_abbrevation(copy_name)
|
||||||
|
copy_name_list[i] = copy_name
|
||||||
|
|
||||||
# get common words in fund_name_list
|
# get common words in fund_name_list
|
||||||
common_word_list = []
|
common_word_list = []
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue