optimize mapping: choose proper candidates mapping list.
This commit is contained in:
parent
60a26377e5
commit
18174bf1cf
|
|
@ -283,17 +283,25 @@ class DataMapping:
|
||||||
doc_compare_mapping = self.doc_fund_class_mapping[
|
doc_compare_mapping = self.doc_fund_class_mapping[
|
||||||
self.doc_fund_class_mapping["FundId"] == parent_id
|
self.doc_fund_class_mapping["FundId"] == parent_id
|
||||||
]
|
]
|
||||||
|
provider_compare_mapping = self.provider_fund_class_mapping\
|
||||||
|
[self.provider_fund_class_mapping["FundId"] == parent_id]
|
||||||
if len(doc_compare_mapping) == 0:
|
if len(doc_compare_mapping) == 0:
|
||||||
|
if len(provider_compare_mapping) == 0:
|
||||||
doc_compare_name_list = self.doc_share_name_list
|
doc_compare_name_list = self.doc_share_name_list
|
||||||
doc_compare_mapping = self.doc_fund_class_mapping
|
doc_compare_mapping = self.doc_fund_class_mapping
|
||||||
|
provider_compare_name_list = self.provider_share_name_list
|
||||||
|
provider_compare_mapping = self.provider_fund_class_mapping
|
||||||
|
else:
|
||||||
|
provider_compare_name_list = (
|
||||||
|
provider_compare_mapping["ShareClassName"].unique().tolist()
|
||||||
|
)
|
||||||
|
doc_compare_name_list = []
|
||||||
|
doc_compare_mapping = pd.DataFrame()
|
||||||
else:
|
else:
|
||||||
doc_compare_name_list = (
|
doc_compare_name_list = (
|
||||||
doc_compare_mapping["ShareClassName"].unique().tolist()
|
doc_compare_mapping["ShareClassName"].unique().tolist()
|
||||||
)
|
)
|
||||||
|
|
||||||
provider_compare_mapping = self.provider_fund_class_mapping[
|
|
||||||
self.provider_fund_class_mapping["FundId"] == parent_id
|
|
||||||
]
|
|
||||||
if len(provider_compare_mapping) == 0 or \
|
if len(provider_compare_mapping) == 0 or \
|
||||||
len(provider_compare_mapping) < len(doc_compare_mapping):
|
len(provider_compare_mapping) < len(doc_compare_mapping):
|
||||||
provider_compare_name_list = doc_compare_name_list
|
provider_compare_name_list = doc_compare_name_list
|
||||||
|
|
|
||||||
10
main.py
10
main.py
|
|
@ -574,8 +574,8 @@ def test_data_extraction_metrics():
|
||||||
|
|
||||||
|
|
||||||
def test_mapping_raw_name():
|
def test_mapping_raw_name():
|
||||||
doc_id = "445102363"
|
doc_id = "469138353"
|
||||||
raw_name = "Danske Invest SICAV Global Portfolio Solution – Defensive Class X"
|
raw_name = "Manulife Global Fund ASEAN Equity Fund I USD"
|
||||||
output_folder = r"/data/emea_ar/output/mapping_data/docs/by_text/"
|
output_folder = r"/data/emea_ar/output/mapping_data/docs/by_text/"
|
||||||
data_mapping = DataMapping(
|
data_mapping = DataMapping(
|
||||||
doc_id,
|
doc_id,
|
||||||
|
|
@ -697,11 +697,13 @@ if __name__ == "__main__":
|
||||||
"405803396",
|
"405803396",
|
||||||
"445102363",
|
"445102363",
|
||||||
"445256897",
|
"445256897",
|
||||||
"448265376"
|
"448265376",
|
||||||
|
"449555622",
|
||||||
|
"449623976",
|
||||||
]
|
]
|
||||||
# special_doc_id_list = check_mapping_doc_id_list
|
# special_doc_id_list = check_mapping_doc_id_list
|
||||||
special_doc_id_list = check_db_mapping_doc_id_list
|
special_doc_id_list = check_db_mapping_doc_id_list
|
||||||
# special_doc_id_list = ["391736837"]
|
# special_doc_id_list = ["469138353"]
|
||||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||||
re_run_extract_data = False
|
re_run_extract_data = False
|
||||||
|
|
|
||||||
|
|
@ -568,6 +568,8 @@ def replace_abbrevation(text: str):
|
||||||
new_text_splits.append('Institutional')
|
new_text_splits.append('Institutional')
|
||||||
elif split.lower() in ['cap', 'cap.']:
|
elif split.lower() in ['cap', 'cap.']:
|
||||||
new_text_splits.append('Capitalisation')
|
new_text_splits.append('Capitalisation')
|
||||||
|
elif split.lower() in ['div', 'div.']:
|
||||||
|
new_text_splits.append('Dividend')
|
||||||
elif split.lower() in ['adm', 'adm.']:
|
elif split.lower() in ['adm', 'adm.']:
|
||||||
new_text_splits.append('Admin')
|
new_text_splits.append('Admin')
|
||||||
elif split.lower() in ['adv', 'adv.']:
|
elif split.lower() in ['adv', 'adv.']:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue