optimize mapping: choose proper candidates mapping list.
This commit is contained in:
parent
60a26377e5
commit
18174bf1cf
|
|
@ -283,17 +283,25 @@ class DataMapping:
|
|||
doc_compare_mapping = self.doc_fund_class_mapping[
|
||||
self.doc_fund_class_mapping["FundId"] == parent_id
|
||||
]
|
||||
provider_compare_mapping = self.provider_fund_class_mapping\
|
||||
[self.provider_fund_class_mapping["FundId"] == parent_id]
|
||||
if len(doc_compare_mapping) == 0:
|
||||
doc_compare_name_list = self.doc_share_name_list
|
||||
doc_compare_mapping = self.doc_fund_class_mapping
|
||||
if len(provider_compare_mapping) == 0:
|
||||
doc_compare_name_list = self.doc_share_name_list
|
||||
doc_compare_mapping = self.doc_fund_class_mapping
|
||||
provider_compare_name_list = self.provider_share_name_list
|
||||
provider_compare_mapping = self.provider_fund_class_mapping
|
||||
else:
|
||||
provider_compare_name_list = (
|
||||
provider_compare_mapping["ShareClassName"].unique().tolist()
|
||||
)
|
||||
doc_compare_name_list = []
|
||||
doc_compare_mapping = pd.DataFrame()
|
||||
else:
|
||||
doc_compare_name_list = (
|
||||
doc_compare_mapping["ShareClassName"].unique().tolist()
|
||||
)
|
||||
|
||||
provider_compare_mapping = self.provider_fund_class_mapping[
|
||||
self.provider_fund_class_mapping["FundId"] == parent_id
|
||||
]
|
||||
|
||||
if len(provider_compare_mapping) == 0 or \
|
||||
len(provider_compare_mapping) < len(doc_compare_mapping):
|
||||
provider_compare_name_list = doc_compare_name_list
|
||||
|
|
|
|||
10
main.py
10
main.py
|
|
@ -574,8 +574,8 @@ def test_data_extraction_metrics():
|
|||
|
||||
|
||||
def test_mapping_raw_name():
|
||||
doc_id = "445102363"
|
||||
raw_name = "Danske Invest SICAV Global Portfolio Solution – Defensive Class X"
|
||||
doc_id = "469138353"
|
||||
raw_name = "Manulife Global Fund ASEAN Equity Fund I USD"
|
||||
output_folder = r"/data/emea_ar/output/mapping_data/docs/by_text/"
|
||||
data_mapping = DataMapping(
|
||||
doc_id,
|
||||
|
|
@ -697,11 +697,13 @@ if __name__ == "__main__":
|
|||
"405803396",
|
||||
"445102363",
|
||||
"445256897",
|
||||
"448265376"
|
||||
"448265376",
|
||||
"449555622",
|
||||
"449623976",
|
||||
]
|
||||
# special_doc_id_list = check_mapping_doc_id_list
|
||||
special_doc_id_list = check_db_mapping_doc_id_list
|
||||
# special_doc_id_list = ["391736837"]
|
||||
# special_doc_id_list = ["469138353"]
|
||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||
re_run_extract_data = False
|
||||
|
|
|
|||
|
|
@ -568,6 +568,8 @@ def replace_abbrevation(text: str):
|
|||
new_text_splits.append('Institutional')
|
||||
elif split.lower() in ['cap', 'cap.']:
|
||||
new_text_splits.append('Capitalisation')
|
||||
elif split.lower() in ['div', 'div.']:
|
||||
new_text_splits.append('Dividend')
|
||||
elif split.lower() in ['adm', 'adm.']:
|
||||
new_text_splits.append('Admin')
|
||||
elif split.lower() in ['adv', 'adv.']:
|
||||
|
|
|
|||
Loading…
Reference in New Issue