Support replaces share class name in database to be more readable.
Examples document 532422720 M&G European Credit Investment Fund A CHFH Acc -> M&G European Credit Investment Fund A CHF H Accumulation M&G European Credit Investment Fund A CHFHInc -> M&G European Credit Investment Fund A CHF H Income M&G European High Yield Credit Investment Fund E GBPHedgedAcc -> M&G European High Yield Credit Investment Fund E GBP Hedged Accumulation
This commit is contained in:
parent
2645d528b1
commit
81a424b00d
22
main.py
22
main.py
|
|
@ -636,7 +636,25 @@ def test_translate_pdf():
|
|||
translate_pdf.start_job()
|
||||
|
||||
|
||||
def test_replace_abbrevation():
|
||||
from utils.biz_utils import replace_abbrevation
|
||||
text_list= ["M&G European Credit Investment Fund A CHFH Acc",
|
||||
"M&G European Credit Investment Fund A CHFHInc",
|
||||
"M&G European Credit Investment Fund A USDHAcc",
|
||||
"M&G European High Yield Credit Investment Fund E GBPHedgedAcc",
|
||||
"M&G Sustainable European Credit Investment Fd Cl L GBPH Acc",
|
||||
"M&G Sustainable Total Return Credit Investment Fd AI HGBPInc",
|
||||
"M&G Total Return Credit Investment Fund Class WI GBPHedgedInc",
|
||||
"M&G Total Return Credit Investment Fund Class W GBP HedgedInc",
|
||||
"M&G Total Return Credit Investment Fund Class P CHF H Acc",
|
||||
"M&G Total Return Credit Investment Fund P EUR Inc"]
|
||||
for text in text_list:
|
||||
result = replace_abbrevation(text)
|
||||
logger.info(f"Original text: {text}, replaced text: {result}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test_replace_abbrevation()
|
||||
# test_translate_pdf()
|
||||
pdf_folder = r"/data/emea_ar/pdf/"
|
||||
page_filter_ground_truth_file = (
|
||||
|
|
@ -905,10 +923,10 @@ if __name__ == "__main__":
|
|||
"536343790"
|
||||
]
|
||||
special_doc_id_list = check_db_mapping_doc_id_list
|
||||
special_doc_id_list = ["534547266"]
|
||||
special_doc_id_list = ["532422720"]
|
||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||
re_run_extract_data = True
|
||||
re_run_extract_data = False
|
||||
re_run_mapping_data = True
|
||||
force_save_total_data = False
|
||||
calculate_metrics = False
|
||||
|
|
|
|||
330
prepare_data.py
330
prepare_data.py
|
|
@ -975,7 +975,327 @@ def concat_mapping(mapping_folder: str,
|
|||
all_data.to_excel(f, index=False)
|
||||
|
||||
|
||||
def calc_typical_doc_metrics_v2():
|
||||
"""
|
||||
Statistics metrics for typical document.
|
||||
1. Fund level datapoint: TOR
|
||||
2. Share level datapoint: OGC, TER, Performance fees
|
||||
3. Only statistics the record which with document investment mapping
|
||||
"""
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||
result_file = r"/data/emea_ar/ground_truth/data_extraction/mapping_data_info_20_new_emea_documents_sample_Accuracy.xlsx"
|
||||
sheet_name = "record_level_Results"
|
||||
data = pd.read_excel(result_file, sheet_name=sheet_name)
|
||||
data.fillna("", inplace=True)
|
||||
# filter data which valid is 1
|
||||
data = data[data["valid"] == 1]
|
||||
|
||||
fund_raw_data_gt = []
|
||||
fund_raw_data_pred = []
|
||||
|
||||
fund_mapping_data_gt = []
|
||||
fund_mapping_data_pred = []
|
||||
|
||||
share_raw_data_gt = []
|
||||
share_raw_data_pred = []
|
||||
|
||||
share_mapping_data_gt = []
|
||||
share_mapping_data_pred = []
|
||||
|
||||
for idx, row in data.iterrows():
|
||||
raw_data_gt_count = row["Raw data in Doc"]
|
||||
raw_data_infer_count = row["Raw data in Inference"]
|
||||
if len(str(raw_data_gt_count)) > 0:
|
||||
raw_data_gt_count = int(raw_data_gt_count)
|
||||
raw_data_infer_count = int(raw_data_infer_count)
|
||||
|
||||
raw_gt_list = [1 for i in range(raw_data_gt_count)]
|
||||
raw_pred_list = []
|
||||
if raw_data_infer_count > 0:
|
||||
raw_pred_list = [1 for i in range(raw_data_infer_count)]
|
||||
if len(raw_pred_list) < len(raw_gt_list):
|
||||
raw_pred_list.extend([0 for i in range(len(raw_gt_list) - len(raw_pred_list))])
|
||||
|
||||
mapping_data_gt_count = row["data in DB"]
|
||||
mapping_data_infer_count = row["data in Inferencce"]
|
||||
if len(str(mapping_data_gt_count)) > 0:
|
||||
mapping_data_gt_count = int(mapping_data_gt_count)
|
||||
mapping_data_infer_count = int(mapping_data_infer_count)
|
||||
|
||||
mapping_gt_list = [1 for i in range(mapping_data_gt_count)]
|
||||
mapping_pred_list = []
|
||||
if mapping_data_infer_count > 0:
|
||||
mapping_pred_list = [1 for i in range(mapping_data_infer_count)]
|
||||
if len(mapping_pred_list) < len(mapping_gt_list):
|
||||
mapping_pred_list.extend([0 for i in range(len(mapping_gt_list) - len(mapping_pred_list))])
|
||||
|
||||
|
||||
data_level = row["data_level"]
|
||||
if data_level == "fund":
|
||||
fund_raw_data_gt.extend(raw_gt_list)
|
||||
fund_raw_data_pred.extend(raw_pred_list)
|
||||
|
||||
fund_mapping_data_gt.extend(mapping_gt_list)
|
||||
fund_mapping_data_pred.extend(mapping_pred_list)
|
||||
else:
|
||||
share_raw_data_gt.extend(raw_gt_list)
|
||||
share_raw_data_pred.extend(raw_pred_list)
|
||||
|
||||
share_mapping_data_gt.extend(mapping_gt_list)
|
||||
share_mapping_data_pred.extend(mapping_pred_list)
|
||||
|
||||
share_raw_data_gt.extend([0, 0, 0, 0, 0, 0])
|
||||
share_raw_data_pred.extend([1, 1, 1, 1, 1, 1])
|
||||
|
||||
share_mapping_data_gt.extend([0, 0, 0, 0, 0, 0])
|
||||
share_mapping_data_pred.extend([1, 1, 1, 1, 1, 1])
|
||||
|
||||
fund_raw_data_accuracy = accuracy_score(fund_raw_data_gt, fund_raw_data_pred)
|
||||
fund_raw_data_precision = precision_score(fund_raw_data_gt, fund_raw_data_pred)
|
||||
fund_raw_data_recall = recall_score(fund_raw_data_gt, fund_raw_data_pred)
|
||||
fund_raw_data_f1 = f1_score(fund_raw_data_gt, fund_raw_data_pred)
|
||||
|
||||
fund_mapping_data_accuracy = accuracy_score(fund_mapping_data_gt, fund_mapping_data_pred)
|
||||
fund_mapping_data_precision = precision_score(fund_mapping_data_gt, fund_mapping_data_pred)
|
||||
fund_mapping_data_recall = recall_score(fund_mapping_data_gt, fund_mapping_data_pred)
|
||||
fund_mapping_data_f1 = f1_score(fund_mapping_data_gt, fund_mapping_data_pred)
|
||||
|
||||
share_raw_data_accuracy = accuracy_score(share_raw_data_gt, share_raw_data_pred)
|
||||
share_raw_data_precision = precision_score(share_raw_data_gt, share_raw_data_pred)
|
||||
share_raw_data_recall = recall_score(share_raw_data_gt, share_raw_data_pred)
|
||||
share_raw_data_f1 = f1_score(share_raw_data_gt, share_raw_data_pred)
|
||||
|
||||
share_mapping_data_accuracy = accuracy_score(share_mapping_data_gt, share_mapping_data_pred)
|
||||
share_mapping_data_precision = precision_score(share_mapping_data_gt, share_mapping_data_pred)
|
||||
share_mapping_data_recall = recall_score(share_mapping_data_gt, share_mapping_data_pred)
|
||||
share_mapping_data_f1 = f1_score(share_mapping_data_gt, share_mapping_data_pred)
|
||||
|
||||
final_data = []
|
||||
|
||||
fund_raw_data_metrics = {"title": "Fund_Datapoint_Raw_Data",
|
||||
"accuracy": fund_raw_data_accuracy,
|
||||
"precision": fund_raw_data_precision,
|
||||
"recall": fund_raw_data_recall,
|
||||
"f1": fund_raw_data_f1,
|
||||
"support": len(fund_raw_data_gt)}
|
||||
final_data.append(fund_raw_data_metrics)
|
||||
logger.info(f"fund_raw_data_accuracy: {fund_raw_data_accuracy}")
|
||||
logger.info(f"fund_raw_data_precision: {fund_raw_data_precision}")
|
||||
logger.info(f"fund_raw_data_recall: {fund_raw_data_recall}")
|
||||
logger.info(f"fund_raw_data_f1: {fund_raw_data_f1}")
|
||||
logger.info(f"fund_raw_data_support: {len(fund_raw_data_gt)}")
|
||||
|
||||
fund_mapping_data_metrics = {"title": "Fund_Datapoint_Mapping_Data",
|
||||
"accuracy": fund_mapping_data_accuracy,
|
||||
"precision": fund_mapping_data_precision,
|
||||
"recall": fund_mapping_data_recall,
|
||||
"f1": fund_mapping_data_f1,
|
||||
"support": len(fund_mapping_data_gt)}
|
||||
final_data.append(fund_mapping_data_metrics)
|
||||
logger.info(f"fund_mapping_data_accuracy: {fund_mapping_data_accuracy}")
|
||||
logger.info(f"fund_mapping_data_precision: {fund_mapping_data_precision}")
|
||||
logger.info(f"fund_mapping_data_recall: {fund_mapping_data_recall}")
|
||||
logger.info(f"fund_mapping_data_f1: {fund_mapping_data_f1}")
|
||||
logger.info(f"fund_mapping_data_support: {len(fund_mapping_data_gt)}")
|
||||
|
||||
share_raw_data_metrics = {"title": "Share_Datapoint_Raw_Data",
|
||||
"accuracy": share_raw_data_accuracy,
|
||||
"precision": share_raw_data_precision,
|
||||
"recall": share_raw_data_recall,
|
||||
"f1": share_raw_data_f1,
|
||||
"support": len(share_raw_data_gt)}
|
||||
final_data.append(share_raw_data_metrics)
|
||||
logger.info(f"share_raw_data_accuracy: {share_raw_data_accuracy}")
|
||||
logger.info(f"share_raw_data_precision: {share_raw_data_precision}")
|
||||
logger.info(f"share_raw_data_recall: {share_raw_data_recall}")
|
||||
logger.info(f"share_raw_data_f1: {share_raw_data_f1}")
|
||||
logger.info(f"share_raw_data_support: {len(share_raw_data_gt)}")
|
||||
|
||||
share_mapping_data_metrics = {"title": "Share_Datapoint_Mapping_Data",
|
||||
"accuracy": share_mapping_data_accuracy,
|
||||
"precision": share_mapping_data_precision,
|
||||
"recall": share_mapping_data_recall,
|
||||
"f1": share_mapping_data_f1,
|
||||
"support": len(share_mapping_data_gt)}
|
||||
final_data.append(share_mapping_data_metrics)
|
||||
logger.info(f"share_mapping_data_accuracy: {share_mapping_data_accuracy}")
|
||||
logger.info(f"share_mapping_data_precision: {share_mapping_data_precision}")
|
||||
logger.info(f"share_mapping_data_recall: {share_mapping_data_recall}")
|
||||
logger.info(f"share_mapping_data_f1: {share_mapping_data_f1}")
|
||||
logger.info(f"share_mapping_data_support: {len(share_mapping_data_gt)}")
|
||||
|
||||
final_data_df = pd.DataFrame(final_data)
|
||||
# set column order as title, accuracy, f1, precision, recall
|
||||
final_data_df = final_data_df[["title", "accuracy", "f1", "precision", "recall", "support"]]
|
||||
# output to excel
|
||||
final_data_file = (
|
||||
r"/data/emea_ar/output/metrics/mapping_data_info_20_new_emea_documents_sample_Accuracy_metrics_v2.xlsx"
|
||||
)
|
||||
with pd.ExcelWriter(final_data_file) as writer:
|
||||
final_data_df.to_excel(
|
||||
writer, sheet_name="metrics", index=False
|
||||
)
|
||||
|
||||
|
||||
def calc_typical_doc_metrics_v1():
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||
result_file = r"/data/emea_ar/ground_truth/data_extraction/mapping_data_info_20_new_emea_documents_sample_Accuracy.xlsx"
|
||||
sheet_name = "record_level_Results"
|
||||
data = pd.read_excel(result_file, sheet_name=sheet_name)
|
||||
data.fillna("", inplace=True)
|
||||
fund_raw_data_list = data["Raw Mapping"].tolist()
|
||||
fund_raw_data_gt = []
|
||||
fund_raw_data_pred = []
|
||||
for fund_raw_data in fund_raw_data_list:
|
||||
if fund_raw_data == "Correct Raw mapping":
|
||||
fund_raw_data_gt.append(1)
|
||||
fund_raw_data_pred.append(1)
|
||||
elif fund_raw_data == "Incorrect Raw mapping":
|
||||
fund_raw_data_gt.append(1)
|
||||
fund_raw_data_pred.append(0)
|
||||
else:
|
||||
pass
|
||||
fund_raw_data_accuracy = accuracy_score(fund_raw_data_gt, fund_raw_data_pred)
|
||||
fund_raw_data_precision = precision_score(fund_raw_data_gt, fund_raw_data_pred)
|
||||
fund_raw_data_recall = recall_score(fund_raw_data_gt, fund_raw_data_pred)
|
||||
fund_raw_data_f1 = f1_score(fund_raw_data_gt, fund_raw_data_pred)
|
||||
|
||||
fund_mapping_data_list = data["Share Mapping"].tolist()
|
||||
fund_mapping_data_gt = []
|
||||
fund_mapping_data_pred = []
|
||||
for fund_mapping_data in fund_mapping_data_list:
|
||||
if fund_mapping_data == "Correct share mapping":
|
||||
fund_mapping_data_gt.append(1)
|
||||
fund_mapping_data_pred.append(1)
|
||||
elif fund_mapping_data == "Incorrect share mapping":
|
||||
fund_mapping_data_gt.append(1)
|
||||
fund_mapping_data_pred.append(0)
|
||||
else:
|
||||
pass
|
||||
fund_mapping_data_accuracy = accuracy_score(fund_mapping_data_gt, fund_mapping_data_pred)
|
||||
fund_mapping_data_precision = precision_score(fund_mapping_data_gt, fund_mapping_data_pred)
|
||||
fund_mapping_data_recall = recall_score(fund_mapping_data_gt, fund_mapping_data_pred)
|
||||
fund_mapping_data_f1 = f1_score(fund_mapping_data_gt, fund_mapping_data_pred)
|
||||
|
||||
share_raw_data_gt = []
|
||||
share_raw_data_pred = []
|
||||
|
||||
share_mapping_data_gt = []
|
||||
share_mapping_data_pred = []
|
||||
for idx, row in data.iterrows():
|
||||
share_raw_data_infer_count = row["Raw Share in Inference"]
|
||||
share_raw_data_gt_count = row["Raw Share in Doc"]
|
||||
if share_raw_data_gt_count is not None and \
|
||||
len(str(share_raw_data_gt_count)) > 0:
|
||||
share_raw_data_gt_count = int(share_raw_data_gt_count)
|
||||
share_raw_data_infer_count = int(share_raw_data_infer_count)
|
||||
|
||||
gt_list = [1 for i in range(share_raw_data_gt_count)]
|
||||
if share_raw_data_infer_count > 0:
|
||||
pred_list = [1 for i in range(share_raw_data_infer_count)]
|
||||
else:
|
||||
pred_list = [1, 1]
|
||||
gt_list = [0, 0]
|
||||
if len(pred_list) < len(gt_list):
|
||||
pred_list.extend([0 for i in range(len(gt_list) - len(pred_list))])
|
||||
share_raw_data_gt.extend(gt_list)
|
||||
share_raw_data_pred.extend(pred_list)
|
||||
|
||||
share_mapping_data_infer_count = row["share in Inferencce"]
|
||||
share_mapping_data_gt_count = row["share in DB"]
|
||||
if share_mapping_data_gt_count is not None and \
|
||||
len(str(share_mapping_data_gt_count)) > 0:
|
||||
share_mapping_data_gt_count = int(share_mapping_data_gt_count)
|
||||
share_mapping_data_infer_count = int(share_mapping_data_infer_count)
|
||||
|
||||
gt_list = [1 for i in range(share_mapping_data_gt_count)]
|
||||
if share_mapping_data_infer_count > 0:
|
||||
pred_list = [1 for i in range(share_mapping_data_infer_count)]
|
||||
else:
|
||||
pred_list = [1, 1]
|
||||
gt_list = [0, 0]
|
||||
if len(pred_list) < len(gt_list):
|
||||
pred_list.extend([0 for i in range(len(gt_list) - len(pred_list))])
|
||||
share_mapping_data_gt.extend(gt_list)
|
||||
share_mapping_data_pred.extend(pred_list)
|
||||
share_raw_data_accuracy = accuracy_score(share_raw_data_gt, share_raw_data_pred)
|
||||
share_raw_data_precision = precision_score(share_raw_data_gt, share_raw_data_pred)
|
||||
share_raw_data_recall = recall_score(share_raw_data_gt, share_raw_data_pred)
|
||||
share_raw_data_f1 = f1_score(share_raw_data_gt, share_raw_data_pred)
|
||||
|
||||
share_mapping_data_accuracy = accuracy_score(share_mapping_data_gt, share_mapping_data_pred)
|
||||
share_mapping_data_precision = precision_score(share_mapping_data_gt, share_mapping_data_pred)
|
||||
share_mapping_data_recall = recall_score(share_mapping_data_gt, share_mapping_data_pred)
|
||||
share_mapping_data_f1 = f1_score(share_mapping_data_gt, share_mapping_data_pred)
|
||||
|
||||
final_data = []
|
||||
|
||||
fund_raw_data_metrics = {"title": "Fund_Raw_Data",
|
||||
"accuracy": fund_raw_data_accuracy,
|
||||
"precision": fund_raw_data_precision,
|
||||
"recall": fund_raw_data_recall,
|
||||
"f1": fund_raw_data_f1,
|
||||
"support": len(fund_raw_data_gt)}
|
||||
final_data.append(fund_raw_data_metrics)
|
||||
logger.info(f"fund_raw_data_accuracy: {fund_raw_data_accuracy}")
|
||||
logger.info(f"fund_raw_data_precision: {fund_raw_data_precision}")
|
||||
logger.info(f"fund_raw_data_recall: {fund_raw_data_recall}")
|
||||
logger.info(f"fund_raw_data_f1: {fund_raw_data_f1}")
|
||||
|
||||
fund_mapping_data_metrics = {"title": "Fund_Mapping_Data",
|
||||
"accuracy": fund_mapping_data_accuracy,
|
||||
"precision": fund_mapping_data_precision,
|
||||
"recall": fund_mapping_data_recall,
|
||||
"f1": fund_mapping_data_f1,
|
||||
"support": len(fund_mapping_data_gt)}
|
||||
final_data.append(fund_mapping_data_metrics)
|
||||
logger.info(f"fund_mapping_data_accuracy: {fund_mapping_data_accuracy}")
|
||||
logger.info(f"fund_mapping_data_precision: {fund_mapping_data_precision}")
|
||||
logger.info(f"fund_mapping_data_recall: {fund_mapping_data_recall}")
|
||||
logger.info(f"fund_mapping_data_f1: {fund_mapping_data_f1}")
|
||||
|
||||
share_raw_data_metrics = {"title": "Share_Raw_Data",
|
||||
"accuracy": share_raw_data_accuracy,
|
||||
"precision": share_raw_data_precision,
|
||||
"recall": share_raw_data_recall,
|
||||
"f1": share_raw_data_f1,
|
||||
"support": len(share_raw_data_gt)}
|
||||
final_data.append(share_raw_data_metrics)
|
||||
logger.info(f"share_raw_data_accuracy: {share_raw_data_accuracy}")
|
||||
logger.info(f"share_raw_data_precision: {share_raw_data_precision}")
|
||||
logger.info(f"share_raw_data_recall: {share_raw_data_recall}")
|
||||
logger.info(f"share_raw_data_f1: {share_raw_data_f1}")
|
||||
|
||||
share_mapping_data_metrics = {"title": "Share_Mapping_Data",
|
||||
"accuracy": share_mapping_data_accuracy,
|
||||
"precision": share_mapping_data_precision,
|
||||
"recall": share_mapping_data_recall,
|
||||
"f1": share_mapping_data_f1,
|
||||
"support": len(share_mapping_data_gt)}
|
||||
final_data.append(share_mapping_data_metrics)
|
||||
logger.info(f"share_mapping_data_accuracy: {share_mapping_data_accuracy}")
|
||||
logger.info(f"share_mapping_data_precision: {share_mapping_data_precision}")
|
||||
logger.info(f"share_mapping_data_recall: {share_mapping_data_recall}")
|
||||
logger.info(f"share_mapping_data_f1: {share_mapping_data_f1}")
|
||||
|
||||
final_data_df = pd.DataFrame(final_data)
|
||||
# set column order as title, accuracy, f1, precision, recall
|
||||
final_data_df = final_data_df[["title", "accuracy", "f1", "precision", "recall", "support"]]
|
||||
# output to excel
|
||||
final_data_file = (
|
||||
r"/data/emea_ar/output/metrics/mapping_data_info_20_new_emea_documents_sample_Accuracy_metrics.xlsx"
|
||||
)
|
||||
with pd.ExcelWriter(final_data_file) as writer:
|
||||
final_data_df.to_excel(
|
||||
writer, sheet_name="metrics", index=False
|
||||
)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# calc_typical_doc_metrics_v1()
|
||||
calc_typical_doc_metrics_v2()
|
||||
|
||||
doc_provider_file_path = (
|
||||
r"/data/emea_ar/basic_information/English/latest_provider_ar_document.xlsx"
|
||||
)
|
||||
|
|
@ -999,11 +1319,11 @@ if __name__ == "__main__":
|
|||
|
||||
doc_provider_file_path = r"/data/emea_ar/basic_information/English/sample_doc/emea_final_typical_case/Final list of EMEA documents.xlsx"
|
||||
pdf_folder = r"/data/emea_ar/pdf/"
|
||||
download_pdf(
|
||||
doc_provider_file_path=doc_provider_file_path,
|
||||
sheet_name="Sheet1",
|
||||
doc_id_column="Document Id",
|
||||
pdf_path=pdf_folder)
|
||||
# download_pdf(
|
||||
# doc_provider_file_path=doc_provider_file_path,
|
||||
# sheet_name="Sheet1",
|
||||
# doc_id_column="Document Id",
|
||||
# pdf_path=pdf_folder)
|
||||
# output_pdf_page_text(pdf_folder, output_folder)
|
||||
|
||||
# extract_pdf_table(pdf_folder, output_folder)
|
||||
|
|
|
|||
|
|
@ -825,8 +825,50 @@ def replace_abbrevation(text: str):
|
|||
elif split.lower() in ['net', 'unhgd']:
|
||||
new_text_splits.append('')
|
||||
else:
|
||||
split = split_short_name_with_share_features(split)
|
||||
new_text_splits.append(split)
|
||||
|
||||
new_text = ' '.join(new_text_splits)
|
||||
new_text = re.sub(r'\s+', ' ', new_text).strip()
|
||||
return new_text
|
||||
|
||||
|
||||
def split_short_name_with_share_features(text: str):
|
||||
"""
|
||||
Split short name with share features,
|
||||
for examples:
|
||||
Document mapping for 532422720
|
||||
CHFHInc to be CHF H Income
|
||||
USDHAcc to be USD H Accumulation
|
||||
GBPHInc to be GBP H Income
|
||||
HAcc to be H Accumulation
|
||||
GBPHedgedAcc to be GBP Hedged Accumulation
|
||||
HGBPInc to be H GBP Income
|
||||
HNOKAcc to be H NOK Accumulation
|
||||
"""
|
||||
if text is None or len(text.strip()) == 0:
|
||||
return text
|
||||
if len(text.split()) > 1:
|
||||
return text
|
||||
text = text.strip()
|
||||
share_features = {'Acc': 'Accumulation',
|
||||
'Inc': 'Income',
|
||||
'Dist': 'Distribution',
|
||||
'Div': 'Dividend',}
|
||||
feature_name = ""
|
||||
for key, value in share_features.items():
|
||||
if len(text) > len(key) and text.endswith(key):
|
||||
feature_name = value
|
||||
text = text.replace(key, '')
|
||||
break
|
||||
|
||||
currency_text = ""
|
||||
for currency in total_currency_list:
|
||||
if len(text) > len(currency) and currency in text:
|
||||
currency_text = currency
|
||||
text = text.replace(currency, '')
|
||||
break
|
||||
|
||||
new_text = currency_text + ' ' + text + ' ' + feature_name
|
||||
new_text = re.sub(r'\s+', ' ', new_text).strip()
|
||||
return new_text
|
||||
Loading…
Reference in New Issue