fix issue for removing item from list

This commit is contained in:
Blade He 2025-01-21 17:24:05 -06:00
parent e2b9bcbdbc
commit 350550d1b0
1 changed files with 30 additions and 10 deletions

View File

@ -458,8 +458,12 @@ def final_function_to_match(doc_id, pred_list, db_list, provider_name):
step0_matched_db_name_cosine= all_matched_fund_names_[0], step0_matched_db_name_jacc= all_matched_fund_names_[1], step0_matched_db_name_leven= all_matched_fund_names_[2], step0_matched_db_name_cosine= all_matched_fund_names_[0], step0_matched_db_name_jacc= all_matched_fund_names_[1], step0_matched_db_name_leven= all_matched_fund_names_[2],
step0_cosine=all_scores_[0], step0_jaccard=all_scores_[1], step0_levenshtein=all_scores_[2], step0_cosine=all_scores_[0], step0_jaccard=all_scores_[1], step0_levenshtein=all_scores_[2],
llm_flag=False)) llm_flag=False))
if db_list[matched_index] in unmatched_db_list:
unmatched_db_list.remove(db_list[matched_index]) unmatched_db_list.remove(db_list[matched_index])
# unmatched_db_list.remove(db_list[matched_index])
if pred_list[index] in unmatched_pred_list:
unmatched_pred_list.remove(pred_list[index]) unmatched_pred_list.remove(pred_list[index])
# unmatched_pred_list.remove(pred_list[index])
else: else:
### STEP-1 Abbreviation replacement ### STEP-1 Abbreviation replacement
cleaned_pred_name1 = replace_abbrevs_in_fundnames([pred_fund])[0] cleaned_pred_name1 = replace_abbrevs_in_fundnames([pred_fund])[0]
@ -479,8 +483,12 @@ def final_function_to_match(doc_id, pred_list, db_list, provider_name):
step1_pred_name=cleaned_pred_name1, step1_db_name=cleaned_db_list1, step1_pred_name=cleaned_pred_name1, step1_db_name=cleaned_db_list1,
step1_matched_db_name_cosine= all_matched_fund_names1_[0], step1_matched_db_name_jacc= all_matched_fund_names1_[1], step1_matched_db_name_leven= all_matched_fund_names1_[2], step1_matched_db_name_cosine= all_matched_fund_names1_[0], step1_matched_db_name_jacc= all_matched_fund_names1_[1], step1_matched_db_name_leven= all_matched_fund_names1_[2],
step1_cosine=all_scores1_[0], step1_jaccard=all_scores1_[1], step1_levenshtein=all_scores1_[2], llm_flag=False)) step1_cosine=all_scores1_[0], step1_jaccard=all_scores1_[1], step1_levenshtein=all_scores1_[2], llm_flag=False))
if db_list[matched_index] in unmatched_db_list:
unmatched_db_list.remove(db_list[matched_index]) unmatched_db_list.remove(db_list[matched_index])
# unmatched_db_list.remove(db_list[matched_index])
if pred_list[index] in unmatched_pred_list:
unmatched_pred_list.remove(pred_list[index]) unmatched_pred_list.remove(pred_list[index])
# unmatched_pred_list.remove(pred_list[index])
else: else:
### STEP-2 Remove Stopwords ### STEP-2 Remove Stopwords
cleaned_pred_name2 = remove_stopwords_nltk([cleaned_pred_name1])[0] cleaned_pred_name2 = remove_stopwords_nltk([cleaned_pred_name1])[0]
@ -503,8 +511,12 @@ def final_function_to_match(doc_id, pred_list, db_list, provider_name):
step2_pred_name=cleaned_pred_name2, step2_db_name=cleaned_db_list2, step2_pred_name=cleaned_pred_name2, step2_db_name=cleaned_db_list2,
step2_matched_db_name_cosine= all_matched_fund_names2_[0], step2_matched_db_name_jacc= all_matched_fund_names2_[1], step2_matched_db_name_leven= all_matched_fund_names2_[2], step2_matched_db_name_cosine= all_matched_fund_names2_[0], step2_matched_db_name_jacc= all_matched_fund_names2_[1], step2_matched_db_name_leven= all_matched_fund_names2_[2],
step2_cosine=all_scores2_[0], step2_jaccard=all_scores2_[1], step2_levenshtein=all_scores2_[2],llm_flag=False)) step2_cosine=all_scores2_[0], step2_jaccard=all_scores2_[1], step2_levenshtein=all_scores2_[2],llm_flag=False))
if db_list[matched_index] in unmatched_db_list:
unmatched_db_list.remove(db_list[matched_index]) unmatched_db_list.remove(db_list[matched_index])
# unmatched_db_list.remove(db_list[matched_index])
if pred_list[index] in unmatched_pred_list:
unmatched_pred_list.remove(pred_list[index]) unmatched_pred_list.remove(pred_list[index])
# unmatched_pred_list.remove(pred_list[index])
else: else:
### STEP-3 Special Character Removal ### STEP-3 Special Character Removal
cleaned_pred_name3 = remove_special_characters([cleaned_pred_name2])[0] cleaned_pred_name3 = remove_special_characters([cleaned_pred_name2])[0]
@ -529,8 +541,12 @@ def final_function_to_match(doc_id, pred_list, db_list, provider_name):
step3_pred_name=cleaned_pred_name3, step3_db_name=cleaned_db_list3, step3_pred_name=cleaned_pred_name3, step3_db_name=cleaned_db_list3,
step3_matched_db_name_cosine= all_matched_fund_names3_[0], step3_matched_db_name_jacc= all_matched_fund_names3_[1], step3_matched_db_name_leven= all_matched_fund_names3_[2], step3_matched_db_name_cosine= all_matched_fund_names3_[0], step3_matched_db_name_jacc= all_matched_fund_names3_[1], step3_matched_db_name_leven= all_matched_fund_names3_[2],
step3_cosine=all_scores3_[0], step3_jaccard=all_scores3_[1], step3_levenshtein=all_scores3_[2],llm_flag=False)) step3_cosine=all_scores3_[0], step3_jaccard=all_scores3_[1], step3_levenshtein=all_scores3_[2],llm_flag=False))
if db_list[matched_index] in unmatched_db_list:
unmatched_db_list.remove(db_list[matched_index]) unmatched_db_list.remove(db_list[matched_index])
# unmatched_db_list.remove(db_list[matched_index])
if pred_list[index] in unmatched_pred_list:
unmatched_pred_list.remove(pred_list[index]) unmatched_pred_list.remove(pred_list[index])
# unmatched_pred_list.remove(pred_list[index])
else: else:
### STEP-4 Common Words Removal ### STEP-4 Common Words Removal
cleaned_db_list4, _ = remove_common_words(cleaned_db_list3) cleaned_db_list4, _ = remove_common_words(cleaned_db_list3)
@ -567,8 +583,12 @@ def final_function_to_match(doc_id, pred_list, db_list, provider_name):
# print("unmatched_pred_list: ",unmatched_pred_list) # print("unmatched_pred_list: ",unmatched_pred_list)
# print("db_list[matched_index]: ",db_list[matched_index]) # print("db_list[matched_index]: ",db_list[matched_index])
# print("pred_list[index]: ",pred_list[index]) # print("pred_list[index]: ",pred_list[index])
if db_list[matched_index] in unmatched_db_list:
unmatched_db_list.remove(db_list[matched_index]) unmatched_db_list.remove(db_list[matched_index])
# unmatched_db_list.remove(db_list[matched_index])
if pred_list[index] in unmatched_pred_list:
unmatched_pred_list.remove(pred_list[index]) unmatched_pred_list.remove(pred_list[index])
# unmatched_pred_list.remove(pred_list[index])
else: else:
df_data.append(format_response(doc_id, pred_list[index], db_list[matched_index], cleaned_pred_name4, df_data.append(format_response(doc_id, pred_list[index], db_list[matched_index], cleaned_pred_name4,
db_list[matched_index], db_list[matched_index],