diff --git a/core/data_mapping.py b/core/data_mapping.py
index a3dfc40..79cbeaf 100644
--- a/core/data_mapping.py
+++ b/core/data_mapping.py
@@ -174,6 +174,7 @@ class DataMapping:
                             investment_info = self.matching_with_database(
                                 raw_name=raw_name, 
                                 raw_share_name=raw_share_name, 
+                                raw_fund_name=raw_fund_name,
                                 parent_id=fund_id, 
                                 matching_type="share",
                                 process_cache=process_cache
@@ -254,6 +255,7 @@ class DataMapping:
         self, 
         raw_name: str, 
         raw_share_name: str = None, 
+        raw_fund_name: str = None,
         parent_id: str = None, 
         matching_type: str = "fund",
         process_cache: dict = {}
@@ -328,9 +330,14 @@ class DataMapping:
                     raw_name, 
                     doc_compare_name_list, 
                     share_name=raw_share_name, 
+                    fund_name=raw_fund_name,
                     matching_type=matching_type,
                     process_cache=process_cache)
-                if max_similarity is not None and max_similarity >= 0.9:
+                if matching_type == "fund":
+                    threshold = 0.7
+                else:
+                    threshold = 0.9
+                if max_similarity is not None and max_similarity >= threshold:
                     data_info["id"] = doc_compare_mapping[
                         doc_compare_mapping[compare_name_dp] == max_similarity_name
                     ][compare_id_dp].values[0]
@@ -344,6 +351,7 @@ class DataMapping:
                     raw_name, 
                     provider_compare_name_list, 
                     share_name=raw_share_name,
+                    fund_name=raw_fund_name,
                     matching_type=matching_type, 
                     pre_common_word_list=pre_common_word_list,
                     process_cache=process_cache
diff --git a/main.py b/main.py
index 9dd739b..f50f738 100644
--- a/main.py
+++ b/main.py
@@ -338,7 +338,7 @@ def batch_start_job(
 
 
         if calculate_metrics:
-            prediction_sheet_name = "mapping_data"
+            prediction_sheet_name = "total_mapping_data"
             ground_truth_file = r"/data/emea_ar/ground_truth/data_extraction/mapping_data_info_73_documents.xlsx"
             ground_truth_sheet_name = "mapping_data"
             metrics_output_folder = r"/data/emea_ar/output/metrics/"
@@ -600,9 +600,9 @@ def test_data_extraction_metrics():
 
 
 def test_mapping_raw_name():
-    doc_id = "394778487"
-    raw_name = "Invesco Global Real Assets Fund FCP-RAIF Invesco Global Property Plus Fund Z Gross QD USD"
-    raw_share_name = "Z Gross QD USD"
+    doc_id = "382366116"
+    raw_name = "SPARINVEST SICAV - ETHICAL EMERGING MARKETS VALUE EUR I"
+    raw_share_name = "EUR I"
     output_folder = r"/data/emea_ar/output/mapping_data/docs/by_text/"
     data_mapping = DataMapping(
         doc_id,
@@ -615,7 +615,7 @@ def test_mapping_raw_name():
     mapping_info = data_mapping.matching_with_database(
         raw_name=raw_name,
         raw_share_name=raw_share_name,
-        parent_id="FS0000H1C9", 
+        parent_id=None, 
         matching_type="share",
         process_cache=process_cache
     )
@@ -697,100 +697,102 @@ if __name__ == "__main__":
     #     "479793787",
     #     "471641628",
     # ]
-    # check_db_mapping_doc_id_list = [
-    #     "292989214",
-    #     "316237292",
-    #     "321733631",
-    #     "323390570",
-    #     "327956364",
-    #     "332223498",
-    #     "333207452",
-    #     "334718372",
-    #     "344636875",
-    #     "362246081",
-    #     "366179419",
-    #     "380945052",
-    #     "382366116",
-    #     "387202452",
-    #     "389171486",
-    #     "391456740",
-    #     "391736837",
-    #     "394778487",
-    #     "401684600",
-    #     "402113224",
-    #     "402181770",
-    #     "402397014",
-    #     "405803396",
-    #     "445102363",
-    #     "445256897",
-    #     "448265376",
-    #     "449555622",
-    #     "449623976",
-    #     "458291624",
-    #     "458359181",
-    #     "463081566",
-    #     "469138353",
-    #     "471641628",
-    #     "476492237",
-    #     "478585901",
-    #     "478586066",
-    #     "479042264",
-    #     "479042269",
-    #     "479793787",
-    #     "481475385",
-    #     "483617247",
-    #     "486378555",
-    #     "486383912",
-    #     "492121213",
-    #     "497497599",
-    #     "502693599"
-    # ]
-    
     check_db_mapping_doc_id_list = [
-        "334584772",
-        "406913630",
-        "407275419",
-        "337937633",
-        "337293427",
-        "334584772",
-        "404712928",
-        "451063582",
-        "451878128",
-        "425595958",
-        "536344026",
-        "532422548",
-        "423418540",
-        "423418395",
-        "532998065",
-        "540307575",
-        "423395975",
-        "508704368",
-        "481482392",
-        "466580448",
-        "423365707",
-        "423364758",
-        "422761666",
-        "422760156",
-        "422760148",
-        "422686965",
-        "492029971",
-        "510300817",
-        "512745032",
-        "514213638",
-        "527525440",
-        "534535767"
+        "292989214",
+        "316237292",
+        "321733631",
+        "323390570",
+        "327956364",
+        "332223498",
+        "333207452",
+        "334718372",
+        "344636875",
+        "362246081",
+        "366179419",
+        "380945052",
+        "382366116",
+        "387202452",
+        "389171486",
+        "391456740",
+        "391736837",
+        "394778487",
+        "401684600",
+        "402113224",
+        "402181770",
+        "402397014",
+        "405803396",
+        "445102363",
+        "445256897",
+        "448265376",
+        "449555622",
+        "449623976",
+        "458291624",
+        "458359181",
+        "463081566",
+        "469138353",
+        "471641628",
+        "476492237",
+        "478585901",
+        "478586066",
+        "479042264",
+        "479042269",
+        "479793787",
+        "481475385",
+        "483617247",
+        "486378555",
+        "486383912",
+        "492121213",
+        "497497599",
+        "502693599"
     ]
+    
+    # check_db_mapping_doc_id_list = [
+    #     "334584772",
+    #     "406913630",
+    #     "407275419",
+    #     "337937633",
+    #     "337293427",
+    #     "334584772",
+    #     "404712928",
+    #     "451063582",
+    #     "451878128",
+    #     "425595958",
+    #     "536344026",
+    #     "532422548",
+    #     "423418540",
+    #     "423418395",
+    #     "532998065",
+    #     "540307575",
+    #     "423395975",
+    #     "508704368",
+    #     "481482392",
+    #     "466580448",
+    #     "423365707",
+    #     "423364758",
+    #     "422761666",
+    #     "422760156",
+    #     "422760148",
+    #     "422686965",
+    #     "492029971",
+    #     "510300817",
+    #     "512745032",
+    #     "514213638",
+    #     "527525440",
+    #     "534535767"
+    # ]
     # special_doc_id_list = check_mapping_doc_id_list
     special_doc_id_list = check_db_mapping_doc_id_list
-    # special_doc_id_list = ["337937633"]
+    # special_doc_id_list = ["394778487"]
     output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
     output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
     re_run_extract_data = False
-    re_run_mapping_data = False
+    re_run_mapping_data = True
     force_save_total_data = True
-    calculate_metrics = False
+    calculate_metrics = True
 
     extract_ways = ["text"]
+    pdf_folder = r"/data/emea_ar/small_pdf/"
+    # pdf_folder = r"/data/emea_ar/pdf/"
     for extract_way in extract_ways:
         batch_start_job(
             pdf_folder,
diff --git a/utils/biz_utils.py b/utils/biz_utils.py
index 4fa8539..f9ef772 100644
--- a/utils/biz_utils.py
+++ b/utils/biz_utils.py
@@ -81,6 +81,7 @@ def clean_text(text: str) -> str:
 def get_most_similar_name(text: str, 
                           name_list: list, 
                           share_name: str = None, 
+                          fund_name: str = None,
                           matching_type="share", 
                           pre_common_word_list: list = None,
                           process_cache: dict = None) -> str:
@@ -116,6 +117,12 @@ def get_most_similar_name(text: str,
         text = text.strip()
         text = remove_special_characters(text)
         text = replace_abbrevation(text)
+        raw_fund_name_split = []
+        if fund_name is not None and len(fund_name.strip()) > 0:
+            fund_name = fund_name.strip()
+            fund_name = remove_special_characters(fund_name)
+            raw_fund_name_split = fund_name.upper().split()
+            
         if share_name is not None:
             share_name = remove_special_characters(share_name)
             share_name = replace_abbrevation(share_name)
@@ -171,11 +178,13 @@ def get_most_similar_name(text: str,
                     text_currency = cache.get("share_currency")
                 else:
                     if share_name is not None and len(share_name.strip()) > 0:
-                        text_share_short_name_list = get_share_short_name_from_text(share_name)
+                        text_share_short_name_list = get_share_short_name_from_text(share_name,
+                                                                                    confirm_text_share=True)
                         text_feature = get_share_feature_from_text(share_name)
                         text_currency = get_currency_from_text(share_name)
                     else:
-                        text_share_short_name_list = get_share_short_name_from_text(text)
+                        text_share_short_name_list = get_share_short_name_from_text(text,
+                                                                                    confirm_text_share=True)
                         text_feature = get_share_feature_from_text(text)
                         text_currency = get_currency_from_text(text)
                     # sort text_share_short_name_list
@@ -187,12 +196,14 @@ def get_most_similar_name(text: str,
                     }
             else:
                 if share_name is not None and len(share_name.strip()) > 0:
-                    text_share_short_name_list = get_share_short_name_from_text(share_name)
+                    text_share_short_name_list = get_share_short_name_from_text(share_name,
+                                                                                confirm_text_share=True)
                     text_share_short_name_list.sort()
                     text_feature = get_share_feature_from_text(share_name)
                     text_currency = get_currency_from_text(share_name)
                 else:
-                    text_share_short_name_list = get_share_short_name_from_text(text)
+                    text_share_short_name_list = get_share_short_name_from_text(text,
+                                                                                confirm_text_share=True)
                     text_feature = get_share_feature_from_text(text)
                     text_currency = get_currency_from_text(text)
         
@@ -203,6 +214,52 @@ def get_most_similar_name(text: str,
                 continue
             copy_name = remove_special_characters(copy_name)
             copy_name = split_words_without_space(copy_name)
+            copy_name_short_name_list = None
+            copy_name_feature = None
+            copy_name_currency = None
+            if matching_type == "share":
+                if  process_cache is not None and isinstance(process_cache, dict):
+                    if process_cache.get(copy_name, None) is not None:
+                        cache = process_cache.get(copy_name)
+                        copy_name_short_name_list = cache.get("share_short_name")
+                        copy_name_feature = cache.get("share_feature")
+                        copy_name_currency = cache.get("share_currency")
+                    else:
+                        copy_name_short_name_list = get_share_short_name_from_text(copy_share_name)
+                        if copy_name_short_name_list is not None:
+                            copy_name_short_name_list.sort()
+                        copy_name_feature = get_share_feature_from_text(copy_share_name)
+                        copy_name_currency = get_currency_from_text(copy_share_name)
+                        process_cache[copy_name] = {
+                            "share_short_name": copy_name_short_name_list,
+                            "share_feature": copy_name_feature,
+                            "share_currency": copy_name_currency
+                        }
+                else:
+                    copy_name_short_name_list = get_share_short_name_from_text(copy_share_name)
+                    copy_name_short_name_list.sort()
+                    copy_name_feature = get_share_feature_from_text(copy_share_name)
+                    copy_name_currency = get_currency_from_text(copy_share_name)
+                try:
+                    if text_share_short_name_list is not None and len(text_share_short_name_list) > 0 and \
+                        copy_name_short_name_list is not None and len(copy_name_short_name_list) > 0:
+                        updated_text_share_short_name_list, updated_copy_name_short_name_list = \
+                            compare_both_short_name(text_share_short_name_list, copy_name_short_name_list)
+                    
+                        if updated_text_share_short_name_list != text_share_short_name_list:
+                            text = ' '.join([split for split in text.split()
+                                                if split not in text_share_short_name_list])
+                            text += ' ' + ' '.join(updated_text_share_short_name_list)
+                            text_share_short_name_list = updated_text_share_short_name_list
+                        
+                        if updated_copy_name_short_name_list != copy_name_short_name_list:
+                            copy_name = ' '.join([split for split in copy_name.split()
+                                                if split not in copy_name_short_name_list])
+                            copy_name += ' ' + ' '.join(updated_copy_name_short_name_list)
+                            copy_name_short_name_list = updated_copy_name_short_name_list
+                except Exception as e:
+                    print(e)
+            
             try:
                 similarity = get_jacard_similarity(text,
                                                 copy_name,
@@ -221,30 +278,7 @@ def get_most_similar_name(text: str,
                 if similarity_2 > similarity:
                     similarity = similarity_2
             if similarity > max_similarity:
-                if matching_type == "share":
-                    if  process_cache is not None and isinstance(process_cache, dict):
-                        if process_cache.get(copy_name, None) is not None:
-                            cache = process_cache.get(copy_name)
-                            copy_name_short_name_list = cache.get("share_short_name")
-                            copy_name_feature = cache.get("share_feature")
-                            copy_name_currency = cache.get("share_currency")
-                        else:
-                            copy_name_short_name_list = get_share_short_name_from_text(copy_share_name)
-                            if copy_name_short_name_list is not None:
-                                copy_name_short_name_list.sort()
-                            copy_name_feature = get_share_feature_from_text(copy_share_name)
-                            copy_name_currency = get_currency_from_text(copy_share_name)
-                            process_cache[copy_name] = {
-                                "share_short_name": copy_name_short_name_list,
-                                "share_feature": copy_name_feature,
-                                "share_currency": copy_name_currency
-                            }
-                    else:
-                        copy_name_short_name_list = get_share_short_name_from_text(copy_share_name)
-                        copy_name_short_name_list.sort()
-                        copy_name_feature = get_share_feature_from_text(copy_share_name)
-                        copy_name_currency = get_currency_from_text(copy_share_name)
-                        
+                if matching_type == "share":                       
                     if text_currency is not None and len(text_currency) > 0 and \
                         copy_name_currency is not None and len(copy_name_currency) > 0:
                         if text_currency != copy_name_currency:
@@ -257,12 +291,18 @@ def get_most_similar_name(text: str,
                     if matching_type == "share":
                         if text_share_short_name_list is not None and len(text_share_short_name_list) > 0 and \
                             copy_name_short_name_list is not None and len(copy_name_short_name_list) > 0:
-                                raw_short_not_in_compare = False
+                                short_name_invalid = False
                                 for short in text_share_short_name_list:
                                     if short not in copy_name_short_name_list:
-                                        raw_short_not_in_compare = True
+                                        short_name_invalid = True
                                         break
-                                if raw_short_not_in_compare:
+                                for compare_short in copy_name_short_name_list:
+                                    if compare_short not in text_share_short_name_list:
+                                        # some short word is in fund name, but not belong to share name
+                                        if compare_short.upper() not in raw_fund_name_split:
+                                            short_name_invalid = True
+                                            break
+                                if short_name_invalid:
                                     continue
                 max_similarity = similarity
                 max_similarity_full_name = full_name
@@ -289,6 +329,43 @@ def get_most_similar_name(text: str,
         return None, 0.0
 
 
+def compare_both_short_name(text_short_name_list: list, compare_short_name_list: list):
+    copy_text_short_name_list = deepcopy(text_short_name_list)
+    copy_compare_short_name_list = deepcopy(compare_short_name_list)
+    copy_text_short_name_list = verify_short_name_container(copy_text_short_name_list, 
+                                                            copy_compare_short_name_list)
+    copy_compare_short_name_list = verify_short_name_container(copy_compare_short_name_list, 
+                                                               copy_text_short_name_list)
+    return copy_text_short_name_list, copy_compare_short_name_list
+
+
+def verify_short_name_container(left_short_name_list: list, right_short_name_list: list):
+    length_1_over_1 = False
+    length_1_count = 0
+    length_1_list = []
+    for short_name in left_short_name_list:
+        if len(short_name) == 1:
+            length_1_count += 1
+            length_1_list.append(short_name)
+    if length_1_count > 1:
+        length_1_over_1 = True
+    
+    if length_1_over_1:
+        for compare_short_name in right_short_name_list:
+            if len(compare_short_name) == length_1_count:
+                all_in = True
+                for short_name in length_1_list:
+                    if short_name not in compare_short_name:
+                        all_in = False
+                        break
+                if all_in:
+                    for short_name in length_1_list:
+                        if short_name in left_short_name_list:
+                            left_short_name_list.remove(short_name)
+                    left_short_name_list.append(compare_short_name)
+    return left_short_name_list
+
+
 def get_share_part_list(text_list: list):
     share_part_list = []
     for text in text_list:
@@ -312,7 +389,7 @@ def get_share_part_list(text_list: list):
     return share_part_list
     
 
-def get_share_short_name_from_text(text: str):
+def get_share_short_name_from_text(text: str, confirm_text_share: bool = False):
     if text is None or len(text.strip()) == 0:
         return None
     text = remove_special_characters(text.strip())
@@ -321,15 +398,18 @@ def get_share_short_name_from_text(text: str):
     
     count = 0
     share_short_name_list = []
-    
+    if confirm_text_share:
+        count_threshold = 6
+    else:
+        count_threshold = 4
     for split in text_split[::-1]:
-        if count == 4:
+        if count == count_threshold:
             break
         if split.lower() not in temp_share_features and \
             split.upper() not in total_currency_list:
             if len(split) <= 3:
                 share_short_name_list.append(split.upper())
-        count += 1
+            count += 1
     
     if len(share_short_name_list) > 1:
         remove_number = []