diff --git a/main.py b/main.py
index df8ed8e..d3891f2 100644
--- a/main.py
+++ b/main.py
@@ -564,10 +564,8 @@ def test_data_extraction_metrics():
 
 
 def test_mapping_raw_name():
-    doc_id = "344636875"
-    raw_fund_name = ""
-    raw_share_name = ""
-    raw_name = "Aberdeen Standard Alpha Global Loans I QInc USD"
+    doc_id = "481475385"
+    raw_name = "Emerging Markets Fund Y-DIST Shares (USD)"
     output_folder = r"/data/emea_ar/output/mapping_data/docs/by_text/"
     data_mapping = DataMapping(
         doc_id,
@@ -578,7 +576,7 @@ def test_mapping_raw_name():
     )
     mapping_info = data_mapping.matching_with_database(
         raw_name=raw_name,
-        parent_id="FS0000DA0E", 
+        parent_id=None, 
         matching_type="share"
     )
     print(mapping_info)
@@ -657,16 +655,15 @@ if __name__ == "__main__":
         "486378555",
         "506559375",
         "479793787",
-        "333207452",
         "471641628",
     ]
     special_doc_id_list = check_mapping_doc_id_list
-    # special_doc_id_list = ["402181770"]
+    special_doc_id_list = ["402113224"]
     output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
     output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
     re_run_extract_data = False
     re_run_mapping_data = True
-    force_save_total_data = True
+    force_save_total_data = False
 
     extract_ways = ["text"]
     for extract_way in extract_ways:
diff --git a/utils/biz_utils.py b/utils/biz_utils.py
index 1ca1697..346fd1d 100644
--- a/utils/biz_utils.py
+++ b/utils/biz_utils.py
@@ -2,6 +2,54 @@ import re
 from copy import deepcopy
 from traceback import print_exc
 
+
+total_currency_list = [
+    "USD",
+    "EUR",
+    "AUD",
+    "JPY",
+    "CHF",
+    "GBP",
+    "SEK",
+    "CNY",
+    "NZD",
+    "CNH",
+    "NOK",
+    "SGD",
+    "HKD",
+    "ZAR",
+    "PLN",
+    "CAD",
+    "CZK",
+    "HUF",
+    "DKK",
+    "BRL",
+    "SKK",
+    "RON",
+    "TRY",
+    "BGN",
+    "CUP",
+    "MXN",
+    "CLF",
+    "XCD",
+    "ISK",
+    "IDR",
+    "MNT",
+    "AED",
+    "AFN",
+    "INR",
+    "ESP",
+    "RUB",
+    "CLP",
+    "KRW",
+    "ETB",
+    "DZD",
+    "XEU",
+    "XFO",
+]
+
+share_features = ['Accumulation', 'Income', 'Distribution', 'Investor', 'Institutional', 'Capitalisation', 'Admin', 'Advantage']
+
 def add_slash_to_text_as_regex(text: str):
     if text is None or len(text) == 0:
         return text
@@ -29,18 +77,18 @@ def get_most_similar_name(text: str, name_list: list, pre_common_word_list: list
     Get the most similar fund name from fund_name_list by jacard similarity
     """
     try:
-        copy_fund_name_list = deepcopy(name_list)
+        copy_name_list = deepcopy(name_list)
         if text is None or len(text.split()) == 0 or \
-                copy_fund_name_list is None or len(copy_fund_name_list) == 0:
+                copy_name_list is None or len(copy_name_list) == 0:
             return None, None
         
-        copy_fund_name_list = [replace_abbrevation(copy_fund_name) for copy_fund_name 
-                               in copy_fund_name_list]
+        copy_name_list = [replace_abbrevation(copy_name) for copy_name 
+                               in copy_name_list]
 
         # get common words in fund_name_list
         common_word_list = []
         if len(name_list) > 1:
-            _, common_word_list = remove_common_word(copy_fund_name_list)
+            _, common_word_list = remove_common_word(copy_name_list)
         if pre_common_word_list is not None and len(pre_common_word_list) > 0:
             common_word_list.extend([word for word in pre_common_word_list
                                      if word not in common_word_list])
@@ -63,14 +111,14 @@ def get_most_similar_name(text: str, name_list: list, pre_common_word_list: list
             for word in common_word_list:
                 if word not in lower_new_splits:
                     # remove word in fund_name_list
-                    for i in range(len(copy_fund_name_list)):
-                        temp_splits = copy_fund_name_list[i].split()
-                        copy_fund_name_list[i] = ' '.join([split for split in temp_splits 
+                    for i in range(len(copy_name_list)):
+                        temp_splits = copy_name_list[i].split()
+                        copy_name_list[i] = ' '.join([split for split in temp_splits 
                                                            if remove_special_characters(split).lower() != word])
 
-            for i in range(len(copy_fund_name_list)):
-                temp_splits = copy_fund_name_list[i].split()
-                copy_fund_name_list[i] = ' '.join([split for split in temp_splits
+            for i in range(len(copy_name_list)):
+                temp_splits = copy_name_list[i].split()
+                copy_name_list[i] = ' '.join([split for split in temp_splits
                                                    if remove_special_characters(split).lower() not in ['fund', 'portfolio', 'class', 'share', 'shares']])
             final_splits = []
             for split in new_splits:
@@ -79,38 +127,72 @@ def get_most_similar_name(text: str, name_list: list, pre_common_word_list: list
 
             text = ' '.join(final_splits)
         max_similarity = 0
-        max_similarity_fund_name = None
+        max_similarity_full_name = None
         text = remove_special_characters(text)
-        text, copy_fund_name_list = update_for_currency(text, copy_fund_name_list)
-        for fund_name, copy_fund_name in zip(name_list , copy_fund_name_list):
-            copy_fund_name = remove_special_characters(copy_fund_name)
-            copy_fund_name = split_words_without_space(copy_fund_name)
+        text, copy_name_list = update_for_currency(text, copy_name_list)
+        text_currencty = get_currency_from_text(text)
+        text_feature = get_share_feature_from_text(text)
+        for full_name, copy_name in zip(name_list , copy_name_list):
+            copy_name = remove_special_characters(copy_name)
+            copy_name = split_words_without_space(copy_name)
             similarity = get_jacard_similarity(text,
-                                            copy_fund_name,
+                                            copy_name,
                                             need_remove_numeric_characters=False)
+            copy_name_2 = replace_abbrevation(copy_name)
+            if copy_name != copy_name_2:
+                similarity_2 = get_jacard_similarity(text,
+                                            copy_name_2,
+                                            need_remove_numeric_characters=False)
+                if similarity_2 > similarity:
+                    similarity = similarity_2
             if similarity > max_similarity:
+                copy_name_currency = get_currency_from_text(copy_name)
+                if text_currencty is not None and copy_name_currency is not None:
+                    if text_currencty != copy_name_currency:
+                        continue
+                copy_name_feature = get_share_feature_from_text(copy_name)
+                if text_feature is not None and copy_name_feature is not None:
+                    if text_feature != copy_name_feature:
+                        continue
                 max_similarity = similarity
-                max_similarity_fund_name = fund_name
+                max_similarity_full_name = full_name
             if max_similarity == 1:
                 break
         if max_similarity < 0.35:
             return None, max_similarity
-        return max_similarity_fund_name, max_similarity
+        return max_similarity_full_name, max_similarity
     except Exception as e:
         print(e)
         print_exc()
         return None, 0.0
 
+def get_share_feature_from_text(text: str):
+    if text is None or len(text.strip()) == 0:
+        return None
+    text = text.strip()
+    text = text.lower()
+    text_split = text.split()
+    temp_share_features = [feature.lower() for feature in share_features]
+    for split in text_split[::-1]:
+        if split in temp_share_features:
+            return split
+    return None
+
+def get_currency_from_text(text: str):
+    if text is None or len(text.strip()) == 0:
+        return None
+    text = text.strip()
+    text = text.lower()
+    text_split = text.split()
+    for split in text_split[::-1]:
+        if split.upper() in total_currency_list:
+            return split
+    return None
+
 
 def update_for_currency(text: str, compare_list: list):
     text_split = text.split()
     with_currency = False
-    total_currency_list = ['USD', 'EUR', 'AUD', 'JPY', 'CHF', 'GBP', 'SEK', 'CNY', 
-                           'NZD', 'CNH', 'NOK', 'SGD', 'HKD', 'ZAR', 'PLN', 'CAD', 
-                           'CZK', 'HUF', 'DKK', 'BRL', 'SKK', 'RON', 'TRY', 'BGN', 
-                           'CUP', 'MXN', 'TOP', 'ILS', 'CLF', 'XCD', 'ISK', 'IDR', 
-                           'MNT', 'AED', 'AFN', 'INR', 'ESP', 'RUB', 'CLP', 'KRW', 
-                           'ETB', 'DZD', 'XEU', 'XFO']
     for split in text_split:
         if split.upper() in total_currency_list:
             with_currency = True
@@ -198,6 +280,16 @@ def remove_common_word(text_list: list):
             else:
                 common_word_list = list(
                     set(common_word_list).intersection(set(new_text_splits_list[j])))
+    
+    remove_list = []
+    # if exists the share name and currency name, remove from the list
+    for word in common_word_list:
+        if word.upper() in total_currency_list:
+            remove_list.append(word)
+    for remove in remove_list:
+        if remove in common_word_list:
+            common_word_list.remove(remove)
+    
     common_word_list = list(set(common_word_list))
     for i in range(len(new_text_splits_list)):
         for common_word in common_word_list:
@@ -219,12 +311,22 @@ def split_words_without_space(text: str):
     # if len(splits) > 1:
     #     return text
     # find all words with capital letter + lower letter
-    regex = r'[A-Z][a-z]+'
+    regex = r"[A-Z][a-z]+"
+    regex2 = r"[A-Z]{2,}[a-z]+"
     word_list = re.findall(regex, text)
+    word_list2 = re.findall(regex2, text)
     if len(word_list) > 0:
         for word in word_list:
-            text = text.replace(word, ' ' + word + ' ')
-        text = re.sub(r'(\s)+', ' ', text)
+            if len(word_list2) > 0:
+                word_exists_in_word2 = False
+                for word2 in word_list2:
+                    if word in word2:
+                        word_exists_in_word2 = True
+                        break
+                if word_exists_in_word2:
+                    continue
+            text = text.replace(word, " " + word + " ")
+        text = re.sub(r"(\s)+", " ", text)
     return text.strip()
 
 
@@ -332,6 +434,8 @@ def replace_abbrevation(text: str):
         text = re.sub(r'swedish\s+krona', 'SEK', text, flags=re.IGNORECASE)
     elif 'swedish kronor' in text.lower():
         text = re.sub(r'swedish\s+kronor', 'SEK', text, flags=re.IGNORECASE)
+    elif "GPB" in text.split():
+        text = re.sub(r"GPB", "GBP", text, flags=re.IGNORECASE)
     elif 'sterling' in text.lower().split():
         text = re.sub(r'sterling', 'GBP', text, flags=re.IGNORECASE)
     elif 'euro' in text.lower().split():
@@ -342,7 +446,7 @@ def replace_abbrevation(text: str):
         text = re.sub(r'\$', 'USD', text, flags=re.IGNORECASE)
     elif '£' in text.lower().split():
         text = re.sub(r'\£', 'GBP', text, flags=re.IGNORECASE)
-    elif 'RMB' in text.lower().split():
+    elif 'RMB' in text.split():
         text = re.sub(r'RMB', 'CNY', text, flags=re.IGNORECASE)
     else:
         pass