diff --git a/configuration/aus_prospectus/datapoint_keyword.json b/configuration/aus_prospectus/datapoint_keyword.json
index f8773c8..696a982 100644
--- a/configuration/aus_prospectus/datapoint_keyword.json
+++ b/configuration/aus_prospectus/datapoint_keyword.json
@@ -21,7 +21,7 @@
   "date_of_last_hwm_reset": {"english": ["date of last hwm reset"]},
   "date_of_last_performance_fee_restructure": {"english": ["date of last performance fee restructure"]},
   "high_water_mark_type": {"english": ["high-water mark type", "high water mark type"]},
-  "minimum_initial_investment": {"english": ["minimum initial investment","inital investment"]},
+  "minimum_initial_investment": {"english": ["minimum initial investment","inital investment", "initial investment amount"]},
   "recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
   "indirect_costs": {"english": ["indirect cost","indirect fees","indirect fee","indirect costs"]}
 }
\ No newline at end of file
diff --git a/configuration/aus_prospectus/datapoint_level.json b/configuration/aus_prospectus/datapoint_level.json
index 9847066..5453133 100644
--- a/configuration/aus_prospectus/datapoint_level.json
+++ b/configuration/aus_prospectus/datapoint_level.json
@@ -21,7 +21,7 @@
     "date_of_last_hwm_reset": "share_level",
     "date_of_last_performance_fee_restructure": "share_level",
     "high_water_mark_type": "share_level",
-    "minimum_initial_investment": "share_level",
+    "minimum_initial_investment": "fund_level",
     "recoverable_expenses": "share_level",
     "indirect_costs": "share_level"
 }
\ No newline at end of file
diff --git a/configuration/aus_prospectus/datapoint_reported_name.json b/configuration/aus_prospectus/datapoint_reported_name.json
index c119485..115122a 100644
--- a/configuration/aus_prospectus/datapoint_reported_name.json
+++ b/configuration/aus_prospectus/datapoint_reported_name.json
@@ -21,7 +21,7 @@
     "date_of_last_hwm_reset": {"english": ["date of last hwm reset"]},
     "date_of_last_performance_fee_restructure": {"english": ["date of last performance fee restructure"]},
     "high_water_mark_type": {"english": ["high-water mark type", "high water mark type"]},
-    "minimum_initial_investment": {"english": ["minimum initial investment","inital investment"]},
+    "minimum_initial_investment": {"english": ["minimum initial investment","inital investment", "initial investment amount"]},
     "recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
     "indirect_costs": {"english": ["indirect cost","indirect fees","indirect fee","indirect costs"]}
   }
\ No newline at end of file
diff --git a/configuration/aus_prospectus/datapoint_type.json b/configuration/aus_prospectus/datapoint_type.json
new file mode 100644
index 0000000..fa841b9
--- /dev/null
+++ b/configuration/aus_prospectus/datapoint_type.json
@@ -0,0 +1,27 @@
+{
+    "total_annual_dollar_based_charges": "float",
+    "management_fee_and_costs": "float",
+    "management_fee": "float",
+    "performance_fee": "float",
+    "performance_fee_costs": "float",
+    "buy_spread": "float",
+    "sell_spread": "float",
+    "establishment_fee": "float",
+    "contribution_fee": "float",
+    "withdrawal_fee": "float",
+    "switching_fee": "float",
+    "activity_fee": "float",
+    "exit_fee": "float", 
+    "administration_fees": "float",
+    "interposed_vehicle_performance_fee_cost": "float", 
+    "additional_hurdle": "text",
+    "benchmark_name": "text",
+    "reference_rate": "float",
+    "crystallisation_frequency": "text",
+    "date_of_last_hwm_reset": "text",
+    "date_of_last_performance_fee_restructure": "text",
+    "high_water_mark_type": "text",
+    "minimum_initial_investment": "integer",
+    "recoverable_expenses": "float",
+    "indirect_costs": "float"
+}
\ No newline at end of file
diff --git a/configuration/emea_ar/datapoint_type.json b/configuration/emea_ar/datapoint_type.json
new file mode 100644
index 0000000..2f729c8
--- /dev/null
+++ b/configuration/emea_ar/datapoint_type.json
@@ -0,0 +1,6 @@
+{
+    "tor": "float",
+    "ogc": "float",
+    "ter": "float",
+    "performance_fee": "float"
+}
\ No newline at end of file
diff --git a/core/data_extraction.py b/core/data_extraction.py
index ea5c7e7..715aa56 100644
--- a/core/data_extraction.py
+++ b/core/data_extraction.py
@@ -185,9 +185,64 @@ class DataExtraction:
             data_list = self.extract_data_by_image()
         else:
             data_list = self.extract_data_by_text()
+        if self.doc_source == "aus_prospectus":
+            data_list = self.post_supplement_data(data_list)
         # data_list = remove_abundant_data(data_list)
         self.output_data_to_file(data_list)
         return data_list
+    
+    def post_supplement_data(self, data_list: list) -> list:
+        """
+        data_dict = {"doc_id": self.doc_id}
+        data_dict["page_index"] = page_num
+        data_dict["datapoints"] = ", ".join(page_datapoints)
+        data_dict["page_text"] = page_text
+        data_dict["instructions"] = instructions
+        data_dict["raw_answer"] = response
+        data_dict["extract_data"] = data
+        data_dict["extract_way"] = original_way
+        data_dict["prompt_token"] = result.get("prompt_token", 0)
+        data_dict["completion_token"] = result.get("completion_token", 0)
+        data_dict["total_token"] = result.get("total_token", 0)
+        """
+        exist_minimum_initial_investment = False
+        minimum_initial_investment = -1
+        mii_fund_name = ""
+        mii_dict = None
+        for data_dict in data_list:
+            extract_data = data_dict.get("extract_data", {})
+            data = extract_data.get("data", [])
+            for data_item in data:
+                keys = list(data_item.keys())
+                if "minimum_initial_investment" in keys:
+                    exist_minimum_initial_investment = True
+                    minimum_initial_investment = data_item.get("minimum_initial_investment", -1)
+                    mii_fund_name = data_item.get("fund_name", "")
+                    mii_dict = data_dict
+                    break
+            if exist_minimum_initial_investment:
+                break
+        if exist_minimum_initial_investment and minimum_initial_investment != -1:
+            # get all of funds in data_list
+            fund_name_list = []
+            for data_dict in data_list:
+                extract_data = data_dict.get("extract_data", {})
+                data = extract_data.get("data", [])
+                for data_item in data:
+                    keys = list(data_item.keys())
+                    if "fund_name" in keys:
+                        fund_name = data_item.get("fund_name", "")
+                        if len(fund_name) > 0 and fund_name not in fund_name_list and fund_name != mii_fund_name:
+                            fund_name_list.append(fund_name)
+            # rewrite mii_dict, set each fund name with same minimum_initial_investment value
+            new_mii_data_list = []
+            for fund_name in fund_name_list:
+                new_data_dict = {"fund_name": fund_name, "minimum_initial_investment": minimum_initial_investment}
+                new_mii_data_list.append(new_data_dict)
+            mii_dict["extract_data"]["data"] = new_mii_data_list
+        return data_list
+        
+        
 
     def extract_data_by_text(self) -> dict:
         """
diff --git a/core/page_filter.py b/core/page_filter.py
index e64bfe6..93c7d07 100644
--- a/core/page_filter.py
+++ b/core/page_filter.py
@@ -23,6 +23,7 @@ class FilterPages:
         self.pdf_file = pdf_file
         self.output_pdf_text_folder = output_pdf_text_folder
         self.configuration_folder = f"./configuration/{doc_source}/"
+        self.doc_source = doc_source
         misc_config_file = os.path.join(self.configuration_folder, "misc_config.json")
         if os.path.exists(misc_config_file):
             with open(misc_config_file, "r", encoding="utf-8") as file:
@@ -119,6 +120,7 @@ class FilterPages:
         domicile_datapoint_config_file = os.path.join(self.configuration_folder, "domicile_datapoints.json")
         datapoint_keywords_config_file = os.path.join(self.configuration_folder, "datapoint_keyword.json")
         datapoint_exclude_keywords_config_file = os.path.join(self.configuration_folder, "datapoint_exclude_keyword.json")
+        datapoint_type_config_file = os.path.join(self.configuration_folder, "datapoint_type.json")
         
         with open(language_config_file, "r", encoding="utf-8") as file:
             self.language_config = json.load(file)
@@ -130,6 +132,10 @@ class FilterPages:
             datapoint_exclude_keywords_config_file, "r", encoding="utf-8"
         ) as file:
             self.datapoint_exclude_keywords_config = json.load(file)
+        with open(
+            datapoint_type_config_file, "r", encoding="utf-8"
+        ) as file:
+            self.datapoint_type_config = json.load(file)
 
     def get_doc_info(self) -> dict:
         if len(self.document_mapping_info_df) == 0:
@@ -224,7 +230,8 @@ class FilterPages:
             if page_index < 2:
                 continue
             page_num = page_index + 1
-            if self.document_dp_pages is not None and len(self.document_dp_pages) > 0 and page_num not in self.document_dp_pages:
+            if self.document_dp_pages is not None and len(self.document_dp_pages) > 0 and \
+                page_num not in self.document_dp_pages:
                 continue
             
             page_text = clean_text(page_text)
@@ -237,7 +244,8 @@ class FilterPages:
             language = self.doc_info.get("language", None)
             if language is None:
                 language = "english"
-            if language == "english" and re.search(self.percentage_regex, text) is None:
+            if self.doc_source == "emea_ar" and language == "english" and \
+                re.search(self.percentage_regex, text) is None:
                 continue
             for datapoint, keywords in self.datapoint_config.items():
                 find_datapoint = False
@@ -257,10 +265,12 @@ class FilterPages:
                                     break
                             if need_exclude:
                                 continue
-
-                        is_valid = self.search_in_sentence_is_valid(search_text, text)
-                        if not is_valid:
-                            continue
+                        is_valid = True
+                        data_type = self.datapoint_type_config.get(datapoint, "float")
+                        if data_type == "float":
+                            is_valid = self.search_in_sentence_is_valid(search_text, text)
+                            if not is_valid:
+                                continue
                         result[datapoint].append(page_index)
                         detail = {
                             "doc_id": self.doc_id,
diff --git a/instructions/aus_prospectus/data_extraction_prompts_config.json b/instructions/aus_prospectus/data_extraction_prompts_config.json
index ccb7903..12025ce 100644
--- a/instructions/aus_prospectus/data_extraction_prompts_config.json
+++ b/instructions/aus_prospectus/data_extraction_prompts_config.json
@@ -136,21 +136,35 @@
 				"The output should be:",
 				"{\"data\": [{\"fund name\": \"MLC Horizon 2 Income Portfolio\", \"share name\": \"MLC Horizon 2 Income Portfolio\", \"management_fee_and_costs\": 1.42, \"management_fee\": 1.35, \"indirect_costs\": 0.07, \"performance_fee\": 0.06}]",
 				"\n",
-				"C. If only find \"Management fees and costs\", please output the relevant as data point key: \"management_fee_and_costs\", instead of \"management_fee\".",
+				"C. If only find \"Management fees and costs\", please output the relevant same value for both of data point keys: \"management_fee_and_costs\" and \"management_fee\".",
 				"---Example 1 Start---",
 				"The fees and costs for managing \nyour investment \nManagement fees and costs \n1 \n• \nSPDR World: 0.30% per annum of net asset \nvalue. This is reduced to 0.18% per annum of net \nasset value with effect from 14 February 2022.",
 				"---Example 1 End---",
 				"The output should be:",
-				"{\"data\": [{\"fund name\": \"SPDR World\", \"share name\": \"SPDR World\", \"management_fee_and_costs\": 0.18}]",
+				"{\"data\": [{\"fund name\": \"SPDR World\", \"share name\": \"SPDR World\", \"management_fee_and_costs\": 0.18, \"management_fee\": 0.18}]",
 				"---Example 2 Start---",
 				"Management Fees and Costs \n\nAs at the date of this PDS, Management Fees and Costs will be capped at: \n\n• 0.18% pa of net asset value for SPDR World \n\n• 0.21% pa of net asset value for SPDR World (Hedged) \n\n",
 				"---Example 2 End---",
 				"The output should be:",
-				"{\"data\": [{\"fund name\": \"SPDR World\", \"share name\": \"SPDR World\", \"management_fee_and_costs\": 0.18}, {\"fund name\": \"SPDR World (Hedged)\", \"share name\": \"SPDR World (Hedged)\", \"management_fee_and_costs\": 0.21}]"
+				"{\"data\": [{\"fund name\": \"SPDR World\", \"share name\": \"SPDR World\", \"management_fee_and_costs\": 0.18, \"management_fee\": 0.18}, {\"fund name\": \"SPDR World (Hedged)\", \"share name\": \"SPDR World (Hedged)\", \"management_fee_and_costs\": 0.21, \"management_fee\": 0.21}]"
 			],
 			"buy_spread": [
 				"Please don't extract data by the reported names for buy_spread or sell_spread, they are: ",
 				"Transaction costs buy/sell spread recovery, Transaction costs reducing return of the investment option (net transaction costs)"
+			],
+			"minimum_initial_investment": [
+				"Minimum initial investment is fund level data, belong to integer number, the value examples are 100, 1,000, 5,000, 10,000, etc.",
+				"---Example 1 Start---",
+				"The minimum investment per Pension Plan account is \n$20,000. The minimum initial investment in any \ninvestment option is $5,000.\n\nPerpetual WealthFocus Pension Plan",
+				"---Example 1 End---",
+				"The output should be:",
+				"{\"data\": [{\"fund name\": \"Perpetual WealthFocus Pension Plan\", \"share name\": \"\", \"minimum_initial_investment\": 5000}]",
+				"\n",
+				"---Example 2 Start---",
+				"Prime Super \n\n5 Initial investment amount \n\nThe minimum net total initial investment amount is $10,000. Please note before you open your pension account: If you \nhave made personal contributions into super and wish to claim a tax deduction, you will have to lodge a Notice of \nIntent to Claim form with the relevant super fund (including Prime Super) before you roll your super into the Income \nStreams account.",
+				"---Example 2 End---",
+				"The output should be:",
+				"{\"data\": [{\"fund name\": \"Prime Super\", \"share name\": \"\", \"minimum_initial_investment\": 10000}]"
 			]
 		}
 	},
diff --git a/main.py b/main.py
index f6dc138..4074d4c 100644
--- a/main.py
+++ b/main.py
@@ -1,5 +1,6 @@
 import os
 import json
+import numpy as np
 import pandas as pd
 from glob import glob
 from tqdm import tqdm
@@ -1043,7 +1044,7 @@ def batch_run_documents(
     )
     re_run_extract_data = True
     re_run_mapping_data = True
-    force_save_total_data = False
+    force_save_total_data = True
     calculate_metrics = False
 
     extract_way = "text"
@@ -1309,6 +1310,7 @@ def merge_output_data_aus_prospectus(
         fund_doc_data_df = data_df[
             (data_df["doc_id"] == doc_id) & (data_df["investment_type"] == 33)
         ]
+        fund_doc_data_df.fillna("", inplace=True)
         for index, row in fund_doc_data_df.iterrows():
             doc_id = str(row["doc_id"])
             page_index = int(row["page_index"])
@@ -1319,7 +1321,6 @@ def merge_output_data_aus_prospectus(
             value = row["value"]
             fund_id = row["investment_id"]
             fund_legal_name = row["investment_name"]
-
             exist = False
             if fund_id != "":
                 for data in doc_data_list:
@@ -1331,7 +1332,14 @@ def merge_output_data_aus_prospectus(
                         if page_index not in data["page_index"]:
                             data["page_index"].append(page_index)
                         exist = True
-
+            else:
+                for data in doc_data_list:
+                    if data["raw_name"] == raw_name:
+                        update_key = datapoint
+                        data[update_key] = value
+                        if page_index not in data["page_index"]:
+                            data["page_index"].append(page_index)
+                        exist = True
             if not exist:
                 data = {
                     "DocumentId": doc_id,