diff --git a/configuration/datapoint_exclude_keyword.json b/configuration/aus_prospectus/datapoint_exclude_keyword.json
similarity index 100%
rename from configuration/datapoint_exclude_keyword.json
rename to configuration/aus_prospectus/datapoint_exclude_keyword.json
diff --git a/configuration/datapoint_keyword.json b/configuration/aus_prospectus/datapoint_keyword.json
similarity index 100%
rename from configuration/datapoint_keyword.json
rename to configuration/aus_prospectus/datapoint_keyword.json
diff --git a/configuration/datapoint_level.json b/configuration/aus_prospectus/datapoint_level.json
similarity index 100%
rename from configuration/datapoint_level.json
rename to configuration/aus_prospectus/datapoint_level.json
diff --git a/configuration/datapoint_name.json b/configuration/aus_prospectus/datapoint_name.json
similarity index 100%
rename from configuration/datapoint_name.json
rename to configuration/aus_prospectus/datapoint_name.json
diff --git a/configuration/datapoint_reported_name.json b/configuration/aus_prospectus/datapoint_reported_name.json
similarity index 100%
rename from configuration/datapoint_reported_name.json
rename to configuration/aus_prospectus/datapoint_reported_name.json
diff --git a/configuration/document_dp_pages.json b/configuration/aus_prospectus/document_dp_pages.json
similarity index 93%
rename from configuration/document_dp_pages.json
rename to configuration/aus_prospectus/document_dp_pages.json
index 4fe058c..940dc20 100644
--- a/configuration/document_dp_pages.json
+++ b/configuration/aus_prospectus/document_dp_pages.json
@@ -6,7 +6,6 @@
 	"552505237": [16, 17, 18, 19, 25, 26, 27],
 	"552505278": [12, 13, 14, 15, 16, 17, 18, 19, 20, 27, 28, 29],
 	"554431052": [34, 35, 36, 41, 42, 43],
-	"554851189": [12, 13, 14],
 	"555377021": [21, 22, 23, 24, 25, 26],
 	"555654388": [35, 36]
 }
diff --git a/configuration/domicile_datapoints.json b/configuration/aus_prospectus/domicile_datapoints.json
similarity index 100%
rename from configuration/domicile_datapoints.json
rename to configuration/aus_prospectus/domicile_datapoints.json
diff --git a/configuration/language.json b/configuration/aus_prospectus/language.json
similarity index 100%
rename from configuration/language.json
rename to configuration/aus_prospectus/language.json
diff --git a/configuration/aus_prospectus/misc_config.json b/configuration/aus_prospectus/misc_config.json
new file mode 100644
index 0000000..1cd6c97
--- /dev/null
+++ b/configuration/aus_prospectus/misc_config.json
@@ -0,0 +1,3 @@
+{
+    "apply_pdf2html": true
+}
\ No newline at end of file
diff --git a/configuration/objective_strategy_regex.json b/configuration/aus_prospectus/objective_strategy_regex.json
similarity index 100%
rename from configuration/objective_strategy_regex.json
rename to configuration/aus_prospectus/objective_strategy_regex.json
diff --git a/core/data_extraction.py b/core/data_extraction.py
index 5175264..51b9cf1 100644
--- a/core/data_extraction.py
+++ b/core/data_extraction.py
@@ -14,6 +14,7 @@ from utils.biz_utils import add_slash_to_text_as_regex, clean_text, get_most_sim
 class DataExtraction:
     def __init__(
         self,
+        doc_source: str,
         doc_id: str,
         pdf_file: str,
         output_data_folder: str,
@@ -24,8 +25,11 @@ class DataExtraction:
         extract_way: str = "text",
         output_image_folder: str = None,
     ) -> None:
+        self.doc_source = doc_source
         self.doc_id = doc_id
         self.pdf_file = pdf_file
+        self.configuration_folder = f"./configuration/{doc_source}/"
+        self.instruction_folder = f"./instructions/{doc_source}/"
         if output_data_folder is None or len(output_data_folder) == 0:
             output_data_folder = r"/data/emea_ar/output/extract_data/docs/"
         os.makedirs(output_data_folder, exist_ok=True)
@@ -75,7 +79,7 @@ class DataExtraction:
     def get_investment_objective_pages(self):
         investment_objective_pages = []
         if self.document_type == 1:
-            objective_strategy_regex_config_file = r"./configuration/objective_strategy_regex.json"
+            objective_strategy_regex_config_file = os.path.join(self.configuration_folder, "objective_strategy_regex.json")
             with open(objective_strategy_regex_config_file, "r", encoding="utf-8") as f:
                 objective_strategy_regex_config = json.load(f)
             objective_start_regex = objective_strategy_regex_config.get("objective_strategy", {}).get("start", "")
@@ -89,7 +93,7 @@ class DataExtraction:
         return investment_objective_pages
     
     def get_datapoint_reported_name(self):
-        language_config_file = r"./configuration/language.json"
+        language_config_file = os.path.join(self.configuration_folder, "language.json")
         self.language_config = {}
         with open(language_config_file, "r", encoding="utf-8") as file:
             self.language_config = json.load(file)
@@ -97,7 +101,7 @@ class DataExtraction:
         self.language_id = self.document_mapping_info_df["Language"].iloc[0]
         self.language = self.language_config.get(self.language_id, None)
         
-        datapoint_reported_name_config_file = r"./configuration/datapoint_reported_name.json"
+        datapoint_reported_name_config_file = os.path.join(self.configuration_folder, "datapoint_reported_name.json")
         all_datapoint_reported_name = {}
         with open(datapoint_reported_name_config_file, "r", encoding="utf-8") as file:
             all_datapoint_reported_name = json.load(file)
@@ -139,19 +143,19 @@ class DataExtraction:
                                                 output_folder=self.output_image_folder)
         
     def get_instructions_config(self) -> dict:
-        instructions_config_file = r"./instructions/data_extraction_prompts_config.json"
+        instructions_config_file = os.path.join(self.instruction_folder, "data_extraction_prompts_config.json")
         with open(instructions_config_file, "r", encoding="utf-8") as f:
             instructions_config = json.load(f)
         return instructions_config
 
     def get_datapoint_level(self) -> dict:
-        datapoint_level_file = r"./configuration/datapoint_level.json"
+        datapoint_level_file = os.path.join(self.configuration_folder, "datapoint_level.json")
         with open(datapoint_level_file, "r", encoding="utf-8") as f:
             datapoint_level = json.load(f)
         return datapoint_level
 
     def get_datapoint_name(self) -> dict:
-        datapoint_name_file = r"./configuration/datapoint_name.json"
+        datapoint_name_file = os.path.join(self.configuration_folder, "datapoint_name.json")
         with open(datapoint_name_file, "r", encoding="utf-8") as f:
             datapoint_name = json.load(f)
         return datapoint_name
@@ -463,10 +467,10 @@ class DataExtraction:
         """
         logger.info(f"Extracting data from page {page_num}")
         if self.document_type == 1:
-            pre_context = f"The document type is prospectus. \nThe fund names in this document are {', '.join(self.fund_name_list)}."
-            if pre_context in page_text:
-                page_text = page_text.replace(pre_context, "\n").strip()
-                
+            # pre_context = f"The document type is prospectus. \nThe fund names in this document are {', '.join(self.fund_name_list)}."
+            # if pre_context in page_text:
+            #     page_text = page_text.replace(pre_context, "\n").strip()
+            pre_context = ""
             if len(self.investment_objective_pages) > 0:
                 # Get the page number of the most recent investment objective at the top of the current page.
                 diff_pages = [page_num - investment_objective_page for investment_objective_page 
@@ -479,7 +483,7 @@ class DataExtraction:
                         page_text = page_text.replace(top_nearest_investment_objective_text, "").strip()
                     pre_context = f"\nThe most recent investment objective page text which maybe with fund name is: \n{top_nearest_investment_objective_text}.\n"
             # If can't find previous investment objective text, add the fund names to be the pre-fix of page text
-            page_text = f"{pre_context}\n{page_text}"
+            page_text = f"{pre_context}\n{page_text}".strip()
                 
         instructions = self.get_instructions_by_datapoints(
             page_text, 
diff --git a/core/data_mapping.py b/core/data_mapping.py
index 6288d36..f76bf54 100644
--- a/core/data_mapping.py
+++ b/core/data_mapping.py
@@ -17,9 +17,11 @@ class DataMapping:
         raw_document_data_list: list,
         document_mapping_info_df: pd.DataFrame,
         output_data_folder: str,
+        doc_source: str = "emea_ar"
     ):
         self.doc_id = doc_id
         self.datapoints = datapoints
+        self.doc_source = doc_source
         self.raw_document_data_list = raw_document_data_list
         if document_mapping_info_df is None or len(document_mapping_info_df) == 0:
             self.document_mapping_info_df = query_document_fund_mapping(doc_id, rerun=False)
diff --git a/core/page_filter.py b/core/page_filter.py
index 748ed12..534720c 100644
--- a/core/page_filter.py
+++ b/core/page_filter.py
@@ -12,11 +12,18 @@ from utils.pdf_util import get_pdf_pages_by_html
 
 class FilterPages:
     def __init__(
-        self, doc_id: str, pdf_file: str, document_mapping_info_df: pd.DataFrame, apply_pdf2html: bool = False
+        self, doc_id: str, pdf_file: str, document_mapping_info_df: pd.DataFrame, doc_source: str = "emea_ar"
     ) -> None:
         self.doc_id = doc_id
         self.pdf_file = pdf_file
-        self.apply_pdf2html = apply_pdf2html
+        self.configuration_folder = f"./configuration/{doc_source}/"
+        misc_config_file = os.path.join(self.configuration_folder, "misc_config.json")
+        if os.path.exists(misc_config_file):
+            with open(misc_config_file, "r", encoding="utf-8") as file:
+                self.misc_config = json.load(file)
+            self.apply_pdf2html = self.misc_config.get("apply_pdf2html", False)
+        else:
+            self.apply_pdf2html = False
         self.page_text_dict = self.get_pdf_page_text_dict()
         if document_mapping_info_df is None or len(document_mapping_info_df) == 0:
             self.document_mapping_info_df = query_document_fund_mapping(doc_id, rerun=False)
@@ -31,7 +38,7 @@ class FilterPages:
         self.percentage_regex = r"\b\d{1,3}\.\d+\b|\b\d{1,3}(\,\d+)\b%"
     
     def get_document_dp_pages(self) -> dict:
-        document_dp_pages_file = r"./configuration/document_dp_pages.json"
+        document_dp_pages_file = os.path.join(self.configuration_folder, "document_dp_pages.json")
         with open(document_dp_pages_file, "r", encoding="utf-8") as file:
             self.document_dp_pages_config = json.load(file)
         self.document_dp_pages = self.document_dp_pages_config.get(self.doc_id, [])
@@ -43,6 +50,7 @@ class FilterPages:
             if len(page_text_dict.keys()) == 0:
                 pdf_util = PDFUtil(self.pdf_file)
                 success, text, page_text_dict = pdf_util.extract_text()
+                self.apply_pdf2html = False
         else:
             pdf_util = PDFUtil(self.pdf_file)
             success, text, page_text_dict = pdf_util.extract_text()
@@ -52,12 +60,11 @@ class FilterPages:
         """
         Remark: remove the
         """
-        language_config_file = r"./configuration/language.json"
-        domicile_datapoint_config_file = r"./configuration/domicile_datapoints.json"
-        datapoint_keywords_config_file = r"./configuration/datapoint_keyword.json"
-        datapoint_exclude_keywords_config_file = (
-            r"./configuration/datapoint_exclude_keyword.json"
-        )
+        language_config_file = os.path.join(self.configuration_folder, "language.json")
+        domicile_datapoint_config_file = os.path.join(self.configuration_folder, "domicile_datapoints.json")
+        datapoint_keywords_config_file = os.path.join(self.configuration_folder, "datapoint_keyword.json")
+        datapoint_exclude_keywords_config_file = os.path.join(self.configuration_folder, "datapoint_exclude_keyword.json")
+        
         with open(language_config_file, "r", encoding="utf-8") as file:
             self.language_config = json.load(file)
         with open(domicile_datapoint_config_file, "r", encoding="utf-8") as file:
diff --git a/instructions/data_extraction_image_prompts.txt b/instructions/aus_prospectus/data_extraction_image_prompts.txt
similarity index 100%
rename from instructions/data_extraction_image_prompts.txt
rename to instructions/aus_prospectus/data_extraction_image_prompts.txt
diff --git a/instructions/data_extraction_prompts.txt b/instructions/aus_prospectus/data_extraction_prompts.txt
similarity index 100%
rename from instructions/data_extraction_prompts.txt
rename to instructions/aus_prospectus/data_extraction_prompts.txt
diff --git a/instructions/data_extraction_prompts_config.json b/instructions/aus_prospectus/data_extraction_prompts_config.json
similarity index 100%
rename from instructions/data_extraction_prompts_config.json
rename to instructions/aus_prospectus/data_extraction_prompts_config.json
diff --git a/instructions/table_extraction_image_optimize_prompts.txt b/instructions/aus_prospectus/table_extraction_image_optimize_prompts.txt
similarity index 100%
rename from instructions/table_extraction_image_optimize_prompts.txt
rename to instructions/aus_prospectus/table_extraction_image_optimize_prompts.txt
diff --git a/instructions/table_extraction_image_prompts.txt b/instructions/aus_prospectus/table_extraction_image_prompts.txt
similarity index 100%
rename from instructions/table_extraction_image_prompts.txt
rename to instructions/aus_prospectus/table_extraction_image_prompts.txt
diff --git a/instructions/table_extraction_image_prompts_v2.txt b/instructions/aus_prospectus/table_extraction_image_prompts_v2.txt
similarity index 100%
rename from instructions/table_extraction_image_prompts_v2.txt
rename to instructions/aus_prospectus/table_extraction_image_prompts_v2.txt
diff --git a/instructions/table_extraction_prompts.txt b/instructions/aus_prospectus/table_extraction_prompts.txt
similarity index 100%
rename from instructions/table_extraction_prompts.txt
rename to instructions/aus_prospectus/table_extraction_prompts.txt
diff --git a/instructions/text_extraction_image_prompts.txt b/instructions/aus_prospectus/text_extraction_image_prompts.txt
similarity index 100%
rename from instructions/text_extraction_image_prompts.txt
rename to instructions/aus_prospectus/text_extraction_image_prompts.txt
diff --git a/main.py b/main.py
index d8e1acf..1ebcf98 100644
--- a/main.py
+++ b/main.py
@@ -23,15 +23,16 @@ class EMEA_AR_Parsing:
     def __init__(
         self,
         doc_id: str,
+        doc_source: str = "emea_ar",
         pdf_folder: str = r"/data/emea_ar/pdf/",
         output_pdf_text_folder: str = r"/data/emea_ar/output/pdf_text/",
         output_extract_data_folder: str = r"/data/emea_ar/output/extract_data/docs/",
         output_mapping_data_folder: str = r"/data/emea_ar/output/mapping_data/docs/",
         extract_way: str = "text",
-        apply_pdf2html: bool = False,
         drilldown_folder: str = r"/data/emea_ar/output/drilldown/",
     ) -> None:
         self.doc_id = doc_id
+        self.doc_source = doc_source
         self.pdf_folder = pdf_folder
         os.makedirs(self.pdf_folder, exist_ok=True)
         self.pdf_file = self.download_pdf()
@@ -70,12 +71,12 @@ class EMEA_AR_Parsing:
         os.makedirs(self.output_mapping_data_folder, exist_ok=True)
 
         self.filter_pages = FilterPages(
-            self.doc_id, self.pdf_file, self.document_mapping_info_df, apply_pdf2html
+            self.doc_id, self.pdf_file, self.document_mapping_info_df, self.doc_source
         )
         self.page_text_dict = self.filter_pages.page_text_dict
         try:
             os.makedirs(output_pdf_text_folder, exist_ok=True)
-            if apply_pdf2html:
+            if self.filter_pages.apply_pdf2html:
                 output_pdf_text_folder = os.path.join(output_pdf_text_folder, "pdf2html/")
             else:
                 output_pdf_text_folder = os.path.join(output_pdf_text_folder, "pymupdf/")
@@ -132,6 +133,7 @@ class EMEA_AR_Parsing:
         if not found_data:
             try:
                 data_extraction = DataExtraction(
+                    self.doc_source,
                     self.doc_id,
                     self.pdf_file,
                     self.output_extract_data_folder,
@@ -263,15 +265,16 @@ class EMEA_AR_Parsing:
         return data_mapping.mapping_raw_data()
 
 
-def filter_pages(doc_id: str, pdf_folder: str) -> None:
+def filter_pages(doc_id: str, pdf_folder: str, doc_source: str) -> None:
     logger.info(f"Filter EMEA AR PDF pages for doc_id: {doc_id}")
-    emea_ar_parsing = EMEA_AR_Parsing(doc_id, pdf_folder)
+    emea_ar_parsing = EMEA_AR_Parsing(doc_id, doc_source=doc_source, pdf_folder=pdf_folder)
     datapoint_page_info, result_details = emea_ar_parsing.get_datapoint_page_info()
     return datapoint_page_info, result_details
 
 
 def extract_data(
     doc_id: str,
+    doc_source: str,
     pdf_folder: str,
     output_data_folder: str,
     extract_way: str = "text",
@@ -280,7 +283,8 @@ def extract_data(
     logger.info(f"Extract EMEA AR data for doc_id: {doc_id}")
     emea_ar_parsing = EMEA_AR_Parsing(
         doc_id,
-        pdf_folder,
+        doc_source=doc_source,
+        pdf_folder=pdf_folder,
         output_extract_data_folder=output_data_folder,
         extract_way=extract_way,
     )
@@ -294,8 +298,8 @@ def mapping_data(
     output_pdf_text_folder: str,
     output_extract_data_folder: str,
     output_mapping_folder: str,
+    doc_source: str = "emea_ar",
     extract_way: str = "text",
-    apply_pdf2html: bool = False,
     drilldown_folder: str = r"/data/emea_ar/output/drilldown/",
     re_run_extract_data: bool = False,
     re_run_mapping_data: bool = False,
@@ -303,12 +307,12 @@ def mapping_data(
     logger.info(f"Extract EMEA AR data for doc_id: {doc_id}")
     emea_ar_parsing = EMEA_AR_Parsing(
         doc_id,
-        pdf_folder,
+        doc_source=doc_source,
+        pdf_folder=pdf_folder,
         output_pdf_text_folder=output_pdf_text_folder,
         output_extract_data_folder=output_extract_data_folder,
         output_mapping_data_folder=output_mapping_folder,
         extract_way=extract_way,
-        apply_pdf2html=apply_pdf2html,
         drilldown_folder=drilldown_folder,
     )
     doc_data_from_gpt, annotation_list = emea_ar_parsing.extract_data(re_run=re_run_extract_data)
@@ -320,6 +324,7 @@ def mapping_data(
 
 def batch_extract_data(
     pdf_folder: str,
+    doc_source: str = "emea_ar",
     doc_data_excel_file: str = None,
     output_child_folder: str = r"/data/emea_ar/output/extract_data/docs/",
     output_total_folder: str = r"/data/emea_ar/output/extract_data/total/",
@@ -349,6 +354,7 @@ def batch_extract_data(
             continue
         data_from_gpt = extract_data(
             doc_id=doc_id,
+            doc_source=doc_source,
             pdf_folder=pdf_folder,
             output_data_folder=output_child_folder,
             extract_way=extract_way,
@@ -372,7 +378,8 @@ def batch_extract_data(
 
 
 def batch_start_job(
-    pdf_folder: str,
+    doc_source: str = "emea_ar",
+    pdf_folder: str = "/data/emea_ar/pdf/",
     output_pdf_text_folder: str = r"/data/emea_ar/output/pdf_text/",
     doc_data_excel_file: str = None,
     output_extract_data_child_folder: str = r"/data/emea_ar/output/extract_data/docs/",
@@ -380,7 +387,6 @@ def batch_start_job(
     output_extract_data_total_folder: str = r"/data/emea_ar/output/extract_data/total/",
     output_mapping_total_folder: str = r"/data/emea_ar/output/mapping_data/total/",
     extract_way: str = "text",
-    apply_pdf2html: bool = False,
     drilldown_folder: str = r"/data/emea_ar/output/drilldown/",
     special_doc_id_list: list = None,
     re_run_extract_data: bool = False,
@@ -418,8 +424,8 @@ def batch_start_job(
                 output_pdf_text_folder=output_pdf_text_folder,
                 output_extract_data_folder=output_extract_data_child_folder,
                 output_mapping_folder=output_mapping_child_folder,
+                doc_source=doc_source,
                 extract_way=extract_way,
-                apply_pdf2html=apply_pdf2html,
                 drilldown_folder=drilldown_folder,
                 re_run_extract_data=re_run_extract_data,
                 re_run_mapping_data=re_run_mapping_data,
@@ -531,6 +537,7 @@ def only_output_mapping_data_in_db(mapping_data: pd.DataFrame) -> None:
 
 def batch_filter_pdf_files(
     pdf_folder: str,
+    doc_source: str = "emea_ar",
     doc_data_excel_file: str = None,
     output_folder: str = r"/data/emea_ar/output/filter_pages/",
     special_doc_id_list: list = None,
@@ -556,7 +563,7 @@ def batch_filter_pdf_files(
         if doc_list is not None and doc_id not in doc_list:
             continue
         doc_datapoint_page_info, doc_result_details = filter_pages(
-            doc_id=doc_id, pdf_folder=pdf_folder
+            doc_id=doc_id, pdf_folder=pdf_folder, doc_source=doc_source
         )
         result_list.append(doc_datapoint_page_info)
         result_details.extend(doc_result_details)
@@ -631,7 +638,7 @@ def test_auto_generate_instructions():
     datapoint_list.remove("doc_id")
 
     data_extraction = DataExtraction(
-        doc_id, pdf_file, page_text_dict, datapoint_page_info, document_mapping_info_df
+        "emear_ar", doc_id, pdf_file, page_text_dict, datapoint_page_info, document_mapping_info_df
     )
     page_index_list = list(page_text_dict.keys())
     if len(page_index_list) > 0:
@@ -898,15 +905,15 @@ def replace_rerun_data(new_data_file: str, original_data_file: str):
         new_extract_data.to_excel(writer, index=False, sheet_name=extract_data_sheet)
 
 
-def batch_run_documents(special_doc_id_list: list = None,
+def batch_run_documents(doc_source: str = "emea_ar",
+                        special_doc_id_list: list = None,
                         pdf_folder:str = r"/data/emea_ar/pdf/",
                         output_pdf_text_folder: str = r"/data/emea_ar/output/pdf_text/",
                         output_extract_data_child_folder:str = r"/data/emea_ar/output/extract_data/docs/",
                         output_extract_data_total_folder:str = r"/data/emea_ar/output/extract_data/total/",
                         output_mapping_child_folder:str = r"/data/emea_ar/output/mapping_data/docs/",
                         output_mapping_total_folder:str = r"/data/emea_ar/output/mapping_data/total/",
-                        drilldown_folder: str = r"/data/emea_ar/output/drilldown/",
-                        apply_pdf2html: bool = False):
+                        drilldown_folder: str = r"/data/emea_ar/output/drilldown/"):
     sample_document_list_folder = r'./sample_documents/'
     document_list_files = glob(sample_document_list_folder + "*.txt")
     page_filter_ground_truth_file = (
@@ -914,7 +921,7 @@ def batch_run_documents(special_doc_id_list: list = None,
     )
     re_run_extract_data = True
     re_run_mapping_data = True
-    force_save_total_data = True
+    force_save_total_data = False
     calculate_metrics = False
 
     extract_way = "text"
@@ -932,6 +939,7 @@ def batch_run_documents(special_doc_id_list: list = None,
                 doc_id_list = f.readlines()
             doc_id_list = [doc_id.strip() for doc_id in doc_id_list]
             batch_start_job(
+                doc_source,
                 pdf_folder,
                 output_pdf_text_folder,
                 page_filter_ground_truth_file,
@@ -940,7 +948,6 @@ def batch_run_documents(special_doc_id_list: list = None,
                 output_extract_data_total_folder,
                 output_mapping_total_folder,
                 extract_way,
-                apply_pdf2html,
                 drilldown_folder,
                 doc_id_list,
                 re_run_extract_data,
@@ -951,6 +958,7 @@ def batch_run_documents(special_doc_id_list: list = None,
             )
     else:
         batch_start_job(
+            doc_source,
             pdf_folder,
             output_pdf_text_folder,
             page_filter_ground_truth_file,
@@ -959,7 +967,6 @@ def batch_run_documents(special_doc_id_list: list = None,
             output_extract_data_total_folder,
             output_mapping_total_folder,
             extract_way,
-            apply_pdf2html,
             drilldown_folder,
             special_doc_id_list,
             re_run_extract_data,
@@ -971,6 +978,7 @@ def batch_run_documents(special_doc_id_list: list = None,
 
 def batch_initial_document(sample_document_list_folder: str = r'./sample_documents/',
                            document_list_file: str = "sample_document_complex.txt",
+                           doc_source: str = "emea_ar",
                            pdf_folder: str = r"/data/emea_ar/pdf/",
                            output_extract_data_child_folder: str = r"/data/emea_ar/output/extract_data/docs/",
                            output_mapping_child_folder: str = r"/data/emea_ar/output/mapping_data/docs/"):
@@ -981,6 +989,7 @@ def batch_initial_document(sample_document_list_folder: str = r'./sample_documen
     for doc_id in tqdm(doc_id_list):
         logger.info(f"Start to initial document: {doc_id}")
         emea_ar_parsing = EMEA_AR_Parsing(doc_id=doc_id,
+                                          doc_source=doc_source,
                                           pdf_folder=pdf_folder,
                                           output_extract_data_folder=output_extract_data_child_folder,
                                           output_mapping_data_folder=output_mapping_child_folder)
@@ -1154,27 +1163,31 @@ if __name__ == "__main__":
     
     
     # special_doc_id_list = ["553242411"]
-    special_doc_id_list: list = ["539790009",
-                                "542300403",
-                                "542301117",
-                                "542306317",
-                                "547567013",
-                                "552505237",
-                                "552505278",
-                                "554431052",
-                                "554851189",
-                                "555377021",
-                                "555654388"]
-    # special_doc_id_list: list = ["542301117"]
-    pdf_folder:str = r"/data/aus_prospectus/pdf/"
-    output_pdf_text_folder:str = r"/data/aus_prospectus/output/pdf_text/"
-    output_extract_data_child_folder:str = r"/data/aus_prospectus/output/extract_data/docs/"
-    output_extract_data_total_folder:str = r"/data/aus_prospectus/output/extract_data/total/"
-    output_mapping_child_folder:str = r"/data/aus_prospectus/output/mapping_data/docs/"
-    output_mapping_total_folder:str = r"/data/aus_prospectus/output/mapping_data/total/"
-    drilldown_folder = r"/data/aus_prospectus/output/drilldown/"
-    apply_pdf2html = True
-    batch_run_documents(special_doc_id_list=special_doc_id_list,
+    doc_source = "aus_prospectus"
+    if doc_source == "aus_prospectus":
+        special_doc_id_list: list = ["539790009",
+                                    "542300403",
+                                    "542301117",
+                                    "542306317",
+                                    "547567013",
+                                    "552505237",
+                                    "552505278",
+                                    "554431052",
+                                    "554851189",
+                                    "555377021",
+                                    "555654388"]
+        special_doc_id_list: list = ["554851189"]
+        pdf_folder:str = r"/data/aus_prospectus/pdf/"
+        output_pdf_text_folder:str = r"/data/aus_prospectus/output/pdf_text/"
+        output_extract_data_child_folder:str = r"/data/aus_prospectus/output/extract_data/docs/"
+        output_extract_data_total_folder:str = r"/data/aus_prospectus/output/extract_data/total/"
+        output_mapping_child_folder:str = r"/data/aus_prospectus/output/mapping_data/docs/"
+        output_mapping_total_folder:str = r"/data/aus_prospectus/output/mapping_data/total/"
+        drilldown_folder = r"/data/aus_prospectus/output/drilldown/"
+    elif doc_source == "emea_ar":
+        special_doc_id_list = ["553242411"]
+    batch_run_documents(doc_source=doc_source,
+                        special_doc_id_list=special_doc_id_list,
                         pdf_folder=pdf_folder,
                         output_pdf_text_folder=output_pdf_text_folder,
                         output_extract_data_child_folder=output_extract_data_child_folder,
@@ -1182,7 +1195,6 @@ if __name__ == "__main__":
                         output_mapping_child_folder=output_mapping_child_folder,
                         output_mapping_total_folder=output_mapping_total_folder,
                         drilldown_folder=drilldown_folder,
-                        apply_pdf2html=apply_pdf2html
                         )
     
     # new_data_file = r"/data/emea_ar/output/mapping_data/total/mapping_data_info_15_documents_by_text_20241121154243.xlsx"
diff --git a/test_specific_biz_logic.py b/test_specific_biz_logic.py
index 32f5d2d..f1006f3 100644
--- a/test_specific_biz_logic.py
+++ b/test_specific_biz_logic.py
@@ -22,6 +22,7 @@ def test_validate_extraction_data():
     datapoint_page_info, result_details = get_datapoint_page_info(filter_pages)
     datapoints = get_datapoints_from_datapoint_page_info(datapoint_page_info)
     data_extraction = DataExtraction(
+                    doc_source="emea_ar",
                     doc_id=document_id,
                     pdf_file=pdf_file,
                     output_data_folder=output_extract_data_child_folder,