From e2b9bcbdbca5f1757b84c128130fbaf80a070764 Mon Sep 17 00:00:00 2001
From: Blade He <Blade.He@morningstar.com>
Date: Tue, 21 Jan 2025 17:09:45 -0600
Subject: [PATCH] initial abbreviation configurations

---
 core/auz_nz/hybrid_solution_script.py | 8 +++++---
 main.py                               | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/core/auz_nz/hybrid_solution_script.py b/core/auz_nz/hybrid_solution_script.py
index 8f316fd..1ec5c93 100644
--- a/core/auz_nz/hybrid_solution_script.py
+++ b/core/auz_nz/hybrid_solution_script.py
@@ -34,14 +34,16 @@ ABB_JSON = dict()
 
 def get_abb_json():
     global ABB_JSON
-    with open("abbreviation_records.json", "r") as file:
-        # Load the JSON and convert keys to lowercase
-        ABB_JSON = {key.lower(): value for key, value in json.load(file).items()}
+    if len(ABB_JSON.keys()) == 0:
+        with open("./configuration/aus_prospectus/abbreviation_records.json", "r") as file:
+            # Load the JSON and convert keys to lowercase
+            ABB_JSON = {key.lower(): value for key, value in json.load(file).items()}
 
 def get_abbre_format_str(fundname):
     """Replaces abbreviations in a fund name with their expanded forms."""
     # Convert fund name to lowercase while matching
     f_list = fundname.lower().split()
+    get_abb_json()
     updated_doc_fname_words = [ABB_JSON.get(word, word).lower() for word in f_list]
     return " ".join(updated_doc_fname_words)
 
diff --git a/main.py b/main.py
index b07ef3f..2052e49 100644
--- a/main.py
+++ b/main.py
@@ -1016,7 +1016,7 @@ def batch_run_documents(
         r"/data/emea_ar/ground_truth/page_filter/datapoint_page_info_88_documents.xlsx"
     )
     re_run_extract_data = False
-    re_run_mapping_data = False
+    re_run_mapping_data = True
     force_save_total_data = True
     calculate_metrics = False