initial abbreviation configurations
This commit is contained in:
parent
b15d260a58
commit
e2b9bcbdbc
|
|
@ -34,14 +34,16 @@ ABB_JSON = dict()
|
||||||
|
|
||||||
def get_abb_json():
|
def get_abb_json():
|
||||||
global ABB_JSON
|
global ABB_JSON
|
||||||
with open("abbreviation_records.json", "r") as file:
|
if len(ABB_JSON.keys()) == 0:
|
||||||
# Load the JSON and convert keys to lowercase
|
with open("./configuration/aus_prospectus/abbreviation_records.json", "r") as file:
|
||||||
ABB_JSON = {key.lower(): value for key, value in json.load(file).items()}
|
# Load the JSON and convert keys to lowercase
|
||||||
|
ABB_JSON = {key.lower(): value for key, value in json.load(file).items()}
|
||||||
|
|
||||||
def get_abbre_format_str(fundname):
|
def get_abbre_format_str(fundname):
|
||||||
"""Replaces abbreviations in a fund name with their expanded forms."""
|
"""Replaces abbreviations in a fund name with their expanded forms."""
|
||||||
# Convert fund name to lowercase while matching
|
# Convert fund name to lowercase while matching
|
||||||
f_list = fundname.lower().split()
|
f_list = fundname.lower().split()
|
||||||
|
get_abb_json()
|
||||||
updated_doc_fname_words = [ABB_JSON.get(word, word).lower() for word in f_list]
|
updated_doc_fname_words = [ABB_JSON.get(word, word).lower() for word in f_list]
|
||||||
return " ".join(updated_doc_fname_words)
|
return " ".join(updated_doc_fname_words)
|
||||||
|
|
||||||
|
|
|
||||||
2
main.py
2
main.py
|
|
@ -1016,7 +1016,7 @@ def batch_run_documents(
|
||||||
r"/data/emea_ar/ground_truth/page_filter/datapoint_page_info_88_documents.xlsx"
|
r"/data/emea_ar/ground_truth/page_filter/datapoint_page_info_88_documents.xlsx"
|
||||||
)
|
)
|
||||||
re_run_extract_data = False
|
re_run_extract_data = False
|
||||||
re_run_mapping_data = False
|
re_run_mapping_data = True
|
||||||
force_save_total_data = True
|
force_save_total_data = True
|
||||||
calculate_metrics = False
|
calculate_metrics = False
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue