From 551f7543798db28aa2242522a1ffe302c8b55013 Mon Sep 17 00:00:00 2001 From: Blade He Date: Thu, 13 Mar 2025 18:36:04 -0500 Subject: [PATCH] Fix issue when saving data extraction data --- core/data_extraction.py | 1 + main.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/core/data_extraction.py b/core/data_extraction.py index fc31131..b31f32b 100644 --- a/core/data_extraction.py +++ b/core/data_extraction.py @@ -313,6 +313,7 @@ class DataExtraction: for remove_item in remove_items: if remove_item in extract_data["data"]: extract_data["data"].remove(remove_item) + return data_list def align_fund_share_name(self, data_list: list): """ diff --git a/main.py b/main.py index 6baf7d1..f7d3310 100644 --- a/main.py +++ b/main.py @@ -1526,8 +1526,8 @@ if __name__ == "__main__": # special_doc_id_list = ["553242411"] - re_run_extract_data = False - re_run_mapping_data = False + re_run_extract_data = True + re_run_mapping_data = True force_save_total_data = True doc_source = "aus_prospectus" # doc_source = "emea_ar" @@ -1560,7 +1560,7 @@ if __name__ == "__main__": # "544886057", # "550769189", # "553449663"] - special_doc_id_list = ["521606755"] + # special_doc_id_list = ["521606755"] # special_doc_id_list = ["391080133", "391080140", "401212184", "412778803", "420339794", "454036250", "414751292"] pdf_folder: str = r"/data/aus_prospectus/pdf/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"