Fix issue when saving data extraction data

This commit is contained in:
Blade He 2025-03-13 18:36:04 -05:00
parent a48af9ddf0
commit 551f754379
2 changed files with 4 additions and 3 deletions

View File

@ -313,6 +313,7 @@ class DataExtraction:
for remove_item in remove_items: for remove_item in remove_items:
if remove_item in extract_data["data"]: if remove_item in extract_data["data"]:
extract_data["data"].remove(remove_item) extract_data["data"].remove(remove_item)
return data_list
def align_fund_share_name(self, data_list: list): def align_fund_share_name(self, data_list: list):
""" """

View File

@ -1526,8 +1526,8 @@ if __name__ == "__main__":
# special_doc_id_list = ["553242411"] # special_doc_id_list = ["553242411"]
re_run_extract_data = False re_run_extract_data = True
re_run_mapping_data = False re_run_mapping_data = True
force_save_total_data = True force_save_total_data = True
doc_source = "aus_prospectus" doc_source = "aus_prospectus"
# doc_source = "emea_ar" # doc_source = "emea_ar"
@ -1560,7 +1560,7 @@ if __name__ == "__main__":
# "544886057", # "544886057",
# "550769189", # "550769189",
# "553449663"] # "553449663"]
special_doc_id_list = ["521606755"] # special_doc_id_list = ["521606755"]
# special_doc_id_list = ["391080133", "391080140", "401212184", "412778803", "420339794", "454036250", "414751292"] # special_doc_id_list = ["391080133", "391080140", "401212184", "412778803", "420339794", "454036250", "414751292"]
pdf_folder: str = r"/data/aus_prospectus/pdf/" pdf_folder: str = r"/data/aus_prospectus/pdf/"
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"