Fix issue when saving data extraction data
This commit is contained in:
parent
a48af9ddf0
commit
551f754379
|
|
@ -313,6 +313,7 @@ class DataExtraction:
|
|||
for remove_item in remove_items:
|
||||
if remove_item in extract_data["data"]:
|
||||
extract_data["data"].remove(remove_item)
|
||||
return data_list
|
||||
|
||||
def align_fund_share_name(self, data_list: list):
|
||||
"""
|
||||
|
|
|
|||
6
main.py
6
main.py
|
|
@ -1526,8 +1526,8 @@ if __name__ == "__main__":
|
|||
|
||||
# special_doc_id_list = ["553242411"]
|
||||
|
||||
re_run_extract_data = False
|
||||
re_run_mapping_data = False
|
||||
re_run_extract_data = True
|
||||
re_run_mapping_data = True
|
||||
force_save_total_data = True
|
||||
doc_source = "aus_prospectus"
|
||||
# doc_source = "emea_ar"
|
||||
|
|
@ -1560,7 +1560,7 @@ if __name__ == "__main__":
|
|||
# "544886057",
|
||||
# "550769189",
|
||||
# "553449663"]
|
||||
special_doc_id_list = ["521606755"]
|
||||
# special_doc_id_list = ["521606755"]
|
||||
# special_doc_id_list = ["391080133", "391080140", "401212184", "412778803", "420339794", "454036250", "414751292"]
|
||||
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
||||
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
||||
|
|
|
|||
Loading…
Reference in New Issue