recover main.py

This commit is contained in:
Blade He 2025-03-31 17:16:05 -05:00
parent a42033f848
commit 50e51e0894
1 changed files with 1 additions and 1 deletions

View File

@ -1538,7 +1538,7 @@ if __name__ == "__main__":
with open(document_sample_file, "r", encoding="utf-8") as f: with open(document_sample_file, "r", encoding="utf-8") as f:
special_doc_id_list = [doc_id.strip() for doc_id in f.readlines() special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()
if len(doc_id.strip()) > 0] if len(doc_id.strip()) > 0]
special_doc_id_list = ["573372424", "455235248", "462780211"] # special_doc_id_list = ["573372424", "455235248", "462780211"]
pdf_folder: str = r"/data/aus_prospectus/pdf/" pdf_folder: str = r"/data/aus_prospectus/pdf/"
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
output_extract_data_child_folder: str = ( output_extract_data_child_folder: str = (