diff --git a/main.py b/main.py index 9cdebf3..9f1a69c 100644 --- a/main.py +++ b/main.py @@ -1538,7 +1538,7 @@ if __name__ == "__main__": with open(document_sample_file, "r", encoding="utf-8") as f: special_doc_id_list = [doc_id.strip() for doc_id in f.readlines() if len(doc_id.strip()) > 0] - special_doc_id_list = ["573372424", "455235248", "462780211"] + # special_doc_id_list = ["573372424", "455235248", "462780211"] pdf_folder: str = r"/data/aus_prospectus/pdf/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/" output_extract_data_child_folder: str = (