From 50e51e0894cc60ade78ba11dd35074ce2857fbd5 Mon Sep 17 00:00:00 2001 From: Blade He Date: Mon, 31 Mar 2025 17:16:05 -0500 Subject: [PATCH] recover main.py --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 9cdebf3..9f1a69c 100644 --- a/main.py +++ b/main.py @@ -1538,7 +1538,7 @@ if __name__ == "__main__": with open(document_sample_file, "r", encoding="utf-8") as f: special_doc_id_list = [doc_id.strip() for doc_id in f.readlines() if len(doc_id.strip()) > 0] - special_doc_id_list = ["573372424", "455235248", "462780211"] + # special_doc_id_list = ["573372424", "455235248", "462780211"] pdf_folder: str = r"/data/aus_prospectus/pdf/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/" output_extract_data_child_folder: str = (