From 7f37f3532fa224d79fb848768c72491843397cd1 Mon Sep 17 00:00:00 2001 From: Blade He Date: Mon, 27 Jan 2025 14:59:26 -0600 Subject: [PATCH] switch example document --- main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index bf07834..e8e0543 100644 --- a/main.py +++ b/main.py @@ -1032,7 +1032,7 @@ def batch_run_documents( page_filter_ground_truth_file = ( r"/data/emea_ar/ground_truth/page_filter/datapoint_page_info_88_documents.xlsx" ) - re_run_extract_data = False + re_run_extract_data = True re_run_mapping_data = True force_save_total_data = False calculate_metrics = False @@ -1375,7 +1375,7 @@ if __name__ == "__main__": # special_doc_id_list = ["553242411"] - doc_source = "emea_ar" + doc_source = "aus_prospectus" if doc_source == "aus_prospectus": document_sample_file = ( r"./sample_documents/aus_prospectus_100_documents_multi_fund_sample.txt" @@ -1396,7 +1396,7 @@ if __name__ == "__main__": # "555377021", # "555654388", # ] - # special_doc_id_list: list = ["534287518"] + special_doc_id_list: list = ["539790009"] pdf_folder: str = r"/data/aus_prospectus/pdf/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/" output_extract_data_child_folder: str = (