switch example document

This commit is contained in:
Blade He 2025-01-27 14:59:26 -06:00
parent 6f831e241c
commit 7f37f3532f
1 changed files with 3 additions and 3 deletions

View File

@ -1032,7 +1032,7 @@ def batch_run_documents(
page_filter_ground_truth_file = ( page_filter_ground_truth_file = (
r"/data/emea_ar/ground_truth/page_filter/datapoint_page_info_88_documents.xlsx" r"/data/emea_ar/ground_truth/page_filter/datapoint_page_info_88_documents.xlsx"
) )
re_run_extract_data = False re_run_extract_data = True
re_run_mapping_data = True re_run_mapping_data = True
force_save_total_data = False force_save_total_data = False
calculate_metrics = False calculate_metrics = False
@ -1375,7 +1375,7 @@ if __name__ == "__main__":
# special_doc_id_list = ["553242411"] # special_doc_id_list = ["553242411"]
doc_source = "emea_ar" doc_source = "aus_prospectus"
if doc_source == "aus_prospectus": if doc_source == "aus_prospectus":
document_sample_file = ( document_sample_file = (
r"./sample_documents/aus_prospectus_100_documents_multi_fund_sample.txt" r"./sample_documents/aus_prospectus_100_documents_multi_fund_sample.txt"
@ -1396,7 +1396,7 @@ if __name__ == "__main__":
# "555377021", # "555377021",
# "555654388", # "555654388",
# ] # ]
# special_doc_id_list: list = ["534287518"] special_doc_id_list: list = ["539790009"]
pdf_folder: str = r"/data/aus_prospectus/pdf/" pdf_folder: str = r"/data/aus_prospectus/pdf/"
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/" output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
output_extract_data_child_folder: str = ( output_extract_data_child_folder: str = (