update metrics

This commit is contained in:
Blade He 2025-03-26 23:14:28 -05:00
parent ff2325c72d
commit dc560e1e01
2 changed files with 89 additions and 45 deletions

10
main.py
View File

@ -1532,13 +1532,17 @@ if __name__ == "__main__":
doc_source = "aus_prospectus"
# doc_source = "emea_ar"
if doc_source == "aus_prospectus":
# document_sample_file = (
# r"./sample_documents/aus_prospectus_verify_6_documents_sample.txt"
# )
document_sample_file = (
r"./sample_documents/aus_prospectus_verify_6_documents_sample.txt"
r"./sample_documents/aus_prospectus_46_documents_sample.txt"
)
with open(document_sample_file, "r", encoding="utf-8") as f:
special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()]
document_mapping_file = r"/data/aus_prospectus/basic_information/next_round/next_round_6_documents_mapping.xlsx"
special_doc_id_list = ["462780211"]
# document_mapping_file = r"/data/aus_prospectus/basic_information/next_round/next_round_6_documents_mapping.xlsx"
document_mapping_file = r"/data/aus_prospectus/basic_information/46_documents/aus_prospectus_46_documents_mapping.xlsx"
# special_doc_id_list = ["553449169"]
pdf_folder: str = r"/data/aus_prospectus/pdf/"
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
output_extract_data_child_folder: str = (

File diff suppressed because one or more lines are too long