update metrics
This commit is contained in:
parent
ff2325c72d
commit
dc560e1e01
10
main.py
10
main.py
|
|
@ -1532,13 +1532,17 @@ if __name__ == "__main__":
|
|||
doc_source = "aus_prospectus"
|
||||
# doc_source = "emea_ar"
|
||||
if doc_source == "aus_prospectus":
|
||||
# document_sample_file = (
|
||||
# r"./sample_documents/aus_prospectus_verify_6_documents_sample.txt"
|
||||
# )
|
||||
document_sample_file = (
|
||||
r"./sample_documents/aus_prospectus_verify_6_documents_sample.txt"
|
||||
r"./sample_documents/aus_prospectus_46_documents_sample.txt"
|
||||
)
|
||||
with open(document_sample_file, "r", encoding="utf-8") as f:
|
||||
special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()]
|
||||
document_mapping_file = r"/data/aus_prospectus/basic_information/next_round/next_round_6_documents_mapping.xlsx"
|
||||
special_doc_id_list = ["462780211"]
|
||||
# document_mapping_file = r"/data/aus_prospectus/basic_information/next_round/next_round_6_documents_mapping.xlsx"
|
||||
document_mapping_file = r"/data/aus_prospectus/basic_information/46_documents/aus_prospectus_46_documents_mapping.xlsx"
|
||||
# special_doc_id_list = ["553449169"]
|
||||
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
||||
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
||||
output_extract_data_child_folder: str = (
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue