update metrics
This commit is contained in:
parent
ff2325c72d
commit
dc560e1e01
10
main.py
10
main.py
|
|
@ -1532,13 +1532,17 @@ if __name__ == "__main__":
|
||||||
doc_source = "aus_prospectus"
|
doc_source = "aus_prospectus"
|
||||||
# doc_source = "emea_ar"
|
# doc_source = "emea_ar"
|
||||||
if doc_source == "aus_prospectus":
|
if doc_source == "aus_prospectus":
|
||||||
|
# document_sample_file = (
|
||||||
|
# r"./sample_documents/aus_prospectus_verify_6_documents_sample.txt"
|
||||||
|
# )
|
||||||
document_sample_file = (
|
document_sample_file = (
|
||||||
r"./sample_documents/aus_prospectus_verify_6_documents_sample.txt"
|
r"./sample_documents/aus_prospectus_46_documents_sample.txt"
|
||||||
)
|
)
|
||||||
with open(document_sample_file, "r", encoding="utf-8") as f:
|
with open(document_sample_file, "r", encoding="utf-8") as f:
|
||||||
special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()]
|
special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()]
|
||||||
document_mapping_file = r"/data/aus_prospectus/basic_information/next_round/next_round_6_documents_mapping.xlsx"
|
# document_mapping_file = r"/data/aus_prospectus/basic_information/next_round/next_round_6_documents_mapping.xlsx"
|
||||||
special_doc_id_list = ["462780211"]
|
document_mapping_file = r"/data/aus_prospectus/basic_information/46_documents/aus_prospectus_46_documents_mapping.xlsx"
|
||||||
|
# special_doc_id_list = ["553449169"]
|
||||||
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
||||||
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
||||||
output_extract_data_child_folder: str = (
|
output_extract_data_child_folder: str = (
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue