diff --git a/core/data_extraction.py b/core/data_extraction.py index a73e5af..aab9b7a 100644 --- a/core/data_extraction.py +++ b/core/data_extraction.py @@ -253,6 +253,7 @@ class DataExtraction: exclude_data: list) -> list: """ If occur error, split the context to two parts and try to get data from the two parts + Relevant document: 503194284 """ try: logger.info(f"Split context to get data to fix issue which output length is over 4K tokens") diff --git a/main.py b/main.py index 4159e8c..8659cdb 100644 --- a/main.py +++ b/main.py @@ -505,7 +505,7 @@ if __name__ == "__main__": # doc_id = "476492237" # extract_data(doc_id, pdf_folder, output_extract_data_child_folder, re_run) - special_doc_id_list = ["503194284"] + special_doc_id_list = ["508854243"] output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/" output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/" re_run_mapping_data = True