a little change
This commit is contained in:
parent
932870f406
commit
50e6c3c19d
|
|
@ -253,6 +253,7 @@ class DataExtraction:
|
||||||
exclude_data: list) -> list:
|
exclude_data: list) -> list:
|
||||||
"""
|
"""
|
||||||
If occur error, split the context to two parts and try to get data from the two parts
|
If occur error, split the context to two parts and try to get data from the two parts
|
||||||
|
Relevant document: 503194284
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Split context to get data to fix issue which output length is over 4K tokens")
|
logger.info(f"Split context to get data to fix issue which output length is over 4K tokens")
|
||||||
|
|
|
||||||
2
main.py
2
main.py
|
|
@ -505,7 +505,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
# doc_id = "476492237"
|
# doc_id = "476492237"
|
||||||
# extract_data(doc_id, pdf_folder, output_extract_data_child_folder, re_run)
|
# extract_data(doc_id, pdf_folder, output_extract_data_child_folder, re_run)
|
||||||
special_doc_id_list = ["503194284"]
|
special_doc_id_list = ["508854243"]
|
||||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||||
re_run_mapping_data = True
|
re_run_mapping_data = True
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue