Confirm span pages calculation, the management fee and costs page only with management_fee_and_costs and management_fee datapoints
This commit is contained in:
parent
f333cc30f5
commit
37cf06a394
|
|
@ -786,6 +786,11 @@ class DataExtraction:
|
|||
share_name = management_fee_data.get("share_name", "")
|
||||
if fund_name == "" or share_name == "":
|
||||
continue
|
||||
remain_keys = [key for key in keys if key not in ["fund_name", "share_name",
|
||||
"management_fee_and_costs",
|
||||
"management_fee"]]
|
||||
if len(remain_keys) > 0:
|
||||
continue
|
||||
if "management_fee_and_costs" in keys:
|
||||
management_fee_and_costs = management_fee_data.get("management_fee_and_costs", -1)
|
||||
try:
|
||||
|
|
|
|||
10
main.py
10
main.py
|
|
@ -1531,18 +1531,18 @@ if __name__ == "__main__":
|
|||
# document_sample_file = (
|
||||
# r"./sample_documents/aus_prospectus_verify_6_documents_sample.txt"
|
||||
# )
|
||||
# document_sample_file = (
|
||||
# r"./sample_documents/aus_prospectus_46_documents_sample.txt"
|
||||
# )
|
||||
document_sample_file = (
|
||||
r"./sample_documents/aus_prospectus_87_vision_cfs_documents_sample.txt"
|
||||
r"./sample_documents/aus_prospectus_46_documents_sample.txt"
|
||||
)
|
||||
# document_sample_file = (
|
||||
# r"./sample_documents/aus_prospectus_87_vision_cfs_documents_sample.txt"
|
||||
# )
|
||||
logger.info(f"Start to run document sample file: {document_sample_file}")
|
||||
with open(document_sample_file, "r", encoding="utf-8") as f:
|
||||
special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()
|
||||
if len(doc_id.strip()) > 0]
|
||||
# special_doc_id_list = ["470879332", "462780211", "561929947", "422100350"]
|
||||
# special_doc_id_list = ["539999907", "455235248", "448576924"]
|
||||
# special_doc_id_list = ["462780211", "539999907"]
|
||||
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
||||
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
||||
output_extract_data_child_folder: str = (
|
||||
|
|
|
|||
Loading…
Reference in New Issue