Confirm span pages calculation, the management fee and costs page only with management_fee_and_costs and management_fee datapoints
This commit is contained in:
parent
f333cc30f5
commit
37cf06a394
|
|
@ -786,6 +786,11 @@ class DataExtraction:
|
||||||
share_name = management_fee_data.get("share_name", "")
|
share_name = management_fee_data.get("share_name", "")
|
||||||
if fund_name == "" or share_name == "":
|
if fund_name == "" or share_name == "":
|
||||||
continue
|
continue
|
||||||
|
remain_keys = [key for key in keys if key not in ["fund_name", "share_name",
|
||||||
|
"management_fee_and_costs",
|
||||||
|
"management_fee"]]
|
||||||
|
if len(remain_keys) > 0:
|
||||||
|
continue
|
||||||
if "management_fee_and_costs" in keys:
|
if "management_fee_and_costs" in keys:
|
||||||
management_fee_and_costs = management_fee_data.get("management_fee_and_costs", -1)
|
management_fee_and_costs = management_fee_data.get("management_fee_and_costs", -1)
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
10
main.py
10
main.py
|
|
@ -1531,18 +1531,18 @@ if __name__ == "__main__":
|
||||||
# document_sample_file = (
|
# document_sample_file = (
|
||||||
# r"./sample_documents/aus_prospectus_verify_6_documents_sample.txt"
|
# r"./sample_documents/aus_prospectus_verify_6_documents_sample.txt"
|
||||||
# )
|
# )
|
||||||
# document_sample_file = (
|
|
||||||
# r"./sample_documents/aus_prospectus_46_documents_sample.txt"
|
|
||||||
# )
|
|
||||||
document_sample_file = (
|
document_sample_file = (
|
||||||
r"./sample_documents/aus_prospectus_87_vision_cfs_documents_sample.txt"
|
r"./sample_documents/aus_prospectus_46_documents_sample.txt"
|
||||||
)
|
)
|
||||||
|
# document_sample_file = (
|
||||||
|
# r"./sample_documents/aus_prospectus_87_vision_cfs_documents_sample.txt"
|
||||||
|
# )
|
||||||
logger.info(f"Start to run document sample file: {document_sample_file}")
|
logger.info(f"Start to run document sample file: {document_sample_file}")
|
||||||
with open(document_sample_file, "r", encoding="utf-8") as f:
|
with open(document_sample_file, "r", encoding="utf-8") as f:
|
||||||
special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()
|
special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()
|
||||||
if len(doc_id.strip()) > 0]
|
if len(doc_id.strip()) > 0]
|
||||||
# special_doc_id_list = ["470879332", "462780211", "561929947", "422100350"]
|
# special_doc_id_list = ["470879332", "462780211", "561929947", "422100350"]
|
||||||
# special_doc_id_list = ["539999907", "455235248", "448576924"]
|
# special_doc_id_list = ["462780211", "539999907"]
|
||||||
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
||||||
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
||||||
output_extract_data_child_folder: str = (
|
output_extract_data_child_folder: str = (
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue