1. support dynamic show fund level data examples.
2. optimize for minimum_initial_investment data point
This commit is contained in:
parent
e60e1fd546
commit
357bb6d580
|
|
@ -9,7 +9,7 @@
|
||||||
"administration_fees": {"english": ["administration fee", "administration fees","admin fee"]},
|
"administration_fees": {"english": ["administration fee", "administration fees","admin fee"]},
|
||||||
"interposed_vehicle_performance_fee_cost": {"english": ["Performance fees charged by interposed vehicles","interposed vehicle performance fee cost", "interposed vehicle performance"]},
|
"interposed_vehicle_performance_fee_cost": {"english": ["Performance fees charged by interposed vehicles","interposed vehicle performance fee cost", "interposed vehicle performance"]},
|
||||||
"benchmark_name": {"english": ["benchmark fund","benchmark name"]},
|
"benchmark_name": {"english": ["benchmark fund","benchmark name"]},
|
||||||
"minimum_initial_investment": {"english": ["minimum initial investment","initial investment", "initial investment amount", "minimum investment"]},
|
"minimum_initial_investment": {"english": ["minimum initial investment","initial investment", "initial investment amount", "minimum investment", "contributions and access to your investment", "start your investment with"]},
|
||||||
"recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
|
"recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
|
||||||
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect fee","indirect costs"]}
|
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect fee","indirect costs"]}
|
||||||
}
|
}
|
||||||
|
|
@ -9,7 +9,7 @@
|
||||||
"administration_fees": {"english": ["administration fee", "administration fees","admin fee"]},
|
"administration_fees": {"english": ["administration fee", "administration fees","admin fee"]},
|
||||||
"interposed_vehicle_performance_fee_cost": {"english": ["Performance fees charged by interposed vehicles","interposed vehicle performance fee cost", "interposed vehicle performance"]},
|
"interposed_vehicle_performance_fee_cost": {"english": ["Performance fees charged by interposed vehicles","interposed vehicle performance fee cost", "interposed vehicle performance"]},
|
||||||
"benchmark_name": {"english": ["benchmark fund","benchmark name"]},
|
"benchmark_name": {"english": ["benchmark fund","benchmark name"]},
|
||||||
"minimum_initial_investment": {"english": ["minimum initial investment","initial investment", "initial investment amount", "minimum investment amounts"]},
|
"minimum_initial_investment": {"english": ["minimum initial investment","initial investment", "initial investment amount", "minimum investment amounts", "Contributions and access to your investment"]},
|
||||||
"recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
|
"recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
|
||||||
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect fee","indirect costs"]}
|
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect fee","indirect costs"]}
|
||||||
}
|
}
|
||||||
|
|
@ -312,39 +312,44 @@ class DataExtraction:
|
||||||
def supplement_minimum_initial_investment(self, data_list: list):
|
def supplement_minimum_initial_investment(self, data_list: list):
|
||||||
exist_minimum_initial_investment = False
|
exist_minimum_initial_investment = False
|
||||||
minimum_initial_investment = -1
|
minimum_initial_investment = -1
|
||||||
mii_fund_name = ""
|
|
||||||
mii_dict = None
|
mii_dict = None
|
||||||
for data_dict in data_list:
|
for data_dict in data_list:
|
||||||
extract_data = data_dict.get("extract_data", {})
|
extract_data = data_dict.get("extract_data", {})
|
||||||
data = extract_data.get("data", [])
|
data = extract_data.get("data", [])
|
||||||
|
remove_items = []
|
||||||
for data_item in data:
|
for data_item in data:
|
||||||
keys = list(data_item.keys())
|
keys = list(data_item.keys())
|
||||||
if "minimum_initial_investment" in keys:
|
if "minimum_initial_investment" in keys:
|
||||||
exist_minimum_initial_investment = True
|
exist_minimum_initial_investment = True
|
||||||
minimum_initial_investment = data_item.get("minimum_initial_investment", -1)
|
if minimum_initial_investment == -1:
|
||||||
mii_fund_name = data_item.get("fund_name", "")
|
minimum_initial_investment = data_item.get("minimum_initial_investment", -1)
|
||||||
mii_dict = data_dict
|
if mii_dict is None:
|
||||||
break
|
mii_dict = data_dict
|
||||||
if exist_minimum_initial_investment:
|
remove_items.append(data_item)
|
||||||
break
|
for data_item in remove_items:
|
||||||
|
try:
|
||||||
|
data_dict["extract_data"]["data"].remove(data_item)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
if exist_minimum_initial_investment and minimum_initial_investment != -1:
|
if exist_minimum_initial_investment and minimum_initial_investment != -1:
|
||||||
# get all of funds in data_list
|
# get all of funds in data_list
|
||||||
fund_name_list = []
|
fund_name_list = []
|
||||||
for data_dict in data_list:
|
for iter_data_dict in data_list:
|
||||||
extract_data = data_dict.get("extract_data", {})
|
extract_data = iter_data_dict.get("extract_data", {})
|
||||||
data = extract_data.get("data", [])
|
data = extract_data.get("data", [])
|
||||||
for data_item in data:
|
for data_item in data:
|
||||||
keys = list(data_item.keys())
|
keys = list(data_item.keys())
|
||||||
if "fund_name" in keys:
|
if "fund_name" in keys:
|
||||||
fund_name = data_item.get("fund_name", "")
|
fund_name = data_item.get("fund_name", "")
|
||||||
if len(fund_name) > 0 and fund_name not in fund_name_list and fund_name != mii_fund_name:
|
if len(fund_name) > 0 and fund_name not in fund_name_list:
|
||||||
fund_name_list.append(fund_name)
|
fund_name_list.append(fund_name)
|
||||||
# rewrite mii_dict, set each fund name with same minimum_initial_investment value
|
# rewrite mii_dict, set each fund name with same minimum_initial_investment value
|
||||||
new_mii_data_list = []
|
new_mii_data_list = []
|
||||||
for fund_name in fund_name_list:
|
for fund_name in fund_name_list:
|
||||||
new_data_dict = {"fund_name": fund_name, "minimum_initial_investment": minimum_initial_investment}
|
new_data_dict = {"fund_name": fund_name, "minimum_initial_investment": minimum_initial_investment}
|
||||||
new_mii_data_list.append(new_data_dict)
|
new_mii_data_list.append(new_data_dict)
|
||||||
mii_dict["extract_data"]["data"] = new_mii_data_list
|
|
||||||
|
mii_dict["extract_data"]["data"].extend(new_mii_data_list)
|
||||||
return data_list
|
return data_list
|
||||||
|
|
||||||
def extract_data_by_text(self) -> dict:
|
def extract_data_by_text(self) -> dict:
|
||||||
|
|
@ -1310,6 +1315,9 @@ class DataExtraction:
|
||||||
instructions.append("\n".join(output_requirement_common_list))
|
instructions.append("\n".join(output_requirement_common_list))
|
||||||
instructions.append("\n")
|
instructions.append("\n")
|
||||||
|
|
||||||
|
fund_datapoint_value_example = {}
|
||||||
|
fund_level_config = output_requirement.get("fund_level", {})
|
||||||
|
|
||||||
share_datapoint_value_example = {}
|
share_datapoint_value_example = {}
|
||||||
share_level_config = output_requirement.get("share_level", {})
|
share_level_config = output_requirement.get("share_level", {})
|
||||||
|
|
||||||
|
|
@ -1319,21 +1327,38 @@ class DataExtraction:
|
||||||
for datapoint in datapoints:
|
for datapoint in datapoints:
|
||||||
investment_level = self.datapoint_level_config.get(datapoint, "")
|
investment_level = self.datapoint_level_config.get(datapoint, "")
|
||||||
if investment_level == "fund_level":
|
if investment_level == "fund_level":
|
||||||
fund_level_example_list = output_requirement.get("fund_level", [])
|
# fund_level_example_list = output_requirement.get("fund_level", [])
|
||||||
for example in fund_level_example_list:
|
# for example in fund_level_example_list:
|
||||||
try:
|
# try:
|
||||||
sub_example_list = json.loads(example)
|
# sub_example_list = json.loads(example)
|
||||||
except:
|
# except:
|
||||||
sub_example_list = json_repair.loads(example)
|
# sub_example_list = json_repair.loads(example)
|
||||||
example_list.extend(sub_example_list)
|
# example_list.extend(sub_example_list)
|
||||||
|
fund_datapoint_value_example[datapoint] = fund_level_config.get(
|
||||||
|
f"{datapoint}_value", []
|
||||||
|
)
|
||||||
elif investment_level == "share_level":
|
elif investment_level == "share_level":
|
||||||
share_datapoint_value_example[datapoint] = share_level_config.get(
|
share_datapoint_value_example[datapoint] = share_level_config.get(
|
||||||
f"{datapoint}_value", []
|
f"{datapoint}_value", []
|
||||||
)
|
)
|
||||||
dp_reported_name[datapoint] = dp_reported_name_config.get(datapoint, "")
|
dp_reported_name[datapoint] = dp_reported_name_config.get(datapoint, "")
|
||||||
|
|
||||||
share_datapoint_list = list(share_datapoint_value_example.keys())
|
|
||||||
instructions.append(f"Example:\n")
|
instructions.append(f"Example:\n")
|
||||||
|
|
||||||
|
fund_datapoint_list = list(fund_datapoint_value_example.keys())
|
||||||
|
if len(fund_datapoint_list) > 0:
|
||||||
|
fund_name_example_list = fund_level_config.get("fund_name", [])
|
||||||
|
for index in range(len(fund_name_example_list)):
|
||||||
|
example_dict = {
|
||||||
|
"fund name": fund_name_example_list[index],
|
||||||
|
}
|
||||||
|
for fund_datapoint in fund_datapoint_list:
|
||||||
|
fund_datapoint_values = fund_datapoint_value_example[fund_datapoint]
|
||||||
|
if index < len(fund_datapoint_values):
|
||||||
|
example_dict[fund_datapoint] = fund_datapoint_values[index]
|
||||||
|
example_list.append(example_dict)
|
||||||
|
|
||||||
|
share_datapoint_list = list(share_datapoint_value_example.keys())
|
||||||
if len(share_datapoint_list) > 0:
|
if len(share_datapoint_list) > 0:
|
||||||
fund_name_example_list = share_level_config.get("fund_name", [])
|
fund_name_example_list = share_level_config.get("fund_name", [])
|
||||||
share_name_example_list = share_level_config.get("share_name", [])
|
share_name_example_list = share_level_config.get("share_name", [])
|
||||||
|
|
@ -1344,9 +1369,7 @@ class DataExtraction:
|
||||||
"share name": share_name_example_list[index],
|
"share name": share_name_example_list[index],
|
||||||
}
|
}
|
||||||
for share_datapoint in share_datapoint_list:
|
for share_datapoint in share_datapoint_list:
|
||||||
share_datapoint_values = share_datapoint_value_example[
|
share_datapoint_values = share_datapoint_value_example[share_datapoint]
|
||||||
share_datapoint
|
|
||||||
]
|
|
||||||
if index < len(share_datapoint_values):
|
if index < len(share_datapoint_values):
|
||||||
example_dict[share_datapoint] = share_datapoint_values[index]
|
example_dict[share_datapoint] = share_datapoint_values[index]
|
||||||
example_list.append(example_dict)
|
example_list.append(example_dict)
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@
|
||||||
"c. If with multiple fund names in context, please retrieve the fund name closest above the numerical value.",
|
"c. If with multiple fund names in context, please retrieve the fund name closest above the numerical value.",
|
||||||
"c.1 fund name example:",
|
"c.1 fund name example:",
|
||||||
"---- Example Start ----",
|
"---- Example Start ----",
|
||||||
"AXA World Funds ACT Emerging Markets Bonds\nAXA World Funds \n \nAdditional Unaudited Appendix \n\nƒ$GGLWLRQDO8QDXGLWHG$SSHQGL[$118$/5(3257$;$:RUOG)XQGV\nExpense Ratios (continued) \n \nCalculated TER (1) \nSwiss method \nApplied\nService Fee (2)\nOngoing \nCharges (3) \n \nwith performance \nfees \nwithout performance \nfees \n \nAXA World Funds - ACT Emerging Markets Short Duration Bonds Low Carbon \nA Capitalisation CHF Hedged \n1.26% \n1.26% \n0.26% \n1.29%",
|
"AXA World Funds ACT Emerging Markets Bonds\nAXA World Funds \n \nAdditional Unaudited Appendix \n\nExpense Ratios (continued) \n \nCalculated TER (1) \nSwiss method \nApplied\nService Fee (2)\nOngoing \nCharges (3) \n \nwith performance \nfees \nwithout performance \nfees \n \nAXA World Funds - ACT Emerging Markets Short Duration Bonds Low Carbon \nA Capitalisation CHF Hedged \n1.26% \n1.26% \n0.26% \n1.29%",
|
||||||
"---- Example End ----",
|
"---- Example End ----",
|
||||||
"Correct fund name: AXA World Funds - ACT Emerging Markets Short Duration Bonds Low Carbon",
|
"Correct fund name: AXA World Funds - ACT Emerging Markets Short Duration Bonds Low Carbon",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
@ -121,15 +121,22 @@
|
||||||
"special_rule": {
|
"special_rule": {
|
||||||
"management_fee_and_costs": [
|
"management_fee_and_costs": [
|
||||||
"If there are multiple Management fee and costs reported names, here is the priority rule:",
|
"If there are multiple Management fee and costs reported names, here is the priority rule:",
|
||||||
"A. With \"Total Management fees and costs (gross)\" and \"Total Management fees and costs (net)\", pick up the values from \"Total Management fees and costs (net)\".",
|
"A.1 With \"Total Management fees and costs (gross)\" and \"Total Management fees and costs (net)\", pick up the values from \"Total Management fees and costs (net)\".",
|
||||||
"---Example Start---",
|
|
||||||
|
"---Example 1 Start---",
|
||||||
"\n Investment option \nInvestment option \nmanagement \ncosts1 \n% p.a. \n(A)\nLifeplan \nadministration fee \n(gross)2 \n% p.a. \n(B)\nLifeplan \nadministration fee \n(net) \n% p.a. \n(C)\nTotal Management \nfees and costs \n(gross) \n% p.a. \n(A + B)\nTotal Management \nfees and costs \n(net) \n% p.a. \n(A + C)\nAllan Gray Australian Equity Fund \u2013 Class A\n0.77\n0.60\n0.42\n1.37\n1.19\n",
|
"\n Investment option \nInvestment option \nmanagement \ncosts1 \n% p.a. \n(A)\nLifeplan \nadministration fee \n(gross)2 \n% p.a. \n(B)\nLifeplan \nadministration fee \n(net) \n% p.a. \n(C)\nTotal Management \nfees and costs \n(gross) \n% p.a. \n(A + B)\nTotal Management \nfees and costs \n(net) \n% p.a. \n(A + C)\nAllan Gray Australian Equity Fund \u2013 Class A\n0.77\n0.60\n0.42\n1.37\n1.19\n",
|
||||||
"---Example End---",
|
"---Example 2 End---",
|
||||||
"The output should be:",
|
"The output should be:",
|
||||||
"{\"data\": [{\"fund name\": \"Allan Gray Australian Equity Fund\", \"share name\": \"Class A\", \"management_fee_and_costs\": 1.19, \"management_fee\": 0.77, \"administration_fees\": 0.42}]",
|
"{\"data\": [{\"fund name\": \"Allan Gray Australian Equity Fund\", \"share name\": \"Class A\", \"management_fee_and_costs\": 1.19, \"management_fee\": 0.77, \"administration_fees\": 0.42}]",
|
||||||
"\n",
|
"\n",
|
||||||
"If there are multiple Management fee and costs sub-columns, here is the rule:",
|
"A.2 The data value with gross and net, please ignore gross value, output the net value only.",
|
||||||
"B. With \"Management fees\" and \"Indirect fee\", sum the values from these two columns: \"Management fees\" + \"Indirect fee\".",
|
"---Example 2 Start---",
|
||||||
|
"Small Fund \nManagement fees \nand costs \n1.17% pa (gross)/2.51% pa (net) \n",
|
||||||
|
"---Example 2 End---",
|
||||||
|
"The output should be:",
|
||||||
|
"{\"data\": [{\"fund name\": \"Small Fund\", \"share name\": \"Small Fund\", \"management_fee_and_costs\": 2.51, \"management_fee\": 2.51}]",
|
||||||
|
"B. If there are multiple Management fee and costs sub-columns, here is the rule: ",
|
||||||
|
"With \"Management fees\" and \"Indirect fee\", sum the values from these two columns: \"Management fees\" + \"Indirect fee\".",
|
||||||
"---Example Start---",
|
"---Example Start---",
|
||||||
"\n\nManagement fees \nManagement fees and costs \nIndirect Fee \nPerformance Fees \nTransaction Costs \nTotal \nMLC diversified investment \noption \nMLC Horizon 2 \nIncome Portfolio \n1.35% p.a. \n0.07% p.a. \n0.06% p.a. \n0.01% p.a. \n1.49% p.a. \n",
|
"\n\nManagement fees \nManagement fees and costs \nIndirect Fee \nPerformance Fees \nTransaction Costs \nTotal \nMLC diversified investment \noption \nMLC Horizon 2 \nIncome Portfolio \n1.35% p.a. \n0.07% p.a. \n0.06% p.a. \n0.01% p.a. \n1.49% p.a. \n",
|
||||||
"---Example End---",
|
"---Example End---",
|
||||||
|
|
@ -170,13 +177,26 @@
|
||||||
"The minimum investment per Pension Plan account is \n$20,000. The minimum initial investment in any \ninvestment option is $5,000.\n\nPerpetual WealthFocus Pension Plan",
|
"The minimum investment per Pension Plan account is \n$20,000. The minimum initial investment in any \ninvestment option is $5,000.\n\nPerpetual WealthFocus Pension Plan",
|
||||||
"---Example 1 End---",
|
"---Example 1 End---",
|
||||||
"The output should be:",
|
"The output should be:",
|
||||||
"{\"data\": [{\"fund name\": \"Perpetual WealthFocus Pension Plan\", \"share name\": \"\", \"minimum_initial_investment\": 5000}]",
|
"{\"data\": [{\"fund name\": \"Perpetual WealthFocus Pension Plan\", \"minimum_initial_investment\": 5000}]",
|
||||||
"\n",
|
"\n",
|
||||||
"---Example 2 Start---",
|
"---Example 2 Start---",
|
||||||
"Prime Super \n\n5 Initial investment amount \n\nThe minimum net total initial investment amount is $10,000. Please note before you open your pension account: If you \nhave made personal contributions into super and wish to claim a tax deduction, you will have to lodge a Notice of \nIntent to Claim form with the relevant super fund (including Prime Super) before you roll your super into the Income \nStreams account.",
|
"Prime Super \n\n5 Initial investment amount \n\nThe minimum net total initial investment amount is $10,000. Please note before you open your pension account: If you \nhave made personal contributions into super and wish to claim a tax deduction, you will have to lodge a Notice of \nIntent to Claim form with the relevant super fund (including Prime Super) before you roll your super into the Income \nStreams account.",
|
||||||
"---Example 2 End---",
|
"---Example 2 End---",
|
||||||
"The output should be:",
|
"The output should be:",
|
||||||
"{\"data\": [{\"fund name\": \"Prime Super\", \"share name\": \"\", \"minimum_initial_investment\": 10000}]"
|
"{\"data\": [{\"fund name\": \"Prime Super\", \"minimum_initial_investment\": 10000}]",
|
||||||
|
"\n",
|
||||||
|
"---Example 3 Start---",
|
||||||
|
"Minimum \nPlatform operators \nIndirect investors \ninvestment \namounts and their platform operators \nInitial – $500,000 \nAdditional – $5,000 \nMinimum investment amounts are subject to the arrangements between indirect investors \n",
|
||||||
|
"---Example 3 End---",
|
||||||
|
"The minimum initial investment is under the \"Initial\", the value is $500,000.",
|
||||||
|
"The output should be:",
|
||||||
|
"{\"data\": [{\"fund name\": \"unknown\", \"minimum_initial_investment\": 500000}]",
|
||||||
|
"\n",
|
||||||
|
"---Example 4 Start---",
|
||||||
|
"Contributions and access \nto your investment \n• \n• \nWe provide choice and flexibility for your investment with access to your money at anytime. \nStart your investment with as little as $1,000. \n• \nEstablish a regular savings plan. \n28 \n• \nYou can switch between the investment options and also rebalance within your selected \noptions at any time. \n• \nMinimum withdrawal – $500. \n",
|
||||||
|
"---Example 4 End---",
|
||||||
|
"The output should be:",
|
||||||
|
"{\"data\": [{\"fund name\": \"unknown\", \"minimum_initial_investment\": 1000}]"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
@ -244,9 +264,19 @@
|
||||||
"Fund level data: (\"fund name\" and \"datapoint_name\") and share level data: (\"fund name\", \"share name\", \"datapoint_name\") should be output separately.",
|
"Fund level data: (\"fund name\" and \"datapoint_name\") and share level data: (\"fund name\", \"share name\", \"datapoint_name\") should be output separately.",
|
||||||
"The output should be JSON format, the format is like below example(s):"
|
"The output should be JSON format, the format is like below example(s):"
|
||||||
],
|
],
|
||||||
"fund_level": [
|
"fund_level": {
|
||||||
"[{\"fund name\": \"fund 1 - sub fund name 1\",\"benchmark_name\": \"S&P 500 Index Fund\"}, {\"fund name\": \"fund 2 - sub fund name 2\",\"benchmark_name\": \"FTSE All Share\"}]"
|
"fund_name":[
|
||||||
],
|
"fund 1",
|
||||||
|
"fund 2",
|
||||||
|
"fund 3"
|
||||||
|
],
|
||||||
|
"benchmark_name_value":[
|
||||||
|
"S&P/ASX 300 Accumulation Index plus 2% pa",
|
||||||
|
"FTSE EPRA/NAREIT Developed Index",
|
||||||
|
"Bloomberg AusBond Bank Bill Index"
|
||||||
|
],
|
||||||
|
"minimum_initial_investment_value": [1000, 5000, 10000]
|
||||||
|
},
|
||||||
"share_level": {
|
"share_level": {
|
||||||
"fund_name": [
|
"fund_name": [
|
||||||
"fund 1",
|
"fund 1",
|
||||||
|
|
@ -279,7 +309,6 @@
|
||||||
"date_of_last_hwm_reset_value": ["29 March 2023", "18 April 2024", "19 October 2021"],
|
"date_of_last_hwm_reset_value": ["29 March 2023", "18 April 2024", "19 October 2021"],
|
||||||
"date_of_last_performance_fee_restructure_value": ["12 August 2022", "15 March 2024", "11 November 2023"],
|
"date_of_last_performance_fee_restructure_value": ["12 August 2022", "15 March 2024", "11 November 2023"],
|
||||||
"high_water_mark_type_value": ["Total Return", "Excess Return", "Both TR & ER"],
|
"high_water_mark_type_value": ["Total Return", "Excess Return", "Both TR & ER"],
|
||||||
"minimum_initial_investment_value": [0, 5000, 10000],
|
|
||||||
"recoverable_expenses_value": [0.12, 0.05, 0.06],
|
"recoverable_expenses_value": [0.12, 0.05, 0.06],
|
||||||
"indirect_costs_value": [0.12, 0.16, 0.02]
|
"indirect_costs_value": [0.12, 0.16, 0.02]
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -342,9 +342,18 @@
|
||||||
"Fund level data: (\"fund name\" and \"TOR\") and share level data: (\"fund name\", \"share name\", \"ter\", \"performance fees\", \"ogc\") should be output separately.",
|
"Fund level data: (\"fund name\" and \"TOR\") and share level data: (\"fund name\", \"share name\", \"ter\", \"performance fees\", \"ogc\") should be output separately.",
|
||||||
"The output should be JSON format, the format is like below example(s):"
|
"The output should be JSON format, the format is like below example(s):"
|
||||||
],
|
],
|
||||||
"fund_level": [
|
"fund_level": {
|
||||||
"[{\"fund name\": \"fund 1 - sub fund name 1\",\"tor\": 35.26}, {\"fund name\": \"fund 2 - sub fund name 2\",\"tor\": -28.26}, {\"fund name\": \"fund 3\",\"tor\": 115.52,}]"
|
"fund_name": [
|
||||||
],
|
"fund 1 - sub fund name 1",
|
||||||
|
"fund 2 - sub fund name 2",
|
||||||
|
"fund3"
|
||||||
|
],
|
||||||
|
"tor_value": [
|
||||||
|
35.26,
|
||||||
|
-28.26,
|
||||||
|
115.52
|
||||||
|
]
|
||||||
|
},
|
||||||
"share_level": {
|
"share_level": {
|
||||||
"fund_name": [
|
"fund_name": [
|
||||||
"fund 1",
|
"fund 1",
|
||||||
|
|
|
||||||
31
main.py
31
main.py
|
|
@ -1429,7 +1429,7 @@ def get_aus_prospectus_document_category():
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
get_aus_prospectus_document_category()
|
# get_aus_prospectus_document_category()
|
||||||
# test_data_extraction_metrics()
|
# test_data_extraction_metrics()
|
||||||
# data_file_path = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_17_documents_by_text_20250219123515.xlsx"
|
# data_file_path = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_17_documents_by_text_20250219123515.xlsx"
|
||||||
# document_mapping_file_path = r"/data/aus_prospectus/basic_information/17_documents/aus_prospectus_17_documents_mapping.xlsx"
|
# document_mapping_file_path = r"/data/aus_prospectus/basic_information/17_documents/aus_prospectus_17_documents_mapping.xlsx"
|
||||||
|
|
@ -1457,6 +1457,7 @@ if __name__ == "__main__":
|
||||||
# special_doc_id_list = ["553242411"]
|
# special_doc_id_list = ["553242411"]
|
||||||
|
|
||||||
doc_source = "aus_prospectus"
|
doc_source = "aus_prospectus"
|
||||||
|
# doc_source = "emea_ar"
|
||||||
if doc_source == "aus_prospectus":
|
if doc_source == "aus_prospectus":
|
||||||
# document_sample_file = (
|
# document_sample_file = (
|
||||||
# r"./sample_documents/aus_prospectus_100_documents_multi_fund_sample.txt"
|
# r"./sample_documents/aus_prospectus_100_documents_multi_fund_sample.txt"
|
||||||
|
|
@ -1485,7 +1486,7 @@ if __name__ == "__main__":
|
||||||
# "555377021",
|
# "555377021",
|
||||||
# "555654388",
|
# "555654388",
|
||||||
# ]
|
# ]
|
||||||
special_doc_id_list: list = ["391080133"]
|
special_doc_id_list: list = ["446324179"]
|
||||||
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
pdf_folder: str = r"/data/aus_prospectus/pdf/"
|
||||||
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
output_pdf_text_folder: str = r"/data/aus_prospectus/output/pdf_text/"
|
||||||
output_extract_data_child_folder: str = (
|
output_extract_data_child_folder: str = (
|
||||||
|
|
@ -1502,18 +1503,18 @@ if __name__ == "__main__":
|
||||||
)
|
)
|
||||||
drilldown_folder = r"/data/aus_prospectus/output/drilldown/"
|
drilldown_folder = r"/data/aus_prospectus/output/drilldown/"
|
||||||
|
|
||||||
# batch_run_documents(
|
batch_run_documents(
|
||||||
# doc_source=doc_source,
|
doc_source=doc_source,
|
||||||
# special_doc_id_list=special_doc_id_list,
|
special_doc_id_list=special_doc_id_list,
|
||||||
# pdf_folder=pdf_folder,
|
pdf_folder=pdf_folder,
|
||||||
# document_mapping_file=document_mapping_file,
|
document_mapping_file=document_mapping_file,
|
||||||
# output_pdf_text_folder=output_pdf_text_folder,
|
output_pdf_text_folder=output_pdf_text_folder,
|
||||||
# output_extract_data_child_folder=output_extract_data_child_folder,
|
output_extract_data_child_folder=output_extract_data_child_folder,
|
||||||
# output_extract_data_total_folder=output_extract_data_total_folder,
|
output_extract_data_total_folder=output_extract_data_total_folder,
|
||||||
# output_mapping_child_folder=output_mapping_child_folder,
|
output_mapping_child_folder=output_mapping_child_folder,
|
||||||
# output_mapping_total_folder=output_mapping_total_folder,
|
output_mapping_total_folder=output_mapping_total_folder,
|
||||||
# drilldown_folder=drilldown_folder,
|
drilldown_folder=drilldown_folder,
|
||||||
# )
|
)
|
||||||
elif doc_source == "emea_ar":
|
elif doc_source == "emea_ar":
|
||||||
special_doc_id_list = [
|
special_doc_id_list = [
|
||||||
"292989214",
|
"292989214",
|
||||||
|
|
@ -1569,7 +1570,7 @@ if __name__ == "__main__":
|
||||||
"520879048",
|
"520879048",
|
||||||
"529925114",
|
"529925114",
|
||||||
]
|
]
|
||||||
# special_doc_id_list = ["532438210"]
|
special_doc_id_list = ["321733631"]
|
||||||
batch_run_documents(
|
batch_run_documents(
|
||||||
doc_source=doc_source, special_doc_id_list=special_doc_id_list
|
doc_source=doc_source, special_doc_id_list=special_doc_id_list
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue