update AUS Prospectus relevant configuration
This commit is contained in:
parent
0a867dcf07
commit
91c86bb983
|
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"total_annual_dollar_based_charges": {"english": ["total annual dollar based charges", "total annual dollar based charges ($)","total annual dollar"]},
|
"total_annual_dollar_based_charges": {"english": ["total annual dollar based charges", "total annual dollar based charges ($)","total annual dollar"]},
|
||||||
"management_fee_and_costs": {"english": ["management fees and cost"]},
|
"management_fee_and_costs": {"english": ["management fees and cost", "Plus other investment fees and costs"]},
|
||||||
"management_fee": {"english": ["management fee", "management fees","investment management fees","management fees and cost", "investment option management costs", "investment option management costs1"]},
|
"management_fee": {"english": ["management fee", "management fees","investment management fees","management fees and cost", "investment option management costs", "investment option management costs1", "Plus other investment fees and costs"]},
|
||||||
"performance_fee": {"english": ["performance fee", "performance fees"]},
|
"performance_fee": {"english": ["performance fee", "performance fees"]},
|
||||||
"performance_fee_costs": {"english": ["performance fee costs", "performance fees costs"]},
|
"performance_fee_costs": {"english": ["performance fee costs", "performance fees costs"]},
|
||||||
"buy_spread": {"english": ["buy-spread", "buy spread", "buy/sell spreads", "BUY-SELL SPREAD"]},
|
"buy_spread": {"english": ["buy-spread", "buy spread", "buy/sell spreads", "BUY-SELL SPREAD"]},
|
||||||
|
|
@ -23,5 +23,5 @@
|
||||||
"high_water_mark_type": {"english": ["high-water mark type", "high water mark type"]},
|
"high_water_mark_type": {"english": ["high-water mark type", "high water mark type"]},
|
||||||
"minimum_initial_investment": {"english": ["minimum initial investment","inital investment"]},
|
"minimum_initial_investment": {"english": ["minimum initial investment","inital investment"]},
|
||||||
"recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
|
"recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
|
||||||
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect costs"]}
|
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect fee","indirect costs"]}
|
||||||
}
|
}
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"total_annual_dollar_based_charges": {"english": ["total annual dollar based charges", "total annual dollar based charges ($)","total annual dollar"]},
|
"total_annual_dollar_based_charges": {"english": ["total annual dollar based charges", "total annual dollar based charges ($)","total annual dollar"]},
|
||||||
"management_fee_and_costs": {"english": ["management fees and cost", "management fees and costs", "management fee and cost"]},
|
"management_fee_and_costs": {"english": ["management fees and cost", "management fees and costs", "management fee and cost", "Plus other investment fees and costs"]},
|
||||||
"management_fee": {"english": ["management fee", "management fees","investment management fees","management fees and cost", "investment option management costs", "investment option management costs1"]},
|
"management_fee": {"english": ["management fee", "management fees","investment management fees","management fees and cost", "investment option management costs", "investment option management costs1", "Plus other investment fees and costs"]},
|
||||||
"performance_fee": {"english": ["performance fee", "performance fees"]},
|
"performance_fee": {"english": ["performance fee", "performance fees"]},
|
||||||
"performance_fee_costs": {"english": ["performance fee costs", "performance fees costs"]},
|
"performance_fee_costs": {"english": ["performance fee costs", "performance fees costs"]},
|
||||||
"buy_spread": {"english": ["buy-spread", "buy spread", "buy/sell spreads", "BUY-SELL SPREAD"]},
|
"buy_spread": {"english": ["buy-spread", "buy spread", "buy/sell spreads", "BUY-SELL SPREAD"]},
|
||||||
|
|
@ -23,5 +23,5 @@
|
||||||
"high_water_mark_type": {"english": ["high-water mark type", "high water mark type"]},
|
"high_water_mark_type": {"english": ["high-water mark type", "high water mark type"]},
|
||||||
"minimum_initial_investment": {"english": ["minimum initial investment","inital investment"]},
|
"minimum_initial_investment": {"english": ["minimum initial investment","inital investment"]},
|
||||||
"recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
|
"recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
|
||||||
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect costs"]}
|
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect fee","indirect costs"]}
|
||||||
}
|
}
|
||||||
|
|
@ -17,8 +17,8 @@
|
||||||
"data_business_features": {
|
"data_business_features": {
|
||||||
"common": [
|
"common": [
|
||||||
"General rules:",
|
"General rules:",
|
||||||
"- The data is in the context, perhaps in table(s), semi-table(s) or paragraphs.",
|
"- 1. The data is in the context, perhaps in table(s), semi-table(s) or paragraphs.",
|
||||||
"- Fund name: ",
|
"- 2. Fund name: ",
|
||||||
"a. The full fund name should be main fund name + sub-fund name, e,g, main fund name is Black Rock European, sub-fund name is Growth, the full fund name is: Black Rock European Growth.",
|
"a. The full fund name should be main fund name + sub-fund name, e,g, main fund name is Black Rock European, sub-fund name is Growth, the full fund name is: Black Rock European Growth.",
|
||||||
"b. The sub-fund name may be as the first column or first row values in the table.",
|
"b. The sub-fund name may be as the first column or first row values in the table.",
|
||||||
"b.1 fund name example:",
|
"b.1 fund name example:",
|
||||||
|
|
@ -34,7 +34,7 @@
|
||||||
"---- Example End ----",
|
"---- Example End ----",
|
||||||
"Correct fund name: AXA World Funds - ACT Emerging Markets Short Duration Bonds Low Carbon",
|
"Correct fund name: AXA World Funds - ACT Emerging Markets Short Duration Bonds Low Carbon",
|
||||||
"\n",
|
"\n",
|
||||||
"- Only extract the latest data from context:",
|
"- 3. Only extract the latest data from context:",
|
||||||
"If with multiple data values in same row, please extract the latest.",
|
"If with multiple data values in same row, please extract the latest.",
|
||||||
"\n",
|
"\n",
|
||||||
"d. Some table format, the fund name is in the end of row, please extract the fund name from the end of row.",
|
"d. Some table format, the fund name is in the end of row, please extract the fund name from the end of row.",
|
||||||
|
|
@ -51,7 +51,7 @@
|
||||||
"---Example End---",
|
"---Example End---",
|
||||||
"Correct fund name: MLC Horizon 2 Income Portfolio",
|
"Correct fund name: MLC Horizon 2 Income Portfolio",
|
||||||
"Correct share name: MLC Horizon 2 Income Portfolio",
|
"Correct share name: MLC Horizon 2 Income Portfolio",
|
||||||
"- Reported names:",
|
"- 4. Reported names:",
|
||||||
"Only output the values which with significant reported names.",
|
"Only output the values which with significant reported names.",
|
||||||
"- Multiple data columns with same reported name but different post-fix:",
|
"- Multiple data columns with same reported name but different post-fix:",
|
||||||
"If there are multiple reported names with different post-fix text, here is the priority rule:",
|
"If there are multiple reported names with different post-fix text, here is the priority rule:",
|
||||||
|
|
@ -60,7 +60,24 @@
|
||||||
"\n Investment option \nInvestment option \nmanagement \ncosts1 \n% p.a. \n(A)\nLifeplan \nadministration fee \n(gross)2 \n% p.a. \n(B)\nLifeplan \nadministration fee \n(net) \n% p.a. \n(C)\nTotal Management \nfees and costs \n(gross) \n% p.a. \n(A + B)\nTotal Management \nfees and costs \n(net) \n% p.a. \n(A + C)\nAllan Gray Australian Equity Fund \u2013 Class A\n0.77\n0.60\n0.42\n1.37\n1.19\n",
|
"\n Investment option \nInvestment option \nmanagement \ncosts1 \n% p.a. \n(A)\nLifeplan \nadministration fee \n(gross)2 \n% p.a. \n(B)\nLifeplan \nadministration fee \n(net) \n% p.a. \n(C)\nTotal Management \nfees and costs \n(gross) \n% p.a. \n(A + B)\nTotal Management \nfees and costs \n(net) \n% p.a. \n(A + C)\nAllan Gray Australian Equity Fund \u2013 Class A\n0.77\n0.60\n0.42\n1.37\n1.19\n",
|
||||||
"---Example End---",
|
"---Example End---",
|
||||||
"The output should be:",
|
"The output should be:",
|
||||||
"{\"data\": [{\"fund name\": \"Allan Gray Australian Equity Fund\", \"share name\": \"Class A\", \"management_fee_and_costs\": 1.19, \"management_fee\": 0.77, \"administration_fees\": 0.42}]"
|
"{\"data\": [{\"fund name\": \"Allan Gray Australian Equity Fund\", \"share name\": \"Class A\", \"management_fee_and_costs\": 1.19, \"management_fee\": 0.77, \"administration_fees\": 0.42}]",
|
||||||
|
"- 5. Reverse order of data columns from table text in PDF:",
|
||||||
|
"For this case, 1. the columns order is reversed, \n2. The fund name is in the end of row with number value in front of fund name.",
|
||||||
|
"---Example 1 Start---",
|
||||||
|
"Transaction\ncosts\n(gross)1\nBuy-sell\nspreads\nTransaction\ncosts (net)\nEquals\ninvestment fees and\ncosts\nThe investment fees and\ncosts are made up of\nPlus\nother\ninvestment\nfees and\ncosts\nPerformance\nfee\n% pa\nEntry %/\nExit %\n% pa\n% pa\n% pa\nReady-made portfolios\nSimple choice\n0.04\n0.10/0.10\n0.00\n0.62\n0.55\n0.07\nMLC Stable\n0.05\n0.10/0.10\n0.02\n0.80\n0.65\n0.15\nMLC Conservative Balanced",
|
||||||
|
"---Example 1 End---",
|
||||||
|
"For this case, Management fees and costs = Management fees with same reported name: Plus\nother\ninvestment\nfees and\ncosts",
|
||||||
|
"The output should be: ",
|
||||||
|
"{\"data\": [{\"fund name\": \"MLC Stable\", \"share name\": \"MLC Stable\", \"buy_spread\": 0.10, \"sell_spread\": 0.10, \"management_fee_and_costs\": 0.55, \"management_fee\": 0.55, \"performance_fee\": 0.07}, {\"fund name\": \"MLC Conservative Balanced\", \"share name\": \"MLC Conservative Balanced\", \"buy_spread\": 0.10, \"sell_spread\": 0.10, \"management_fee_and_costs\": 0.65, \"management_fee\": 0.65, \"performance_fee\": 0.15}]",
|
||||||
|
"\n",
|
||||||
|
"---Example 2 Start---",
|
||||||
|
"\nTotal\nTransaction Costs\nPerformance Fees\nManagement fees and costs\nIndirect Fee\nManagement fees\nMLC diversified investment\noption\n1.49% p.a.\n0.01% p.a.\n0.06% p.a.\n0.07% p.a.\n1.35% p.a.\nMLC Horizon 2\nIncome Portfolio\n",
|
||||||
|
"---Example 2 End---",
|
||||||
|
"For this case, Management fees and costs = Management fees + Indirect Fee.",
|
||||||
|
"The output should be:",
|
||||||
|
"{\"data\": [{\"fund name\": \"MLC Horizon 2 Income Portfolio\", \"share name\": \"MLC Horizon 2 Income Portfolio\", \"management_fee_and_costs\": 1.42, \"management_fee\": 1.35, \"indirect_costs\": 0.07, \"performance_fee\": 0.06}]",
|
||||||
|
"- 6. Please ignore these words as fund names, it means never extract these words as fund names. They are:",
|
||||||
|
"\"Ready-made portfolios\", \"Simple choice\", \"Build-your-own portfolio\"."
|
||||||
],
|
],
|
||||||
"investment_level": {
|
"investment_level": {
|
||||||
"total_annual_dollar_based_charges": "Total annual dollar based charges is share level data.",
|
"total_annual_dollar_based_charges": "Total annual dollar based charges is share level data.",
|
||||||
|
|
@ -140,6 +157,10 @@
|
||||||
"---Example End---",
|
"---Example End---",
|
||||||
"The output should be:",
|
"The output should be:",
|
||||||
"{\"data\": [{\"fund name\": \"MLC Horizon 4 Balanced Portfolio\", \"share name\": \"MLC Horizon 4 Balanced Portfolio\", \"management_fee_and_costs\": 1.67, \"management_fee\": 1.58, \"administration_fees\": 0.09, \"performance_fee\": 0.03}]"
|
"{\"data\": [{\"fund name\": \"MLC Horizon 4 Balanced Portfolio\", \"share name\": \"MLC Horizon 4 Balanced Portfolio\", \"management_fee_and_costs\": 1.67, \"management_fee\": 1.58, \"administration_fees\": 0.09, \"performance_fee\": 0.03}]"
|
||||||
|
],
|
||||||
|
"buy_spread": [
|
||||||
|
"Please don't extract data by the reported names for buy_spread or sell_spread, they are: ",
|
||||||
|
"Transaction costs buy/sell spread recovery, Transaction costs reducing return of the investment option (net transaction costs)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
||||||
4
main.py
4
main.py
|
|
@ -887,7 +887,7 @@ def batch_run_documents(special_doc_id_list: list = None,
|
||||||
)
|
)
|
||||||
re_run_extract_data = True
|
re_run_extract_data = True
|
||||||
re_run_mapping_data = True
|
re_run_mapping_data = True
|
||||||
force_save_total_data = False
|
force_save_total_data = True
|
||||||
calculate_metrics = False
|
calculate_metrics = False
|
||||||
|
|
||||||
extract_way = "text"
|
extract_way = "text"
|
||||||
|
|
@ -1051,7 +1051,7 @@ if __name__ == "__main__":
|
||||||
special_doc_id_list: list = ["539790009",
|
special_doc_id_list: list = ["539790009",
|
||||||
"542300403",
|
"542300403",
|
||||||
"542301117",
|
"542301117",
|
||||||
# "542306317",
|
"542306317",
|
||||||
"547567013",
|
"547567013",
|
||||||
"552505237",
|
"552505237",
|
||||||
"552505278",
|
"552505278",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue