update AUS Prospectus relevant configuration
This commit is contained in:
parent
0a867dcf07
commit
91c86bb983
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"total_annual_dollar_based_charges": {"english": ["total annual dollar based charges", "total annual dollar based charges ($)","total annual dollar"]},
|
||||
"management_fee_and_costs": {"english": ["management fees and cost"]},
|
||||
"management_fee": {"english": ["management fee", "management fees","investment management fees","management fees and cost", "investment option management costs", "investment option management costs1"]},
|
||||
"management_fee_and_costs": {"english": ["management fees and cost", "Plus other investment fees and costs"]},
|
||||
"management_fee": {"english": ["management fee", "management fees","investment management fees","management fees and cost", "investment option management costs", "investment option management costs1", "Plus other investment fees and costs"]},
|
||||
"performance_fee": {"english": ["performance fee", "performance fees"]},
|
||||
"performance_fee_costs": {"english": ["performance fee costs", "performance fees costs"]},
|
||||
"buy_spread": {"english": ["buy-spread", "buy spread", "buy/sell spreads", "BUY-SELL SPREAD"]},
|
||||
|
|
@ -23,5 +23,5 @@
|
|||
"high_water_mark_type": {"english": ["high-water mark type", "high water mark type"]},
|
||||
"minimum_initial_investment": {"english": ["minimum initial investment","inital investment"]},
|
||||
"recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
|
||||
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect costs"]}
|
||||
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect fee","indirect costs"]}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"total_annual_dollar_based_charges": {"english": ["total annual dollar based charges", "total annual dollar based charges ($)","total annual dollar"]},
|
||||
"management_fee_and_costs": {"english": ["management fees and cost", "management fees and costs", "management fee and cost"]},
|
||||
"management_fee": {"english": ["management fee", "management fees","investment management fees","management fees and cost", "investment option management costs", "investment option management costs1"]},
|
||||
"management_fee_and_costs": {"english": ["management fees and cost", "management fees and costs", "management fee and cost", "Plus other investment fees and costs"]},
|
||||
"management_fee": {"english": ["management fee", "management fees","investment management fees","management fees and cost", "investment option management costs", "investment option management costs1", "Plus other investment fees and costs"]},
|
||||
"performance_fee": {"english": ["performance fee", "performance fees"]},
|
||||
"performance_fee_costs": {"english": ["performance fee costs", "performance fees costs"]},
|
||||
"buy_spread": {"english": ["buy-spread", "buy spread", "buy/sell spreads", "BUY-SELL SPREAD"]},
|
||||
|
|
@ -23,5 +23,5 @@
|
|||
"high_water_mark_type": {"english": ["high-water mark type", "high water mark type"]},
|
||||
"minimum_initial_investment": {"english": ["minimum initial investment","inital investment"]},
|
||||
"recoverable_expenses": {"english": ["recoverable expenses","recoverable cost","expense recoveries"]},
|
||||
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect costs"]}
|
||||
"indirect_costs": {"english": ["indirect cost","indirect fees","indirect fee","indirect costs"]}
|
||||
}
|
||||
|
|
@ -17,8 +17,8 @@
|
|||
"data_business_features": {
|
||||
"common": [
|
||||
"General rules:",
|
||||
"- The data is in the context, perhaps in table(s), semi-table(s) or paragraphs.",
|
||||
"- Fund name: ",
|
||||
"- 1. The data is in the context, perhaps in table(s), semi-table(s) or paragraphs.",
|
||||
"- 2. Fund name: ",
|
||||
"a. The full fund name should be main fund name + sub-fund name, e,g, main fund name is Black Rock European, sub-fund name is Growth, the full fund name is: Black Rock European Growth.",
|
||||
"b. The sub-fund name may be as the first column or first row values in the table.",
|
||||
"b.1 fund name example:",
|
||||
|
|
@ -34,7 +34,7 @@
|
|||
"---- Example End ----",
|
||||
"Correct fund name: AXA World Funds - ACT Emerging Markets Short Duration Bonds Low Carbon",
|
||||
"\n",
|
||||
"- Only extract the latest data from context:",
|
||||
"- 3. Only extract the latest data from context:",
|
||||
"If with multiple data values in same row, please extract the latest.",
|
||||
"\n",
|
||||
"d. Some table format, the fund name is in the end of row, please extract the fund name from the end of row.",
|
||||
|
|
@ -51,7 +51,7 @@
|
|||
"---Example End---",
|
||||
"Correct fund name: MLC Horizon 2 Income Portfolio",
|
||||
"Correct share name: MLC Horizon 2 Income Portfolio",
|
||||
"- Reported names:",
|
||||
"- 4. Reported names:",
|
||||
"Only output the values which with significant reported names.",
|
||||
"- Multiple data columns with same reported name but different post-fix:",
|
||||
"If there are multiple reported names with different post-fix text, here is the priority rule:",
|
||||
|
|
@ -60,7 +60,24 @@
|
|||
"\n Investment option \nInvestment option \nmanagement \ncosts1 \n% p.a. \n(A)\nLifeplan \nadministration fee \n(gross)2 \n% p.a. \n(B)\nLifeplan \nadministration fee \n(net) \n% p.a. \n(C)\nTotal Management \nfees and costs \n(gross) \n% p.a. \n(A + B)\nTotal Management \nfees and costs \n(net) \n% p.a. \n(A + C)\nAllan Gray Australian Equity Fund \u2013 Class A\n0.77\n0.60\n0.42\n1.37\n1.19\n",
|
||||
"---Example End---",
|
||||
"The output should be:",
|
||||
"{\"data\": [{\"fund name\": \"Allan Gray Australian Equity Fund\", \"share name\": \"Class A\", \"management_fee_and_costs\": 1.19, \"management_fee\": 0.77, \"administration_fees\": 0.42}]"
|
||||
"{\"data\": [{\"fund name\": \"Allan Gray Australian Equity Fund\", \"share name\": \"Class A\", \"management_fee_and_costs\": 1.19, \"management_fee\": 0.77, \"administration_fees\": 0.42}]",
|
||||
"- 5. Reverse order of data columns from table text in PDF:",
|
||||
"For this case, 1. the columns order is reversed, \n2. The fund name is in the end of row with number value in front of fund name.",
|
||||
"---Example 1 Start---",
|
||||
"Transaction\ncosts\n(gross)1\nBuy-sell\nspreads\nTransaction\ncosts (net)\nEquals\ninvestment fees and\ncosts\nThe investment fees and\ncosts are made up of\nPlus\nother\ninvestment\nfees and\ncosts\nPerformance\nfee\n% pa\nEntry %/\nExit %\n% pa\n% pa\n% pa\nReady-made portfolios\nSimple choice\n0.04\n0.10/0.10\n0.00\n0.62\n0.55\n0.07\nMLC Stable\n0.05\n0.10/0.10\n0.02\n0.80\n0.65\n0.15\nMLC Conservative Balanced",
|
||||
"---Example 1 End---",
|
||||
"For this case, Management fees and costs = Management fees with same reported name: Plus\nother\ninvestment\nfees and\ncosts",
|
||||
"The output should be: ",
|
||||
"{\"data\": [{\"fund name\": \"MLC Stable\", \"share name\": \"MLC Stable\", \"buy_spread\": 0.10, \"sell_spread\": 0.10, \"management_fee_and_costs\": 0.55, \"management_fee\": 0.55, \"performance_fee\": 0.07}, {\"fund name\": \"MLC Conservative Balanced\", \"share name\": \"MLC Conservative Balanced\", \"buy_spread\": 0.10, \"sell_spread\": 0.10, \"management_fee_and_costs\": 0.65, \"management_fee\": 0.65, \"performance_fee\": 0.15}]",
|
||||
"\n",
|
||||
"---Example 2 Start---",
|
||||
"\nTotal\nTransaction Costs\nPerformance Fees\nManagement fees and costs\nIndirect Fee\nManagement fees\nMLC diversified investment\noption\n1.49% p.a.\n0.01% p.a.\n0.06% p.a.\n0.07% p.a.\n1.35% p.a.\nMLC Horizon 2\nIncome Portfolio\n",
|
||||
"---Example 2 End---",
|
||||
"For this case, Management fees and costs = Management fees + Indirect Fee.",
|
||||
"The output should be:",
|
||||
"{\"data\": [{\"fund name\": \"MLC Horizon 2 Income Portfolio\", \"share name\": \"MLC Horizon 2 Income Portfolio\", \"management_fee_and_costs\": 1.42, \"management_fee\": 1.35, \"indirect_costs\": 0.07, \"performance_fee\": 0.06}]",
|
||||
"- 6. Please ignore these words as fund names, it means never extract these words as fund names. They are:",
|
||||
"\"Ready-made portfolios\", \"Simple choice\", \"Build-your-own portfolio\"."
|
||||
],
|
||||
"investment_level": {
|
||||
"total_annual_dollar_based_charges": "Total annual dollar based charges is share level data.",
|
||||
|
|
@ -140,6 +157,10 @@
|
|||
"---Example End---",
|
||||
"The output should be:",
|
||||
"{\"data\": [{\"fund name\": \"MLC Horizon 4 Balanced Portfolio\", \"share name\": \"MLC Horizon 4 Balanced Portfolio\", \"management_fee_and_costs\": 1.67, \"management_fee\": 1.58, \"administration_fees\": 0.09, \"performance_fee\": 0.03}]"
|
||||
],
|
||||
"buy_spread": [
|
||||
"Please don't extract data by the reported names for buy_spread or sell_spread, they are: ",
|
||||
"Transaction costs buy/sell spread recovery, Transaction costs reducing return of the investment option (net transaction costs)"
|
||||
]
|
||||
}
|
||||
},
|
||||
|
|
|
|||
4
main.py
4
main.py
|
|
@ -887,7 +887,7 @@ def batch_run_documents(special_doc_id_list: list = None,
|
|||
)
|
||||
re_run_extract_data = True
|
||||
re_run_mapping_data = True
|
||||
force_save_total_data = False
|
||||
force_save_total_data = True
|
||||
calculate_metrics = False
|
||||
|
||||
extract_way = "text"
|
||||
|
|
@ -1051,7 +1051,7 @@ if __name__ == "__main__":
|
|||
special_doc_id_list: list = ["539790009",
|
||||
"542300403",
|
||||
"542301117",
|
||||
# "542306317",
|
||||
"542306317",
|
||||
"547567013",
|
||||
"552505237",
|
||||
"552505278",
|
||||
|
|
|
|||
Loading…
Reference in New Issue