optimize instructions
This commit is contained in:
parent
3f2bb38208
commit
03365227b9
|
|
@ -16,7 +16,7 @@
|
||||||
"reported_name": {
|
"reported_name": {
|
||||||
"tor": "The TOR reported name could be:\nTOR, Turnover Ratio, Portfolio Turnover, Portfolio turnover ratio, PTR, Taux de rotation corrigé - Gecorrigeerde omloopsnelheid, etc.",
|
"tor": "The TOR reported name could be:\nTOR, Turnover Ratio, Portfolio Turnover, Portfolio turnover ratio, PTR, Taux de rotation corrigé - Gecorrigeerde omloopsnelheid, etc.",
|
||||||
"ogc": "The OGC reported name could be:\nOGC, OGF, Ongoing Charge, Operation Charge, Ongoing charges in per cent, Ongoing charges in percent, Ongoing charges as a percentage, On Going Charges, Operating Charge, Ongoing Fund Charge, etc.",
|
"ogc": "The OGC reported name could be:\nOGC, OGF, Ongoing Charge, Operation Charge, Ongoing charges in per cent, Ongoing charges in percent, Ongoing charges as a percentage, On Going Charges, Operating Charge, Ongoing Fund Charge, etc.",
|
||||||
"ter": "The TER reported name could be:\nTER, Total Expense Ratio, Total expense ratio as a percentage, Total Fund Charge, Gross Expense Ratio, All in fee, Total Net Expense Ratio, Weighted Average Expense Ratio, Synthetic total Expense Ratio, Annualised TER including performance fees, Capped Expense Ratio, TER (en %) (with performance), Net TER, Total Expense Ratio in Prozent, Annualisierte TER in % (Mit Gebührenverzicht), etc.",
|
"ter": "The TER reported name could be:\nTER, Total Expense Ratio, Total expense ratio as a percentage, Total Fund Charge, Gross Expense Ratio, All in fee, Total Net Expense Ratio, Weighted Average Expense Ratio, Synthetic total Expense Ratio, Annualised TER including performance fees, Capped Expense Ratio, TER (en %) (with performance), Net TER, Total Expense Ratio in Prozent, Annualisierte TER in % (Mit Gebührenverzicht), Annualised TER % (with fee waiver), etc.",
|
||||||
"performance_fee": "The performance fees reported name could be:\nperformance fees, performance fees ratio, Performance, etc."
|
"performance_fee": "The performance fees reported name could be:\nperformance fees, performance fees ratio, Performance, etc."
|
||||||
},
|
},
|
||||||
"data_business_features": {
|
"data_business_features": {
|
||||||
|
|
@ -75,8 +75,17 @@
|
||||||
"Some data table is with multiple date columns, please extract the data from the latest date column:",
|
"Some data table is with multiple date columns, please extract the data from the latest date column:",
|
||||||
"- Get dates from column header.",
|
"- Get dates from column header.",
|
||||||
"- Only extract data from the columns which column header is as the latest date.",
|
"- Only extract data from the columns which column header is as the latest date.",
|
||||||
|
"-- commone case",
|
||||||
"The latest date-time column usually is the first datapoint value column.",
|
"The latest date-time column usually is the first datapoint value column.",
|
||||||
"Here is the example:",
|
"-- special case",
|
||||||
|
"If with several value columns with same latest date, if one of these column titles with \"(c)\", please extract the data from this column.",
|
||||||
|
"---Example 1 Start---",
|
||||||
|
"Columns: \"For the year ended 31 Dec 23\", \"For the year ended 31 Dec 23\", \"For the year ended 31 Dec 23 (a)\", \"For the year ended 31 Dec 23 (b)\", \"For the year ended 31 Dec 23 (c)\", \"For the year ended 31 Dec 22\", \"For the year ended 31 Dec 21\", please extract the data from \"For the year ended 31 Dec 23 (c)\" column.",
|
||||||
|
"---Example 1 End---",
|
||||||
|
"---Example 2 Start---",
|
||||||
|
"Columns: \"For the period ended 31 Dec 23\", \"For the period ended 31 Dec 23\", \"For the period ended 31 Dec 23 (a)\", \"For the period ended 31 Dec 23 (b)\", \"For the period ended 31 Dec 23 (c)\", \"For the period ended 31 Dec 22\", \"For the period ended 31 Dec 21\", please extract the data from \"For the period ended 31 Dec 23 (c)\" column.",
|
||||||
|
"---Example 2 End---",
|
||||||
|
"More examples for extracting data from the latest date column:",
|
||||||
"-----Example Start-----",
|
"-----Example Start-----",
|
||||||
"performance fees\\nhistorical performance fees\\nhistorical performance fees\\nFrom \\n1 July \\nFrom \\n19 July \\nFrom \\n1 January \\nFrom \\n27 April \\nFrom \\n19 July \\nFrom \\n1 January \\n2021\\nFrom \\n22 May \\n2021\\nFrom \\n16 July \\n2021\\nFrom \\n21 September \\n2021\\nto 30 June 2023\\nto 31 December 2022\\nto 31 December 2021\\nAsia Total Return Fund Class I5 (CHF Hedged) Acc\\n1.73%\\n \\n-1.32%\\n \\n \\n 2.04%\\n \\n \\n \\n",
|
"performance fees\\nhistorical performance fees\\nhistorical performance fees\\nFrom \\n1 July \\nFrom \\n19 July \\nFrom \\n1 January \\nFrom \\n27 April \\nFrom \\n19 July \\nFrom \\n1 January \\n2021\\nFrom \\n22 May \\n2021\\nFrom \\n16 July \\n2021\\nFrom \\n21 September \\n2021\\nto 30 June 2023\\nto 31 December 2022\\nto 31 December 2021\\nAsia Total Return Fund Class I5 (CHF Hedged) Acc\\n1.73%\\n \\n-1.32%\\n \\n \\n 2.04%\\n \\n \\n \\n",
|
||||||
"-----Example End-----",
|
"-----Example End-----",
|
||||||
|
|
@ -140,10 +149,20 @@
|
||||||
"{\"data\": [{\"fund name\": \"GAMAX FUNDS - ASIA PACIFIC\", \"share name\": \"A\", \"ter\": 2.07, \"performance_fee\": 0}]}",
|
"{\"data\": [{\"fund name\": \"GAMAX FUNDS - ASIA PACIFIC\", \"share name\": \"A\", \"ter\": 2.07, \"performance_fee\": 0}]}",
|
||||||
"The performance fees value is TER (including Performance Fees) - TER (excluding Performance Fees) = 2.07 - 2.07 = 0",
|
"The performance fees value is TER (including Performance Fees) - TER (excluding Performance Fees) = 2.07 - 2.07 = 0",
|
||||||
"Case 2:",
|
"Case 2:",
|
||||||
"Attention: if some table is with three value columns: TER excluding performance fees, TER including performance fees, Performance fees, ",
|
"If some table is with three value columns: \"TER including performance fees\", \"TER excluding performance fees\", \"Performance fees\", ",
|
||||||
"The Performance fees value in column: Performance fees, chould be \"-\", because of TER including performance fees - TER excluding performance fees = 0, ",
|
"The Performance fees value in column: Performance fees, chould be \"-\", because of \"TER including performance fees\" - \"TER excluding performance fees\" = 0, ",
|
||||||
"But it's incorrect, according to this issue, please still extract performance fees from TER including performance fees - TER excluding performance fees.",
|
"But it's incorrect, according to this issue, please still extract performance fees from \"TER including performance fees\" - \"TER excluding performance fees\".",
|
||||||
"To make sure performance fees is with actual value."
|
"To make sure performance fees is with actual value.",
|
||||||
|
"Case 3:",
|
||||||
|
"If some table is with three value columns: \"TER including performance fees\", \"TER excluding performance fees\", \"SYNTHETIC TER\", ",
|
||||||
|
"The performace fee value is still \"TER including performance fees\" - \"TER excluding performance fees\", ",
|
||||||
|
"For this scenario, please ignore the \"SYNTHETIC TER\" column.",
|
||||||
|
"Here is the example:",
|
||||||
|
"-----Example Start-----",
|
||||||
|
"As at September 30, 2022, the annualised total expense ratios of \\nthe sub-fund Pictet - Corto Europe Long Short are as follows: \\nCLASS \\nANNUALISED TER INCLUDING \\nPERFORMANCE FEES \\nANNUALISED TER EXCLUDING \\nPERFORMANCE FEES \\nSYNTHETIC TER \\nP EUR \\n1.66% \\n1.66% \\n1.98%",
|
||||||
|
"-----Example End-----",
|
||||||
|
"The output should be:",
|
||||||
|
"{\"data\": [{\"fund name\": \"Pictet Corto Europe Long Short\", \"share name\": \"P EUR\", \"ter\": 1.98, \"performance_fee\": 0}]}"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
||||||
37
main.py
37
main.py
|
|
@ -807,14 +807,47 @@ if __name__ == "__main__":
|
||||||
"467788879",
|
"467788879",
|
||||||
"470515549"
|
"470515549"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# documents in New EMEA Documents sample.xlsx as typical documents
|
||||||
|
check_db_mapping_doc_id_list = [
|
||||||
|
"511052670",
|
||||||
|
"520733219",
|
||||||
|
"524306810",
|
||||||
|
"526747539",
|
||||||
|
"528783089",
|
||||||
|
"532422720",
|
||||||
|
"532438210",
|
||||||
|
"534112077",
|
||||||
|
"534538571",
|
||||||
|
"534538682",
|
||||||
|
"535798742",
|
||||||
|
"536299372",
|
||||||
|
"539566148",
|
||||||
|
"539604165",
|
||||||
|
"540056900",
|
||||||
|
"541343431",
|
||||||
|
"541669780",
|
||||||
|
"541669996",
|
||||||
|
"541670397",
|
||||||
|
"541923319",
|
||||||
|
"542335994",
|
||||||
|
"543243585",
|
||||||
|
"543243654",
|
||||||
|
"543244170",
|
||||||
|
"543519140",
|
||||||
|
"543519615",
|
||||||
|
"543628379",
|
||||||
|
"543809340",
|
||||||
|
"543944737"
|
||||||
|
]
|
||||||
# special_doc_id_list = check_mapping_doc_id_list
|
# special_doc_id_list = check_mapping_doc_id_list
|
||||||
special_doc_id_list = check_db_mapping_doc_id_list
|
special_doc_id_list = check_db_mapping_doc_id_list
|
||||||
special_doc_id_list = ["431073795"]
|
# special_doc_id_list = ["434902020", "467595142", "528826768"]
|
||||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||||
re_run_extract_data = True
|
re_run_extract_data = True
|
||||||
re_run_mapping_data = True
|
re_run_mapping_data = True
|
||||||
force_save_total_data = False
|
force_save_total_data = True
|
||||||
calculate_metrics = False
|
calculate_metrics = False
|
||||||
|
|
||||||
extract_ways = ["text"]
|
extract_ways = ["text"]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue