optimize keywords/ instructions for special cases documents.

This commit is contained in:
Blade He 2024-10-23 16:56:43 -05:00
parent 171f3b6d1f
commit 53dadf61f4
3 changed files with 4 additions and 2 deletions

View File

@ -116,6 +116,8 @@
"Kostenpauschale",
"Gesamtkostenquote",
"Gesamtaufwandsquoten",
"kostenquote",
"Gesamt kostenquote",
"Betriebskostenquote des Fonds",
"TER",
"Total Expense Ratio",

View File

@ -16,7 +16,7 @@
"reported_name": {
"tor": "The TOR reported name could be:\nTOR, Turnover Ratio, Portfolio Turnover, Portfolio turnover ratio, PTR, Taux de rotation corrigé - Gecorrigeerde omloopsnelheid, etc.",
"ogc": "The OGC reported name could be:\nOGC, OGF, OCF, Ongoing Charge, Operation Charge, Ongoing charges in per cent, Ongoing charges in percent, Ongoing charges as a percentage, On Going Charges, Operating Charge, Ongoing Fund Charge, OCF Cap Rate, Ongoing Charges Figure, etc.",
"ter": "The TER reported name could be:\nTER, Total Expense Ratio, Total expense ratio as a percentage, Total Fund Charge, Gross Expense Ratio, All in fee, Total Net Expense Ratio, Weighted Average Expense Ratio, Synthetic total Expense Ratio, Annualised TER including performance fees, Capped Expense Ratio, TER (en %) (with performance), Net TER, Total Expense Ratio in Prozent, Annualisierte TER in % (Mit Gebührenverzicht), Annualised TER % (with fee waiver), etc.",
"ter": "The TER reported name could be:\nTER, Total Expense Ratio, Total expense ratio as a percentage, Total Fund Charge, Gross Expense Ratio, All in fee, Total Net Expense Ratio, Weighted Average Expense Ratio, Synthetic total Expense Ratio, Annualised TER including performance fees, Capped Expense Ratio, TER (en %) (with performance), Net TER, Total Expense Ratio in Prozent, Annualisierte TER in % (Mit Gebührenverzicht), Annualised TER % (with fee waiver), kostenquote, Gesamt kostenquote, etc.",
"performance_fee": "The performance fees reported name could be:\nperformance fees, performance fees ratio, Performance, etc."
},
"data_business_features": {

View File

@ -854,7 +854,7 @@ if __name__ == "__main__":
]
# special_doc_id_list = check_mapping_doc_id_list
special_doc_id_list = check_db_mapping_doc_id_list
special_doc_id_list = ["536299372"]
special_doc_id_list = ["543243654"]
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
re_run_extract_data = False