instructions: explicitly announce, not to collect data which value with -, *, **, N/A, N/A%, N/A %, NONE
This commit is contained in:
parent
c4985ac75f
commit
40bcce4404
|
|
@ -129,7 +129,12 @@
|
|||
"Only output the data point which with relevant value.",
|
||||
"Don't ignore the data point which with negative value, e.g. -0.12, -1.13",
|
||||
"Don't ignore the data point which with explicit zero value, e.g. 0, 0.00",
|
||||
"Ignore the data point which value with -, *, **, N/A, N/A%, N/A %, NONE, etc.",
|
||||
"Don't extract data which values are -, *, **, N/A, N/A%, N/A %, NONE, it means the value should be NULL, please skip them.",
|
||||
"Example:",
|
||||
"Context:",
|
||||
"Sub-Funds\nClass of shares\nCurrency\nTER\nPerformance\nfees\nSwiss Life Funds (LUX) Bond Emerging Markets Corporates\nAM - Shares CHF hedged - Capitalisation\nCHF\n0.23%\n-\nAM - Shares EUR hedged - Capitalisation\nEUR\n0.23%\n0.00%\n",
|
||||
"Output:",
|
||||
"{\"data\": [{\"fund name\": \"Swiss Life Funds (LUX) Bond Emerging Markets Corporates\", \"share name\": \"AM - Shares CHF hedged - Capitalisation\", \"ter\": 0.23}, {\"fund name\": \"Swiss Life Funds (LUX) Bond Emerging Markets Corporates\", \"share name\": \"AM - Shares EUR hedged - Capitalisation\", \"ter\": 0.23, \"performance_fee\": 0}]}",
|
||||
"Fund level data: (\"fund name\" and \"TOR\") and share level data: (\"fund name\", \"share name\", \"ter\", \"performance fees\", \"ogc\") should be output separately.",
|
||||
"The output should be JSON format, the format is like below example(s):"
|
||||
],
|
||||
|
|
|
|||
4
main.py
4
main.py
|
|
@ -577,13 +577,13 @@ if __name__ == "__main__":
|
|||
# extract_way,
|
||||
# re_run_extract_data)
|
||||
|
||||
special_doc_id_list = []
|
||||
special_doc_id_list = ["349679479"]
|
||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||
re_run_mapping_data = True
|
||||
force_save_total_data = False
|
||||
|
||||
extract_ways = ["text", "image"]
|
||||
extract_ways = ["text"]
|
||||
for extract_way in extract_ways:
|
||||
batch_start_job(
|
||||
pdf_folder,
|
||||
|
|
|
|||
Loading…
Reference in New Issue