instructions: explicitly announce, not to collect data which value with -, *, **, N/A, N/A%, N/A %, NONE
This commit is contained in:
parent
c4985ac75f
commit
40bcce4404
|
|
@ -129,7 +129,12 @@
|
||||||
"Only output the data point which with relevant value.",
|
"Only output the data point which with relevant value.",
|
||||||
"Don't ignore the data point which with negative value, e.g. -0.12, -1.13",
|
"Don't ignore the data point which with negative value, e.g. -0.12, -1.13",
|
||||||
"Don't ignore the data point which with explicit zero value, e.g. 0, 0.00",
|
"Don't ignore the data point which with explicit zero value, e.g. 0, 0.00",
|
||||||
"Ignore the data point which value with -, *, **, N/A, N/A%, N/A %, NONE, etc.",
|
"Don't extract data which values are -, *, **, N/A, N/A%, N/A %, NONE, it means the value should be NULL, please skip them.",
|
||||||
|
"Example:",
|
||||||
|
"Context:",
|
||||||
|
"Sub-Funds\nClass of shares\nCurrency\nTER\nPerformance\nfees\nSwiss Life Funds (LUX) Bond Emerging Markets Corporates\nAM - Shares CHF hedged - Capitalisation\nCHF\n0.23%\n-\nAM - Shares EUR hedged - Capitalisation\nEUR\n0.23%\n0.00%\n",
|
||||||
|
"Output:",
|
||||||
|
"{\"data\": [{\"fund name\": \"Swiss Life Funds (LUX) Bond Emerging Markets Corporates\", \"share name\": \"AM - Shares CHF hedged - Capitalisation\", \"ter\": 0.23}, {\"fund name\": \"Swiss Life Funds (LUX) Bond Emerging Markets Corporates\", \"share name\": \"AM - Shares EUR hedged - Capitalisation\", \"ter\": 0.23, \"performance_fee\": 0}]}",
|
||||||
"Fund level data: (\"fund name\" and \"TOR\") and share level data: (\"fund name\", \"share name\", \"ter\", \"performance fees\", \"ogc\") should be output separately.",
|
"Fund level data: (\"fund name\" and \"TOR\") and share level data: (\"fund name\", \"share name\", \"ter\", \"performance fees\", \"ogc\") should be output separately.",
|
||||||
"The output should be JSON format, the format is like below example(s):"
|
"The output should be JSON format, the format is like below example(s):"
|
||||||
],
|
],
|
||||||
|
|
|
||||||
4
main.py
4
main.py
|
|
@ -577,13 +577,13 @@ if __name__ == "__main__":
|
||||||
# extract_way,
|
# extract_way,
|
||||||
# re_run_extract_data)
|
# re_run_extract_data)
|
||||||
|
|
||||||
special_doc_id_list = []
|
special_doc_id_list = ["349679479"]
|
||||||
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
output_mapping_child_folder = r"/data/emea_ar/output/mapping_data/docs/"
|
||||||
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
output_mapping_total_folder = r"/data/emea_ar/output/mapping_data/total/"
|
||||||
re_run_mapping_data = True
|
re_run_mapping_data = True
|
||||||
force_save_total_data = False
|
force_save_total_data = False
|
||||||
|
|
||||||
extract_ways = ["text", "image"]
|
extract_ways = ["text"]
|
||||||
for extract_way in extract_ways:
|
for extract_way in extract_ways:
|
||||||
batch_start_job(
|
batch_start_job(
|
||||||
pdf_folder,
|
pdf_folder,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue