fix issue
This commit is contained in:
parent
ea81197bcd
commit
7b0c825a39
|
|
@ -1112,12 +1112,14 @@ class DataExtraction:
|
||||||
def is_next_page_with_same_structure_table(self, current_page_text: str, next_page_text: str) -> bool:
|
def is_next_page_with_same_structure_table(self, current_page_text: str, next_page_text: str) -> bool:
|
||||||
with_same_structure_table = False
|
with_same_structure_table = False
|
||||||
compare_table_structure_prompts_file = os.path.join(self.instruction_folder, "compare_table_structure_prompts.json")
|
compare_table_structure_prompts_file = os.path.join(self.instruction_folder, "compare_table_structure_prompts.json")
|
||||||
|
if not os.path.exists(compare_table_structure_prompts_file):
|
||||||
|
return with_same_structure_table
|
||||||
with open(compare_table_structure_prompts_file, "r", encoding="utf-8") as f:
|
with open(compare_table_structure_prompts_file, "r", encoding="utf-8") as f:
|
||||||
compare_table_structure_prompts = "\n".join(json.load(f).get("prompts", []))
|
compare_table_structure_prompts = "\n".join(json.load(f).get("prompts", []))
|
||||||
if len(compare_table_structure_prompts) > 0:
|
if len(compare_table_structure_prompts) > 0:
|
||||||
prompts = f"Context: \ncurrent page contents:\n{current_page_text}\nnext page contents:\n{next_page_text}\nInstructions:\n{compare_table_structure_prompts}\n"
|
prompts = f"Context: \ncurrent page contents:\n{current_page_text}\nnext page contents:\n{next_page_text}\nInstructions:\n{compare_table_structure_prompts}\n"
|
||||||
result, with_error = chat(
|
result, with_error = chat(
|
||||||
prompt=prompts, text_model="qwen-plus", image_model="qwen-vl-plus"
|
prompt=prompts, text_model=self.text_model, image_model=self.image_model
|
||||||
)
|
)
|
||||||
response = result.get("response", "")
|
response = result.get("response", "")
|
||||||
if not with_error:
|
if not with_error:
|
||||||
|
|
@ -2014,7 +2016,7 @@ class DataExtraction:
|
||||||
# The reason why apply special_rule_by_keywords is:
|
# The reason why apply special_rule_by_keywords is:
|
||||||
# 1. The special rule is very complex, prompsts are very long.
|
# 1. The special rule is very complex, prompsts are very long.
|
||||||
# 2. To load it by keywords, is to avoid for simple case, the prompts are too long.
|
# 2. To load it by keywords, is to avoid for simple case, the prompts are too long.
|
||||||
complex_special_rule = data_business_features.get("sepcial_rule_by_keywords", "")
|
complex_special_rule = data_business_features.get("sepcial_rule_by_keywords", {})
|
||||||
with_special_rule_title = False
|
with_special_rule_title = False
|
||||||
found_sub_datapoints = []
|
found_sub_datapoints = []
|
||||||
datapoint_special_rule = {}
|
datapoint_special_rule = {}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
{
|
||||||
|
"prompts": [
|
||||||
|
"Assume there is a data table in current page contents, is there the table with same table structure in the next page contents?",
|
||||||
|
"The meaning of \"same\" is: with totally same table columns for the table in both of current page and next page.",
|
||||||
|
"Please output JSON format, the format example is:",
|
||||||
|
"{\"answer\": \"Yes\"} or {\"answer\": \"No\"}",
|
||||||
|
"Answer:\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -97,7 +97,8 @@
|
||||||
"The performance fees should not be the presence of the rates at which the performance fees are calculated.",
|
"The performance fees should not be the presence of the rates at which the performance fees are calculated.",
|
||||||
"The reported of performance fees should not be \"% based on the NAV at the end of the accounting period\""
|
"The reported of performance fees should not be \"% based on the NAV at the end of the accounting period\""
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
|
"sepcial_rule_by_keywords": {}
|
||||||
},
|
},
|
||||||
"special_cases": {
|
"special_cases": {
|
||||||
"common": [
|
"common": [
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue