fix issue

This commit is contained in:
blade 2025-11-12 14:07:55 +08:00
parent ea81197bcd
commit 7b0c825a39
3 changed files with 15 additions and 3 deletions

View File

@ -1112,12 +1112,14 @@ class DataExtraction:
def is_next_page_with_same_structure_table(self, current_page_text: str, next_page_text: str) -> bool:
with_same_structure_table = False
compare_table_structure_prompts_file = os.path.join(self.instruction_folder, "compare_table_structure_prompts.json")
if not os.path.exists(compare_table_structure_prompts_file):
return with_same_structure_table
with open(compare_table_structure_prompts_file, "r", encoding="utf-8") as f:
compare_table_structure_prompts = "\n".join(json.load(f).get("prompts", []))
if len(compare_table_structure_prompts) > 0:
prompts = f"Context: \ncurrent page contents:\n{current_page_text}\nnext page contents:\n{next_page_text}\nInstructions:\n{compare_table_structure_prompts}\n"
result, with_error = chat(
prompt=prompts, text_model="qwen-plus", image_model="qwen-vl-plus"
prompt=prompts, text_model=self.text_model, image_model=self.image_model
)
response = result.get("response", "")
if not with_error:
@ -2014,7 +2016,7 @@ class DataExtraction:
# The reason why apply special_rule_by_keywords is:
# 1. The special rule is very complex, prompsts are very long.
# 2. To load it by keywords, is to avoid for simple case, the prompts are too long.
complex_special_rule = data_business_features.get("sepcial_rule_by_keywords", "")
complex_special_rule = data_business_features.get("sepcial_rule_by_keywords", {})
with_special_rule_title = False
found_sub_datapoints = []
datapoint_special_rule = {}

View File

@ -0,0 +1,9 @@
{
"prompts": [
"Assume there is a data table in current page contents, is there the table with same table structure in the next page contents?",
"The meaning of \"same\" is: with totally same table columns for the table in both of current page and next page.",
"Please output JSON format, the format example is:",
"{\"answer\": \"Yes\"} or {\"answer\": \"No\"}",
"Answer:\n"
]
}

View File

@ -97,7 +97,8 @@
"The performance fees should not be the presence of the rates at which the performance fees are calculated.",
"The reported of performance fees should not be \"% based on the NAV at the end of the accounting period\""
]
}
},
"sepcial_rule_by_keywords": {}
},
"special_cases": {
"common": [