fix issue

2025-11-12 14:07:55 +08:00 · 2025-11-12 14:07:55 +08:00 · 7b0c825a39
parent ea81197bcd
commit 7b0c825a39
3 changed files with 15 additions and 3 deletions
--- a/core/data_extraction.py
+++ b/core/data_extraction.py
@ -1112,12 +1112,14 @@ class DataExtraction:
    def is_next_page_with_same_structure_table(self, current_page_text: str, next_page_text: str) -> bool:
        with_same_structure_table = False
        compare_table_structure_prompts_file = os.path.join(self.instruction_folder, "compare_table_structure_prompts.json")
        if not os.path.exists(compare_table_structure_prompts_file):
            return with_same_structure_table
        with open(compare_table_structure_prompts_file, "r", encoding="utf-8") as f:
            compare_table_structure_prompts = "\n".join(json.load(f).get("prompts", []))
        if len(compare_table_structure_prompts) > 0:
            prompts = f"Context: \ncurrent page contents:\n{current_page_text}\nnext page contents:\n{next_page_text}\nInstructions:\n{compare_table_structure_prompts}\n"
            result, with_error = chat(
-                prompt=prompts, text_model="qwen-plus", image_model="qwen-vl-plus"
+                prompt=prompts, text_model=self.text_model, image_model=self.image_model
            )
            response = result.get("response", "")
            if not with_error:
@ -2014,7 +2016,7 @@ class DataExtraction:
        # The reason why apply special_rule_by_keywords is:
        # 1. The special rule is very complex, prompsts are very long.
        # 2. To load it by keywords, is to avoid for simple case, the prompts are too long.
-        complex_special_rule = data_business_features.get("sepcial_rule_by_keywords", "")
+        complex_special_rule = data_business_features.get("sepcial_rule_by_keywords", {})
        with_special_rule_title = False
        found_sub_datapoints = []
        datapoint_special_rule = {}
--- a/instructions/emea_ar/compare_table_structure_prompts.json
+++ b/instructions/emea_ar/compare_table_structure_prompts.json
@ -0,0 +1,9 @@
 {
 	"prompts": [
        "Assume there is a data table in current page contents, is there the table with same table structure in the next page contents?", 
        "The meaning of \"same\" is: with totally same table columns for the table in both of current page and next page.",
        "Please output JSON format, the format example is:",
        "{\"answer\": \"Yes\"} or {\"answer\": \"No\"}",
        "Answer:\n"
    ]
 }
--- a/instructions/emea_ar/data_extraction_prompts_config.json
+++ b/instructions/emea_ar/data_extraction_prompts_config.json
@ -97,7 +97,8 @@
 				"The performance fees should not be the presence of the rates at which the performance fees are calculated.",
 				"The reported of performance fees should not be \"% based on the NAV at the end of the accounting period\""
 			]
-		}
+		},
 		"sepcial_rule_by_keywords": {}
 	},
 	"special_cases": {
 		"common": [