Changes done for Bugfix: 1. SSL issue \n2. Ignore Example Tables \n3. Performacne fee

This commit is contained in:
Ravi Maheshwari 2025-03-17 17:07:08 +05:30
parent dd15c1c48e
commit af3d1222a6
3 changed files with 18 additions and 5 deletions

View File

@ -441,7 +441,17 @@
"b. This example mentioned share classes, please output according to share class.",
"The output should be",
"{\"data\": [{\"fund name\": \"Platinum International Fund\", \"share name\": \"C Class\", \"performance_fee_costs\": 0}, {\"fund name\": \"Platinum International Fund\", \"share name\": \"E Class\", \"performance_fee_costs\": 0}, {\"fund name\": \"Platinum International Fund\", \"share name\": \"P Class\", \"performance_fee_costs\": 0.15}, {\"fund name\": \"Platinum Global Fund (Long Only)\", \"share name\": \"C Class\", \"performance_fee_costs\": 0}, {\"fund name\": \"Platinum Global Fund (Long Only)\", \"share name\": \"E Class\", \"performance_fee_costs\": 0}, {\"fund name\": \"Platinum Global Fund (Long Only)\", \"share name\": \"P Class\", \"performance_fee_costs\": 0.24}]}",
"D. Identify the value of performance fee and if it is written 0% or 0.00% or 0 or 0.00 then extract the same as 0 do not assume nil for the same and return its values as 0"
"D. Identify the value of performance fee and if it is written 0% or 0.00% or 0 or 0.00 then extract the same as 0 do not assume null for the same and return its values as 0",
"E. If for performacen fee specifically Nil is written in the value then return NULL('') for the same",
"---Example Start---",
"Vanguard Investor Short Term Fixed Interest Fund PLUS Performance fees Nil \nAnd, you will be charged or have deducted \nfrom your investment $0 in performance fees \neach year.",
"---Example End---",
"a. For this example, as Performance fee mentioned as Nil so return NULL('') as performance fee datapoint value.",
"F. If you found Example in the header of the table then ignore that table and do not extract value from the same table",
"---Example Start---",
"Example - Vanguard Investor Short Term Fixed Interest Fund \nContribution fees Nil \nFor every additional $5,000 you put in, you \nwill be charged $0. \nPLUS Management fees and \ncosts 3,4 \n0.19% p.a. of the NAV of the Fund \nAnd, for every $500,000 you have in the \nFund, you will be charged or have deducted \nfrom your investment $950 each year. \nPLUS Performance fees Nil \nAnd, you will be charged or have deducted \nfrom your investment $0 in performance fees \neach year.",
"---Example End---",
"a. For this example, you have Example keyword in the header so you should not extract any datapoint values Like performance_fee_costs, management fee etc."
],
"minimum_initial_investment": [
"Minimum initial investment is fund level data, belong to integer number, the value examples are 100, 1,000, 5,000, 10,000, etc.",

View File

@ -19,7 +19,7 @@ from core.data_extraction import DataExtraction
from core.data_mapping import DataMapping
from core.auz_nz.hybrid_solution_script import api_for_fund_matching_call
from core.metrics import Metrics
import certifi
class EMEA_AR_Parsing:
def __init__(
@ -1507,6 +1507,8 @@ if __name__ == "__main__":
# output_data_file_path = os.path.join(merged_total_data_folder, "merged_" + data_file_base_name)
# merge_output_data_aus_prospectus(data_file_path, document_mapping_file_path, output_data_file_path)
os.environ["SSL_CERT_FILE"] = certifi.where()
doc_source = "aus_prospectus"
sample_document_list_folder: str = r'./sample_documents/'
document_list_file: str = "aus_prospectus_29_documents_sample.txt"
@ -1531,7 +1533,7 @@ if __name__ == "__main__":
# doc_source = "emea_ar"
if doc_source == "aus_prospectus":
document_sample_file = (
r"./sample_documents/aus_prospectus_46_documents_sample.txt"
r"./sample_documents\special_cases.txt"
)
with open(document_sample_file, "r", encoding="utf-8") as f:
special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()]

View File

@ -6,6 +6,7 @@ import os
from time import sleep
import base64
import dotenv
import httpx
# loads .env file with your OPENAI_API_KEY
dotenv.load_dotenv()
@ -76,7 +77,7 @@ def chat(
max_tokens = 4096
client = AzureOpenAI(
azure_endpoint=azure_endpoint, api_key=api_key, api_version=api_version
azure_endpoint=azure_endpoint, api_key=api_key, api_version=api_version, http_client=httpx.Client(verify=False)
)
if (