diff --git a/app_emea_ar.py b/app_emea_ar.py
index 105be43..8aab6d4 100644
--- a/app_emea_ar.py
+++ b/app_emea_ar.py
@@ -69,7 +69,8 @@ def emea_ar_data_extract():
                                           output_extract_data_folder=output_extract_data_folder,
                                           output_mapping_data_folder=output_mapping_data_folder,
                                           extract_way=extract_way,
-                                          drilldown_folder=drilldown_folder)
+                                          drilldown_folder=drilldown_folder,
+                                          compare_with_provider=False)
         doc_data_from_gpt, annotation_list = emea_ar_parsing.extract_data(re_run=re_run_extract_data)
         doc_mapping_data = emea_ar_parsing.mapping_data(
             data_from_gpt=doc_data_from_gpt, re_run=re_run_mapping_data
diff --git a/configuration/emea_ar/abbreviation_records.json b/configuration/emea_ar/abbreviation_records.json
new file mode 100644
index 0000000..10302cb
--- /dev/null
+++ b/configuration/emea_ar/abbreviation_records.json
@@ -0,0 +1,1066 @@
+{
+    "H": "High", 
+    "(acc)": "(acc)",
+    "(dist)": "(dist)",
+    "perf": "(perf)",
+    "1167 Act Fds ICAV": "1167 Active Funds ICAV",
+    "Andln": "Aandelen",
+    "AB": "AB SICAV I",
+    "Aberdeen Global II": "Aberdeen Global II",
+    "Aberdeen Liqdty (Lux)": "Aberdeen Liquidity Fund (Lux)",
+    "Aberdeen Std": "Aberdeen Standard",
+    "AS SICAV I": "Aberdeen Standard SICAV I",
+    "ABN AMRO": "ABN AMRO Multi-Manager Funds ",
+    "AAMMF FoM": "ABN AMRO Multi-Manager Funds - Fund of Mandates",
+    "Abs": "Absolute",
+    "AI": "Absolute Insight",
+    "Acct": "Account",
+    "Acc": "Accumulation",
+    "Achrs": "Achievers",
+    "Actns": "Actions",
+    "Actv": "Active",
+    "Act": "Active",
+    "actvy": "actively",
+    "Actr": "Actuaries",
+    "Admt": "Adamant",
+    "Adj": "Adjustable",
+    "Adjs": "Adjusted",
+    "Admin": "Administrative",
+    "Adm": "Administrative",
+    "Advcd": "Advanced",
+    "Advnr": "Advancer",
+    "Advtg": "Advantage",
+    "Advts": "Advantus",
+    "Advnturs": "Adventurous",
+    "Adv": "Advisor",
+    "Advs": "Advisors",
+    "Arspc": "Aerospace",
+    "Afts": "Affiliates",
+    "Afr": "Africa",
+    "Agcy": "Agency",
+    "Aggt": "Aggregate",
+    "Agrsv": "Aggressive",
+    "AgriBsns": "AgriBusiness",
+    "Agril": "Agricultural",
+    "Agc": "Agricultural",
+    "Agltr": "Agriculture",
+    "Agr": "Agriculture",
+    "Agri": "Agrinvest",
+    "Abg": "Airbag",
+    "Air": "Airlines",
+    "Akcvy": "Akciovy",
+    "Akt": "Aktier",
+    "Albtrs": "Albatross",
+    "Allnc": "Alliance",
+    "AllncBrnstn": "AllianceBernstein",
+    "Allc": "Allocation",
+    "Allctr": "Allocator",
+    "Alp": "Alpha",
+    "Alt": "Alternative",
+    "Alts": "Alternatives",
+    "Ambt": "Ambition",
+    "Amer": "American",
+    "Am": "American",
+    "AFS": "Amundi Fund Solutions -",
+    "Amundi Fds": "Amundi Funds",
+    "Amundi Fds II": "Amundi Funds II -",
+    "Amundi IS": "Amundi Index Solutions - Amundi Index",
+    "Amundi Mny Mkt Fd": "Amundi Money Market Fund",
+    "Amundi SF": "Amundi S.F. -",
+    "Angl": "Angel",
+    "Ann": "Annual",
+    "ADis": "Annually Distribution",
+    "AD": "Annually Distribution",
+    "ATA": "ANTARCTICA",
+    "AntiBench": "Antibenchmark",
+    "App": "Appliances",
+    "Apprec": "Appreciation",
+    "Appr": "Approche",
+    "Apl": "April",
+    "Arbn": "Arabian",
+    "Arbtrg": "Arbitrage",
+    "Ar": "Area",
+    "ARM": "ARMENIA",
+    "Artfcl": "Artificial",
+    "AsiaPac": "Asia Pac\u00edfic",
+    "APAC": "Asia Pac\u00edfic",
+    "Asn": "Asian",
+    "Asst": "Asset",
+    "Ast": "Asset",
+    "Assts": "Assets",
+    "Assrd": "Assured",
+    "Audentia Capital": "Audentia Capital SICAV PLC",
+    "Ausgwn": "Ausgewogen",
+    "AUS": "Australian",
+    "Autcll": "Autocallable",
+    "Atmt": "Automated",
+    "Atmtn": "Automation",
+    "Avant": "Avantage",
+    "Avrg": "Average",
+    "Avg": "Average",
+    "Aviva Investors": "Aviva Investors",
+    "Awrns": "Awareness",
+    "AXAIMFIIS": "AXA IM Fixed Income Investment Strategies",
+    "AXAWF": "AXA World Funds",
+    "AXAWF II": "AXA World Funds II",
+    "Bckd": "Backed",
+    "Bkwrdt": "Backwardated",
+    "Baillie Gifford WW": "Baillie Gifford Wldwd",
+    "Bal": "Balanced",
+    "Bk": "Bank",
+    "Bkg": "Banking",
+    "BL": "Banque de Luxembourg",
+    "Bantleon sel": "Bantleon select",
+    "Bcly": "Barclays",
+    "Bsn": "Basin",
+    "Behvrl": "Behavioral",
+    "Bench": "Benchmark",
+    "Bnf": "Benefit",
+    "Bt": "Beta",
+    "Bvrg": "Beverage",
+    "Biotech": "Biotechnology",
+    "BlkRk": "BlackRock",
+    "BR": "BlackRock",
+    "Blnd": "Blended",
+    "Blmbrg": "Bloomberg",
+    "BBMSCI": "Bloomberg Barclays MSCI",
+    "Bl Chp": "Blue Chip",
+    "BNPP B Strategy": "BNP Paribas B Strategy",
+    "BNY": "BNY Mellon Liquidity Funds PLC",
+    "Bd": "Bond",
+    "Bds": "Bonds",
+    "Bnsm": "Bonusom",
+    "Bstr": "Booster",
+    "Brds": "Brands",
+    "BrdwnGLB": "BrandywineGLOBAL",
+    "BRA": "Brazil",
+    "Bnt": "Brent",
+    "Bdgw": "Bridgeway",
+    "Brd": "Broad",
+    "Bdpst": "Budapest",
+    "Bldr": "Builder",
+    "Bnd": "Bundle",
+    "CaixaBank Global": "CaixaBank Global SICAV",
+    "CA": "California",
+    "Cll": "Call",
+    "Cnd": "Canada",
+    "CAD": "Canadian Dollar ",
+    "Candriam Bds": "Candriam Bonds",
+    "Candriam Eqs B": "Candriam Equities B",
+    "Candriam Eqs L": "Candriam Equities L",
+    "Candriam Sustainable": "Candriam Sustainable",
+    "Cp": "Cap",
+    "Cptl": "Capital",
+    "CapitalatWork": "Capitalatwork Foyer Umbrella",
+    "Cap": "Capitalisation",
+    "Cpd": "Capped",
+    "Cps": "Caps",
+    "Carb": "Carbon",
+    "Cr": "Core",
+    "Carmignac Pf": "Carmignac Portfolio",
+    "Csh": "Cash",
+    "Cau": "Cautious",
+    "Cdl": "Cedola",
+    "Cntrl": "Central",
+    "Cntrc": "Centric",
+    "Crtfcts": "Certificates",
+    "Csky": "Cesky",
+    "Chg": "Change",
+    "Chrts": "Charities",
+    "CHN": "CHINA",
+    "Chns": "Chinese",
+    "CNY": "Chinese Yuan ",
+    "Chp": "Chip",
+    "Chc": "Choice",
+    "Cts": "Cities",
+    "Cl": "Classic",
+    "Clmt": "Climate",
+    "Cls": "Close",
+    "Cld": "Cloud",
+    "Cogntv": "Cognitive",
+    "Coll": "Collateralized",
+    "Cllctn": "Collection",
+    "Cllctv": "Collective",
+    "Collect": "Collectivit\u00e9s",
+    "Col": "COLOMBIA",
+    "Colord": "Colorado",
+    "Cmfrt": "Comfort",
+    "Comrcl": "Commercial",
+    "Commercial": "Commercialization",
+    "Cmdts": "Commodities",
+    "Cmdty": "Commodity",
+    "Cmd": "Commodity",
+    "CIF": "Common Investment Fund",
+    "Comm": "Communication",
+    "Cie": "Compagnie",
+    "Cies": "Compagnies",
+    "Coms": "Companies",
+    "Com": "Company",
+    "Cmp": "Compass",
+    "Cmplt": "Complete",
+    "Compnt": "Component",
+    "Comps": "Composite",
+    "Cmprhsv": "Comprehensive",
+    "Comp": "Computer",
+    "Cmptg": "Computing",
+    "Concntr": "Concentrated",
+    "Concpt": "Concept",
+    "Cndtnl": "Conditional",
+    "Cnsrv": "Conservative",
+    "Cnsv": "Conservative",
+    "CsvtCvtb": "Conservative Convertible",
+    "Convr": "Conserver",
+    "Cons": "Consolidado",
+    "Cnstnt": "Constant",
+    "Constnd": "Constrained",
+    "Constr": "Constraint",
+    "Const": "Construction",
+    "Cnsmr": "Consumer",
+    "Continen": "Continental",
+    "Contnn": "Continental",
+    "Cntgnt": "Contingent",
+    "Contra": "Contrarian",
+    "Ctrl": "Control",
+    "Contrvrsl": "Controversial",
+    "Cont": "Controversial",
+    "Cnvrt": "Convertible",
+    "Convert": "Convertibles",
+    "Cnvrts": "Convertibles",
+    "Convex": "Convexit\u00e9",
+    "Convct": "Conviction",
+    "Convict": "Convictions",
+    "Coop": "Cooper",
+    "Cor": "Core",
+    "Corp": "Corporates",
+    "CrpBdIdx": "Corporate Bond Index",
+    "Countrs": "Countries",
+    "Cntry": "Country",
+    "Cpn": "Coupon",
+    "Cov": "Covered",
+    "Crct": "Creciente",
+    "Crdt": "Credit",
+    "CS": "Credit Suisse",
+    "CSV SIF": "Credit Suisse Virtuoso SICAV - SIF",
+    "Cre": "Creek",
+    "Crsvr": "Crossover",
+    "Crd": "Crude",
+    "Cum": "Cumulative",
+    "Ccis": "Currencies",
+    "Ccy": "Currency",
+    "Cust": "Custom",
+    "Custmzd": "Customized",
+    "Cycl": "Cyclicals",
+    "Dl": "Daily",
+    "Danske FoF": "Danske Fund of Funds",
+    "Danske Invest Allc": "Danske Invest Allocation SICAV",
+    "Danske Invest": "Danske Invest SICAV",
+    "Dt": "Date",
+    "Dtd": "Dated",
+    "db": "Db",
+    "db AM": "db Advisory Multibrands",
+    "De": "Death",
+    "Dbt": "Debt",
+    "Dfnc": "Defence",
+    "Dfndr": "Defender",
+    "Defensv": "Defensive",
+    "Defesv": "Defensive",
+    "Dfnd": "Defined",
+    "Dlt": "Delta",
+    "Delta Lloyd L": "Delta Lloyd L",
+    "Dmnd": "Demand",
+    "Dmgrphcs": "Demographics",
+    "Demgrph": "Demography",
+    "Dmgr": "Demography",
+    "Dnmntd": "Denominated",
+    "Dpsts": "Deposits",
+    "Dsgntd": "Designated",
+    "Deutlnd": "Deutschland",
+    "Dev": "Developed",
+    "Devpg": "Developing",
+    "Devpmt": "Development",
+    "Dgnstcs": "Diagnostics",
+    "Dgtl": "Digital",
+    "Dimsnl": "Dimensional",
+    "Drt": "Direct",
+    "Dir": "Direct",
+    "Discplnd": "Disciplined",
+    "Dscnt": "Discount",
+    "Discvs": "Discoveries",
+    "Discv": "Discovery",
+    "Discret": "Discretion",
+    "Disctnry": "Discretionary",
+    "Disc": "Discretionary",
+    "Disrpt": "Disruptive ",
+    "Dsrpt": "Disruptive ",
+    "Dis": "Distribution",
+    "Divers": "Diversified",
+    "Div": "Dividend",
+    "Dvrs": "Diversified",
+    "Divst": "Diversity",
+    "DP": "Dividend Payout",
+    "DR": "Dividend Reinvestment",
+    "Divs": "Dividends",
+    "Dlhps": "Dluhopisu",
+    "Dbyvtl": "Dobyvatelia",
+    "Dllr": "Dollar",
+    "Domst": "Domestic",
+    "DPAM Capital B": "DPAM Capital B",
+    "Drvn": "Driven",
+    "Durb": "Durable",
+    "Drbl": "Durables",
+    "Dur": "Duration",
+    "Dyn": "Dynamic",
+    "Erns": "Earnings",
+    "Estn": "Eastern",
+    "Eaton Vance Intl (CYM)": "Eaton Vance International (Cayman Islands)",
+    "Eaton Vance Intl (IRL)": "Eaton Vance International (Ireland)",
+    "Ecomm": "Ecommerce ",
+    "Eco": "Economie",
+    "Ecos": "Economies",
+    "Ecoy": "Economy",
+    "Edu": "Education",
+    "Elevation Fds (IE)": "Elevation UCITS Funds (Ireland) ICAV",
+    "E": "Elite",
+    "Emgnt": "Emergente",
+    "Em": "Emerging",
+    "Emerg": "Emerging",
+    "Em Mkts": "Emerging Markets",
+    "EM": "Emerging Markets",
+    "Emply": "Employee",
+    "Emplmt": "Employment",
+    "Empwrmt": "Empowerment",
+    "Endwmnt": "Endowment ",
+    "Endur": "Endurance",
+    "Eggm": "Eneagement",
+    "Engy": "Energy",
+    "Ey": "Energy",
+    "Eng": "English",
+    "Enh": "Enhanced",
+    "EnhFxIn": "Enhanced Fixed Income",
+    "Entrprs": "Entrepreneurs",
+    "Ents": "Enterprises",
+    "Entrepr": "Entreprendre",
+    "Entrprnrs": "Entrepreneurs",
+    "Entr": "Entreprise",
+    "Envir": "Environment",
+    "Envirtly": "Environmentally",
+    "Epch": "Epoch",
+    "Epsilon Fund": "Epsilon Fund",
+    "Eqlty": "Equality",
+    "Eqs": "Equities",
+    "Eq": "Equity",
+    "Essential Port Sel": "Essential Portfolio Selection",
+    "Estblshd": "Established",
+    "Estt": "Estate",
+    "Etcl": "Ethical",
+    "EUR": "EUR",
+    "\u20ac": "EUR",
+    "Euro": "Euro",
+    "EURO": "EURO",
+    "Eurbl": "Eurobloc",
+    "Eurodoll": "Eurodollar",
+    "Eurlnd": "Euroland",
+    "Eurp": "European",
+    "Erst": "Eurostoxx",
+    "Euroz": "Eurozone",
+    "Ev": "Event",
+    "Evol": "Evolutif",
+    "Evvg": "Evolving",
+    "Excld": "Excluding",
+    "Excl": "Exclusif",
+    "Exclsv": "Exclusive",
+    "Exm": "Exempt",
+    "Expc": "Expectations",
+    "Expts": "Expertise",
+    "Exptrs": "Exporters",
+    "Exps": "Exps",
+    "ext": "extend",
+    "Extnl": "External",
+    "Extr": "Extra",
+    "Fac": "Factor",
+    "Facs": "Factors",
+    "Flln": "Fallen",
+    "Fam": "Familiales",
+    "Fml": "Familie",
+    "FCP": "Fcp",
+    "Feb": "February",
+    "Fedrtd": "Federated",
+    "Fdr": "Founder",
+    "Fidelity": "Fidelity Funds",
+    "Fid": "Fiduciary",
+    "Finac": "Finance",
+    "Fincl": "Financial",
+    "Fincls": "Financials",
+    "Fi": "Financials",
+    "Fndr": "Finder",
+    "Fst": "First",
+    "Fxd": "Fixed",
+    "Flx": "Flex",
+    "Flex": "Flexible",
+    "Fltng": "Floating",
+    "Fl": "Floating",
+    "Flrd": "Floored",
+    "Foc": "Focus",
+    "Fcs": "Focused",
+    "Focused": "Focused SICAV",
+    "Fstg": "Forstrong",
+    "Fortn": "Fortnightly",
+    "FH Aberdeen Global": "Forvaltningshuset Aberdeen Global",
+    "Fssl Ful": "Fossil Fuel",
+    "Fndtn": "Foundation",
+    "Fdrs": "Founders",
+    "Frmlgtn": "Framlington",
+    "Frm": "Framlington",
+    "Fran": "Franchise",
+    "Frk Flx Er Agt Bd": "Franklin Flexible Euro Aggregate Bond",
+    "Frntr": "Frontier",
+    "Frtl": "Frontline ",
+    "Fd": "Fund",
+    "FoF": "Fund of Funds",
+    "Fdmtl": "Fundamental",
+    "Fdml": "Fundamental",
+    "Fds": "Funds",
+    "Fut": "Future",
+    "Futs": "Futures",
+    "GAM": "Gam",
+    "Garant": "Garantizado",
+    "Gtmr": "Gartmore",
+    "GBP": "GBP",
+    "\u00a3": "GBP",
+    "Gndr": "Gender",
+    "Gen": "General",
+    "Grmny": "Germany",
+    "Gestielle": "Gestielle Investment SICAV",
+    "Gest": "Gestion",
+    "Gnts": "Giants",
+    "Glt": "Gilts",
+    "Glbl": "Global",
+    "Glb": "Global",
+    "GDOF": "Global Dynamic Opportunities Fund Ltd.",
+    "GEI": "Global Equity Income ",
+    "GlInGd": "Global Investment Grade",
+    "Glble": "Globale",
+    "Glblnch": "Globalnich",
+    "GS": "Goldman Sachs",
+    "GSF II": "Goldman Sachs Funds II",
+    "Goodbody": "Goodbody Platform ICAV",
+    "Gouvrmntls": "Gouvernementales",
+    "Govt": "Government",
+    "Gvs": "Govies",
+    "Grd": "Grade",
+    "Grde": "Grande",
+    "Grter": "Greater",
+    "Grs Inc": "Gross Income",
+    "GI": "Gross Income",
+    "GP": "Gross Paying",
+    "Grp": "Group",
+    "Grwr": "Growers",
+    "Gr": "Growth",
+    "GSQrtx": "GSQuartix",
+    "Grtd": "Guaranteed",
+    "HY": "High Yield",
+    "Hpshr": "Hampshire",
+    "Hrd": "Hard",
+    "Hrdwr": "Hardware",
+    "Hdstrt": "Headstart",
+    "Hlth": "Health",
+    "Hlthcare": "Healthcare",
+    "Hlthcr": "Healthcare",
+    "Hdg": "Hedged",
+    "HF": "Hedged Fund",
+    "Hndrsn Pn": "Henderson Pan",
+    "Heptagon": "Heptagon Fund PLC",
+    "Hereford Fds": "Hereford Funds",
+    "Hertg": "Heritage",
+    "Hxvt": "Hexavest",
+    "Hi": "High",
+    "Hi Yld": "High Yield",
+    "Hldg": "Holding",
+    "HKD": "Hong Kong Dollar ",
+    "Hrzn": "Horizon",
+    "Hosptlty": "Hospitality",
+    "Human": "Humanisme",
+    "Hngrn": "Hungarian",
+    "Hyb": "Hybrid",
+    "Impct": "Impact",
+    "Imp": "Impact",
+    "Incld": "Including",
+    "Incl": "Including",
+    "Inc": "Income",
+    "Indep": "Independence",
+    "Idx": "Index",
+    "Idxd": "Indexed",
+    "Idxng": "Indexing",
+    "INR": "Indian Rupee ",
+    "Individualnh": "Individualniho",
+    "IDR": "Indonesian Rupiah ",
+    "Industr": "Industralized",
+    "Indstr": "Industrials",
+    "Inds": "Industries",
+    "Infl": "Inflation",
+    "Infor": "Information",
+    "Info": "Information",
+    "Infmd": "Informed",
+    "Infras": "Infrastructure",
+    "Initl": "Initial",
+    "Innovt": "Innovation",
+    "Innvtv": "Innovative",
+    "Innovtr": "Innovators",
+    "Insnstv": "Insensitive",
+    "Insim": "Insieme",
+    "Insgts": "Insights",
+    "Inst": "Institution",
+    "Instl": "Institutional",
+    "Ins": "Insurance",
+    "Insts": "Institutions",
+    "Intllgnc": "Intelligence",
+    "Intlgc": "Intelligence",
+    "Intst": "Interest",
+    "IntrR": "Interest Rate",
+    "Intmdt": "Intermediate",
+    "Itmt": "Intermediate",
+    "Intl": "International",
+    "Internat": "Internationales",
+    "Inter": "Interval",
+    "Intrs": "Intrinsic",
+    "Invrs": "Inverse",
+    "Inv": "Investors",
+    "Investec GSF": "Investec Global Strategy Fund",
+    "Investec SIV": "Investec Series IV",
+    "Invt": "Investing",
+    "Invmt": "Investment",
+    "Invm": "Investment",
+    "IG": "Investment Grade",
+    "Invmts": "Investments",
+    "Invms": "Investments",
+    "Irl": "Ireland",
+    "IEP": "Irish Pound ",
+    "Issr": "Issuer",
+    "ITA": "Italy",
+    "JPM": "JPMorgan Liquidity Funds",
+    "Jan": "Janvier",
+    "Jpn": "Japanese",
+    "JPY": "Japanese Yen ",
+    "JPM ISF II": "JPMorgan Investment Strategies Funds II",
+    "JPMF": "JPMorgan Portfolio Strategies Funds",
+    "Jmpr": "Jumper",
+    "Jmpstr": "Jumpstart",
+    "Kairos Alpha SICAV": "Kairos Alpha SICAV",
+    "Kairos Intl SICAV": "Kairos International SICAV",
+    "KBC Eq Fd": "KBC Equity Fund",
+    "KBC Index Fd": "KBC Index Fund",
+    "KBC Instl Fd": "KBC Institutional Fund ",
+    "KBC Master Fd": "KBC Master Fund",
+    "KBC Renta": "KBC Renta",
+    "KBC Select Immo": "KBC Select Immo",
+    "KH": "Kleinwort Hambros ",
+    "Knzvtvn": "Konzervativni",
+    "Krtkdbh": "Kratkodobych",
+    "Ldrd": "Laddered",
+    "Lrg": "Large",
+    "Lgr": "Larger",
+    "LA": "Latin America",
+    "Lattd": "Latitude",
+    "Ldrs": "Leaders",
+    "Ldrsp": "Leadership",
+    "Ldg": "Leading",
+    "Lrng": "Learning",
+    "Lvl": "Level",
+    "Lvrg": "Leverage",
+    "Lvrgd": "Leveraged",
+    "LGIP": "LGIP Funds (Lux)",
+    "Lf": "Life",
+    "Lfstyl": "Lifestyle",
+    "Ltd": "Limited",
+    "Lnkd": "Linked",
+    "Liqd": "Liquid",
+    "Liq": "Liquidez",
+    "Liqdty": "Liquidity",
+    "Lqdty": "Liquidity",
+    "Lstd": "Listed",
+    "Lvstk": "Livestock",
+    "Lvg": "Living",
+    "Ln": "Loan",
+    "Lns": "Loans",
+    "Lcl": "Local",
+    "LO Funds III": "Lombard Odier Funds III",
+    "LO Selection": "Lombard Odier Selection",
+    "LSE": "London Stock Exchange",
+    "Lng": "Long",
+    "Lg": "Long",
+    "L S": "Long Short",
+    "L/S": "Long/Short",
+    "L/T": "Long Term",
+    "L-S": "Long-Short",
+    "Lw": "Low",
+    "LRWgt": "Low Risk Weighted",
+    "LUX": "Luxembourg",
+    "Lyn": "Lynch",
+    "Lyxor Invmt Fds": "Lyxor Investment Funds",
+    "Lyxor Newcits II Plc": "Lyxor Newcits II Plc",
+    "Macq": "Macquarie",
+    "Mac": "Macro",
+    "Mntn": "Maintain",
+    "MAS": "Malaysia",
+    "Mgd": "Managed",
+    "Mgmt": "Management",
+    "Mgr": "Manager",
+    "Mgrs": "Managers",
+    "Manu": "Manulife",
+    "Manulife GF": "Manulife Global Fund",
+    "Mkt": "Market",
+    "MN": "Market Neutral",
+    "MNP": "Market Neutral Portfolio",
+    "Mkts": "Markets",
+    "Mkwd": "Marketwide",
+    "Mtr": "Master",
+    "Mstr": "Masters",
+    "Machg": "Matching",
+    "Matrls": "Materials",
+    "Mat": "Maturity",
+    "Mxmsr": "Maximiser",
+    "Med": "Mediterranean",
+    "Mdm": "Medium",
+    "M/T": "Medium Term",
+    "Mgtrnd": "Megatrend",
+    "Mrg": "Merger",
+    "Mrl": "Merrill",
+    "Mtl": "Metal",
+    "Mtls": "Metals",
+    "MFS Inv": "MFS\u00ae Investment Funds",
+    "MFS Meridian": "MFS\u00ae Meridian Funds",
+    "Md": "Mid",
+    "Mdl": "Middle",
+    "Mnrs": "Miners",
+    "Min": "Minimum",
+    "Mng": "Mining",
+    "MnRsk": "MinRisk",
+    "Mirabaud": "Mirabaud Luxembourg SIF",
+    "Mitlnd": "Mittelstand",
+    "Mod": "Moderate",
+    "Mdfd": "Modified",
+    "Momt": "Momentum",
+    "Mny": "Money",
+    "Mth": "Month",
+    "Mn": "Monthly",
+    "Mthly": "Monthly",
+    "Mly": "Monthly",
+    "MDis": "Monthly Distribution",
+    "MD": "Monthly Distribution",
+    "Mnmnt": "Monument",
+    "Mt": "Monument",
+    "Moorea Fd": "Moorea Fund",
+    "MS INVF": "Morgan Stanley Investment Funds",
+    "Mortg": "Mortgage",
+    "Mlt": "Multi",
+    "Multi Challenge": "Multi Challenge SICAV",
+    "MltAdv": "Multiadvisers",
+    "Mltalt": "Multialternative",
+    "Mltast": "Multiasset",
+    "Mlt-Asst": "Multi-Asset",
+    "MA": "Multi-Asset",
+    "Multicoop": "Multicooperation",
+    "Mltfct": "Multifactor",
+    "Mlt-Mgr": "Multi-Manager",
+    "MltOpps": "Multiopportunities SICAV",
+    "Multipartner": "Multipartner SICAV",
+    "RobecoSAM": "Multipartner SICAV - RobecoSAM",
+    "Mltplr": "Multiplier",
+    "Mltsct": "Multisector",
+    "MSMM": "Multi-Style, Multi-Manager SICAV Funds plc",
+    "MU Lux": "MULTI-UNITS LUXEMBOURG",
+    "Muncpl": "Municipal",
+    "Mut": "Mutual",
+    "Myfd": "MY.fund",
+    "Ntnl": "National",
+    "Natrl": "Natural",
+    "Nat": "Naturelles",
+    "Nbg Bm": "Neuberger Berman",
+    "Nflz": "Neuflize",
+    "Netrl": "Neutral",
+    "New Capital": "New Capital Fund Lux",
+    "Nwtn": "Newton",
+    "NN (B) Invest": "NN (B) Invest",
+    "NN (L) Intl": "NN (L) International",
+    "NN (L) Pat": "NN (L) Patrimonial",
+    "NVt": "Non-Voting",
+    "Nordea 1 -": "Nordea 1 -",
+    "Nrm": "Normal",
+    "Nrth": "North",
+    "NT": "Northern Trust Ucits Common Contractual Fund",
+    "Nor": "NORWAY",
+    "NOK": "Norwegian Krone ",
+    "Nov": "November",
+    "Nvych": "Novych",
+    "O\u2019Sh": "O\u2019Shaughnessy",
+    "Oblig": "Obligatie",
+    "Obl": "Obligationer",
+    "Oct": "Octobre",
+    "Off": "Offensiv",
+    "Offsh": "Offshore",
+    "Op": "Open",
+    "Oppc": "Opportunistic",
+    "Opports": "Opportunities",
+    "Opps": "Opportunities",
+    "Opp": "Opportunity",
+    "Optm": "Optimum",
+    "Optd": "Optimised",
+    "Optr": "Optimiser",
+    "Optmzr": "Optimizer",
+    "Optimum": "Optimum Fund",
+    "Opt": "Option",
+    "Ord": "Ordinary",
+    "Ori": "Orient",
+    "Oth": "Other",
+    "Ovrs": "Overseas",
+    "Ovrwrtg": "Overwriting",
+    "Ownshp": "Ownership",
+    "Pac": "Pacific",
+    "Ps": "Paesi",
+    "Prmtrc": "Parametric",
+    "Paty": "Parity",
+    "Part": "Partenaires",
+    "Prtly": "Partially",
+    "PtH": "Partially-Hedged",
+    "Ptcpt": "Participant",
+    "Partic": "Participation",
+    "Ptnr": "Partner",
+    "Ptnrs": "Partners",
+    "PGLI": "Partners Group Listed Investments SICAV",
+    "Partners Group Listed": "Partners Group Listed Investments SICAV - Listed",
+    "Pasv": "Passive",
+    "Patrim": "Patrimoine",
+    "Patriml": "Patrimonial",
+    "Py": "Pay",
+    "Pyt": "Payout",
+    "P2P": "Peer to Peer",
+    "Pensn": "Pension",
+    "Pen": "Pension",
+    "Perf": "Performance",
+    "Perfm": "Performers",
+    "Prdic": "Periodic",
+    "Prd": "Periodo",
+    "Perpt": "Perpetual",
+    "Psnl": "Personal",
+    "Phrm": "Pharma",
+    "Phrmctls": "Pharmaceuticals",
+    "PI Inv": "PI Investment Funds",
+    "PIMCO": "PIMCO",
+    "PIMCO IRL": "PIMCO Funds Ireland PLC",
+    "PIMCO GIS": "PIMCO Funds: Global Investors Series plc",
+    "PIMCO Sel": "PIMCO Select Funds PLC",
+    "Pinr": "Pioneer",
+    "Pvvrv": "Pivovarov",
+    "Pln": "Plan",
+    "Pltnm": "Platinum",
+    "Plato IIF": "Plato Institutional Index Fund",
+    "Plyrs": "Players",
+    "plc": "plc.",
+    "Pl": "Pool",
+    "Polar Cap": "Polar Capital Funds PLC",
+    "Plcy": "Policy",
+    "Pld": "Pooled",
+    "Port": "Portfolio",
+    "Ptf": "Portfolio",
+    "Pstv": "Positive",
+    "Pwr": "Power",
+    "Prec": "Precious",
+    "PM": "Precious Metals",
+    "Prfrnc": "Preference ",
+    "Pref": "Preferred",
+    "Pre": "Premia",
+    "Prem": "Premier",
+    "Prm": "Premium",
+    "Presv": "Preservation",
+    "Prstg": "Prestige",
+    "Prc": "Price",
+    "Prcng": "Pricing",
+    "Prlztst": "Prilezitosti",
+    "Pr": "Prime",
+    "Princ": "Principal",
+    "Principal": "Principal Global Investors Funds",
+    "Priv": "Private",
+    "PBFI": "Private Bank Funds I",
+    "Privl": "Privilege",
+    "Prcss": "Process",
+    "Prod": "Products",
+    "Prfl": "Profile",
+    "Prog": "Progressif",
+    "Prgrv": "Progressive",
+    "Prpty": "Property",
+    "Protec": "Protecci\u00f3n",
+    "Protd": "protected",
+    "Prot": "Protection",
+    "Prvds": "Providus",
+    "Prdnt": "Prudente",
+    "Pru": "Prudential",
+    "PCFS": "Pure Capital Fund SICAV",
+    "Pure SIF SA": "Pure SICAV-SIF S.A.",
+    "PtWrt": "PutWrite",
+    "PW": "PutWrite",
+    "Qual": "Quality",
+    "Qul": "Quality",
+    "Qntmtl": "Quantamental",
+    "Quant": "Quantitative",
+    "Quants": "Quantitatives",
+    "Qt": "Quarterly",
+    "QDis": "Quarterly Distribution",
+    "QD": "Quarterly Distribution",
+    "Quoniam Fds Sel": "Quoniam Funds Selection SICAV",
+    "RAMS": "RAMS Investment Unit Trust",
+    "Rt": "Return",
+    "Rts": "Reuters",
+    "Rl": "Real",
+    "RE": "Real Estate",
+    "Rl Rt": "Real Return",
+    "Rms": "Reams",
+    "Rsnbl": "Reasonable",
+    "Rebal": "Rebalance",
+    "Rcvy": "Recovery",
+    "Red Arc Glb Invms": "Red Arc Global Investments (Ireland) ICAV",
+    "Rgnl": "Regional",
+    "Rglr": "Regular",
+    "Reg": "Regular",
+    "Relatv": "Relative",
+    "Rlx": "Relax",
+    "Rendim": "Rendimiento",
+    "Rnt": "Renta",
+    "RF": "Renta Fija",
+    "Renta": "Rentabilit\u00e9",
+    "Rsrch": "Research",
+    "Rsh": "Research",
+    "Rsrv": "Reserves",
+    "Res": "Resources",
+    "responsibility": "responsAbility SICAV (Lux)",
+    "Rspnb": "Responsible",
+    "Resrs": "Ressources",
+    "Restrc": "Restricted",
+    "Rstrcng": "Restructuring",
+    "Retl": "Retail",
+    "Ret": "Return",
+    "Retrs": "Reuters",
+    "Rev": "Revenue",
+    "Rvvl": "Revival",
+    "Revolt": "Revolution",
+    "Rsg": "Rising",
+    "Rsk": "Risk",
+    "Rd": "Road",
+    "Rds": "Roads",
+    "Rbtc": "Robotics",
+    "Rdn": "Rodina",
+    "RLBF II": "Royal London Bond Funds II ICVC",
+    "RCCF": "Russell Common Contractual Fund",
+    "RIQIC Fund plc": "Russell Investments Qualifying Investor China Fund plc",
+    "S&P": "S&p",
+    "Sat": "Satellites",
+    "Satsftn": "Satisfaction",
+    "Svg": "Saving",
+    "Schroder AS": "Schroder Alternative Solutions",
+    "Schroder GAIA": "Schroder GAIA",
+    "Schroder GAIA II": "Schroder GAIA II",
+    "Schroder ISF": "Schroder International Selection Fund",
+    "Schroder Invmt Fd": "Schroder Investment Fund",
+    "Schroder Sel": "Schroder Selection",
+    "Schroder SMBC Glb Bd": "Schroder SMBC Global Bond Series",
+    "Schroder SSF": "Schroder Special Situations Fund",
+    "Sci": "Scientific",
+    "Scintfc": "Scientific",
+    "Scrd": "Scored",
+    "Scrn": "Screened",
+    "Sect": "Sectors",
+    "Secu": "Secure",
+    "Secs": "Securities",
+    "Scs": "Securities",
+    "Sctsd": "Securitised",
+    "Sctzd": "Securitized",
+    "Sec": "Security",
+    "Sgrgtd": "Segragated",
+    "SEI GAF": "SEI Global Assets Fund plc - The SEI",
+    "SEI GIF": "SEI Global Investments Fund Plc - The SEI",
+    "SEI GMF ": "SEI Global Master Fund plc - The SEI",
+    "Selec": "Selecci\u00f3n",
+    "Sel": "Selectis",
+    "Slctv": "Selective",
+    "Sdis": "Semi-annual Distribution",
+    "SD": "Semi-annual Distribution",
+    "Sr": "Senior",
+    "Ser": "Series",
+    "Svc": "Service",
+    "Svd Plfm": "Serviced Platform SICAV",
+    "Svcs": "Services",
+    "Shckltn": "Shackleton",
+    "SSE": "Shanghai Stock Exchange",
+    "Shr": "Share",
+    "Shld": "Shareholder",
+    "Shrs": "Shares",
+    "Shrh": "Shariah ",
+    "ShelteR Invest": "ShelteR Invest",
+    "Shrt": "Short",
+    "Short Dur": "Short Duration",
+    "Shrt Dur": "Short Duration",
+    "S/T": "Short-Term",
+    "SICAV": "sicav",
+    "Smplty": "Simplicity",
+    "SGD": "Singapore Dollar ",
+    "Sits": "Situations",
+    "Sivek": "Sivek",
+    "Skyline": "Skyline Umbrella Fund ICAV",
+    "Slv": "Sleeve",
+    "Sm": "Small",
+    "S&M": "Small & Mid",
+    "Sm Cp": "Small Caps",
+    "SmCp": "SmallCap",
+    "Sm-Cp": "Small-Cap",
+    "Smlr": "Smaller",
+    "Smrt": "Smart",
+    "SB": "Smart Beta",
+    "Smrtfd": "Smartfund",
+    "Smd": "Smid",
+    "Sclly": "Socially",
+    "ScllyAwr": "Socially Aware",
+    "Sftwr": "Software",
+    "Solid": "Solidaire",
+    "Sodty": "Solidarity",
+    "Solu": "Solutions",
+    "Sostnbl": "Sostenible",
+    "Souv": "Souverain",
+    "Sov": "Sovereigns",
+    "Svrgn": "Soverign",
+    "Spec": "Special",
+    "Spctm": "Spectrum",
+    "Sptlt": "Spotlight",
+    "Sqr": "Square",
+    "Stblty": "Stability",
+    "Stbl": "Stable",
+    "Std": "Standard",
+    "SLI": "Standard Life Investments Global SICAV",
+    "SLI GS II": "Standard Life Investments Global SICAV II",
+    "Stp": "Staples",
+    "Strt": "Start",
+    "Stt Strt": "State Street",
+    "Statstcl": "Statistical",
+    "Stpng": "Steepening",
+    "Stlg": "Sterling",
+    "Stwdsp": "Stewardship",
+    "Stk": "Stock",
+    "Stckpckr": "Stockpicker",
+    "Stks": "Stocks",
+    "Strat": "Strategy",
+    "Strats": "Strategies",
+    "Stgy": "Strategy",
+    "Struct": "Structured",
+    "Strctr": "Structures",
+    "Sbctnt": "Subcontinent",
+    "SbFd": "Sub-Fund",
+    "Sub": "Subsector",
+    "Skk ": "Sukuk ",
+    "Spr": "Super",
+    "ST Plus": "Super Trust Plus",
+    "Sprntnl": "Supranational",
+    "Srpls": "Surplus",
+    "Sustnby": "Sustainability",
+    "Sstby": "Sustainability",
+    "Sust": "Sustainable",
+    "Sst": "Sustainable",
+    "SEK": "Swedish Krona ",
+    "Sw": "Sweep",
+    "Swisscanto (LU) BF": "Swisscanto (LU) Bond Fund",
+    "Swisscanto (LU) EF": "Swisscanto (LU) Equity Fund",
+    "Swisscanto (LU) MMF": "Swisscanto (LU) Money Market Fund",
+    "Swisscanto (LU) PF": "Swisscanto (LU) Portfolio Fund",
+    "Switz": "Switzerland",
+    "Symphonia": "Symphonia Lux SICAV",
+    "Sntgm": "Syntagma",
+    "Sys": "System",
+    "Systmtc": "Systematic",
+    "Sysmc": "Systematic",
+    "T. Rowe Price": "T. Rowe Price Funds SICAV",
+    "Tact": "Tactical",
+    "Tailrd": "Tailored",
+    "Trgt": "Target",
+    "Tech": "Technology",
+    "Techs": "Technologies",
+    "Tchs": "Technologies",
+    "Tele": "Telecom",
+    "Telecms": "Telecommunications",
+    "Tmpltn": "Templeton",
+    "Trm": "Termine",
+    "Thm": "Thomson",
+    "Thms": "Themes",
+    "Thmsn": "Thomson",
+    "Thr Brdg Eurp": "Three Bridges Europe",
+    "Tilney ICAV": "Tilney Umbrella A ICAV",
+    "Tmng": "Timing",
+    "Ttl": "Total",
+    "TR": "Total Return",
+    "Trk": "Track",
+    "Trkr": "Tracker",
+    "Trdbl": "Tradable",
+    "Trd": "Trade",
+    "Trdg": "Trading",
+    "Trnh": "Tranche",
+    "Trsctn": "Transaction",
+    "Trans": "Transamerica",
+    "Transfmt": "Transformational",
+    "Trnsfm": "Transformational",
+    "Trsptn": "Transportation",
+    "Treas": "Treasuries",
+    "Trs": "Treasury",
+    "Trnd": "Trend",
+    "Trnds": "Trends",
+    "Trndy": "Trendy",
+    "Trhv": "Trhov",
+    "Trl": "Trials",
+    "Trif": "Triflex",
+    "Tr": "Trust",
+    "Trnard": "Turnaround",
+    "Twntyfr": "Twentyfour",
+    "Ttfr": "Twentyfour",
+    "US": "United States",
+    "UCITS": "Ucits",
+    "Ultr": "Ultra",
+    "Ulysses LT Funds": "Ulysses - L.T. Funds",
+    "Unbnd": "Unbundled",
+    "Uncons": "Unconstrained",
+    "Uncrltd": "Uncorrelated",
+    "Unhdgd": "Unhedged",
+    "UnH": "Unhedged",
+    "Unvsl": "Universal",
+    "Univ": "University",
+    "Unrstd": "Unrestricted",
+    "Unr": "Unrestricted",
+    "Upstm": "Upstream",
+    "USD": "USD",
+    "$": "USD",
+    "Utilts": "Utilities",
+    "Util": "Utility",
+    "Val": "Value",
+    "Valinvt": "Valueinvest",
+    "Var": "Variance",
+    "vhcl": "Vehicle",
+    "Active": "Vehicle",
+    "Vol": "Volatility",
+    "Volatil": "Volatility",
+    "Vontobel": "Vontobel Fund",
+    "Vyvazn": "Vyvazene",
+    "Wtr": "Water",
+    "Wlth": "Wealth",
+    "Wpns": "Weapons",
+    "Wkly": "Weekly",
+    "Wghd": "Weighed",
+    "Wtd": "Weighted",
+    "Wellington II SICAV": "Wellington Management Funds (Luxembourg) II SICAV",
+    "Wrld": "Wereld",
+    "Wstn": "Western",
+    "Wstfld": "Westfield",
+    "Wholsl": "Wholesale",
+    "Wnrs": "Winners",
+    "Wldwd": "Wldwd",
+    "WW": "Wldwd",
+    "Wld": "World",
+    "Yr": "Year",
+    "Yld": "Yield",
+    "Y": "Yield",
+    "Zr": "Zero",
+    "PLN": "Zloty"
+}
\ No newline at end of file
diff --git a/core/auz_nz/hybrid_solution_script.py b/core/auz_nz/hybrid_solution_script.py
index d97b588..514a027 100644
--- a/core/auz_nz/hybrid_solution_script.py
+++ b/core/auz_nz/hybrid_solution_script.py
@@ -32,24 +32,24 @@ from openai import AzureOpenAI
 
 ABB_JSON = dict()
 
-def get_abb_json():
+def get_abb_json(doc_source: str = "aus_prospectus"):
     global ABB_JSON
     if len(ABB_JSON.keys()) == 0:
-        with open("./configuration/aus_prospectus/abbreviation_records.json", "r") as file:
+        with open(f"./configuration/{doc_source}/abbreviation_records.json", "r") as file:
             # Load the JSON and convert keys to lowercase
             ABB_JSON = {key.lower(): value for key, value in json.load(file).items()}
 
-def get_abbre_format_str(fundname):
+def get_abbre_format_str(fundname, doc_source: str = "aus_prospectus"):
     """Replaces abbreviations in a fund name with their expanded forms."""
     # Convert fund name to lowercase while matching
     f_list = fundname.lower().split()
-    get_abb_json()
+    get_abb_json(doc_source)
     updated_doc_fname_words = [ABB_JSON.get(word, word).lower() for word in f_list]
     return " ".join(updated_doc_fname_words)
 
-def replace_abbrevs_in_fundnames(fund_names_list):
+def replace_abbrevs_in_fundnames(fund_names_list, doc_source: str = "aus_prospectus"):
     """Replaces abbreviations in a list of fund names."""
-    return [get_abbre_format_str(fund_name) for fund_name in fund_names_list]
+    return [get_abbre_format_str(fund_name, doc_source) for fund_name in fund_names_list]
 
 
 ### STEP 2 - Remove Stopwords
@@ -440,7 +440,7 @@ def format_response(doc_id, pred_fund, db_fund, clean_pred_name, clean_db_name,
     return dt
 
 
-def final_function_to_match(doc_id, pred_list, db_list, provider_name):
+def final_function_to_match(doc_id, pred_list, db_list, provider_name, doc_source: str = "aus_prospectus"):
     final_result = {}
     df_data = []
     unmatched_pred_list = pred_list.copy()
@@ -466,8 +466,8 @@ def final_function_to_match(doc_id, pred_list, db_list, provider_name):
                 # unmatched_pred_list.remove(pred_list[index])
             else:
                 ### STEP-1 Abbreviation replacement
-                cleaned_pred_name1 = replace_abbrevs_in_fundnames([pred_fund])[0]
-                cleaned_db_list1 = replace_abbrevs_in_fundnames(db_list)
+                cleaned_pred_name1 = replace_abbrevs_in_fundnames([pred_fund], doc_source)[0]
+                cleaned_db_list1 = replace_abbrevs_in_fundnames(db_list, doc_source)
                 # print("--> ",cleaned_db_list1, cleaned_pred_name1)
                 step1_result, matched_index, all_scores1_, all_matched_fund_names1_ = get_fund_match_final_score(cleaned_db_list1, cleaned_pred_name1)
                 # print(f"\nStep 1 - Abbreviation Replacement Result: {step1_result}")
@@ -617,11 +617,11 @@ def final_function_to_match(doc_id, pred_list, db_list, provider_name):
     # print("==>>> DB LIST: ",unmatched_db_list)
     # print("==>>> PRED LIST: ",unmatched_pred_list)
     if len(unmatched_pred_list)!=0:
-        cleaned_unmatched_pred_list = replace_abbrevs_in_fundnames(unmatched_pred_list)
+        cleaned_unmatched_pred_list = replace_abbrevs_in_fundnames(unmatched_pred_list, doc_source)
         cleaned_unmatched_pred_list = remove_stopwords_nltk(cleaned_unmatched_pred_list)
         cleaned_unmatched_pred_list = remove_special_characters(cleaned_unmatched_pred_list)
         
-        cleaned_unmatched_db_list = replace_abbrevs_in_fundnames(unmatched_db_list)
+        cleaned_unmatched_db_list = replace_abbrevs_in_fundnames(unmatched_db_list, doc_source)
         cleaned_unmatched_db_list = remove_stopwords_nltk(cleaned_unmatched_db_list)
         cleaned_unmatched_db_list = remove_special_characters(cleaned_unmatched_db_list)
         prompt_context = f"""
diff --git a/core/data_mapping.py b/core/data_mapping.py
index 4218b5f..8578c1c 100644
--- a/core/data_mapping.py
+++ b/core/data_mapping.py
@@ -1,6 +1,7 @@
 import os
 import json
 import pandas as pd
+from copy import deepcopy
 from utils.biz_utils import get_most_similar_name, remove_common_word
 from utils.sql_query_util import (
     query_document_fund_mapping,
@@ -18,14 +19,18 @@ class DataMapping:
         raw_document_data_list: list,
         document_mapping_info_df: pd.DataFrame,
         output_data_folder: str,
-        doc_source: str = "emea_ar"
+        doc_source: str = "emea_ar",
+        compare_with_provider: bool = True
     ):
         self.doc_id = doc_id
         self.datapoints = datapoints
         self.doc_source = doc_source
+        self.compare_with_provider = compare_with_provider
         self.raw_document_data_list = raw_document_data_list
         if document_mapping_info_df is None or len(document_mapping_info_df) == 0:
-            self.document_mapping_info_df = query_document_fund_mapping(doc_id, rerun=False)
+            self.document_mapping_info_df = query_document_fund_mapping(
+                doc_id, rerun=False
+            )
         else:
             self.document_mapping_info_df = document_mapping_info_df
 
@@ -44,7 +49,9 @@ class DataMapping:
     def set_mapping_data_by_db(self, document_mapping_info_df: pd.DataFrame):
         logger.info("Setting document mapping data")
         if document_mapping_info_df is None or len(document_mapping_info_df) == 0:
-            self.document_mapping_info_df = query_document_fund_mapping(self.doc_id, rerun=False)
+            self.document_mapping_info_df = query_document_fund_mapping(
+                self.doc_id, rerun=False
+            )
         else:
             self.document_mapping_info_df = document_mapping_info_df
         if len(self.document_mapping_info_df) == 0:
@@ -92,26 +99,27 @@ class DataMapping:
     def get_provider_mapping(self):
         if len(self.document_mapping_info_df) == 0:
             return pd.DataFrame()
-        provider_id_list = (
-            self.document_mapping_info_df["ProviderId"].unique().tolist()
-        )
+        provider_id_list = self.document_mapping_info_df["ProviderId"].unique().tolist()
         provider_mapping_list = []
         for provider_id in provider_id_list:
-            provider_mapping_list.append(query_investment_by_provider(provider_id, rerun=False))
+            provider_mapping_list.append(
+                query_investment_by_provider(provider_id, rerun=False)
+            )
         provider_mapping_df = pd.concat(provider_mapping_list)
         provider_mapping_df = provider_mapping_df.drop_duplicates()
         provider_mapping_df.reset_index(drop=True, inplace=True)
         return provider_mapping_df
-    
+
     def mapping_raw_data_entrance(self):
-        if self.doc_source == "emear_ar":
+        if self.doc_source == "emea_ar":
             return self.mapping_raw_data()
         elif self.doc_source == "aus_prospectus":
-            return self.mapping_raw_data_aus()
+            return self.mapping_raw_data_generic()
         else:
             return self.mapping_raw_data()
-        
-    def mapping_raw_data_aus(self):
+        # return self.mapping_raw_data_generic()
+
+    def mapping_raw_data_generic(self):
         logger.info(f"Mapping raw data for AUS Prospectus document {self.doc_id}")
         mapped_data_list = []
         # Generate raw name based on fund name and share name by integrate_share_name
@@ -128,7 +136,9 @@ class DataMapping:
                 raw_share_name = raw_data.get("share_name", "")
                 raw_data_keys = list(raw_data.keys())
                 if len(raw_share_name) > 0:
-                    integrated_share_name = self.integrate_share_name(raw_fund_name, raw_share_name)
+                    integrated_share_name = self.integrate_share_name(
+                        raw_fund_name, raw_share_name
+                    )
                     if integrated_share_name not in share_raw_name_list:
                         share_raw_name_list.append(integrated_share_name)
                     for datapoint in self.datapoints:
@@ -144,7 +154,7 @@ class DataMapping:
                                 "investment_type": 1,
                                 "investment_id": "",
                                 "investment_name": "",
-                                "similarity": 0
+                                "similarity": 0,
                             }
                             mapped_data_list.append(mapped_data)
                 else:
@@ -162,29 +172,38 @@ class DataMapping:
                                 "value": raw_data[datapoint],
                                 "investment_type": 33,
                                 "investment_id": "",
-                                "investment_name": ""
+                                "investment_name": "",
                             }
                             mapped_data_list.append(mapped_data)
         # Mapping raw data with database
-        iter_count = 30
+        iter_count = 60
         fund_match_result = {}
         if len(fund_raw_name_list) > 0:
-            fund_match_result = self.get_raw_name_db_match_result(fund_raw_name_list, "fund", iter_count)
-            logger.info(f"Fund match result: \n{fund_match_result}")
+            fund_match_result = self.get_raw_name_db_match_result(
+                fund_raw_name_list, "fund", iter_count
+            )
+            # logger.info(f"Fund match result: \n{fund_match_result}")
         share_match_result = {}
         if len(share_raw_name_list) > 0:
-            share_match_result = self.get_raw_name_db_match_result(share_raw_name_list, "share", iter_count)
-            logger.info(f"Share match result: \n{share_match_result}")
-        
+            share_match_result = self.get_raw_name_db_match_result(
+                share_raw_name_list, "share", iter_count
+            )
+            # logger.info(f"Share match result: \n{share_match_result}")
+
         for mapped_data in mapped_data_list:
             investment_type = mapped_data["investment_type"]
             raw_name = mapped_data["raw_name"]
             if investment_type == 33:
                 if fund_match_result.get(raw_name) is not None:
                     matched_db_fund_name = fund_match_result[raw_name]
-                    if matched_db_fund_name is not None and len(matched_db_fund_name) > 0:
+                    if (
+                        matched_db_fund_name is not None
+                        and len(matched_db_fund_name) > 0
+                    ):
                         # get FundId from self.doc_fund_mapping
-                        find_fund_df = self.doc_fund_mapping[self.doc_fund_mapping["FundName"] == matched_db_fund_name]
+                        find_fund_df = self.doc_fund_mapping[
+                            self.doc_fund_mapping["FundName"] == matched_db_fund_name
+                        ]
                         if find_fund_df is not None and len(find_fund_df) > 0:
                             fund_id = find_fund_df["FundId"].values[0]
                             mapped_data["investment_id"] = fund_id
@@ -193,38 +212,82 @@ class DataMapping:
             if investment_type == 1:
                 if share_match_result.get(raw_name) is not None:
                     matched_db_share_name = share_match_result[raw_name]
-                    if matched_db_share_name is not None and len(matched_db_share_name) > 0:
+                    if (
+                        matched_db_share_name is not None
+                        and len(matched_db_share_name) > 0
+                    ):
                         # get SecId from self.doc_fund_class_mapping
-                        find_share_df = self.doc_fund_class_mapping[self.doc_fund_class_mapping["ShareClassName"] == matched_db_share_name]
+                        find_share_df = self.doc_fund_class_mapping[
+                            self.doc_fund_class_mapping["ShareClassName"]
+                            == matched_db_share_name
+                        ]
                         if find_share_df is not None and len(find_share_df) > 0:
                             share_id = find_share_df["SecId"].values[0]
                             mapped_data["investment_id"] = share_id
                             mapped_data["investment_name"] = matched_db_share_name
                             mapped_data["similarity"] = 1
-                        
+
         self.output_mapping_file(mapped_data_list)
         return mapped_data_list
-        
-    def get_raw_name_db_match_result(self, raw_name_list, investment_type: str, iter_count: int = 30):
+
+    def get_raw_name_db_match_result(
+        self, raw_name_list, investment_type: str, iter_count: int = 30
+    ):
         # split raw_name_list into several parts which each part is with 30 elements
         # The reason to split is to avoid invoke token limitation issues from CahtGPT
-        raw_name_list_parts = [raw_name_list[i:i + iter_count] 
-                               for i in range(0, len(raw_name_list), iter_count)]
+        raw_name_list_parts = [
+            raw_name_list[i : i + iter_count]
+            for i in range(0, len(raw_name_list), iter_count)
+        ]
         all_match_result = {}
+        doc_fund_name_list = deepcopy(self.doc_fund_name_list)
+        doc_share_name_list = deepcopy(self.doc_share_name_list)
         for raw_name_list in raw_name_list_parts:
             if investment_type == "fund":
-                match_result = final_function_to_match(doc_id=self.doc_id,
-                                                       pred_list=raw_name_list,
-                                                       db_list=self.doc_fund_name_list,
-                                                       provider_name=self.provider_name)
+                match_result, doc_fund_name_list = self.get_final_function_to_match(
+                    raw_name_list, doc_fund_name_list
+                )
             else:
-                match_result = final_function_to_match(doc_id=self.doc_id,
-                                                       pred_list=raw_name_list,
-                                                       db_list=self.doc_share_name_list,
-                                                       provider_name=self.provider_name)
+                match_result, doc_share_name_list = self.get_final_function_to_match(
+                    raw_name_list, doc_share_name_list
+                )
             all_match_result.update(match_result)
         return all_match_result
+
+    def get_final_function_to_match(self, raw_name_list, db_name_list):
+        if len(db_name_list) == 0:
+            match_result = {}
+            for raw_name in raw_name_list:
+                match_result[raw_name] = ""
+        else:
+            match_result = final_function_to_match(
+                doc_id=self.doc_id,
+                pred_list=raw_name_list,
+                db_list=db_name_list,
+                provider_name=self.provider_name,
+                doc_source=self.doc_source
+            )
+            matched_name_list = list(match_result.values())
+            db_name_list = self.remove_matched_names(db_name_list, matched_name_list)
+        return match_result, db_name_list
+
+    def remove_matched_names(self, target_name_list: list, matched_name_list: list):
+        if len(matched_name_list) == 0:
+            return target_name_list
         
+        matched_name_list = list(set(matched_name_list))
+        matched_name_list = [
+            value for value in matched_name_list if value is not None and len(value) > 0
+        ]
+        for matched_name in matched_name_list:
+            if (
+                matched_name is not None
+                and len(matched_name) > 0
+                and matched_name in target_name_list
+            ):
+                target_name_list.remove(matched_name)
+        return target_name_list
+
     def mapping_raw_data(self):
         """
         doc_id, page_index, datapoint, value,
@@ -245,9 +308,14 @@ class DataMapping:
                 if raw_fund_name is None or len(raw_fund_name) == 0:
                     continue
                 raw_share_name = raw_data.get("share_name", "")
-                if len(self.doc_fund_name_list) == 0 and len(self.provider_fund_name_list) == 0:
+                if (
+                    len(self.doc_fund_name_list) == 0
+                    and len(self.provider_fund_name_list) == 0
+                ):
                     if len(raw_share_name) > 0:
-                        integrated_share_name = self.integrate_share_name(raw_fund_name, raw_share_name)
+                        integrated_share_name = self.integrate_share_name(
+                            raw_fund_name, raw_share_name
+                        )
                         raw_data_keys = list(raw_data.keys())
                         for datapoint in self.datapoints:
                             if datapoint in raw_data_keys:
@@ -262,7 +330,7 @@ class DataMapping:
                                     "investment_type": 1,
                                     "investment_id": "",
                                     "investment_name": "",
-                                    "similarity": 0
+                                    "similarity": 0,
                                 }
                                 mapped_data_list.append(mapped_data)
                     else:
@@ -279,13 +347,15 @@ class DataMapping:
                                     "value": raw_data[datapoint],
                                     "investment_type": 33,
                                     "investment_id": "",
-                                    "investment_name": ""
+                                    "investment_name": "",
                                 }
                                 mapped_data_list.append(mapped_data)
                 else:
                     raw_name = ""
                     if raw_share_name is not None and len(raw_share_name) > 0:
-                        raw_name = self.integrate_share_name(raw_fund_name, raw_share_name)
+                        raw_name = self.integrate_share_name(
+                            raw_fund_name, raw_share_name
+                        )
                         if mapped_share_cache.get(raw_name) is not None:
                             investment_info = mapped_share_cache[raw_name]
                         else:
@@ -298,14 +368,20 @@ class DataMapping:
                                 )
                                 fund_id = fund_info["id"]
                                 mapped_fund_cache[raw_fund_name] = fund_info
-                            investment_info = self.matching_with_database(
-                                raw_name=raw_name, 
-                                raw_share_name=raw_share_name, 
-                                raw_fund_name=raw_fund_name,
-                                parent_id=fund_id, 
-                                matching_type="share",
-                                process_cache=process_cache
-                            )
+                            investment_info = {}
+                            if len(fund_id) > 0:
+                                investment_info = self.mapping_unique_raw_data(fund_id=fund_id,
+                                                                               raw_fund_name=raw_fund_name,
+                                                                               raw_data_list=raw_data_list)
+                            if investment_info.get("id", None) is None or len(investment_info.get("id", "")) == 0:
+                                investment_info = self.matching_with_database(
+                                    raw_name=raw_name,
+                                    raw_share_name=raw_share_name,
+                                    raw_fund_name=raw_fund_name,
+                                    parent_id=fund_id,
+                                    matching_type="share",
+                                    process_cache=process_cache,
+                                )
                             mapped_share_cache[raw_name] = investment_info
                     elif raw_fund_name is not None and len(raw_fund_name) > 0:
                         raw_name = raw_fund_name
@@ -322,7 +398,7 @@ class DataMapping:
                             "id": "",
                             "legal_name": "",
                             "investment_type": -1,
-                            "similarity": 0
+                            "similarity": 0,
                         }
 
                     raw_data_keys = list(raw_data.keys())
@@ -339,13 +415,35 @@ class DataMapping:
                                 "investment_type": investment_info["investment_type"],
                                 "investment_id": investment_info["id"],
                                 "investment_name": investment_info["legal_name"],
-                                "similarity": investment_info["similarity"]
+                                "similarity": investment_info["similarity"],
                             }
                             mapped_data_list.append(mapped_data)
-        
+
         self.output_mapping_file(mapped_data_list)
         return mapped_data_list
     
+    def mapping_unique_raw_data(self, fund_id: str, raw_fund_name: str, raw_data_list: list):
+        share_count = 0
+        for raw_data in raw_data_list:
+            fund_name = raw_data.get("fund_name", "")
+            share_name = raw_data.get("share_name", "")
+            if fund_name == raw_fund_name and  share_name is not None and len(share_name) > 0:
+                share_count += 1
+                if share_count > 1:
+                    break
+        data_info = {}
+        if share_count == 1:
+            doc_compare_mapping = self.doc_fund_class_mapping[
+                    self.doc_fund_class_mapping["FundId"] == fund_id
+                ]
+            if len(doc_compare_mapping) == 1:
+                data_info["id"] = doc_compare_mapping["SecId"].values[0]
+                data_info["legal_name"] = doc_compare_mapping["ShareClassName"].values[0]
+                data_info["investment_type"] = 1
+                data_info["similarity"] = 1
+        return data_info
+        
+
     def output_mapping_file(self, mapped_data_list: list):
         json_data_file = os.path.join(
             self.output_data_json_folder, f"{self.doc_id}.json"
@@ -355,10 +453,10 @@ class DataMapping:
 
         extract_data_df = pd.DataFrame(self.raw_document_data_list)
         extract_data_df.reset_index(drop=True, inplace=True)
-        
+
         mapping_data_df = pd.DataFrame(mapped_data_list)
         mapping_data_df.reset_index(drop=True, inplace=True)
-        
+
         excel_data_file = os.path.join(
             self.output_data_excel_folder, f"{self.doc_id}.xlsx"
         )
@@ -373,7 +471,7 @@ class DataMapping:
         raw_name = ""
         if raw_share_name is not None and len(raw_share_name) > 0:
             raw_name = raw_share_name
-            # some share names are very short, 
+            # some share names are very short,
             # so we need to combine with fund name
             raw_name_splits = raw_name.split()
             raw_fund_name_splits = raw_fund_name.split()
@@ -384,13 +482,13 @@ class DataMapping:
         return raw_name
 
     def matching_with_database(
-        self, 
-        raw_name: str, 
-        raw_share_name: str = None, 
+        self,
+        raw_name: str,
+        raw_share_name: str = None,
         raw_fund_name: str = None,
-        parent_id: str = None, 
+        parent_id: str = None,
         matching_type: str = "fund",
-        process_cache: dict = {}
+        process_cache: dict = {},
     ):
         if len(self.doc_fund_name_list) == 0 and len(self.provider_fund_name_list) == 0:
             data_info["id"] = ""
@@ -402,7 +500,7 @@ class DataMapping:
             data_info["investment_type"] = investment_type
             data_info["similarity"] = 0
             return data_info
-        
+
         if matching_type == "fund":
             doc_compare_name_list = self.doc_fund_name_list
             doc_compare_mapping = self.doc_fund_mapping
@@ -417,8 +515,9 @@ class DataMapping:
                 doc_compare_mapping = self.doc_fund_class_mapping[
                     self.doc_fund_class_mapping["FundId"] == parent_id
                 ]
-                provider_compare_mapping = self.provider_fund_class_mapping\
-                        [self.provider_fund_class_mapping["FundId"] == parent_id]
+                provider_compare_mapping = self.provider_fund_class_mapping[
+                    self.provider_fund_class_mapping["FundId"] == parent_id
+                ]
                 if len(doc_compare_mapping) == 0:
                     if len(provider_compare_mapping) == 0:
                         doc_compare_name_list = self.doc_share_name_list
@@ -435,9 +534,10 @@ class DataMapping:
                     doc_compare_name_list = (
                         doc_compare_mapping["ShareClassName"].unique().tolist()
                     )
-                    
-                if len(provider_compare_mapping) == 0 or \
-                    len(provider_compare_mapping) < len(doc_compare_mapping):
+
+                if len(provider_compare_mapping) == 0 or len(
+                    provider_compare_mapping
+                ) < len(doc_compare_mapping):
                     provider_compare_name_list = doc_compare_name_list
                     provider_compare_mapping = doc_compare_mapping
                 else:
@@ -459,58 +559,68 @@ class DataMapping:
             if doc_compare_name_list is not None and len(doc_compare_name_list) > 0:
                 _, pre_common_word_list = remove_common_word(doc_compare_name_list)
                 max_similarity_name, max_similarity = get_most_similar_name(
-                    raw_name, 
-                    doc_compare_name_list, 
-                    share_name=raw_share_name, 
+                    raw_name,
+                    doc_compare_name_list,
+                    share_name=raw_share_name,
                     fund_name=raw_fund_name,
                     matching_type=matching_type,
-                    process_cache=process_cache)
+                    process_cache=process_cache,
+                )
                 if matching_type == "fund":
                     threshold = 0.7
                 else:
-                    threshold = 0.9
+                    if self.compare_with_provider:
+                        threshold = 0.9
+                    else:
+                        threshold = 0.6
                 if max_similarity is not None and max_similarity >= threshold:
                     data_info["id"] = doc_compare_mapping[
                         doc_compare_mapping[compare_name_dp] == max_similarity_name
                     ][compare_id_dp].values[0]
                     data_info["legal_name"] = max_similarity_name
                     data_info["similarity"] = max_similarity
-                    
+
             if data_info.get("id", None) is None or len(data_info.get("id", "")) == 0:
                 # set pre_common_word_list, reason: the document mapping for same fund maybe different with provider mapping
                 # the purpose is to get the most common word list, to improve the similarity.
-                max_similarity_name, max_similarity = get_most_similar_name(
-                    raw_name, 
-                    provider_compare_name_list, 
-                    share_name=raw_share_name,
-                    fund_name=raw_fund_name,
-                    matching_type=matching_type, 
-                    pre_common_word_list=pre_common_word_list,
-                    process_cache=process_cache
-                )
-                threshold = 0.7
-                if matching_type == "share":
-                    threshold = 0.5
-                round_similarity = 0
-                if max_similarity is not None and isinstance(max_similarity, float):
-                    round_similarity = round(max_similarity, 1)
-                if round_similarity is not None and round_similarity >= threshold:
-                    data_info["id"] = provider_compare_mapping[
-                        provider_compare_mapping[compare_name_dp] == max_similarity_name
-                    ][compare_id_dp].values[0]
-                    data_info["legal_name"] = max_similarity_name
-                    data_info["similarity"] = max_similarity
-                else:
-                    if len(doc_compare_name_list) == 1:
-                        data_info["id"] = doc_compare_mapping[
-                            doc_compare_mapping[compare_name_dp] == doc_compare_name_list[0]
+                if self.compare_with_provider:
+                    max_similarity_name, max_similarity = get_most_similar_name(
+                        raw_name,
+                        provider_compare_name_list,
+                        share_name=raw_share_name,
+                        fund_name=raw_fund_name,
+                        matching_type=matching_type,
+                        pre_common_word_list=pre_common_word_list,
+                        process_cache=process_cache,
+                    )
+                    threshold = 0.7
+                    if matching_type == "share":
+                        threshold = 0.5
+                    round_similarity = 0
+                    if max_similarity is not None and isinstance(max_similarity, float):
+                        round_similarity = round(max_similarity, 1)
+                    if round_similarity is not None and round_similarity >= threshold:
+                        data_info["id"] = provider_compare_mapping[
+                            provider_compare_mapping[compare_name_dp] == max_similarity_name
                         ][compare_id_dp].values[0]
-                        data_info["legal_name"] = doc_compare_name_list[0]
-                        data_info["similarity"] = 1
+                        data_info["legal_name"] = max_similarity_name
+                        data_info["similarity"] = max_similarity
                     else:
-                        data_info["id"] = ""
-                        data_info["legal_name"] = ""
-                        data_info["similarity"] = 0
+                        if len(doc_compare_name_list) == 1:
+                            data_info["id"] = doc_compare_mapping[
+                                doc_compare_mapping[compare_name_dp]
+                                == doc_compare_name_list[0]
+                            ][compare_id_dp].values[0]
+                            data_info["legal_name"] = doc_compare_name_list[0]
+                            data_info["similarity"] = 1
+                        else:
+                            data_info["id"] = ""
+                            data_info["legal_name"] = ""
+                            data_info["similarity"] = 0
+                else:
+                    data_info["id"] = ""
+                    data_info["legal_name"] = ""
+                    data_info["similarity"] = 0
             data_info["investment_type"] = investment_type
         else:
             data_info["id"] = ""
diff --git a/main.py b/main.py
index 2052e49..bf07834 100644
--- a/main.py
+++ b/main.py
@@ -31,11 +31,14 @@ class EMEA_AR_Parsing:
         output_mapping_data_folder: str = r"/data/emea_ar/output/mapping_data/docs/",
         extract_way: str = "text",
         drilldown_folder: str = r"/data/emea_ar/output/drilldown/",
+        compare_with_provider: bool = True
     ) -> None:
         self.doc_id = doc_id
         self.doc_source = doc_source
         self.pdf_folder = pdf_folder
         os.makedirs(self.pdf_folder, exist_ok=True)
+        self.compare_with_provider = compare_with_provider
+        
         self.pdf_file = self.download_pdf()
         self.document_mapping_info_df = query_document_fund_mapping(doc_id, rerun=False)
 
@@ -72,11 +75,11 @@ class EMEA_AR_Parsing:
         os.makedirs(self.output_mapping_data_folder, exist_ok=True)
 
         self.filter_pages = FilterPages(
-            self.doc_id, 
-            self.pdf_file, 
-            self.document_mapping_info_df, 
+            self.doc_id,
+            self.pdf_file,
+            self.document_mapping_info_df,
             self.doc_source,
-            output_pdf_text_folder
+            output_pdf_text_folder,
         )
         self.page_text_dict = self.filter_pages.page_text_dict
 
@@ -87,7 +90,9 @@ class EMEA_AR_Parsing:
             drilldown_folder = r"/data/emea_ar/output/drilldown/"
         os.makedirs(drilldown_folder, exist_ok=True)
         self.drilldown_folder = drilldown_folder
-        misc_config_file = os.path.join(f"./configuration/{doc_source}/", "misc_config.json")
+        misc_config_file = os.path.join(
+            f"./configuration/{doc_source}/", "misc_config.json"
+        )
         if os.path.exists(misc_config_file):
             with open(misc_config_file, "r", encoding="utf-8") as f:
                 misc_config = json.load(f)
@@ -278,7 +283,8 @@ class EMEA_AR_Parsing:
             data_from_gpt,
             self.document_mapping_info_df,
             self.output_mapping_data_folder,
-            self.doc_source
+            self.doc_source,
+            compare_with_provider=self.compare_with_provider
         )
         return data_mapping.mapping_raw_data_entrance()
 
@@ -334,6 +340,7 @@ def mapping_data(
         output_mapping_data_folder=output_mapping_folder,
         extract_way=extract_way,
         drilldown_folder=drilldown_folder,
+        compare_with_provider=False
     )
     doc_data_from_gpt, annotation_list = emea_ar_parsing.extract_data(
         re_run=re_run_extract_data
@@ -501,19 +508,29 @@ def batch_start_job(
             result_extract_data_df.to_excel(
                 writer, index=False, sheet_name="extract_data"
             )
-        
-        if document_mapping_file is not None and len(document_mapping_file) > 0 and os.path.exists(document_mapping_file):
+
+        if (
+            document_mapping_file is not None
+            and len(document_mapping_file) > 0
+            and os.path.exists(document_mapping_file)
+        ):
             try:
-                merged_total_data_folder = os.path.join(output_mapping_total_folder, "merged/")
+                merged_total_data_folder = os.path.join(
+                    output_mapping_total_folder, "merged/"
+                )
                 os.makedirs(merged_total_data_folder, exist_ok=True)
                 data_file_base_name = os.path.basename(output_file)
-                output_merged_data_file_path = os.path.join(merged_total_data_folder, "merged_" + data_file_base_name)
-                merge_output_data_aus_prospectus(output_file, document_mapping_file, output_merged_data_file_path)
+                output_merged_data_file_path = os.path.join(
+                    merged_total_data_folder, "merged_" + data_file_base_name
+                )
+                merge_output_data_aus_prospectus(
+                    output_file, document_mapping_file, output_merged_data_file_path
+                )
             except Exception as e:
                 logger.error(f"Error: {e}")
 
         if calculate_metrics:
-            prediction_sheet_name = "total_mapping_data"
+            prediction_sheet_name = "data_in_doc_mapping"
             ground_truth_file = r"/data/emea_ar/ground_truth/data_extraction/mapping_data_info_73_documents.xlsx"
             ground_truth_sheet_name = "mapping_data"
             metrics_output_folder = r"/data/emea_ar/output/metrics/"
@@ -770,11 +787,11 @@ def test_auto_generate_instructions():
 
 
 def test_data_extraction_metrics():
-    data_type = "data_extraction"
+    data_type = "document_mapping_in_db"
     # prediction_file = r"/data/emea_ar/output/mapping_data/total/mapping_data_info_88_documents_by_image_20240920033929.xlsx"
-    prediction_file = r"/data/emea_ar/output/mapping_data/total/mapping_data_info_88_documents_by_text_20240922152517.xlsx"
+    prediction_file = r"/data/emea_ar/output/mapping_data/total/mapping_data_info_51_documents_by_text_20250127104008.xlsx"
     # prediction_file = r"/data/emea_ar/output/mapping_data/docs/by_text/excel/481475385.xlsx"
-    prediction_sheet_name = "mapping_data"
+    prediction_sheet_name = "data_in_doc_mapping"
     ground_truth_file = r"/data/emea_ar/ground_truth/data_extraction/mapping_data_info_73_documents.xlsx"
     ground_truth_sheet_name = "mapping_data"
     metrics_output_folder = r"/data/emea_ar/output/metrics/"
@@ -1017,7 +1034,7 @@ def batch_run_documents(
     )
     re_run_extract_data = False
     re_run_mapping_data = True
-    force_save_total_data = True
+    force_save_total_data = False
     calculate_metrics = False
 
     extract_way = "text"
@@ -1194,13 +1211,17 @@ def merge_output_data_aus_prospectus(
 ):
     # TODO: merge output data for aus prospectus, plan to realize it on 2025-01-16
     data_df = pd.read_excel(data_file_path, sheet_name="total_mapping_data")
-    document_mapping_df = pd.read_excel(document_mapping_file, sheet_name="document_mapping")
+    document_mapping_df = pd.read_excel(
+        document_mapping_file, sheet_name="document_mapping"
+    )
     # set doc_id to be string type
     data_df["doc_id"] = data_df["doc_id"].astype(str)
     document_mapping_df["DocumentId"] = document_mapping_df["DocumentId"].astype(str)
 
     doc_id_list = data_df["doc_id"].unique().tolist()
-    datapoint_keyword_config_file = r"./configuration/aus_prospectus/datapoint_name.json"
+    datapoint_keyword_config_file = (
+        r"./configuration/aus_prospectus/datapoint_name.json"
+    )
     with open(datapoint_keyword_config_file, "r", encoding="utf-8") as f:
         datapoint_keyword_config = json.load(f)
     datapoint_name_list = list(datapoint_keyword_config.keys())
@@ -1212,7 +1233,9 @@ def merge_output_data_aus_prospectus(
                 "EffectiveDate"
             ].values[0]
         )[0:10]
-        share_doc_data_df = data_df[(data_df["doc_id"] == doc_id) & (data_df["investment_type"] == 1)]
+        share_doc_data_df = data_df[
+            (data_df["doc_id"] == doc_id) & (data_df["investment_type"] == 1)
+        ]
         exist_raw_name_list = []
         for index, row in share_doc_data_df.iterrows():
             doc_id = str(row["doc_id"])
@@ -1228,7 +1251,9 @@ def merge_output_data_aus_prospectus(
             fund_id = ""
             fund_legal_name = ""
             if share_class_id != "":
-                record_row = document_mapping_df[document_mapping_df["FundClassId"] == share_class_id]
+                record_row = document_mapping_df[
+                    document_mapping_df["FundClassId"] == share_class_id
+                ]
                 if len(record_row) > 0:
                     fund_id = record_row["FundId"].values[0]
                     fund_legal_name = record_row["FundLegalName"].values[0]
@@ -1265,16 +1290,16 @@ def merge_output_data_aus_prospectus(
                 doc_data_list.append(data)
             # find data from total_data_list by raw_name
             for data in doc_data_list:
-                if (
-                    data["raw_name"] == raw_name
-                ):
+                if data["raw_name"] == raw_name:
                     update_key = datapoint
                     data[update_key] = value
                     if page_index not in data["page_index"]:
                         data["page_index"].append(page_index)
                     break
-        
-        fund_doc_data_df = data_df[(data_df["doc_id"] == doc_id) & (data_df["investment_type"] == 33)]
+
+        fund_doc_data_df = data_df[
+            (data_df["doc_id"] == doc_id) & (data_df["investment_type"] == 33)
+        ]
         for index, row in fund_doc_data_df.iterrows():
             doc_id = str(row["doc_id"])
             page_index = int(row["page_index"])
@@ -1285,12 +1310,13 @@ def merge_output_data_aus_prospectus(
             value = row["value"]
             fund_id = row["investment_id"]
             fund_legal_name = row["investment_name"]
-            
+
             exist = False
             if fund_id != "":
                 for data in doc_data_list:
-                    if (fund_id != "" and data["fund_id"] == fund_id) or \
-                        (data["raw_fund_name"] == raw_fund_name):
+                    if (fund_id != "" and data["fund_id"] == fund_id) or (
+                        data["raw_fund_name"] == raw_fund_name
+                    ):
                         update_key = datapoint
                         data[update_key] = value
                         if page_index not in data["page_index"]:
@@ -1323,6 +1349,7 @@ def merge_output_data_aus_prospectus(
 
 
 if __name__ == "__main__":
+    # test_data_extraction_metrics()
     # data_file_path = r"/data/aus_prospectus/output/mapping_data/total/mapping_data_info_11_documents_by_text_20250116220811.xlsx"
     # document_mapping_file_path = r"/data/aus_prospectus/basic_information/11_documents/document_mapping.xlsx"
     # merged_total_data_folder = r'/data/aus_prospectus/output/mapping_data/total/merged/'
@@ -1347,10 +1374,12 @@ if __name__ == "__main__":
     #                        output_mapping_child_folder=output_mapping_child_folder)
 
     # special_doc_id_list = ["553242411"]
-    
-    doc_source = "aus_prospectus"
+
+    doc_source = "emea_ar"
     if doc_source == "aus_prospectus":
-        document_sample_file = r"./sample_documents/aus_prospectus_100_documents_multi_fund_sample.txt"
+        document_sample_file = (
+            r"./sample_documents/aus_prospectus_100_documents_multi_fund_sample.txt"
+        )
         with open(document_sample_file, "r", encoding="utf-8") as f:
             special_doc_id_list = [doc_id.strip() for doc_id in f.readlines()]
         document_mapping_file = r"/data/aus_prospectus/basic_information/from_2024_documents/aus_100_document_prospectus_multi_fund.xlsx"
@@ -1383,7 +1412,7 @@ if __name__ == "__main__":
             r"/data/aus_prospectus/output/mapping_data/total/"
         )
         drilldown_folder = r"/data/aus_prospectus/output/drilldown/"
-        
+
         batch_run_documents(
             doc_source=doc_source,
             special_doc_id_list=special_doc_id_list,
@@ -1397,7 +1426,61 @@ if __name__ == "__main__":
             drilldown_folder=drilldown_folder,
         )
     elif doc_source == "emea_ar":
-        special_doc_id_list = ["553242408"]
+        special_doc_id_list = [
+            "292989214",
+            "316237292",
+            "321733631",
+            "323390570",
+            "327956364",
+            "333207452",
+            "334718372",
+            "344636875",
+            "362246081",
+            "366179419",
+            "380945052",
+            "382366116",
+            "387202452",
+            "389171486",
+            "391456740",
+            "391736837",
+            "394778487",
+            "401684600",
+            "402113224",
+            "402181770",
+            "402397014",
+            "405803396",
+            "445102363",
+            "445256897",
+            "448265376",
+            "449555622",
+            "449623976",
+            "458291624",
+            "458359181",
+            "463081566",
+            "469138353",
+            "471641628",
+            "476492237",
+            "478585901",
+            "478586066",
+            "479042264",
+            "479793787",
+            "481475385",
+            "483617247",
+            "486378555",
+            "486383912",
+            "492121213",
+            "497497599",
+            "502693599",
+            "502821436",
+            "503194284",
+            "506559375",
+            "507967525",
+            "508854243",
+            "509845549",
+            "520879048",
+            "529925114",
+        ]
+        special_doc_id_list = ["471641628"]
         batch_run_documents(
             doc_source=doc_source, special_doc_id_list=special_doc_id_list
         )