dc-ml-emea-ar/playground.ipynb

605 lines
22 KiB
Plaintext
Raw Normal View History

2024-08-28 15:21:26 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"from utils.biz_utils import add_slash_to_text_as_regex\n",
"import json\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"regex = r\"Turnover \\n\""
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Turnover\\\\s+\\\\n'"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"add_slash_to_text_as_regex(regex)"
]
},
{
"cell_type": "code",
2024-08-29 22:05:58 +00:00
"execution_count": 46,
2024-08-28 15:21:26 +00:00
"metadata": {},
"outputs": [],
"source": [
2024-08-29 22:05:58 +00:00
"text = \"What was the share of investments made in transitional and enabling activities? \\nTaxonomy-aligned\\nactivities are expressed \\nas a share of\\n\\u2022\\t Turnover reflects the\\n\""
2024-08-28 15:21:26 +00:00
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<re.Match object; span=(141, 151), match='Turnover \\n'>"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"re.search(regex, text)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"datapoint_keywords_config_file = r\"./configuration/datapoint_keyword.json\""
]
},
{
"cell_type": "code",
2024-08-29 22:05:58 +00:00
"execution_count": 47,
2024-08-28 15:21:26 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TOR no match\n",
2024-08-29 22:05:58 +00:00
"Turnover\\*\\s+\\n no match\n",
"Turnover\\s+\\n no match\n",
2024-08-28 15:21:26 +00:00
"Turnover\\s+Ratio no match\n",
"Turnover\\s+Rate no match\n",
"Portfolio\\s+Turnover no match\n",
"Portfolio\\s+turnover\\s+ratio no match\n",
"Portfolio\\s+turnover\\s+rate no match\n",
"PTR no match\n",
"Annual\\s+Portfolio\\s+Turnover\\s+Ratio no match\n"
]
}
],
"source": [
"with open(datapoint_keywords_config_file, \"r\", encoding=\"utf-8\") as file:\n",
" datapoint_keywords_config = json.load(file)\n",
"\n",
"tor_regex_list = datapoint_keywords_config.get(\"tor\", {}).get(\"english\", [])\n",
"\n",
"for tor_regex in tor_regex_list:\n",
" regex = add_slash_to_text_as_regex(tor_regex)\n",
" search = re.search(regex, text)\n",
" if search:\n",
" print(f\"{regex} match {search.group()}\")\n",
" else:\n",
" print(f\"{regex} no match\")"
]
},
2024-08-29 22:05:58 +00:00
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from utils.sql_query_util import query_investment_by_provider, query_document_fund_mapping\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"provider_mapping = query_investment_by_provider(company_id=\"0C00008QVP\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ProviderId</th>\n",
" <th>ProviderName</th>\n",
" <th>FundId</th>\n",
" <th>FundName</th>\n",
" <th>ISIN</th>\n",
" <th>SecId</th>\n",
" <th>CurrencyId</th>\n",
" <th>ShareClassName</th>\n",
" <th>ShareClassStatus</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>840</th>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH4</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Credit O...</td>\n",
" <td>LU1053597990</td>\n",
" <td>F000010MEE</td>\n",
" <td>USD</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Credit O...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>841</th>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH4</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Credit O...</td>\n",
" <td>LU1053597727</td>\n",
" <td>F000010MEF</td>\n",
" <td>USD</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Credit O...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>842</th>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU0993574440</td>\n",
" <td>F000010MEG</td>\n",
" <td>USD</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>843</th>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU1805616171</td>\n",
" <td>F000010PUN</td>\n",
" <td>CHF</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>844</th>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU1076358073</td>\n",
" <td>F000010MEH</td>\n",
" <td>EUR</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>845</th>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU2046740358</td>\n",
" <td>F0000143Y8</td>\n",
" <td>USD</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>846</th>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU2046740432</td>\n",
" <td>F0000143Y9</td>\n",
" <td>USD</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>847</th>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU0993569101</td>\n",
" <td>F00001564H</td>\n",
" <td>USD</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>848</th>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU2122516821</td>\n",
" <td>F000014UPK</td>\n",
" <td>AUD</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ProviderId ProviderName FundId \\\n",
"840 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH4 \n",
"841 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH4 \n",
"842 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"843 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"844 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"845 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"846 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"847 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"848 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"\n",
" FundName ISIN \\\n",
"840 T. Rowe Price Funds Series II SICAV - Credit O... LU1053597990 \n",
"841 T. Rowe Price Funds Series II SICAV - Credit O... LU1053597727 \n",
"842 T. Rowe Price Funds Series II SICAV - Floating... LU0993574440 \n",
"843 T. Rowe Price Funds Series II SICAV - Floating... LU1805616171 \n",
"844 T. Rowe Price Funds Series II SICAV - Floating... LU1076358073 \n",
"845 T. Rowe Price Funds Series II SICAV - Floating... LU2046740358 \n",
"846 T. Rowe Price Funds Series II SICAV - Floating... LU2046740432 \n",
"847 T. Rowe Price Funds Series II SICAV - Floating... LU0993569101 \n",
"848 T. Rowe Price Funds Series II SICAV - Floating... LU2122516821 \n",
"\n",
" SecId CurrencyId ShareClassName \\\n",
"840 F000010MEE USD T. Rowe Price Funds Series II SICAV - Credit O... \n",
"841 F000010MEF USD T. Rowe Price Funds Series II SICAV - Credit O... \n",
"842 F000010MEG USD T. Rowe Price Funds Series II SICAV - Floating... \n",
"843 F000010PUN CHF T. Rowe Price Funds Series II SICAV - Floating... \n",
"844 F000010MEH EUR T. Rowe Price Funds Series II SICAV - Floating... \n",
"845 F0000143Y8 USD T. Rowe Price Funds Series II SICAV - Floating... \n",
"846 F0000143Y9 USD T. Rowe Price Funds Series II SICAV - Floating... \n",
"847 F00001564H USD T. Rowe Price Funds Series II SICAV - Floating... \n",
"848 F000014UPK AUD T. Rowe Price Funds Series II SICAV - Floating... \n",
"\n",
" ShareClassStatus \n",
"840 0 \n",
"841 0 \n",
"842 1 \n",
"843 0 \n",
"844 0 \n",
"845 0 \n",
"846 0 \n",
"847 0 \n",
"848 0 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"provider_mapping[provider_mapping[\"FundName\"].str.contains(\"T. Rowe Price Funds Series II SICAV\")]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"document_mapping = query_document_fund_mapping(doc_id=\"486378555\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DocumentId</th>\n",
" <th>EffectiveDate</th>\n",
" <th>DocumentType</th>\n",
" <th>Format</th>\n",
" <th>Language</th>\n",
" <th>DocumentStatus</th>\n",
" <th>ProviderId</th>\n",
" <th>ProviderName</th>\n",
" <th>FundId</th>\n",
" <th>FundName</th>\n",
" <th>Domicile</th>\n",
" <th>SecId</th>\n",
" <th>CurrencyId</th>\n",
" <th>ShareClassName</th>\n",
" <th>ISIN</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>486378555</td>\n",
" <td>2022-06-30</td>\n",
" <td>4</td>\n",
" <td>PDF</td>\n",
" <td>0L00000122</td>\n",
" <td>1</td>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LUX</td>\n",
" <td>F000010MEG</td>\n",
" <td>USD</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU0993574440</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>486378555</td>\n",
" <td>2022-06-30</td>\n",
" <td>4</td>\n",
" <td>PDF</td>\n",
" <td>0L00000122</td>\n",
" <td>1</td>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LUX</td>\n",
" <td>F000010PUN</td>\n",
" <td>CHF</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU1805616171</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>486378555</td>\n",
" <td>2022-06-30</td>\n",
" <td>4</td>\n",
" <td>PDF</td>\n",
" <td>0L00000122</td>\n",
" <td>1</td>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LUX</td>\n",
" <td>F000010MEH</td>\n",
" <td>EUR</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU1076358073</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>486378555</td>\n",
" <td>2022-06-30</td>\n",
" <td>4</td>\n",
" <td>PDF</td>\n",
" <td>0L00000122</td>\n",
" <td>1</td>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LUX</td>\n",
" <td>F0000143Y8</td>\n",
" <td>USD</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU2046740358</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>486378555</td>\n",
" <td>2022-06-30</td>\n",
" <td>4</td>\n",
" <td>PDF</td>\n",
" <td>0L00000122</td>\n",
" <td>1</td>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LUX</td>\n",
" <td>F0000143Y9</td>\n",
" <td>USD</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU2046740432</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>486378555</td>\n",
" <td>2022-06-30</td>\n",
" <td>4</td>\n",
" <td>PDF</td>\n",
" <td>0L00000122</td>\n",
" <td>1</td>\n",
" <td>0C00008QVP</td>\n",
" <td>T. Rowe Price (Luxembourg) Management S.à r.l.</td>\n",
" <td>FS0000DUH5</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LUX</td>\n",
" <td>F000014UPK</td>\n",
" <td>AUD</td>\n",
" <td>T. Rowe Price Funds Series II SICAV - Floating...</td>\n",
" <td>LU2122516821</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" DocumentId EffectiveDate DocumentType Format Language DocumentStatus \\\n",
"0 486378555 2022-06-30 4 PDF 0L00000122 1 \n",
"1 486378555 2022-06-30 4 PDF 0L00000122 1 \n",
"2 486378555 2022-06-30 4 PDF 0L00000122 1 \n",
"3 486378555 2022-06-30 4 PDF 0L00000122 1 \n",
"4 486378555 2022-06-30 4 PDF 0L00000122 1 \n",
"5 486378555 2022-06-30 4 PDF 0L00000122 1 \n",
"\n",
" ProviderId ProviderName FundId \\\n",
"0 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"1 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"2 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"3 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"4 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"5 0C00008QVP T. Rowe Price (Luxembourg) Management S.à r.l. FS0000DUH5 \n",
"\n",
" FundName Domicile SecId \\\n",
"0 T. Rowe Price Funds Series II SICAV - Floating... LUX F000010MEG \n",
"1 T. Rowe Price Funds Series II SICAV - Floating... LUX F000010PUN \n",
"2 T. Rowe Price Funds Series II SICAV - Floating... LUX F000010MEH \n",
"3 T. Rowe Price Funds Series II SICAV - Floating... LUX F0000143Y8 \n",
"4 T. Rowe Price Funds Series II SICAV - Floating... LUX F0000143Y9 \n",
"5 T. Rowe Price Funds Series II SICAV - Floating... LUX F000014UPK \n",
"\n",
" CurrencyId ShareClassName ISIN \n",
"0 USD T. Rowe Price Funds Series II SICAV - Floating... LU0993574440 \n",
"1 CHF T. Rowe Price Funds Series II SICAV - Floating... LU1805616171 \n",
"2 EUR T. Rowe Price Funds Series II SICAV - Floating... LU1076358073 \n",
"3 USD T. Rowe Price Funds Series II SICAV - Floating... LU2046740358 \n",
"4 USD T. Rowe Price Funds Series II SICAV - Floating... LU2046740432 \n",
"5 AUD T. Rowe Price Funds Series II SICAV - Floating... LU2122516821 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"document_mapping"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['T. Rowe Price Funds Series II SICAV - Floating Rate Loan Fund I Cap',\n",
" 'T. Rowe Price Funds Series II SICAV - Floating Rate Loan Fund Ih (CHF) Cap',\n",
" 'T. Rowe Price Funds Series II SICAV - Floating Rate Loan Fund Ih (EUR) Cap',\n",
" 'T. Rowe Price Funds Series II SICAV - Floating Rate Loan Fund Q (USD) Cap',\n",
" 'T. Rowe Price Funds Series II SICAV - Floating Rate Loan Fund Qd (USD) Dis',\n",
" 'T. Rowe Price Funds Series II SICAV - Floating Rate Loan Fund Sdn (AUD) Dis']"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(document_mapping[\"ShareClassName\"].unique())"
]
},
2024-08-28 15:21:26 +00:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "torch2_real",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}