crypto_quant/core/db_huge_volume_data.py

547 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import logging
from typing import Optional, List, Dict, Any, Union
from core.db_manager import DBData
from core.utils import check_date_time_format, datetime_to_timestamp
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
class DBHugeVolumeData:
def __init__(
self,
db_url: str
):
self.db_url = db_url
self.table_name = "crypto_huge_volume"
self.columns = [
"symbol",
"bar",
"window_size",
"timestamp",
"date_time",
"open",
"high",
"low",
"close",
"volume",
"volCcy",
"volCCyQuote",
"volume_ma",
"volume_std",
"volume_threshold",
"huge_volume",
"volume_ratio",
"spike_intensity",
"close_80_percentile",
"close_20_percentile",
"price_80_high",
"price_20_low",
"volume_80_20_price_spike",
"close_90_percentile",
"close_10_percentile",
"price_90_high",
"price_10_low",
"volume_90_10_price_spike",
"create_time",
]
self.db_manager = DBData(db_url, self.table_name, self.columns)
def _process_time_parameter(self, time_param: Optional[Union[str, int]]) -> Optional[int]:
"""
处理时间参数,统一转换为时间戳
:param time_param: 时间参数(字符串或整数)
:return: 时间戳或None
"""
if time_param is None:
return None
if isinstance(time_param, int):
return time_param
if isinstance(time_param, str):
if time_param.isdigit():
return int(time_param)
else:
parsed_time = check_date_time_format(time_param)
if parsed_time is None:
logging.warning(f"日期时间格式错误: {time_param}")
return None
return datetime_to_timestamp(parsed_time)
return None
def _build_query_conditions(
self,
symbol: Optional[str] = None,
bar: Optional[str] = None,
window_size: Optional[int] = None,
start: Optional[Union[str, int]] = None,
end: Optional[Union[str, int]] = None,
additional_conditions: Optional[List[str]] = None
) -> tuple[List[str], Dict[str, Any]]:
"""
构建查询条件
:param symbol: 交易对
:param bar: K线周期
:param window_size: 窗口大小
:param start: 开始时间
:param end: 结束时间
:param additional_conditions: 额外的查询条件
:return: (条件列表, 参数字典)
"""
conditions = additional_conditions or []
condition_dict = {}
if symbol:
conditions.append("symbol = :symbol")
condition_dict["symbol"] = symbol
if bar:
conditions.append("bar = :bar")
condition_dict["bar"] = bar
if window_size:
conditions.append("window_size = :window_size")
condition_dict["window_size"] = window_size
# 处理时间参数
start_timestamp = self._process_time_parameter(start)
end_timestamp = self._process_time_parameter(end)
if start_timestamp is not None:
conditions.append("timestamp >= :start")
condition_dict["start"] = start_timestamp
if end_timestamp is not None:
conditions.append("timestamp <= :end")
condition_dict["end"] = end_timestamp
return conditions, condition_dict
def insert_data_to_mysql(self, df: pd.DataFrame) -> None:
"""
将巨量交易数据保存到MySQL的crypto_huge_volume表
速度:⭐⭐⭐⭐⭐ 最快
内存:⭐⭐⭐⭐ 中等
适用场景:中小数据量(<10万条
:param df: 巨量交易数据DataFrame
"""
if df is None or df.empty:
logging.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql(df)
def insert_data_to_mysql_fast(self, df: pd.DataFrame) -> None:
"""
快速插入巨量交易数据方案2使用executemany批量插入
速度:⭐⭐⭐⭐ 很快
内存:⭐⭐⭐⭐⭐ 低
适用场景:中等数据量
:param df: 巨量交易数据DataFrame
"""
if df is None or df.empty:
logging.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql_fast(df)
def insert_data_to_mysql_chunk(self, df: pd.DataFrame, chunk_size: int = 1000) -> None:
"""
分块插入巨量交易数据方案3适合大数据量
速度:⭐⭐⭐ 中等
内存:⭐⭐⭐⭐⭐ 最低
适用场景:大数据量(>10万条
:param df: 巨量交易数据DataFrame
:param chunk_size: 分块大小
"""
if df is None or df.empty:
logging.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql_chunk(df, chunk_size)
def insert_data_to_mysql_simple(self, df: pd.DataFrame) -> None:
"""
简单插入巨量交易数据方案4直接使用to_sql忽略重复
速度:⭐⭐⭐⭐⭐ 最快
内存:⭐⭐⭐⭐ 中等
注意:会抛出重复键错误,需要额外处理
"""
if df is None or df.empty:
logging.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql_simple(df)
def query_latest_data(self, symbol: str, bar: str, window_size: int) -> Optional[Dict[str, Any]]:
"""
查询最新巨量交易数据
:param symbol: 交易对
:param bar: K线周期
:param window_size: 窗口大小
:return: 最新数据记录或None
"""
sql = """
SELECT * FROM crypto_huge_volume
WHERE symbol = :symbol AND bar = :bar AND window_size = :window_size
ORDER BY timestamp DESC
LIMIT 1
"""
condition_dict = {"symbol": symbol, "bar": bar, "window_size": window_size}
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
def query_data_by_symbol_bar_window_size_timestamp(self, symbol: str, bar: str, window_size: int, timestamp: int) -> Optional[Dict[str, Any]]:
"""
根据交易对、K线周期, 窗口大小和时间戳查询巨量交易数据
:param symbol: 交易对
:param bar: K线周期
:param window_size: 窗口大小
:param timestamp: 时间戳
:return: 数据记录或None
"""
sql = """
SELECT * FROM crypto_huge_volume
WHERE symbol = :symbol AND bar = :bar AND window_size = :window_size AND timestamp = :timestamp
"""
condition_dict = {"symbol": symbol, "bar": bar, "window_size": window_size, "timestamp": timestamp}
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
def query_huge_volume_data_by_symbol_bar_window_size(
self,
symbol: str,
bar: str,
window_size: int,
start: Optional[Union[str, int]] = None,
end: Optional[Union[str, int]] = None
) -> Optional[List[Dict[str, Any]]]:
"""
根据交易对、K线周期和窗口大小查询巨量交易数据
:param symbol: 交易对
:param bar: K线周期
:param window_size: 窗口大小
:param start: 开始时间
:param end: 结束时间
:return: 数据记录列表或None
"""
conditions, condition_dict = self._build_query_conditions(symbol, bar, window_size, start, end)
where_clause = " AND ".join(conditions) if conditions else "1=1"
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY timestamp ASC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_huge_volume_records(
self,
symbol: Optional[str] = None,
bar: Optional[str] = None,
window_size: Optional[int] = None,
start: Optional[Union[str, int]] = None,
end: Optional[Union[str, int]] = None
) -> Optional[List[Dict[str, Any]]]:
"""
查询巨量交易记录只返回huge_volume=1的记录
:param symbol: 交易对
:param bar: K线周期
:param window_size: 窗口大小
:param start: 开始时间
:param end: 结束时间
:return: 巨量交易记录列表或None
"""
conditions, condition_dict = self._build_query_conditions(
symbol, bar, window_size, start, end, additional_conditions=["huge_volume = 1"]
)
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_volume_80_20_price_spike_records(
self,
symbol: Optional[str] = None,
bar: Optional[str] = None,
window_size: Optional[int] = None,
start: Optional[Union[str, int]] = None,
end: Optional[Union[str, int]] = None
) -> Optional[List[Dict[str, Any]]]:
"""
查询80/20量价尖峰记录只返回volume_80_20_price_spike=1的记录
:param symbol: 交易对
:param bar: K线周期
:param window_size: 窗口大小
:param start: 开始时间
:param end: 结束时间
:return: 80/20量价尖峰记录列表或None
"""
conditions, condition_dict = self._build_query_conditions(
symbol, bar, window_size, start, end, additional_conditions=["volume_80_20_price_spike = 1"]
)
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_volume_90_10_price_spike_records(
self,
symbol: Optional[str] = None,
bar: Optional[str] = None,
window_size: Optional[int] = None,
start: Optional[Union[str, int]] = None,
end: Optional[Union[str, int]] = None
) -> Optional[List[Dict[str, Any]]]:
"""
查询90/10量价尖峰记录只返回volume_90_10_price_spike=1的记录
:param symbol: 交易对
:param bar: K线周期
:param window_size: 窗口大小
:param start: 开始时间
:param end: 结束时间
:return: 90/10量价尖峰记录列表或None
"""
conditions, condition_dict = self._build_query_conditions(
symbol, bar, window_size, start, end, additional_conditions=["volume_90_10_price_spike = 1"]
)
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_price_80_high_records(
self,
symbol: Optional[str] = None,
bar: Optional[str] = None,
window_size: Optional[int] = None,
start: Optional[Union[str, int]] = None,
end: Optional[Union[str, int]] = None
) -> Optional[List[Dict[str, Any]]]:
"""
查询价格达到80%分位数高点的记录只返回price_80_high=1的记录
:param symbol: 交易对
:param bar: K线周期
:param window_size: 窗口大小
:param start: 开始时间
:param end: 结束时间
:return: 价格80%分位数高点记录列表或None
"""
conditions, condition_dict = self._build_query_conditions(
symbol, bar, window_size, start, end, additional_conditions=["price_80_high = 1"]
)
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_price_20_low_records(
self,
symbol: Optional[str] = None,
bar: Optional[str] = None,
window_size: Optional[int] = None,
start: Optional[Union[str, int]] = None,
end: Optional[Union[str, int]] = None
) -> Optional[List[Dict[str, Any]]]:
"""
查询价格达到20%分位数低点的记录只返回price_20_low=1的记录
:param symbol: 交易对
:param bar: K线周期
:param start: 开始时间
:param end: 结束时间
:return: 价格20%分位数低点记录列表或None
"""
conditions, condition_dict = self._build_query_conditions(
symbol, bar, window_size, start, end, additional_conditions=["price_20_low = 1"]
)
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_price_90_high_records(
self,
symbol: Optional[str] = None,
bar: Optional[str] = None,
window_size: Optional[int] = None,
start: Optional[Union[str, int]] = None,
end: Optional[Union[str, int]] = None
) -> Optional[List[Dict[str, Any]]]:
"""
查询价格达到90%分位数高点的记录只返回price_90_high=1的记录
:param symbol: 交易对
:param bar: K线周期
:param start: 开始时间
:param end: 结束时间
:return: 价格90%分位数高点记录列表或None
"""
conditions, condition_dict = self._build_query_conditions(
symbol, bar, window_size, start, end, additional_conditions=["price_90_high = 1"]
)
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_price_10_low_records(
self,
symbol: Optional[str] = None,
bar: Optional[str] = None,
window_size: Optional[int] = None,
start: Optional[Union[str, int]] = None,
end: Optional[Union[str, int]] = None
) -> Optional[List[Dict[str, Any]]]:
"""
查询价格达到10%分位数低点的记录只返回price_10_low=1的记录
:param symbol: 交易对
:param bar: K线周期
:param start: 开始时间
:param end: 结束时间
:return: 价格10%分位数低点记录列表或None
"""
conditions, condition_dict = self._build_query_conditions(
symbol, bar, window_size, start, end, additional_conditions=["price_10_low = 1"]
)
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def get_statistics_summary(
self,
symbol: Optional[str] = None,
bar: Optional[str] = None,
window_size: Optional[int] = None,
start: Optional[Union[str, int]] = None,
end: Optional[Union[str, int]] = None
) -> Optional[Dict[str, Any]]:
"""
获取巨量交易统计摘要
:param symbol: 交易对
:param bar: K线周期
:param window_size: 窗口大小
:param start: 开始时间
:param end: 结束时间
:return: 统计摘要或None
"""
conditions, condition_dict = self._build_query_conditions(symbol, bar, window_size, start, end)
where_clause = " AND ".join(conditions) if conditions else "1=1"
sql = f"""
SELECT
COUNT(*) as total_records,
SUM(huge_volume) as huge_volume_count,
SUM(volume_80_20_price_spike) as volume_80_20_price_spike_count,
SUM(volume_90_10_price_spike) as volume_90_10_price_spike_count,
SUM(price_80_high) as price_80_high_count,
SUM(price_20_low) as price_20_low_count,
SUM(price_90_high) as price_90_high_count,
SUM(price_10_low) as price_10_low_count,
AVG(volume_ratio) as avg_volume_ratio,
MAX(volume_ratio) as max_volume_ratio,
AVG(spike_intensity) as avg_spike_intensity,
MAX(spike_intensity) as max_spike_intensity
FROM crypto_huge_volume
WHERE {where_clause}
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
def get_top_volume_spikes(
self,
symbol: Optional[str] = None,
bar: Optional[str] = None,
window_size: Optional[int] = None,
limit: int = 10
) -> Optional[List[Dict[str, Any]]]:
"""
获取成交量尖峰最高的记录
:param symbol: 交易对
:param bar: K线周期
:param window_size: 窗口大小
:param limit: 返回记录数量
:return: 成交量尖峰记录列表或None
"""
conditions, condition_dict = self._build_query_conditions(
symbol, bar, window_size, additional_conditions=["huge_volume = 1"]
)
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY volume_ratio DESC
LIMIT :limit
"""
condition_dict["limit"] = limit
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def get_percentile_statistics(
self,
symbol: Optional[str] = None,
bar: Optional[str] = None,
window_size: Optional[int] = None,
start: Optional[Union[str, int]] = None,
end: Optional[Union[str, int]] = None
) -> Optional[Dict[str, Any]]:
"""
获取分位数统计信息
:param symbol: 交易对
:param bar: K线周期
:param window_size: 窗口大小
:param start: 开始时间
:param end: 结束时间
:return: 分位数统计信息或None
"""
conditions, condition_dict = self._build_query_conditions(symbol, bar, window_size, start, end)
where_clause = " AND ".join(conditions) if conditions else "1=1"
sql = f"""
SELECT
AVG(close_80_percentile) as avg_close_80_percentile,
AVG(close_20_percentile) as avg_close_20_percentile,
AVG(close_90_percentile) as avg_close_90_percentile,
AVG(close_10_percentile) as avg_close_10_percentile,
MAX(close_80_percentile) as max_close_80_percentile,
MIN(close_20_percentile) as min_close_20_percentile,
MAX(close_90_percentile) as max_close_90_percentile,
MIN(close_10_percentile) as min_close_10_percentile
FROM crypto_huge_volume
WHERE {where_clause}
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=False)