521 lines
19 KiB
Python
521 lines
19 KiB
Python
import pandas as pd
|
||
import logging
|
||
from typing import Optional, List, Dict, Any, Union
|
||
from core.db_manager import DBData
|
||
from core.utils import check_date_time_format, datetime_to_timestamp
|
||
|
||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
|
||
|
||
|
||
class DBHugeVolumeData:
|
||
def __init__(
|
||
self,
|
||
db_url: str
|
||
):
|
||
self.db_url = db_url
|
||
self.table_name = "crypto_huge_volume"
|
||
self.columns = [
|
||
"symbol",
|
||
"bar",
|
||
"timestamp",
|
||
"date_time",
|
||
"open",
|
||
"high",
|
||
"low",
|
||
"close",
|
||
"volume",
|
||
"volCcy",
|
||
"volCCyQuote",
|
||
"volume_ma",
|
||
"volume_std",
|
||
"volume_threshold",
|
||
"huge_volume",
|
||
"volume_ratio",
|
||
"spike_intensity",
|
||
"close_80_percentile",
|
||
"close_20_percentile",
|
||
"price_80_high",
|
||
"price_20_low",
|
||
"volume_80_20_price_spike",
|
||
"close_90_percentile",
|
||
"close_10_percentile",
|
||
"price_90_high",
|
||
"price_10_low",
|
||
"volume_90_10_price_spike",
|
||
"create_time",
|
||
]
|
||
self.db_manager = DBData(db_url, self.table_name, self.columns)
|
||
|
||
def _process_time_parameter(self, time_param: Optional[Union[str, int]]) -> Optional[int]:
|
||
"""
|
||
处理时间参数,统一转换为时间戳
|
||
:param time_param: 时间参数(字符串或整数)
|
||
:return: 时间戳或None
|
||
"""
|
||
if time_param is None:
|
||
return None
|
||
|
||
if isinstance(time_param, int):
|
||
return time_param
|
||
|
||
if isinstance(time_param, str):
|
||
if time_param.isdigit():
|
||
return int(time_param)
|
||
else:
|
||
parsed_time = check_date_time_format(time_param)
|
||
if parsed_time is None:
|
||
logging.warning(f"日期时间格式错误: {time_param}")
|
||
return None
|
||
return datetime_to_timestamp(parsed_time)
|
||
|
||
return None
|
||
|
||
def _build_query_conditions(
|
||
self,
|
||
symbol: Optional[str] = None,
|
||
bar: Optional[str] = None,
|
||
start: Optional[Union[str, int]] = None,
|
||
end: Optional[Union[str, int]] = None,
|
||
additional_conditions: Optional[List[str]] = None
|
||
) -> tuple[List[str], Dict[str, Any]]:
|
||
"""
|
||
构建查询条件
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param start: 开始时间
|
||
:param end: 结束时间
|
||
:param additional_conditions: 额外的查询条件
|
||
:return: (条件列表, 参数字典)
|
||
"""
|
||
conditions = additional_conditions or []
|
||
condition_dict = {}
|
||
|
||
if symbol:
|
||
conditions.append("symbol = :symbol")
|
||
condition_dict["symbol"] = symbol
|
||
if bar:
|
||
conditions.append("bar = :bar")
|
||
condition_dict["bar"] = bar
|
||
|
||
# 处理时间参数
|
||
start_timestamp = self._process_time_parameter(start)
|
||
end_timestamp = self._process_time_parameter(end)
|
||
|
||
if start_timestamp is not None:
|
||
conditions.append("timestamp >= :start")
|
||
condition_dict["start"] = start_timestamp
|
||
if end_timestamp is not None:
|
||
conditions.append("timestamp <= :end")
|
||
condition_dict["end"] = end_timestamp
|
||
|
||
return conditions, condition_dict
|
||
|
||
def insert_data_to_mysql(self, df: pd.DataFrame) -> None:
|
||
"""
|
||
将巨量交易数据保存到MySQL的crypto_huge_volume表
|
||
速度:⭐⭐⭐⭐⭐ 最快
|
||
内存:⭐⭐⭐⭐ 中等
|
||
适用场景:中小数据量(<10万条)
|
||
:param df: 巨量交易数据DataFrame
|
||
"""
|
||
if df is None or df.empty:
|
||
logging.warning("DataFrame为空,无需写入数据库。")
|
||
return
|
||
|
||
self.db_manager.insert_data_to_mysql(df)
|
||
|
||
def insert_data_to_mysql_fast(self, df: pd.DataFrame) -> None:
|
||
"""
|
||
快速插入巨量交易数据(方案2:使用executemany批量插入)
|
||
速度:⭐⭐⭐⭐ 很快
|
||
内存:⭐⭐⭐⭐⭐ 低
|
||
适用场景:中等数据量
|
||
:param df: 巨量交易数据DataFrame
|
||
"""
|
||
if df is None or df.empty:
|
||
logging.warning("DataFrame为空,无需写入数据库。")
|
||
return
|
||
|
||
self.db_manager.insert_data_to_mysql_fast(df)
|
||
|
||
def insert_data_to_mysql_chunk(self, df: pd.DataFrame, chunk_size: int = 1000) -> None:
|
||
"""
|
||
分块插入巨量交易数据(方案3:适合大数据量)
|
||
速度:⭐⭐⭐ 中等
|
||
内存:⭐⭐⭐⭐⭐ 最低
|
||
适用场景:大数据量(>10万条)
|
||
:param df: 巨量交易数据DataFrame
|
||
:param chunk_size: 分块大小
|
||
"""
|
||
if df is None or df.empty:
|
||
logging.warning("DataFrame为空,无需写入数据库。")
|
||
return
|
||
|
||
self.db_manager.insert_data_to_mysql_chunk(df, chunk_size)
|
||
|
||
def insert_data_to_mysql_simple(self, df: pd.DataFrame) -> None:
|
||
"""
|
||
简单插入巨量交易数据(方案4:直接使用to_sql,忽略重复)
|
||
速度:⭐⭐⭐⭐⭐ 最快
|
||
内存:⭐⭐⭐⭐ 中等
|
||
注意:会抛出重复键错误,需要额外处理
|
||
"""
|
||
if df is None or df.empty:
|
||
logging.warning("DataFrame为空,无需写入数据库。")
|
||
return
|
||
|
||
self.db_manager.insert_data_to_mysql_simple(df)
|
||
|
||
def query_latest_data(self, symbol: str, bar: str) -> Optional[Dict[str, Any]]:
|
||
"""
|
||
查询最新巨量交易数据
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:return: 最新数据记录或None
|
||
"""
|
||
sql = """
|
||
SELECT * FROM crypto_huge_volume
|
||
WHERE symbol = :symbol AND bar = :bar
|
||
ORDER BY timestamp DESC
|
||
LIMIT 1
|
||
"""
|
||
condition_dict = {"symbol": symbol, "bar": bar}
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
|
||
|
||
def query_data_by_symbol_bar_timestamp(self, symbol: str, bar: str, timestamp: int) -> Optional[Dict[str, Any]]:
|
||
"""
|
||
根据交易对、K线周期和时间戳查询巨量交易数据
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param timestamp: 时间戳
|
||
:return: 数据记录或None
|
||
"""
|
||
sql = """
|
||
SELECT * FROM crypto_huge_volume
|
||
WHERE symbol = :symbol AND bar = :bar AND timestamp = :timestamp
|
||
"""
|
||
condition_dict = {"symbol": symbol, "bar": bar, "timestamp": timestamp}
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
|
||
|
||
def query_huge_volume_data_by_symbol_bar(
|
||
self,
|
||
symbol: str,
|
||
bar: str,
|
||
start: Optional[Union[str, int]] = None,
|
||
end: Optional[Union[str, int]] = None
|
||
) -> Optional[List[Dict[str, Any]]]:
|
||
"""
|
||
根据交易对和K线周期查询巨量交易数据
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param start: 开始时间
|
||
:param end: 结束时间
|
||
:return: 数据记录列表或None
|
||
"""
|
||
conditions, condition_dict = self._build_query_conditions(symbol, bar, start, end)
|
||
|
||
where_clause = " AND ".join(conditions) if conditions else "1=1"
|
||
sql = f"""
|
||
SELECT * FROM crypto_huge_volume
|
||
WHERE {where_clause}
|
||
ORDER BY timestamp ASC
|
||
"""
|
||
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
|
||
|
||
def query_huge_volume_records(
|
||
self,
|
||
symbol: Optional[str] = None,
|
||
bar: Optional[str] = None,
|
||
start: Optional[Union[str, int]] = None,
|
||
end: Optional[Union[str, int]] = None
|
||
) -> Optional[List[Dict[str, Any]]]:
|
||
"""
|
||
查询巨量交易记录(只返回huge_volume=1的记录)
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param start: 开始时间
|
||
:param end: 结束时间
|
||
:return: 巨量交易记录列表或None
|
||
"""
|
||
conditions, condition_dict = self._build_query_conditions(
|
||
symbol, bar, start, end, additional_conditions=["huge_volume = 1"]
|
||
)
|
||
|
||
where_clause = " AND ".join(conditions)
|
||
sql = f"""
|
||
SELECT * FROM crypto_huge_volume
|
||
WHERE {where_clause}
|
||
ORDER BY timestamp DESC
|
||
"""
|
||
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
|
||
|
||
def query_volume_80_20_price_spike_records(
|
||
self,
|
||
symbol: Optional[str] = None,
|
||
bar: Optional[str] = None,
|
||
start: Optional[Union[str, int]] = None,
|
||
end: Optional[Union[str, int]] = None
|
||
) -> Optional[List[Dict[str, Any]]]:
|
||
"""
|
||
查询80/20量价尖峰记录(只返回volume_80_20_price_spike=1的记录)
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param start: 开始时间
|
||
:param end: 结束时间
|
||
:return: 80/20量价尖峰记录列表或None
|
||
"""
|
||
conditions, condition_dict = self._build_query_conditions(
|
||
symbol, bar, start, end, additional_conditions=["volume_80_20_price_spike = 1"]
|
||
)
|
||
|
||
where_clause = " AND ".join(conditions)
|
||
sql = f"""
|
||
SELECT * FROM crypto_huge_volume
|
||
WHERE {where_clause}
|
||
ORDER BY timestamp DESC
|
||
"""
|
||
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
|
||
|
||
def query_volume_90_10_price_spike_records(
|
||
self,
|
||
symbol: Optional[str] = None,
|
||
bar: Optional[str] = None,
|
||
start: Optional[Union[str, int]] = None,
|
||
end: Optional[Union[str, int]] = None
|
||
) -> Optional[List[Dict[str, Any]]]:
|
||
"""
|
||
查询90/10量价尖峰记录(只返回volume_90_10_price_spike=1的记录)
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param start: 开始时间
|
||
:param end: 结束时间
|
||
:return: 90/10量价尖峰记录列表或None
|
||
"""
|
||
conditions, condition_dict = self._build_query_conditions(
|
||
symbol, bar, start, end, additional_conditions=["volume_90_10_price_spike = 1"]
|
||
)
|
||
|
||
where_clause = " AND ".join(conditions)
|
||
sql = f"""
|
||
SELECT * FROM crypto_huge_volume
|
||
WHERE {where_clause}
|
||
ORDER BY timestamp DESC
|
||
"""
|
||
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
|
||
|
||
def query_price_80_high_records(
|
||
self,
|
||
symbol: Optional[str] = None,
|
||
bar: Optional[str] = None,
|
||
start: Optional[Union[str, int]] = None,
|
||
end: Optional[Union[str, int]] = None
|
||
) -> Optional[List[Dict[str, Any]]]:
|
||
"""
|
||
查询价格达到80%分位数高点的记录(只返回price_80_high=1的记录)
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param start: 开始时间
|
||
:param end: 结束时间
|
||
:return: 价格80%分位数高点记录列表或None
|
||
"""
|
||
conditions, condition_dict = self._build_query_conditions(
|
||
symbol, bar, start, end, additional_conditions=["price_80_high = 1"]
|
||
)
|
||
|
||
where_clause = " AND ".join(conditions)
|
||
sql = f"""
|
||
SELECT * FROM crypto_huge_volume
|
||
WHERE {where_clause}
|
||
ORDER BY timestamp DESC
|
||
"""
|
||
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
|
||
|
||
def query_price_20_low_records(
|
||
self,
|
||
symbol: Optional[str] = None,
|
||
bar: Optional[str] = None,
|
||
start: Optional[Union[str, int]] = None,
|
||
end: Optional[Union[str, int]] = None
|
||
) -> Optional[List[Dict[str, Any]]]:
|
||
"""
|
||
查询价格达到20%分位数低点的记录(只返回price_20_low=1的记录)
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param start: 开始时间
|
||
:param end: 结束时间
|
||
:return: 价格20%分位数低点记录列表或None
|
||
"""
|
||
conditions, condition_dict = self._build_query_conditions(
|
||
symbol, bar, start, end, additional_conditions=["price_20_low = 1"]
|
||
)
|
||
|
||
where_clause = " AND ".join(conditions)
|
||
sql = f"""
|
||
SELECT * FROM crypto_huge_volume
|
||
WHERE {where_clause}
|
||
ORDER BY timestamp DESC
|
||
"""
|
||
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
|
||
|
||
def query_price_90_high_records(
|
||
self,
|
||
symbol: Optional[str] = None,
|
||
bar: Optional[str] = None,
|
||
start: Optional[Union[str, int]] = None,
|
||
end: Optional[Union[str, int]] = None
|
||
) -> Optional[List[Dict[str, Any]]]:
|
||
"""
|
||
查询价格达到90%分位数高点的记录(只返回price_90_high=1的记录)
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param start: 开始时间
|
||
:param end: 结束时间
|
||
:return: 价格90%分位数高点记录列表或None
|
||
"""
|
||
conditions, condition_dict = self._build_query_conditions(
|
||
symbol, bar, start, end, additional_conditions=["price_90_high = 1"]
|
||
)
|
||
|
||
where_clause = " AND ".join(conditions)
|
||
sql = f"""
|
||
SELECT * FROM crypto_huge_volume
|
||
WHERE {where_clause}
|
||
ORDER BY timestamp DESC
|
||
"""
|
||
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
|
||
|
||
def query_price_10_low_records(
|
||
self,
|
||
symbol: Optional[str] = None,
|
||
bar: Optional[str] = None,
|
||
start: Optional[Union[str, int]] = None,
|
||
end: Optional[Union[str, int]] = None
|
||
) -> Optional[List[Dict[str, Any]]]:
|
||
"""
|
||
查询价格达到10%分位数低点的记录(只返回price_10_low=1的记录)
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param start: 开始时间
|
||
:param end: 结束时间
|
||
:return: 价格10%分位数低点记录列表或None
|
||
"""
|
||
conditions, condition_dict = self._build_query_conditions(
|
||
symbol, bar, start, end, additional_conditions=["price_10_low = 1"]
|
||
)
|
||
|
||
where_clause = " AND ".join(conditions)
|
||
sql = f"""
|
||
SELECT * FROM crypto_huge_volume
|
||
WHERE {where_clause}
|
||
ORDER BY timestamp DESC
|
||
"""
|
||
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
|
||
|
||
def get_statistics_summary(
|
||
self,
|
||
symbol: Optional[str] = None,
|
||
bar: Optional[str] = None,
|
||
start: Optional[Union[str, int]] = None,
|
||
end: Optional[Union[str, int]] = None
|
||
) -> Optional[Dict[str, Any]]:
|
||
"""
|
||
获取巨量交易统计摘要
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param start: 开始时间
|
||
:param end: 结束时间
|
||
:return: 统计摘要或None
|
||
"""
|
||
conditions, condition_dict = self._build_query_conditions(symbol, bar, start, end)
|
||
|
||
where_clause = " AND ".join(conditions) if conditions else "1=1"
|
||
sql = f"""
|
||
SELECT
|
||
COUNT(*) as total_records,
|
||
SUM(huge_volume) as huge_volume_count,
|
||
SUM(volume_80_20_price_spike) as volume_80_20_price_spike_count,
|
||
SUM(volume_90_10_price_spike) as volume_90_10_price_spike_count,
|
||
SUM(price_80_high) as price_80_high_count,
|
||
SUM(price_20_low) as price_20_low_count,
|
||
SUM(price_90_high) as price_90_high_count,
|
||
SUM(price_10_low) as price_10_low_count,
|
||
AVG(volume_ratio) as avg_volume_ratio,
|
||
MAX(volume_ratio) as max_volume_ratio,
|
||
AVG(spike_intensity) as avg_spike_intensity,
|
||
MAX(spike_intensity) as max_spike_intensity
|
||
FROM crypto_huge_volume
|
||
WHERE {where_clause}
|
||
"""
|
||
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
|
||
|
||
def get_top_volume_spikes(
|
||
self,
|
||
symbol: Optional[str] = None,
|
||
bar: Optional[str] = None,
|
||
limit: int = 10
|
||
) -> Optional[List[Dict[str, Any]]]:
|
||
"""
|
||
获取成交量尖峰最高的记录
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param limit: 返回记录数量
|
||
:return: 成交量尖峰记录列表或None
|
||
"""
|
||
conditions, condition_dict = self._build_query_conditions(
|
||
symbol, bar, additional_conditions=["huge_volume = 1"]
|
||
)
|
||
|
||
where_clause = " AND ".join(conditions)
|
||
sql = f"""
|
||
SELECT * FROM crypto_huge_volume
|
||
WHERE {where_clause}
|
||
ORDER BY volume_ratio DESC
|
||
LIMIT :limit
|
||
"""
|
||
condition_dict["limit"] = limit
|
||
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
|
||
|
||
def get_percentile_statistics(
|
||
self,
|
||
symbol: Optional[str] = None,
|
||
bar: Optional[str] = None,
|
||
start: Optional[Union[str, int]] = None,
|
||
end: Optional[Union[str, int]] = None
|
||
) -> Optional[Dict[str, Any]]:
|
||
"""
|
||
获取分位数统计信息
|
||
:param symbol: 交易对
|
||
:param bar: K线周期
|
||
:param start: 开始时间
|
||
:param end: 结束时间
|
||
:return: 分位数统计信息或None
|
||
"""
|
||
conditions, condition_dict = self._build_query_conditions(symbol, bar, start, end)
|
||
|
||
where_clause = " AND ".join(conditions) if conditions else "1=1"
|
||
sql = f"""
|
||
SELECT
|
||
AVG(close_80_percentile) as avg_close_80_percentile,
|
||
AVG(close_20_percentile) as avg_close_20_percentile,
|
||
AVG(close_90_percentile) as avg_close_90_percentile,
|
||
AVG(close_10_percentile) as avg_close_10_percentile,
|
||
MAX(close_80_percentile) as max_close_80_percentile,
|
||
MIN(close_20_percentile) as min_close_20_percentile,
|
||
MAX(close_90_percentile) as max_close_90_percentile,
|
||
MIN(close_10_percentile) as min_close_10_percentile
|
||
FROM crypto_huge_volume
|
||
WHERE {where_clause}
|
||
"""
|
||
|
||
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
|