import pandas as pd import logging from typing import Optional, List, Dict, Any, Union from core.db_manager import DBData from core.utils import check_date_time_format, datetime_to_timestamp logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s") class DBHugeVolumeData: def __init__( self, db_url: str ): self.db_url = db_url self.table_name = "crypto_huge_volume" self.columns = [ "symbol", "bar", "window_size", "timestamp", "date_time", "open", "high", "low", "close", "volume", "volCcy", "volCCyQuote", "volume_ma", "volume_std", "volume_threshold", "huge_volume", "volume_ratio", "spike_intensity", "close_80_percentile", "close_20_percentile", "price_80_high", "price_20_low", "volume_80_20_price_spike", "close_90_percentile", "close_10_percentile", "price_90_high", "price_10_low", "volume_90_10_price_spike", "create_time", ] self.db_manager = DBData(db_url, self.table_name, self.columns) def _process_time_parameter(self, time_param: Optional[Union[str, int]]) -> Optional[int]: """ 处理时间参数,统一转换为时间戳 :param time_param: 时间参数(字符串或整数) :return: 时间戳或None """ if time_param is None: return None if isinstance(time_param, int): return time_param if isinstance(time_param, str): if time_param.isdigit(): return int(time_param) else: parsed_time = check_date_time_format(time_param) if parsed_time is None: logging.warning(f"日期时间格式错误: {time_param}") return None return datetime_to_timestamp(parsed_time) return None def _build_query_conditions( self, symbol: Optional[str] = None, bar: Optional[str] = None, window_size: Optional[int] = None, start: Optional[Union[str, int]] = None, end: Optional[Union[str, int]] = None, additional_conditions: Optional[List[str]] = None ) -> tuple[List[str], Dict[str, Any]]: """ 构建查询条件 :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 :param start: 开始时间 :param end: 结束时间 :param additional_conditions: 额外的查询条件 :return: (条件列表, 参数字典) """ conditions = additional_conditions or [] condition_dict = {} if symbol: conditions.append("symbol = :symbol") condition_dict["symbol"] = symbol if bar: conditions.append("bar = :bar") condition_dict["bar"] = bar if window_size: conditions.append("window_size = :window_size") condition_dict["window_size"] = window_size # 处理时间参数 start_timestamp = self._process_time_parameter(start) end_timestamp = self._process_time_parameter(end) if start_timestamp is not None: conditions.append("timestamp >= :start") condition_dict["start"] = start_timestamp if end_timestamp is not None: conditions.append("timestamp <= :end") condition_dict["end"] = end_timestamp return conditions, condition_dict def insert_data_to_mysql(self, df: pd.DataFrame) -> None: """ 将巨量交易数据保存到MySQL的crypto_huge_volume表 速度:⭐⭐⭐⭐⭐ 最快 内存:⭐⭐⭐⭐ 中等 适用场景:中小数据量(<10万条) :param df: 巨量交易数据DataFrame """ if df is None or df.empty: logging.warning("DataFrame为空,无需写入数据库。") return self.db_manager.insert_data_to_mysql(df) def insert_data_to_mysql_fast(self, df: pd.DataFrame) -> None: """ 快速插入巨量交易数据(方案2:使用executemany批量插入) 速度:⭐⭐⭐⭐ 很快 内存:⭐⭐⭐⭐⭐ 低 适用场景:中等数据量 :param df: 巨量交易数据DataFrame """ if df is None or df.empty: logging.warning("DataFrame为空,无需写入数据库。") return self.db_manager.insert_data_to_mysql_fast(df) def insert_data_to_mysql_chunk(self, df: pd.DataFrame, chunk_size: int = 1000) -> None: """ 分块插入巨量交易数据(方案3:适合大数据量) 速度:⭐⭐⭐ 中等 内存:⭐⭐⭐⭐⭐ 最低 适用场景:大数据量(>10万条) :param df: 巨量交易数据DataFrame :param chunk_size: 分块大小 """ if df is None or df.empty: logging.warning("DataFrame为空,无需写入数据库。") return self.db_manager.insert_data_to_mysql_chunk(df, chunk_size) def insert_data_to_mysql_simple(self, df: pd.DataFrame) -> None: """ 简单插入巨量交易数据(方案4:直接使用to_sql,忽略重复) 速度:⭐⭐⭐⭐⭐ 最快 内存:⭐⭐⭐⭐ 中等 注意:会抛出重复键错误,需要额外处理 """ if df is None or df.empty: logging.warning("DataFrame为空,无需写入数据库。") return self.db_manager.insert_data_to_mysql_simple(df) def query_latest_data(self, symbol: str, bar: str, window_size: int) -> Optional[Dict[str, Any]]: """ 查询最新巨量交易数据 :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 :return: 最新数据记录或None """ sql = """ SELECT * FROM crypto_huge_volume WHERE symbol = :symbol AND bar = :bar AND window_size = :window_size ORDER BY timestamp DESC LIMIT 1 """ condition_dict = {"symbol": symbol, "bar": bar, "window_size": window_size} return self.db_manager.query_data(sql, condition_dict, return_multi=False) def query_data_by_symbol_bar_window_size_timestamp(self, symbol: str, bar: str, window_size: int, timestamp: int) -> Optional[Dict[str, Any]]: """ 根据交易对、K线周期, 窗口大小和时间戳查询巨量交易数据 :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 :param timestamp: 时间戳 :return: 数据记录或None """ sql = """ SELECT * FROM crypto_huge_volume WHERE symbol = :symbol AND bar = :bar AND window_size = :window_size AND timestamp = :timestamp """ condition_dict = {"symbol": symbol, "bar": bar, "window_size": window_size, "timestamp": timestamp} return self.db_manager.query_data(sql, condition_dict, return_multi=False) def query_huge_volume_data_by_symbol_bar_window_size( self, symbol: str, bar: str, window_size: int, start: Optional[Union[str, int]] = None, end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ 根据交易对、K线周期和窗口大小查询巨量交易数据 :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 :param start: 开始时间 :param end: 结束时间 :return: 数据记录列表或None """ conditions, condition_dict = self._build_query_conditions(symbol, bar, window_size, start, end) where_clause = " AND ".join(conditions) if conditions else "1=1" sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY timestamp ASC """ return self.db_manager.query_data(sql, condition_dict, return_multi=True) def query_huge_volume_records( self, symbol: Optional[str] = None, bar: Optional[str] = None, window_size: Optional[int] = None, start: Optional[Union[str, int]] = None, end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ 查询巨量交易记录(只返回huge_volume=1的记录) :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 :param start: 开始时间 :param end: 结束时间 :return: 巨量交易记录列表或None """ conditions, condition_dict = self._build_query_conditions( symbol, bar, window_size, start, end, additional_conditions=["huge_volume = 1"] ) where_clause = " AND ".join(conditions) sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY timestamp DESC """ return self.db_manager.query_data(sql, condition_dict, return_multi=True) def query_volume_80_20_price_spike_records( self, symbol: Optional[str] = None, bar: Optional[str] = None, window_size: Optional[int] = None, start: Optional[Union[str, int]] = None, end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ 查询80/20量价尖峰记录(只返回volume_80_20_price_spike=1的记录) :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 :param start: 开始时间 :param end: 结束时间 :return: 80/20量价尖峰记录列表或None """ conditions, condition_dict = self._build_query_conditions( symbol, bar, window_size, start, end, additional_conditions=["volume_80_20_price_spike = 1"] ) where_clause = " AND ".join(conditions) sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY timestamp DESC """ return self.db_manager.query_data(sql, condition_dict, return_multi=True) def query_volume_90_10_price_spike_records( self, symbol: Optional[str] = None, bar: Optional[str] = None, window_size: Optional[int] = None, start: Optional[Union[str, int]] = None, end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ 查询90/10量价尖峰记录(只返回volume_90_10_price_spike=1的记录) :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 :param start: 开始时间 :param end: 结束时间 :return: 90/10量价尖峰记录列表或None """ conditions, condition_dict = self._build_query_conditions( symbol, bar, window_size, start, end, additional_conditions=["volume_90_10_price_spike = 1"] ) where_clause = " AND ".join(conditions) sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY timestamp DESC """ return self.db_manager.query_data(sql, condition_dict, return_multi=True) def query_price_80_high_records( self, symbol: Optional[str] = None, bar: Optional[str] = None, window_size: Optional[int] = None, start: Optional[Union[str, int]] = None, end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ 查询价格达到80%分位数高点的记录(只返回price_80_high=1的记录) :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 :param start: 开始时间 :param end: 结束时间 :return: 价格80%分位数高点记录列表或None """ conditions, condition_dict = self._build_query_conditions( symbol, bar, window_size, start, end, additional_conditions=["price_80_high = 1"] ) where_clause = " AND ".join(conditions) sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY timestamp DESC """ return self.db_manager.query_data(sql, condition_dict, return_multi=True) def query_price_20_low_records( self, symbol: Optional[str] = None, bar: Optional[str] = None, window_size: Optional[int] = None, start: Optional[Union[str, int]] = None, end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ 查询价格达到20%分位数低点的记录(只返回price_20_low=1的记录) :param symbol: 交易对 :param bar: K线周期 :param start: 开始时间 :param end: 结束时间 :return: 价格20%分位数低点记录列表或None """ conditions, condition_dict = self._build_query_conditions( symbol, bar, window_size, start, end, additional_conditions=["price_20_low = 1"] ) where_clause = " AND ".join(conditions) sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY timestamp DESC """ return self.db_manager.query_data(sql, condition_dict, return_multi=True) def query_price_90_high_records( self, symbol: Optional[str] = None, bar: Optional[str] = None, window_size: Optional[int] = None, start: Optional[Union[str, int]] = None, end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ 查询价格达到90%分位数高点的记录(只返回price_90_high=1的记录) :param symbol: 交易对 :param bar: K线周期 :param start: 开始时间 :param end: 结束时间 :return: 价格90%分位数高点记录列表或None """ conditions, condition_dict = self._build_query_conditions( symbol, bar, window_size, start, end, additional_conditions=["price_90_high = 1"] ) where_clause = " AND ".join(conditions) sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY timestamp DESC """ return self.db_manager.query_data(sql, condition_dict, return_multi=True) def query_price_10_low_records( self, symbol: Optional[str] = None, bar: Optional[str] = None, window_size: Optional[int] = None, start: Optional[Union[str, int]] = None, end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ 查询价格达到10%分位数低点的记录(只返回price_10_low=1的记录) :param symbol: 交易对 :param bar: K线周期 :param start: 开始时间 :param end: 结束时间 :return: 价格10%分位数低点记录列表或None """ conditions, condition_dict = self._build_query_conditions( symbol, bar, window_size, start, end, additional_conditions=["price_10_low = 1"] ) where_clause = " AND ".join(conditions) sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY timestamp DESC """ return self.db_manager.query_data(sql, condition_dict, return_multi=True) def get_statistics_summary( self, symbol: Optional[str] = None, bar: Optional[str] = None, window_size: Optional[int] = None, start: Optional[Union[str, int]] = None, end: Optional[Union[str, int]] = None ) -> Optional[Dict[str, Any]]: """ 获取巨量交易统计摘要 :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 :param start: 开始时间 :param end: 结束时间 :return: 统计摘要或None """ conditions, condition_dict = self._build_query_conditions(symbol, bar, window_size, start, end) where_clause = " AND ".join(conditions) if conditions else "1=1" sql = f""" SELECT COUNT(*) as total_records, SUM(huge_volume) as huge_volume_count, SUM(volume_80_20_price_spike) as volume_80_20_price_spike_count, SUM(volume_90_10_price_spike) as volume_90_10_price_spike_count, SUM(price_80_high) as price_80_high_count, SUM(price_20_low) as price_20_low_count, SUM(price_90_high) as price_90_high_count, SUM(price_10_low) as price_10_low_count, AVG(volume_ratio) as avg_volume_ratio, MAX(volume_ratio) as max_volume_ratio, AVG(spike_intensity) as avg_spike_intensity, MAX(spike_intensity) as max_spike_intensity FROM crypto_huge_volume WHERE {where_clause} """ return self.db_manager.query_data(sql, condition_dict, return_multi=False) def get_top_volume_spikes( self, symbol: Optional[str] = None, bar: Optional[str] = None, window_size: Optional[int] = None, limit: int = 10 ) -> Optional[List[Dict[str, Any]]]: """ 获取成交量尖峰最高的记录 :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 :param limit: 返回记录数量 :return: 成交量尖峰记录列表或None """ conditions, condition_dict = self._build_query_conditions( symbol, bar, window_size, additional_conditions=["huge_volume = 1"] ) where_clause = " AND ".join(conditions) sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY volume_ratio DESC LIMIT :limit """ condition_dict["limit"] = limit return self.db_manager.query_data(sql, condition_dict, return_multi=True) def get_percentile_statistics( self, symbol: Optional[str] = None, bar: Optional[str] = None, window_size: Optional[int] = None, start: Optional[Union[str, int]] = None, end: Optional[Union[str, int]] = None ) -> Optional[Dict[str, Any]]: """ 获取分位数统计信息 :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 :param start: 开始时间 :param end: 结束时间 :return: 分位数统计信息或None """ conditions, condition_dict = self._build_query_conditions(symbol, bar, window_size, start, end) where_clause = " AND ".join(conditions) if conditions else "1=1" sql = f""" SELECT AVG(close_80_percentile) as avg_close_80_percentile, AVG(close_20_percentile) as avg_close_20_percentile, AVG(close_90_percentile) as avg_close_90_percentile, AVG(close_10_percentile) as avg_close_10_percentile, MAX(close_80_percentile) as max_close_80_percentile, MIN(close_20_percentile) as min_close_20_percentile, MAX(close_90_percentile) as max_close_90_percentile, MIN(close_10_percentile) as min_close_10_percentile FROM crypto_huge_volume WHERE {where_clause} """ return self.db_manager.query_data(sql, condition_dict, return_multi=False)