import pandas as pd import logging from core.db_manager import DBData from core.utils import check_date_time_format, datetime_to_timestamp logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s") class DBHugeVolumeData: def __init__( self, db_url: str ): self.db_url = db_url self.table_name = "crypto_huge_volume" self.columns = [ "symbol", "bar", "timestamp", "date_time", "open", "high", "low", "close", "volume", "volCcy", "volCCyQuote", "volume_ma", "volume_std", "volume_threshold", "huge_volume", "volume_ratio", "spike_intensity", "close_80_percentile", "close_20_percentile", "price_high", "price_low", "volume_price_spike", "create_time", ] self.db_manager = DBData(db_url, self.table_name, self.columns) def insert_data_to_mysql(self, df: pd.DataFrame): """ 将巨量交易数据保存到MySQL的crypto_huge_volume表 速度:⭐⭐⭐⭐⭐ 最快 内存:⭐⭐⭐⭐ 中等 适用场景:中小数据量(<10万条) :param df: 巨量交易数据DataFrame """ if df is None or df.empty: logging.warning("DataFrame为空,无需写入数据库。") return self.db_manager.insert_data_to_mysql(df) def insert_data_to_mysql_fast(self, df: pd.DataFrame): """ 快速插入巨量交易数据(方案2:使用executemany批量插入) 速度:⭐⭐⭐⭐ 很快 内存:⭐⭐⭐⭐⭐ 低 适用场景:中等数据量 :param df: 巨量交易数据DataFrame """ if df is None or df.empty: logging.warning("DataFrame为空,无需写入数据库。") return self.db_manager.insert_data_to_mysql_fast(df) def insert_data_to_mysql_chunk(self, df: pd.DataFrame, chunk_size: int = 1000): """ 分块插入巨量交易数据(方案3:适合大数据量) 速度:⭐⭐⭐ 中等 内存:⭐⭐⭐⭐⭐ 最低 适用场景:大数据量(>10万条) :param df: 巨量交易数据DataFrame :param chunk_size: 分块大小 """ if df is None or df.empty: logging.warning("DataFrame为空,无需写入数据库。") return self.db_manager.insert_data_to_mysql_chunk(df, chunk_size) def insert_data_to_mysql_simple(self, df: pd.DataFrame): """ 简单插入巨量交易数据(方案4:直接使用to_sql,忽略重复) 速度:⭐⭐⭐⭐⭐ 最快 内存:⭐⭐⭐⭐ 中等 注意:会抛出重复键错误,需要额外处理 """ if df is None or df.empty: logging.warning("DataFrame为空,无需写入数据库。") return self.db_manager.insert_data_to_mysql_simple(df) def query_latest_data(self, symbol: str, bar: str): """ 查询最新巨量交易数据 :param symbol: 交易对 :param bar: K线周期 """ sql = """ SELECT * FROM crypto_huge_volume WHERE symbol = :symbol AND bar = :bar ORDER BY timestamp DESC LIMIT 1 """ condition_dict = {"symbol": symbol, "bar": bar} return self.db_manager.query_data(sql, condition_dict, return_multi=False) def query_data_by_symbol_bar_timestamp(self, symbol: str, bar: str, timestamp: int): """ 根据交易对、K线周期和时间戳查询巨量交易数据 :param symbol: 交易对 :param bar: K线周期 :param timestamp: 时间戳 """ sql = """ SELECT * FROM crypto_huge_volume WHERE symbol = :symbol AND bar = :bar AND timestamp = :timestamp """ condition_dict = {"symbol": symbol, "bar": bar, "timestamp": timestamp} return self.db_manager.query_data(sql, condition_dict, return_multi=False) def query_huge_volume_data_by_symbol_bar(self, symbol: str, bar: str, start: str = None, end: str = None): """ 根据交易对和K线周期查询巨量交易数据 :param symbol: 交易对 :param bar: K线周期 :param start: 开始时间 :param end: 结束时间 """ if start is None or end is None: sql = """ SELECT * FROM crypto_huge_volume WHERE symbol = :symbol AND bar = :bar ORDER BY timestamp ASC """ condition_dict = {"symbol": symbol, "bar": bar} else: if start is not None: if isinstance(start, str): if start.isdigit(): start = int(start) else: start = check_date_time_format(start) # 判断是否是日期时间格式 if start is None: logging.warning(f"日期时间格式错误: {start}") return None start = datetime_to_timestamp(start) if end is not None: if isinstance(end, str): if end.isdigit(): end = int(end) else: end = check_date_time_format(end) if end is None: logging.warning(f"日期时间格式错误: {end}") return None end = datetime_to_timestamp(end) if start is not None and end is not None: if start > end: start, end = end, start sql = """ SELECT * FROM crypto_huge_volume WHERE symbol = :symbol AND bar = :bar AND timestamp BETWEEN :start AND :end ORDER BY timestamp ASC """ condition_dict = {"symbol": symbol, "bar": bar, "start": start, "end": end} elif start is not None: sql = """ SELECT * FROM crypto_huge_volume WHERE symbol = :symbol AND bar = :bar AND timestamp >= :start ORDER BY timestamp ASC """ condition_dict = {"symbol": symbol, "bar": bar, "start": start} elif end is not None: sql = """ SELECT * FROM crypto_huge_volume WHERE symbol = :symbol AND bar = :bar AND timestamp <= :end ORDER BY timestamp ASC """ condition_dict = {"symbol": symbol, "bar": bar, "end": end} return self.db_manager.query_data(sql, condition_dict, return_multi=True) def query_huge_volume_records(self, symbol: str = None, bar: str = None, start: str = None, end: str = None): """ 查询巨量交易记录(只返回huge_volume=1的记录) :param symbol: 交易对 :param bar: K线周期 :param start: 开始时间 :param end: 结束时间 """ conditions = ["huge_volume = 1"] condition_dict = {} if symbol: conditions.append("symbol = :symbol") condition_dict["symbol"] = symbol if bar: conditions.append("bar = :bar") condition_dict["bar"] = bar if start: if isinstance(start, str): if start.isdigit(): start = int(start) else: start = check_date_time_format(start) if start is None: logging.warning(f"日期时间格式错误: {start}") return None start = datetime_to_timestamp(start) conditions.append("timestamp >= :start") condition_dict["start"] = start if end: if isinstance(end, str): if end.isdigit(): end = int(end) else: end = check_date_time_format(end) if end is None: logging.warning(f"日期时间格式错误: {end}") return None end = datetime_to_timestamp(end) conditions.append("timestamp <= :end") condition_dict["end"] = end where_clause = " AND ".join(conditions) sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY timestamp DESC """ return self.db_manager.query_data(sql, condition_dict, return_multi=True) def query_volume_price_spike_records(self, symbol: str = None, bar: str = None, start: str = None, end: str = None): """ 查询量价尖峰记录(只返回volume_price_spike=1的记录) :param symbol: 交易对 :param bar: K线周期 :param start: 开始时间 :param end: 结束时间 """ conditions = ["volume_price_spike = 1"] condition_dict = {} if symbol: conditions.append("symbol = :symbol") condition_dict["symbol"] = symbol if bar: conditions.append("bar = :bar") condition_dict["bar"] = bar if start: if isinstance(start, str): if start.isdigit(): start = int(start) else: start = check_date_time_format(start) if start is None: logging.warning(f"日期时间格式错误: {start}") return None start = datetime_to_timestamp(start) conditions.append("timestamp >= :start") condition_dict["start"] = start if end: if isinstance(end, str): if end.isdigit(): end = int(end) else: end = check_date_time_format(end) if end is None: logging.warning(f"日期时间格式错误: {end}") return None end = datetime_to_timestamp(end) conditions.append("timestamp <= :end") condition_dict["end"] = end where_clause = " AND ".join(conditions) sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY timestamp DESC """ return self.db_manager.query_data(sql, condition_dict, return_multi=True) def get_statistics_summary(self, symbol: str = None, bar: str = None, start: str = None, end: str = None): """ 获取巨量交易统计摘要 :param symbol: 交易对 :param bar: K线周期 :param start: 开始时间 :param end: 结束时间 """ conditions = [] condition_dict = {} if symbol: conditions.append("symbol = :symbol") condition_dict["symbol"] = symbol if bar: conditions.append("bar = :bar") condition_dict["bar"] = bar if start: if isinstance(start, str): if start.isdigit(): start = int(start) else: start = check_date_time_format(start) if start is None: logging.warning(f"日期时间格式错误: {start}") return None start = datetime_to_timestamp(start) conditions.append("timestamp >= :start") condition_dict["start"] = start if end: if isinstance(end, str): if end.isdigit(): end = int(end) else: end = check_date_time_format(end) if end is None: logging.warning(f"日期时间格式错误: {end}") return None end = datetime_to_timestamp(end) conditions.append("timestamp <= :end") condition_dict["end"] = end where_clause = " AND ".join(conditions) if conditions else "1=1" sql = f""" SELECT COUNT(*) as total_records, SUM(huge_volume) as huge_volume_count, SUM(volume_price_spike) as volume_price_spike_count, SUM(price_high) as price_high_count, SUM(price_low) as price_low_count, AVG(volume_ratio) as avg_volume_ratio, MAX(volume_ratio) as max_volume_ratio, AVG(spike_intensity) as avg_spike_intensity, MAX(spike_intensity) as max_spike_intensity FROM crypto_huge_volume WHERE {where_clause} """ return self.db_manager.query_data(sql, condition_dict, return_multi=False) def get_top_volume_spikes(self, symbol: str = None, bar: str = None, limit: int = 10): """ 获取成交量尖峰最高的记录 :param symbol: 交易对 :param bar: K线周期 :param limit: 返回记录数量 """ conditions = ["huge_volume = 1"] condition_dict = {} if symbol: conditions.append("symbol = :symbol") condition_dict["symbol"] = symbol if bar: conditions.append("bar = :bar") condition_dict["bar"] = bar where_clause = " AND ".join(conditions) sql = f""" SELECT * FROM crypto_huge_volume WHERE {where_clause} ORDER BY volume_ratio DESC LIMIT :limit """ condition_dict["limit"] = limit return self.db_manager.query_data(sql, condition_dict, return_multi=True)