crypto_quant/core/db_huge_volume_data.py

379 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import logging
from core.db_manager import DBData
from core.utils import check_date_time_format, datetime_to_timestamp
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
class DBHugeVolumeData:
def __init__(
self,
db_url: str
):
self.db_url = db_url
self.table_name = "crypto_huge_volume"
self.columns = [
"symbol",
"bar",
"timestamp",
"date_time",
"open",
"high",
"low",
"close",
"volume",
"volCcy",
"volCCyQuote",
"volume_ma",
"volume_std",
"volume_threshold",
"huge_volume",
"volume_ratio",
"spike_intensity",
"close_80_percentile",
"close_20_percentile",
"price_high",
"price_low",
"volume_price_spike",
"create_time",
]
self.db_manager = DBData(db_url, self.table_name, self.columns)
def insert_data_to_mysql(self, df: pd.DataFrame):
"""
将巨量交易数据保存到MySQL的crypto_huge_volume表
速度:⭐⭐⭐⭐⭐ 最快
内存:⭐⭐⭐⭐ 中等
适用场景:中小数据量(<10万条
:param df: 巨量交易数据DataFrame
"""
if df is None or df.empty:
logging.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql(df)
def insert_data_to_mysql_fast(self, df: pd.DataFrame):
"""
快速插入巨量交易数据方案2使用executemany批量插入
速度:⭐⭐⭐⭐ 很快
内存:⭐⭐⭐⭐⭐ 低
适用场景:中等数据量
:param df: 巨量交易数据DataFrame
"""
if df is None or df.empty:
logging.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql_fast(df)
def insert_data_to_mysql_chunk(self, df: pd.DataFrame, chunk_size: int = 1000):
"""
分块插入巨量交易数据方案3适合大数据量
速度:⭐⭐⭐ 中等
内存:⭐⭐⭐⭐⭐ 最低
适用场景:大数据量(>10万条
:param df: 巨量交易数据DataFrame
:param chunk_size: 分块大小
"""
if df is None or df.empty:
logging.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql_chunk(df, chunk_size)
def insert_data_to_mysql_simple(self, df: pd.DataFrame):
"""
简单插入巨量交易数据方案4直接使用to_sql忽略重复
速度:⭐⭐⭐⭐⭐ 最快
内存:⭐⭐⭐⭐ 中等
注意:会抛出重复键错误,需要额外处理
"""
if df is None or df.empty:
logging.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql_simple(df)
def query_latest_data(self, symbol: str, bar: str):
"""
查询最新巨量交易数据
:param symbol: 交易对
:param bar: K线周期
"""
sql = """
SELECT * FROM crypto_huge_volume
WHERE symbol = :symbol AND bar = :bar
ORDER BY timestamp DESC
LIMIT 1
"""
condition_dict = {"symbol": symbol, "bar": bar}
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
def query_data_by_symbol_bar_timestamp(self, symbol: str, bar: str, timestamp: int):
"""
根据交易对、K线周期和时间戳查询巨量交易数据
:param symbol: 交易对
:param bar: K线周期
:param timestamp: 时间戳
"""
sql = """
SELECT * FROM crypto_huge_volume
WHERE symbol = :symbol AND bar = :bar AND timestamp = :timestamp
"""
condition_dict = {"symbol": symbol, "bar": bar, "timestamp": timestamp}
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
def query_huge_volume_data_by_symbol_bar(self, symbol: str, bar: str, start: str = None, end: str = None):
"""
根据交易对和K线周期查询巨量交易数据
:param symbol: 交易对
:param bar: K线周期
:param start: 开始时间
:param end: 结束时间
"""
if start is None or end is None:
sql = """
SELECT * FROM crypto_huge_volume
WHERE symbol = :symbol AND bar = :bar
ORDER BY timestamp ASC
"""
condition_dict = {"symbol": symbol, "bar": bar}
else:
if start is not None:
if isinstance(start, str):
if start.isdigit():
start = int(start)
else:
start = check_date_time_format(start)
# 判断是否是日期时间格式
if start is None:
logging.warning(f"日期时间格式错误: {start}")
return None
start = datetime_to_timestamp(start)
if end is not None:
if isinstance(end, str):
if end.isdigit():
end = int(end)
else:
end = check_date_time_format(end)
if end is None:
logging.warning(f"日期时间格式错误: {end}")
return None
end = datetime_to_timestamp(end)
if start is not None and end is not None:
if start > end:
start, end = end, start
sql = """
SELECT * FROM crypto_huge_volume
WHERE symbol = :symbol AND bar = :bar AND timestamp BETWEEN :start AND :end
ORDER BY timestamp ASC
"""
condition_dict = {"symbol": symbol, "bar": bar, "start": start, "end": end}
elif start is not None:
sql = """
SELECT * FROM crypto_huge_volume
WHERE symbol = :symbol AND bar = :bar AND timestamp >= :start
ORDER BY timestamp ASC
"""
condition_dict = {"symbol": symbol, "bar": bar, "start": start}
elif end is not None:
sql = """
SELECT * FROM crypto_huge_volume
WHERE symbol = :symbol AND bar = :bar AND timestamp <= :end
ORDER BY timestamp ASC
"""
condition_dict = {"symbol": symbol, "bar": bar, "end": end}
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_huge_volume_records(self, symbol: str = None, bar: str = None, start: str = None, end: str = None):
"""
查询巨量交易记录只返回huge_volume=1的记录
:param symbol: 交易对
:param bar: K线周期
:param start: 开始时间
:param end: 结束时间
"""
conditions = ["huge_volume = 1"]
condition_dict = {}
if symbol:
conditions.append("symbol = :symbol")
condition_dict["symbol"] = symbol
if bar:
conditions.append("bar = :bar")
condition_dict["bar"] = bar
if start:
if isinstance(start, str):
if start.isdigit():
start = int(start)
else:
start = check_date_time_format(start)
if start is None:
logging.warning(f"日期时间格式错误: {start}")
return None
start = datetime_to_timestamp(start)
conditions.append("timestamp >= :start")
condition_dict["start"] = start
if end:
if isinstance(end, str):
if end.isdigit():
end = int(end)
else:
end = check_date_time_format(end)
if end is None:
logging.warning(f"日期时间格式错误: {end}")
return None
end = datetime_to_timestamp(end)
conditions.append("timestamp <= :end")
condition_dict["end"] = end
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_volume_price_spike_records(self, symbol: str = None, bar: str = None, start: str = None, end: str = None):
"""
查询量价尖峰记录只返回volume_price_spike=1的记录
:param symbol: 交易对
:param bar: K线周期
:param start: 开始时间
:param end: 结束时间
"""
conditions = ["volume_price_spike = 1"]
condition_dict = {}
if symbol:
conditions.append("symbol = :symbol")
condition_dict["symbol"] = symbol
if bar:
conditions.append("bar = :bar")
condition_dict["bar"] = bar
if start:
if isinstance(start, str):
if start.isdigit():
start = int(start)
else:
start = check_date_time_format(start)
if start is None:
logging.warning(f"日期时间格式错误: {start}")
return None
start = datetime_to_timestamp(start)
conditions.append("timestamp >= :start")
condition_dict["start"] = start
if end:
if isinstance(end, str):
if end.isdigit():
end = int(end)
else:
end = check_date_time_format(end)
if end is None:
logging.warning(f"日期时间格式错误: {end}")
return None
end = datetime_to_timestamp(end)
conditions.append("timestamp <= :end")
condition_dict["end"] = end
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def get_statistics_summary(self, symbol: str = None, bar: str = None, start: str = None, end: str = None):
"""
获取巨量交易统计摘要
:param symbol: 交易对
:param bar: K线周期
:param start: 开始时间
:param end: 结束时间
"""
conditions = []
condition_dict = {}
if symbol:
conditions.append("symbol = :symbol")
condition_dict["symbol"] = symbol
if bar:
conditions.append("bar = :bar")
condition_dict["bar"] = bar
if start:
if isinstance(start, str):
if start.isdigit():
start = int(start)
else:
start = check_date_time_format(start)
if start is None:
logging.warning(f"日期时间格式错误: {start}")
return None
start = datetime_to_timestamp(start)
conditions.append("timestamp >= :start")
condition_dict["start"] = start
if end:
if isinstance(end, str):
if end.isdigit():
end = int(end)
else:
end = check_date_time_format(end)
if end is None:
logging.warning(f"日期时间格式错误: {end}")
return None
end = datetime_to_timestamp(end)
conditions.append("timestamp <= :end")
condition_dict["end"] = end
where_clause = " AND ".join(conditions) if conditions else "1=1"
sql = f"""
SELECT
COUNT(*) as total_records,
SUM(huge_volume) as huge_volume_count,
SUM(volume_price_spike) as volume_price_spike_count,
SUM(price_high) as price_high_count,
SUM(price_low) as price_low_count,
AVG(volume_ratio) as avg_volume_ratio,
MAX(volume_ratio) as max_volume_ratio,
AVG(spike_intensity) as avg_spike_intensity,
MAX(spike_intensity) as max_spike_intensity
FROM crypto_huge_volume
WHERE {where_clause}
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
def get_top_volume_spikes(self, symbol: str = None, bar: str = None, limit: int = 10):
"""
获取成交量尖峰最高的记录
:param symbol: 交易对
:param bar: K线周期
:param limit: 返回记录数量
"""
conditions = ["huge_volume = 1"]
condition_dict = {}
if symbol:
conditions.append("symbol = :symbol")
condition_dict["symbol"] = symbol
if bar:
conditions.append("bar = :bar")
condition_dict["bar"] = bar
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_huge_volume
WHERE {where_clause}
ORDER BY volume_ratio DESC
LIMIT :limit
"""
condition_dict["limit"] = limit
return self.db_manager.query_data(sql, condition_dict, return_multi=True)