crypto_quant/core/db/db_binance_data.py

525 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import core.logger as logging
from core.db.db_manager import DBData
from core.utils import transform_date_time_to_timestamp
logger = logging.logger
class DBBinanceData:
def __init__(
self,
db_url: str
):
self.db_url = db_url
self.table_name = "crypto_binance_data"
self.columns = [
"symbol",
"bar",
"timestamp",
"date_time",
"date_time_us",
"open",
"high",
"low",
"close",
"pre_close",
"close_change",
"pct_chg",
"volume",
"volCcy",
"volCCyQuote",
"buy_sz",
"sell_sz",
# 技术指标字段
"ma1",
"ma2",
"dif",
"dea",
"macd",
"macd_signal",
"macd_divergence",
"kdj_k",
"kdj_d",
"kdj_j",
"kdj_signal",
"kdj_pattern",
"sar",
"sar_signal",
"ma5",
"ma10",
"ma20",
"ma30",
"ma_cross",
"ma5_close_diff",
"ma10_close_diff",
"ma20_close_diff",
"ma30_close_diff",
"ma_close_avg",
"ma_long_short",
"ma_divergence",
"rsi_14",
"rsi_signal",
"boll_upper",
"boll_middle",
"boll_lower",
"boll_signal",
"boll_pattern",
"k_length",
"k_shape",
"k_up_down",
"create_time",
]
self.db_manager = DBData(db_url, self.table_name, self.columns)
def insert_data_to_mysql(self, df: pd.DataFrame):
"""
将K线行情数据保存到MySQL的crypto_binance_data表
速度:⭐⭐⭐⭐⭐ 最快
内存:⭐⭐⭐⭐ 中等
适用场景:中小数据量(<10万条
:param df: K线数据DataFrame
"""
if df is None or df.empty:
logger.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql(df)
def insert_data_to_mysql_fast(self, df: pd.DataFrame):
"""
快速插入K线行情数据方案2使用executemany批量插入
速度:⭐⭐⭐⭐ 很快
内存:⭐⭐⭐⭐⭐ 低
适用场景:中等数据量
:param df: K线数据DataFrame
"""
if df is None or df.empty:
logger.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql_fast(df)
def insert_data_to_mysql_chunk(self, df: pd.DataFrame, chunk_size: int = 1000):
"""
分块插入K线行情数据方案3适合大数据量
速度:⭐⭐⭐ 中等
内存:⭐⭐⭐⭐⭐ 最低
适用场景:大数据量(>10万条
:param df: K线数据DataFrame
:param chunk_size: 分块大小
"""
if df is None or df.empty:
logger.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql_chunk(df, chunk_size)
def insert_data_to_mysql_simple(self, df: pd.DataFrame):
"""
简单插入K线行情数据方案4直接使用to_sql忽略重复
速度:⭐⭐⭐⭐⭐ 最快
内存:⭐⭐⭐⭐ 中等
注意:会抛出重复键错误,需要额外处理
"""
if df is None or df.empty:
logger.warning("DataFrame为空无需写入数据库。")
return
self.db_manager.insert_data_to_mysql_simple(df)
def query_latest_data(self, symbol: str, bar: str):
"""
查询最新数据
:param symbol: 交易对
:param bar: K线周期
"""
sql = """
SELECT * FROM crypto_binance_data
WHERE symbol = :symbol AND bar = :bar
ORDER BY timestamp DESC
LIMIT 1
"""
condition_dict = {"symbol": symbol, "bar": bar}
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
def query_data_before_timestamp(self, symbol: str, bar: str, timestamp: int, limit: int = 100):
"""
根据时间戳查询之前的数据
:param symbol: 交易对
:param bar: K线周期
:param timestamp: 时间戳
:param limit: 查询数量
"""
sql = """
SELECT * FROM crypto_binance_data
WHERE symbol = :symbol AND bar = :bar AND timestamp < :timestamp
ORDER BY timestamp DESC
LIMIT :limit
"""
condition_dict = {"symbol": symbol, "bar": bar, "timestamp": timestamp, "limit": limit}
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_data_by_technical_indicators(
self,
symbol: str,
bar: str,
start: str = None,
end: str = None,
macd_signal: str = None,
kdj_signal: str = None,
rsi_signal: str = None,
boll_signal: str = None,
ma_cross: str = None
):
"""
根据技术指标查询数据
:param symbol: 交易对
:param bar: K线周期
:param start: 开始时间
:param end: 结束时间
:param macd_signal: MACD信号
:param kdj_signal: KDJ信号
:param rsi_signal: RSI信号
:param boll_signal: 布林带信号
:param ma_cross: 均线交叉信号
"""
conditions = ["symbol = :symbol", "bar = :bar"]
condition_dict = {"symbol": symbol, "bar": bar}
if macd_signal:
conditions.append("macd_signal = :macd_signal")
condition_dict["macd_signal"] = macd_signal
if kdj_signal:
conditions.append("kdj_signal = :kdj_signal")
condition_dict["kdj_signal"] = kdj_signal
if rsi_signal:
conditions.append("rsi_signal = :rsi_signal")
condition_dict["rsi_signal"] = rsi_signal
if boll_signal:
conditions.append("boll_signal = :boll_signal")
condition_dict["boll_signal"] = boll_signal
if ma_cross:
conditions.append("ma_cross = :ma_cross")
condition_dict["ma_cross"] = ma_cross
# 处理时间范围
if start:
start_timestamp = transform_date_time_to_timestamp(start)
if start_timestamp:
conditions.append("timestamp >= :start")
condition_dict["start"] = start_timestamp
if end:
end_timestamp = transform_date_time_to_timestamp(end)
if end_timestamp:
conditions.append("timestamp <= :end")
condition_dict["end"] = end_timestamp
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_binance_data
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_macd_signals(
self,
symbol: str,
bar: str,
signal: str = None,
start: str = None,
end: str = None
):
"""
查询MACD信号数据
:param symbol: 交易对
:param bar: K线周期
:param signal: MACD信号类型
:param start: 开始时间
:param end: 结束时间
"""
conditions = ["symbol = :symbol", "bar = :bar"]
condition_dict = {"symbol": symbol, "bar": bar}
if signal:
conditions.append("macd_signal = :signal")
condition_dict["signal"] = signal
# 处理时间范围
if start:
start_timestamp = transform_date_time_to_timestamp(start)
if start_timestamp:
conditions.append("timestamp >= :start")
condition_dict["start"] = start_timestamp
if end:
end_timestamp = transform_date_time_to_timestamp(end)
if end_timestamp:
conditions.append("timestamp <= :end")
condition_dict["end"] = end_timestamp
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_binance_data
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_kdj_signals(
self,
symbol: str,
bar: str,
signal: str = None,
pattern: str = None,
start: str = None,
end: str = None
):
"""
查询KDJ信号数据
:param symbol: 交易对
:param bar: K线周期
:param signal: KDJ信号类型
:param pattern: KDJ模式
:param start: 开始时间
:param end: 结束时间
"""
conditions = ["symbol = :symbol", "bar = :bar"]
condition_dict = {"symbol": symbol, "bar": bar}
if signal:
conditions.append("kdj_signal = :signal")
condition_dict["signal"] = signal
if pattern:
conditions.append("kdj_pattern = :pattern")
condition_dict["pattern"] = pattern
# 处理时间范围
if start:
start_timestamp = transform_date_time_to_timestamp(start)
if start_timestamp:
conditions.append("timestamp >= :start")
condition_dict["start"] = start_timestamp
if end:
end_timestamp = transform_date_time_to_timestamp(end)
if end_timestamp:
conditions.append("timestamp <= :end")
condition_dict["end"] = end_timestamp
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_binance_data
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_ma_signals(
self,
symbol: str,
bar: str,
cross: str = None,
long_short: str = None,
divergence: str = None,
start: str = None,
end: str = None
):
"""
查询均线信号数据
:param symbol: 交易对
:param bar: K线周期
:param cross: 均线交叉信号
:param long_short: 均线多空
:param divergence: 均线发散
:param start: 开始时间
:param end: 结束时间
"""
conditions = ["symbol = :symbol", "bar = :bar"]
condition_dict = {"symbol": symbol, "bar": bar}
if cross:
conditions.append("ma_cross = :cross")
condition_dict["cross"] = cross
if long_short:
conditions.append("ma_long_short = :long_short")
condition_dict["long_short"] = long_short
if divergence:
conditions.append("ma_divergence = :divergence")
condition_dict["divergence"] = divergence
# 处理时间范围
if start:
start_timestamp = transform_date_time_to_timestamp(start)
if start_timestamp:
conditions.append("timestamp >= :start")
condition_dict["start"] = start_timestamp
if end:
end_timestamp = transform_date_time_to_timestamp(end)
if end_timestamp:
conditions.append("timestamp <= :end")
condition_dict["end"] = end_timestamp
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_binance_data
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def query_bollinger_signals(
self,
symbol: str,
bar: str,
signal: str = None,
pattern: str = None,
start: str = None,
end: str = None
):
"""
查询布林带信号数据
:param symbol: 交易对
:param bar: K线周期
:param signal: 布林带信号
:param pattern: 布林带模式
:param start: 开始时间
:param end: 结束时间
"""
conditions = ["symbol = :symbol", "bar = :bar"]
condition_dict = {"symbol": symbol, "bar": bar}
if signal:
conditions.append("boll_signal = :signal")
condition_dict["signal"] = signal
if pattern:
conditions.append("boll_pattern = :pattern")
condition_dict["pattern"] = pattern
# 处理时间范围
if start:
start_timestamp = transform_date_time_to_timestamp(start)
if start_timestamp:
conditions.append("timestamp >= :start")
condition_dict["start"] = start_timestamp
if end:
end_timestamp = transform_date_time_to_timestamp(end)
if end_timestamp:
conditions.append("timestamp <= :end")
condition_dict["end"] = end_timestamp
where_clause = " AND ".join(conditions)
sql = f"""
SELECT * FROM crypto_binance_data
WHERE {where_clause}
ORDER BY timestamp DESC
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=True)
def get_technical_statistics(
self,
symbol: str,
bar: str,
start: str = None,
end: str = None
):
"""
获取技术指标统计信息
:param symbol: 交易对
:param bar: K线周期
:param start: 开始时间
:param end: 结束时间
"""
conditions = ["symbol = :symbol", "bar = :bar"]
condition_dict = {"symbol": symbol, "bar": bar}
# 处理时间范围
if start:
start_timestamp = transform_date_time_to_timestamp(start)
if start_timestamp:
conditions.append("timestamp >= :start")
condition_dict["start"] = start_timestamp
if end:
end_timestamp = transform_date_time_to_timestamp(end)
if end_timestamp:
conditions.append("timestamp <= :end")
condition_dict["end"] = end_timestamp
where_clause = " AND ".join(conditions)
sql = f"""
SELECT
COUNT(*) as total_records,
COUNT(CASE WHEN macd_signal IS NOT NULL THEN 1 END) as macd_signal_count,
COUNT(CASE WHEN kdj_signal IS NOT NULL THEN 1 END) as kdj_signal_count,
COUNT(CASE WHEN rsi_signal IS NOT NULL THEN 1 END) as rsi_signal_count,
COUNT(CASE WHEN boll_signal IS NOT NULL THEN 1 END) as boll_signal_count,
COUNT(CASE WHEN ma_cross IS NOT NULL THEN 1 END) as ma_cross_count,
AVG(ma5_close_diff) as avg_ma5_close_diff,
AVG(ma10_close_diff) as avg_ma10_close_diff,
AVG(ma20_close_diff) as avg_ma20_close_diff,
AVG(ma30_close_diff) as avg_ma30_close_diff,
AVG(ma_close_avg) as avg_ma_close_avg,
AVG(rsi_14) as avg_rsi_14,
AVG(boll_upper) as avg_boll_upper,
AVG(boll_middle) as avg_boll_middle,
AVG(boll_lower) as avg_boll_lower
FROM crypto_binance_data
WHERE {where_clause}
"""
return self.db_manager.query_data(sql, condition_dict, return_multi=False)
def query_market_data_by_symbol_bar(self, symbol: str, bar: str, start: str = None, end: str = None):
"""
根据交易对和K线周期查询数据
:param symbol: 交易对
:param bar: K线周期
:param start: 开始时间
:param end: 结束时间
"""
if start is None and end is None:
sql = """
SELECT * FROM crypto_binance_data
WHERE symbol = :symbol AND bar = :bar
ORDER BY timestamp ASC
"""
condition_dict = {"symbol": symbol, "bar": bar}
else:
if start is not None:
start = transform_date_time_to_timestamp(start)
if start is None:
logger.warning(f"开始时间格式错误: {start}")
return None
if end is not None:
end = transform_date_time_to_timestamp(end)
if end is None:
logger.warning(f"结束时间格式错误: {end}")
return None
if start is not None and end is not None:
if start > end:
start, end = end, start
sql = """
SELECT * FROM crypto_binance_data
WHERE symbol = :symbol AND bar = :bar AND timestamp BETWEEN :start AND :end
ORDER BY timestamp ASC
"""
condition_dict = {"symbol": symbol, "bar": bar, "start": start, "end": end}
elif start is not None:
sql = """
SELECT * FROM crypto_binance_data
WHERE symbol = :symbol AND bar = :bar AND timestamp >= :start
ORDER BY timestamp ASC
"""
condition_dict = {"symbol": symbol, "bar": bar, "start": start}
elif end is not None:
sql = """
SELECT * FROM crypto_binance_data
WHERE symbol = :symbol AND bar = :bar AND timestamp <= :end
ORDER BY timestamp ASC
"""
condition_dict = {"symbol": symbol, "bar": bar, "end": end}
return self.db_manager.query_data(sql, condition_dict, return_multi=True)