crypto_quant/huge_volume_main.py

364 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from core.huge_volume import HugeVolume
from core.db_market_data import DBMarketData
from core.db_huge_volume_data import DBHugeVolumeData
from core.utils import timestamp_to_datetime
from market_data_main import MarketDataMain
import logging
from config import MONITOR_CONFIG, MYSQL_CONFIG
from datetime import datetime
import pandas as pd
import os
import re
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
class HugeVolumeMain:
def __init__(self, window_size: int = 50, threshold: float = 2.0):
mysql_user = MYSQL_CONFIG.get("user", "xch")
mysql_password = MYSQL_CONFIG.get("password", "")
if not mysql_password:
raise ValueError("MySQL password is not set")
mysql_host = MYSQL_CONFIG.get("host", "localhost")
mysql_port = MYSQL_CONFIG.get("port", 3306)
mysql_database = MYSQL_CONFIG.get("database", "okx")
self.db_url = f"mysql+pymysql://{mysql_user}:{mysql_password}@{mysql_host}:{mysql_port}/{mysql_database}"
self.huge_volume = HugeVolume()
self.db_market_data = DBMarketData(self.db_url)
self.db_huge_volume_data = DBHugeVolumeData(self.db_url)
self.monitor_main = MarketDataMain()
self.window_size = window_size
self.threshold = threshold
self.output_folder = "./output/huge_volume_statistics/"
os.makedirs(self.output_folder, exist_ok=True)
def batch_initial_detect_volume_spike(self, start: str = None):
for symbol in self.monitor_main.symbols:
for bar in self.monitor_main.bars:
if start is None:
start = MONITOR_CONFIG.get("volume_monitor", {}).get(
"initial_date", "2025-05-01 00:00:00"
)
data = self.detect_volume_spike(
symbol, bar, start, only_output_huge_volume=True, is_update=False
)
if data is not None and len(data) > 0:
logging.info(f"此次初始化巨量交易数据: {len(data)}")
else:
logging.info(f"此次初始化巨量交易数据为空")
def detect_volume_spike(
self,
symbol: str = "XCH-USDT",
bar: str = "5m",
start: str = "2025-05-01 00:00:00",
end: str = None,
only_output_huge_volume: bool = False,
is_update: bool = False,
):
if start is None:
start = MONITOR_CONFIG.get("volume_monitor", {}).get(
"initial_date", "2025-05-01 00:00:00"
)
if end is None:
end = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
logging.info(f"开始处理巨量交易数据: {symbol} {bar} {start} {end}")
data = self.db_market_data.query_market_data_by_symbol_bar(
symbol, bar, start, end
)
if data is None:
logging.warning(f"获取行情数据失败: {symbol} {bar} {start} {end}")
return None
else:
if len(data) == 0:
logging.warning(f"获取行情数据为空: {symbol} {bar} {start} {end}")
return None
else:
if isinstance(data, list):
data = pd.DataFrame(data)
elif isinstance(data, dict):
data = pd.DataFrame([data])
data = self.huge_volume.detect_huge_volume(
data=data,
window_size=self.window_size,
threshold=self.threshold,
check_price=True,
only_output_huge_volume=only_output_huge_volume,
output_excel=True,
)
if data is not None:
if is_update:
for index, row in data.iterrows():
exist_huge_volume_data = (
self.db_huge_volume_data.query_data_by_symbol_bar_timestamp(
symbol, bar, row["timestamp"]
)
)
if exist_huge_volume_data is not None:
# remove the exist_huge_volume_data from data
data = data[
data["timestamp"] != exist_huge_volume_data["timestamp"]
]
if data is not None and len(data) > 0:
self.db_huge_volume_data.insert_data_to_mysql(data)
else:
logging.warning(
f"此次处理巨量交易数据为空: {symbol} {bar} {start} {end}"
)
return data
else:
return None
def batch_update_volume_spike(self):
for symbol in self.monitor_main.symbols:
for bar in self.monitor_main.bars:
self.update_volume_spike(symbol, bar)
def update_volume_spike(self, symbol: str, bar: str):
try:
self.monitor_main.update_data(symbol, bar)
latest_huge_volume_data = self.db_huge_volume_data.query_latest_data(
symbol, bar
)
if latest_huge_volume_data is None or len(latest_huge_volume_data) == 0:
self.detect_volume_spike(symbol, bar, only_output_huge_volume=True)
return
else:
earliest_date_time = latest_huge_volume_data["date_time"]
earliest_timestamp = latest_huge_volume_data["timestamp"]
seconds = self.get_seconds_by_bar(bar)
earliest_timestamp = earliest_timestamp - (
(self.window_size - 1) * seconds * 1000
)
earliest_date_time = timestamp_to_datetime(earliest_timestamp)
data = self.detect_volume_spike(
symbol=symbol,
bar=bar,
start=earliest_date_time,
only_output_huge_volume=True,
is_update=True,
)
logging.info(
f"更新巨量交易数据: {symbol} {bar} from {earliest_date_time}"
)
if data is not None and len(data) > 0:
logging.info(f"此次更新巨量交易数据: {len(data)}")
else:
logging.info(f"此次更新巨量交易数据为空")
except Exception as e:
logging.error(f"更新巨量交易数据失败: {symbol} {bar} {e}")
def get_seconds_by_bar(self, bar: str):
"""
根据bar获取秒数
bar: 1s/1m/3m/5m/15m/30m/1H/2H/4H/6H/12H/1D/2D/3D/1W/1M/3M
:param bar: 时间周期
:return: 秒数
"""
if bar == "1s":
return 1
elif bar == "1m":
return 60
elif bar == "3m":
return 180
elif bar == "5m":
return 300
elif bar == "15m":
return 900
elif bar == "30m":
return 1800
elif bar == "1H":
return 3600
elif bar == "2H":
return 7200
elif bar == "4H":
return 14400
elif bar == "6H":
return 21600
elif bar == "12H":
return 43200
elif bar == "1D":
return 86400
elif bar == "2D":
return 172800
elif bar == "3D":
return 259200
elif bar == "1W":
return 604800
elif bar == "1M":
return 2592000
elif bar == "3M":
return 7776000
else:
raise ValueError(f"不支持的bar: {bar}")
def next_periods_rise_or_fall(
self,
symbol: str,
bar: str,
start: str = None,
end: str = None,
periods: list = [3, 5],
output_excel: bool = False,
):
if start is None:
start = MONITOR_CONFIG.get("volume_monitor", {}).get(
"initial_date", "2025-05-01 00:00:00"
)
if end is None:
end = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
logging.info(f"开始计算巨量出现后之后3或5个周期上涨或下跌的比例: {symbol} {bar} {start} {end}")
huge_volume_data = (
self.db_huge_volume_data.query_huge_volume_data_by_symbol_bar(
symbol, bar, start, end
)
)
if huge_volume_data is None or len(huge_volume_data) == 0:
logging.warning(f"获取巨量交易数据为空: {symbol} {bar} {start} {end}")
return None
else:
if isinstance(huge_volume_data, list):
huge_volume_data = pd.DataFrame(huge_volume_data)
elif isinstance(huge_volume_data, dict):
huge_volume_data = pd.DataFrame([huge_volume_data])
market_data = self.db_market_data.query_market_data_by_symbol_bar(
symbol, bar, start, end
)
if market_data is None or len(market_data) == 0:
logging.warning(f"获取行情数据为空: {symbol} {bar} {start} {end}")
return None
else:
if isinstance(market_data, list):
market_data = pd.DataFrame(market_data)
elif isinstance(market_data, dict):
market_data = pd.DataFrame([market_data])
if (
huge_volume_data is not None
and len(huge_volume_data) > 0
and market_data is not None
and len(market_data) > 0
):
# 将huge_volume_data和market_data合并
# market_data移除id列
market_data = market_data.drop(columns=["id"])
# huge_volume_data移除id列
huge_volume_data = huge_volume_data.drop(columns=["id"])
data = pd.merge(market_data, huge_volume_data, on="timestamp", how="left")
# 同名的列只是后缀为_x和_y需要合并
data = data.rename(
columns={
"symbol_x": "symbol",
"bar_x": "bar",
"date_time_x": "date_time",
"open_x": "open",
"high_x": "high",
"low_x": "low",
"close_x": "close",
"volume_x": "volume",
"volCcy_x": "volCcy",
"volCCyQuote_x": "volCCyQuote",
"create_time_x": "create_time",
}
)
data = data.drop(
columns=[
"symbol_y",
"bar_y",
"date_time_y",
"open_y",
"high_y",
"low_y",
"close_y",
"volume_y",
"volCcy_y",
"volCCyQuote_y",
"create_time_y",
]
)
# 根据timestamp排序
data = data.sort_values(by="timestamp", ascending=True)
data = data[
[
"symbol",
"bar",
"timestamp",
"date_time",
"open",
"high",
"low",
"close",
"volume",
"huge_volume",
"volume_ratio",
"volume_price_spike",
"price_high",
"price_low",
]
]
data = data.dropna()
data = data.reset_index(drop=True)
data, result_data = self.huge_volume.next_periods_rise_or_fall(
data=data, periods=periods, output_excel=output_excel
)
return data, result_data
def batch_next_periods_rise_or_fall(
self,
start: str = None,
end: str = None,
periods: list = [3, 5],
output_excel: bool = False,
):
if start is None:
start = MONITOR_CONFIG.get("volume_monitor", {}).get(
"initial_date", "2025-05-01 00:00:00"
)
if end is None:
end = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
data_list = []
result_data_list = []
for symbol in self.monitor_main.symbols:
for bar in self.monitor_main.bars:
data, result_data = self.next_periods_rise_or_fall(
symbol, bar, start, end, periods, output_excel
)
data_list.append(data)
result_data_list.append(result_data)
data = pd.concat(data_list)
result_data = pd.concat(result_data_list)
if output_excel:
data = data.reset_index(drop=True)
result_data = result_data.reset_index(drop=True)
current_date = datetime.now().strftime("%Y%m%d%H%M%S")
file_name = (
f"next_periods_rise_or_fall_{current_date}.xlsx"
)
try:
with pd.ExcelWriter(
os.path.join(self.output_folder, file_name)
) as writer:
data.to_excel(writer, sheet_name="details", index=False)
result_data.to_excel(
writer, sheet_name="next_periods_statistics", index=False
)
except Exception as e:
logging.error(f"导出Excel文件失败: {e}")
return data, result_data
if __name__ == "__main__":
huge_volume_main = HugeVolumeMain()
# statistics_main.batch_initial_detect_volume_spike(
# start="2025-05-01 00:00:00",
# )
huge_volume_main.batch_update_volume_spike()
# huge_volume_main.batch_next_periods_rise_or_fall(
# periods=[3, 5],
# output_excel=True,
# )