support statistics price-volume relationship

This commit is contained in:
blade 2025-07-31 20:31:22 +08:00
parent 5bc9b2b8c7
commit 4ee8658d89
29 changed files with 294 additions and 328 deletions

Binary file not shown.

Binary file not shown.

View File

@ -4,6 +4,7 @@ import os
import re import re
import pandas as pd import pandas as pd
from datetime import datetime from datetime import datetime
from copy import deepcopy
from typing import Optional, List, Dict, Any, Tuple from typing import Optional, List, Dict, Any, Tuple
logging.basicConfig( logging.basicConfig(
@ -175,8 +176,7 @@ class HugeVolume:
self, self,
data: pd.DataFrame, data: pd.DataFrame,
window_size: int = 50, window_size: int = 50,
periods: List[int] = [3, 5], periods: List[int] = [1, 2, 3, 5, 10],
output_excel: bool = False
) -> Tuple[pd.DataFrame, pd.DataFrame]: ) -> Tuple[pd.DataFrame, pd.DataFrame]:
""" """
1. 根据period_count计算每个timestamp的下一个periods的rise_or_fall 1. 根据period_count计算每个timestamp的下一个periods的rise_or_fall
@ -195,9 +195,8 @@ class HugeVolume:
1000000000 100 1 103 rise 98 fall 1000000000 100 1 103 rise 98 fall
因为之后第3个periods的close是103所以next_3_result为rise 因为之后第3个periods的close是103所以next_3_result为rise
因为之后第5个periods的close是98所以next_3_result为fall 因为之后第5个periods的close是98所以next_3_result为fall
2. 根据volume_ratio_percentile_10将data分成10份然后计算每一份的上涨与下跌次数以及平均收益率
2. 如果output_excel为True则输出到excel 3. 新建一个列表: result计算之后n个周期close上涨或下跌的比例
3. 新建一个列表: result计算huge_volume为1时之后3或5个周期close上涨或下跌的比例
a. 计算huge_volume为1时且price_80_high为1时的数量如100 并且计算next_3_result为fall的次数如50 然后计算fall_ratio 如50/100=0.5 a. 计算huge_volume为1时且price_80_high为1时的数量如100 并且计算next_3_result为fall的次数如50 然后计算fall_ratio 如50/100=0.5
b. 计算huge_volume为1时且price_80_high为1时的数量如100 并且计算next_5_result为fall的次数如30 然后计算fall_ratio 如30/100=0.3 b. 计算huge_volume为1时且price_80_high为1时的数量如100 并且计算next_5_result为fall的次数如30 然后计算fall_ratio 如30/100=0.3
c. 计算huge_volume为1时且price_20_low为1时的数量如100 并且计算next_3_result为rise的次数如50 然后计算rise_ratio 如50/100=0.5 c. 计算huge_volume为1时且price_20_low为1时的数量如100 并且计算next_3_result为rise的次数如50 然后计算rise_ratio 如50/100=0.5
@ -206,21 +205,32 @@ class HugeVolume:
Args: Args:
data: 包含巨量交易数据的DataFrame data: 包含巨量交易数据的DataFrame
periods: 计算周期列表默认[3, 5] periods: 计算周期列表默认[1, 2, 3, 5, 10]
output_excel: 是否输出到Excel文件默认False output_excel: 是否输出到Excel文件默认False
Returns: Returns:
Tuple[pd.DataFrame, pd.DataFrame]: (处理后的数据, 统计结果) Tuple[pd.DataFrame, pd.DataFrame]: (处理后的数据, 统计结果)
""" """
# 将huge_volume, volume_80_20_price_spike, volume_90_10_price_spike,
# price_80_high, price_20_low, price_90_high, price_10_low设置为整型
data["huge_volume"] = data["huge_volume"].astype(int)
data["volume_80_20_price_spike"] = data["volume_80_20_price_spike"].astype(int)
data["volume_90_10_price_spike"] = data["volume_90_10_price_spike"].astype(int)
data["price_80_high"] = data["price_80_high"].astype(int)
data["price_20_low"] = data["price_20_low"].astype(int)
data["price_90_high"] = data["price_90_high"].astype(int)
data["price_10_low"] = data["price_10_low"].astype(int)
data = data.sort_values(by="timestamp", ascending=True) data = data.sort_values(by="timestamp", ascending=True)
data = data.reset_index(drop=True) data = data.reset_index(drop=True)
# 计算未来价格变化 # 计算未来价格变化
for period in periods: for period in periods:
data[f"next_{period}_close"] = data["close"].shift(-period) data[f"next_{period}_close"] = data["close"].shift(-period)
data[f"next_{period}_result"] = ( data[f"next_{period}_change"] = (
data[f"next_{period}_close"] / data["close"] - 1 (data[f"next_{period}_close"] / data["close"] - 1) * 100
) )
data[f"next_{period}_result"] = data[f"next_{period}_result"].apply( # 添加一列next_{period}_result如果next_{period}_change大于0则值为rise如果next_{period}_change小于0则值为fall如果next_{period}_change等于0则值为draw
data[f"next_{period}_result"] = data[f"next_{period}_change"].apply(
lambda x: ( lambda x: (
"rise" "rise"
if pd.notna(x) and x > 0 if pd.notna(x) and x > 0
@ -232,67 +242,125 @@ class HugeVolume:
) )
) )
# 将volume_ratio按照百分位分成十份
huge_volume_data = deepcopy(data[data["huge_volume"] == 1])
huge_volume_data = huge_volume_data.sort_values(by="timestamp", ascending=True)
huge_volume_data = huge_volume_data.reset_index(drop=True)
huge_volume_data["volume_ratio_percentile"] = huge_volume_data["volume_ratio"].rank(pct=True)
huge_volume_data["volume_ratio_percentile_10"] = huge_volume_data["volume_ratio_percentile"].apply(
lambda x: 10 if x <= 0.1 else 20
if x <= 0.2 else 30 if x <= 0.3 else 40
if x <= 0.4 else 50 if x <= 0.5 else 60
if x <= 0.6 else 70 if x <= 0.7 else 80
if x <= 0.8 else 90 if x <= 0.9 else 100
)
huge_volume_ratio_percentile_10_mean = huge_volume_data.groupby("volume_ratio_percentile_10")["volume_ratio"].mean()
percentile_10_mean = round(float(huge_volume_data["volume_ratio"].mean()), 4)
# insert one row to huge_volume_ratio_percentile_10_mean, key is -1, value is percentile_10_mean
huge_volume_ratio_percentile_10_mean.loc["-1"] = percentile_10_mean
# transform huge_volume_ratio_percentile_10_mean index to int
huge_volume_ratio_percentile_10_mean.index = huge_volume_ratio_percentile_10_mean.index.astype(int)
# sort huge_volume_ratio_percentile_10_mean by index
huge_volume_ratio_percentile_10_mean = huge_volume_ratio_percentile_10_mean.sort_index()
results = []
# iterate huge_volume_ratio_percentile_10_mean
for index, value in huge_volume_ratio_percentile_10_mean.items():
if index == -1:
data_temp = deepcopy(huge_volume_data)
volume_ratio_percentile_10 = "all"
current_percentile_10_mean = percentile_10_mean
else:
data_temp = deepcopy(huge_volume_data[huge_volume_data["volume_ratio_percentile_10"] == index])
volume_ratio_percentile_10 = str(index)
current_percentile_10_mean = round(value, 4)
data_temp = data_temp.reset_index(drop=True)
data_temp = data_temp.sort_values(by="timestamp", ascending=True)
data_temp = data_temp.reset_index(drop=True)
# 过滤data, 只获取huge_volume为1且价格处于分位数位置的行 # 过滤data, 只获取huge_volume为1且价格处于分位数位置的行
price_conditions = [] price_conditions = []
if "price_80_high" in data.columns: if "price_80_high" in data_temp.columns:
price_conditions.append(data["price_80_high"] == 1) price_conditions.append(data_temp["price_80_high"] == 1)
if "price_20_low" in data.columns: if "price_20_low" in data_temp.columns:
price_conditions.append(data["price_20_low"] == 1) price_conditions.append(data_temp["price_20_low"] == 1)
if "price_90_high" in data.columns: if "price_90_high" in data_temp.columns:
price_conditions.append(data["price_90_high"] == 1) price_conditions.append(data_temp["price_90_high"] == 1)
if "price_10_low" in data.columns: if "price_10_low" in data_temp.columns:
price_conditions.append(data["price_10_low"] == 1) price_conditions.append(data_temp["price_10_low"] == 1)
if price_conditions: if price_conditions:
combined_condition = data["huge_volume"] == 1 combined_condition = data_temp["huge_volume"] == 1
for condition in price_conditions: for condition in price_conditions:
combined_condition = combined_condition | condition combined_condition = combined_condition | condition
data = data[combined_condition] data_temp = data_temp[combined_condition]
data = data.reset_index(drop=True) data_temp = data_temp.reset_index(drop=True)
# 统计各种分位数情况的数量 # 统计各种分位数情况的数量
price_stats = {} price_stats = {}
for price_type in ["price_80_high", "price_20_low", "price_90_high", "price_10_low"]: for price_type in ["price_80_high", "price_20_low", "price_90_high", "price_10_low"]:
if price_type in data.columns: if price_type in data.columns:
price_stats[price_type] = len(data[(data["huge_volume"] == 1) & (data[price_type] == 1)]) price_stats[price_type] = len(data_temp[(data_temp["huge_volume"] == 1) & (data_temp[price_type] == 1)])
results = []
for period in periods: for period in periods:
for price_type, count in price_stats.items(): for price_type, count in price_stats.items():
if count > 0: if count > 0:
# 计算下跌次数 # 计算下跌次数
fall_count = len( fall_count = len(
data[ data_temp[
(data["huge_volume"] == 1) & (data_temp["huge_volume"] == 1) &
(data[price_type] == 1) & (data_temp[price_type] == 1) &
(data[f"next_{period}_result"] == "fall") (data_temp[f"next_{period}_result"] == "fall")
] ]
) )
# 计算上涨次数 # 计算上涨次数
rise_count = len( rise_count = len(
data[ data_temp[
(data["huge_volume"] == 1) & (data_temp["huge_volume"] == 1) &
(data[price_type] == 1) & (data_temp[price_type] == 1) &
(data[f"next_{period}_result"] == "rise") (data_temp[f"next_{period}_result"] == "rise")
] ]
) )
draw_count = len(
data_temp[
(data_temp["huge_volume"] == 1) &
(data_temp[price_type] == 1) &
(data_temp[f"next_{period}_result"] == "draw")
]
)
# 根据data[f"next_{period}_result"]获得平均收益率
average_return = float(data_temp[(data_temp["huge_volume"] == 1) & (data_temp[price_type] == 1)]
[f"next_{period}_change"].mean())
max_return = float(data_temp[(data_temp["huge_volume"] == 1) & (data_temp[price_type] == 1)]
[f"next_{period}_change"].max())
min_return = float(data_temp[(data_temp["huge_volume"] == 1) & (data_temp[price_type] == 1)]
[f"next_{period}_change"].min())
results.append( results.append(
{ {
"symbol": data["symbol"].iloc[0] if len(data) > 0 else "", "symbol": data_temp["symbol"].iloc[0] if len(data_temp) > 0 else "",
"bar": data["bar"].iloc[0] if len(data) > 0 else "", "bar": data_temp["bar"].iloc[0] if len(data_temp) > 0 else "",
"window_size": window_size, "window_size": window_size,
"huge_volume": 1, "huge_volume": 1,
"volume_ratio_percentile_10": volume_ratio_percentile_10,
"volume_ratio_percentile_10_mean": current_percentile_10_mean,
"price_type": price_type, "price_type": price_type,
"next_period": period, "next_period": period,
"fall_count": fall_count, "average_return": average_return,
"max_return": max_return,
"min_return": min_return,
"rise_count": rise_count, "rise_count": rise_count,
"fall_ratio": fall_count / count, "rise_ratio": round((rise_count / count) * 100, 4),
"rise_ratio": rise_count / count, "fall_count": fall_count,
"fall_ratio": round((fall_count / count) * 100, 4),
"draw_count": draw_count,
"draw_ratio": round((draw_count / count) * 100, 4),
"total_count": count, "total_count": count,
} }
) )
result_data = pd.DataFrame(results) result_data = pd.DataFrame(results)
return data, result_data return huge_volume_data, result_data

View File

@ -5,7 +5,7 @@ from typing import Optional
import pandas as pd import pandas as pd
import okx.MarketData as Market import okx.MarketData as Market
import okx.TradingData as TradingData import okx.TradingData as TradingData
from core.utils import datetime_to_timestamp from core.utils import transform_date_time_to_timestamp
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
class MarketDataMonitor: class MarketDataMonitor:
@ -46,18 +46,9 @@ class MarketDataMonitor:
two_months_ago = datetime.now() - timedelta(days=60) two_months_ago = datetime.now() - timedelta(days=60)
start_time = int(two_months_ago.timestamp() * 1000) start_time = int(two_months_ago.timestamp() * 1000)
else: else:
try: start_time = transform_date_time_to_timestamp(start)
# 判断是否就是timestamp整型数据 if start_time is None:
if isinstance(start, int): logging.error(f"start参数解析失败: {start}")
start_time = start
# 判断是否为纯数字UTC毫秒级timestamp
elif start.isdigit():
start_time = int(start)
else:
# 按北京时间字符串处理转换为毫秒级timestamp
start_time = datetime_to_timestamp(start)
except Exception as e:
logging.error(f"start参数解析失败: {e}")
return None return None
columns = ["timestamp", "open", "high", "low", "close", "volume", "volCcy", "volCCyQuote", "confirm"] columns = ["timestamp", "open", "high", "low", "close", "volume", "volCcy", "volCCyQuote", "confirm"]
all_data = [] all_data = []

View File

@ -3,7 +3,7 @@ from datetime import datetime
import logging import logging
from typing import Optional from typing import Optional
import pandas as pd import pandas as pd
from core.base import QuantTrader from core.biz.quant_trader import QuantTrader
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')

View File

@ -4,8 +4,8 @@ import logging
from typing import Optional from typing import Optional
import pandas as pd import pandas as pd
import okx.MarketData as Market import okx.MarketData as Market
from core.utils import datetime_to_timestamp, timestamp_to_datetime from core.utils import timestamp_to_datetime
from core.db_trade_data import DBTradeData from core.db.db_trade_data import DBTradeData
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,8 +1,8 @@
import pandas as pd import pandas as pd
import logging import logging
from typing import Optional, List, Dict, Any, Union from typing import Optional, List, Dict, Any, Union
from core.db_manager import DBData from core.db.db_manager import DBData
from core.utils import check_date_time_format, datetime_to_timestamp from core.utils import transform_date_time_to_timestamp
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s") logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
@ -55,22 +55,11 @@ class DBHugeVolumeData:
""" """
if time_param is None: if time_param is None:
return None return None
time_param = transform_date_time_to_timestamp(time_param)
if isinstance(time_param, int): if time_param is None:
return None
return time_param return time_param
if isinstance(time_param, str):
if time_param.isdigit():
return int(time_param)
else:
parsed_time = check_date_time_format(time_param)
if parsed_time is None:
logging.warning(f"日期时间格式错误: {time_param}")
return None
return datetime_to_timestamp(parsed_time)
return None
def _build_query_conditions( def _build_query_conditions(
self, self,
symbol: Optional[str] = None, symbol: Optional[str] = None,

View File

@ -2,11 +2,7 @@ import pandas as pd
from sqlalchemy import create_engine, exc, text from sqlalchemy import create_engine, exc, text
import re, datetime import re, datetime
import logging import logging
from core.utils import ( from core.utils import transform_data_type
transform_data_type,
datetime_to_timestamp,
check_date_time_format,
)
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s") logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")

View File

@ -1,7 +1,7 @@
import pandas as pd import pandas as pd
import logging import logging
from core.db_manager import DBData from core.db.db_manager import DBData
from core.utils import check_date_time_format, datetime_to_timestamp from core.utils import transform_date_time_to_timestamp
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s") logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
@ -119,34 +119,13 @@ class DBMarketData:
condition_dict = {"symbol": symbol, "bar": bar} condition_dict = {"symbol": symbol, "bar": bar}
else: else:
if start is not None: if start is not None:
if isinstance(start, str): start = transform_date_time_to_timestamp(start)
if start.isdigit():
start = int(start)
else:
start = check_date_time_format(start)
# 判断是否是日期时间格式
if start is None: if start is None:
logging.warning(f"日期时间格式错误: {start}")
return None
start = datetime_to_timestamp(start)
elif isinstance(start, int):
start = int(start)
else:
logging.warning(f"开始时间格式错误: {start}") logging.warning(f"开始时间格式错误: {start}")
return None return None
if end is not None: if end is not None:
if isinstance(end, str): end = transform_date_time_to_timestamp(end)
if end.isdigit():
end = int(end)
else:
end = check_date_time_format(end)
if end is None: if end is None:
logging.warning(f"日期时间格式错误: {end}")
return None
end = datetime_to_timestamp(end)
elif isinstance(end, int):
end = int(end)
else:
logging.warning(f"结束时间格式错误: {end}") logging.warning(f"结束时间格式错误: {end}")
return None return None
if start is not None and end is not None: if start is not None and end is not None:

View File

@ -1,8 +1,8 @@
import pandas as pd import pandas as pd
import logging import logging
from typing import Optional, List, Dict, Any, Union from typing import Optional, List, Dict, Any, Union
from core.db_manager import DBData from core.db.db_manager import DBData
from core.utils import check_date_time_format, datetime_to_timestamp from core.utils import transform_date_time_to_timestamp
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s") logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
@ -34,22 +34,11 @@ class DBTradeData:
""" """
if time_param is None: if time_param is None:
return None return None
time_param = transform_date_time_to_timestamp(time_param)
if isinstance(time_param, int): if time_param is None:
return None
return time_param return time_param
if isinstance(time_param, str):
if time_param.isdigit():
return int(time_param)
else:
parsed_time = check_date_time_format(time_param)
if parsed_time is None:
logging.warning(f"日期时间格式错误: {time_param}")
return None
return datetime_to_timestamp(parsed_time)
return None
def _build_query_conditions( def _build_query_conditions(
self, self,
symbol: Optional[str] = None, symbol: Optional[str] = None,

View File

@ -1,6 +1,9 @@
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
from decimal import Decimal from decimal import Decimal
import re import re
import logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
def datetime_to_timestamp(date_str: str) -> int: def datetime_to_timestamp(date_str: str) -> int:
""" """
@ -31,7 +34,7 @@ def transform_data_type(data: dict):
return data return data
def check_date_time_format(date_time: str) -> bool: def check_date_time_format(date_time: str) -> str | None:
""" """
检查日期时间格式是否正确 检查日期时间格式是否正确
""" """
@ -43,3 +46,26 @@ def check_date_time_format(date_time: str) -> bool:
return f"{date_time[0:4]}-{date_time[4:6]}-{date_time[6:8]} 00:00:00" return f"{date_time[0:4]}-{date_time[4:6]}-{date_time[6:8]} 00:00:00"
else: else:
return None return None
def transform_date_time_to_timestamp(date_time: int | str):
"""
将日期时间转换为毫秒级timestamp
"""
try:
# 判断是否就是timestamp整型数据
if isinstance(date_time, int):
date_time = date_time
# 判断是否为纯数字UTC毫秒级timestamp
elif date_time.isdigit():
date_time = int(date_time)
else:
date_time = check_date_time_format(date_time)
if date_time is None:
logging.error(f"日期时间格式错误: {date_time}")
return None
# 按北京时间字符串处理转换为毫秒级timestamp
date_time = datetime_to_timestamp(date_time)
return date_time
except Exception as e:
logging.error(f"start参数解析失败: {e}")
return None

View File

@ -1,7 +1,7 @@
from core.huge_volume import HugeVolume from core.biz.huge_volume import HugeVolume
from core.db_market_data import DBMarketData from core.db.db_market_data import DBMarketData
from core.db_huge_volume_data import DBHugeVolumeData from core.db.db_huge_volume_data import DBHugeVolumeData
from core.utils import timestamp_to_datetime from core.utils import timestamp_to_datetime, transform_date_time_to_timestamp
from market_data_main import MarketDataMain from market_data_main import MarketDataMain
import logging import logging
from config import MONITOR_CONFIG, MYSQL_CONFIG, WINDOW_SIZE from config import MONITOR_CONFIG, MYSQL_CONFIG, WINDOW_SIZE
@ -43,7 +43,7 @@ class HugeVolumeMain:
"initial_date", "2025-05-01 00:00:00" "initial_date", "2025-05-01 00:00:00"
) )
data = self.detect_volume_spike( data = self.detect_volume_spike(
symbol, bar, window_size, start, only_output_huge_volume=True, is_update=False symbol, bar, window_size, start, only_output_huge_volume=False, is_update=False
) )
if data is not None and len(data) > 0: if data is not None and len(data) > 0:
logging.info(f"此次初始化巨量交易数据: {len(data)}") logging.info(f"此次初始化巨量交易数据: {len(data)}")
@ -88,7 +88,7 @@ class HugeVolumeMain:
threshold=self.threshold, threshold=self.threshold,
check_price=True, check_price=True,
only_output_huge_volume=only_output_huge_volume, only_output_huge_volume=only_output_huge_volume,
output_excel=True, output_excel=False,
) )
if data is not None: if data is not None:
if is_update: if is_update:
@ -125,7 +125,7 @@ class HugeVolumeMain:
symbol, bar, window_size symbol, bar, window_size
) )
if latest_huge_volume_data is None or len(latest_huge_volume_data) == 0: if latest_huge_volume_data is None or len(latest_huge_volume_data) == 0:
self.detect_volume_spike(symbol, bar, only_output_huge_volume=True) self.detect_volume_spike(symbol, bar, only_output_huge_volume=False)
return return
else: else:
earliest_date_time = latest_huge_volume_data["date_time"] earliest_date_time = latest_huge_volume_data["date_time"]
@ -141,7 +141,7 @@ class HugeVolumeMain:
bar=bar, bar=bar,
window_size=window_size, window_size=window_size,
start=earliest_date_time, start=earliest_date_time,
only_output_huge_volume=True, only_output_huge_volume=False,
is_update=True, is_update=True,
) )
logging.info( logging.info(
@ -206,7 +206,6 @@ class HugeVolumeMain:
start: str = None, start: str = None,
end: str = None, end: str = None,
periods: list = [3, 5], periods: list = [3, 5],
output_excel: bool = False,
): ):
if start is None: if start is None:
start = MONITOR_CONFIG.get("volume_monitor", {}).get( start = MONITOR_CONFIG.get("volume_monitor", {}).get(
@ -214,78 +213,29 @@ class HugeVolumeMain:
) )
if end is None: if end is None:
end = datetime.now().strftime("%Y-%m-%d %H:%M:%S") end = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
logging.info(f"开始计算巨量出现后之后3或5个周期上涨或下跌的比例: {symbol} {bar} 窗口大小: {window_size}{start}{end}") periods_text = ", ".join([str(period) for period in periods])
huge_volume_data = ( logging.info(f"开始计算巨量出现后,之后{periods_text}个周期,上涨或下跌的比例: {symbol} {bar} 窗口大小: {window_size}{start}{end}")
volume_statistics_data = (
self.db_huge_volume_data.query_huge_volume_data_by_symbol_bar_window_size( self.db_huge_volume_data.query_huge_volume_data_by_symbol_bar_window_size(
symbol, bar, window_size, start, end symbol, bar, window_size, start, end
) )
) )
if huge_volume_data is None or len(huge_volume_data) == 0: if volume_statistics_data is None or len(volume_statistics_data) == 0:
logging.warning(f"获取巨量交易数据为空: {symbol} {bar} 窗口大小: {window_size}{start}{end}") logging.warning(f"获取巨量交易数据为空: {symbol} {bar} 窗口大小: {window_size}{start}{end}")
return None return None
else: else:
if isinstance(huge_volume_data, list): if isinstance(volume_statistics_data, list):
huge_volume_data = pd.DataFrame(huge_volume_data) volume_statistics_data = pd.DataFrame(volume_statistics_data)
elif isinstance(huge_volume_data, dict): elif isinstance(volume_statistics_data, dict):
huge_volume_data = pd.DataFrame([huge_volume_data]) volume_statistics_data = pd.DataFrame([volume_statistics_data])
market_data = self.db_market_data.query_market_data_by_symbol_bar(
symbol, bar, start, end
)
if market_data is None or len(market_data) == 0:
logging.warning(f"获取行情数据为空: {symbol} {bar} 窗口大小: {window_size}{start}{end}")
return None
else:
if isinstance(market_data, list):
market_data = pd.DataFrame(market_data)
elif isinstance(market_data, dict):
market_data = pd.DataFrame([market_data])
if ( if (
huge_volume_data is not None volume_statistics_data is not None
and len(huge_volume_data) > 0 and len(volume_statistics_data) > 0
and market_data is not None
and len(market_data) > 0
): ):
# 将huge_volume_data和market_data合并
# market_data移除id列
market_data = market_data.drop(columns=["id"])
# huge_volume_data移除id列
huge_volume_data = huge_volume_data.drop(columns=["id"])
data = pd.merge(market_data, huge_volume_data, on="timestamp", how="left")
# 同名的列只是后缀为_x和_y需要合并
data = data.rename(
columns={
"symbol_x": "symbol",
"bar_x": "bar",
"date_time_x": "date_time",
"open_x": "open",
"high_x": "high",
"low_x": "low",
"close_x": "close",
"volume_x": "volume",
"volCcy_x": "volCcy",
"volCCyQuote_x": "volCCyQuote",
"create_time_x": "create_time",
}
)
data = data.drop(
columns=[
"symbol_y",
"bar_y",
"date_time_y",
"open_y",
"high_y",
"low_y",
"close_y",
"volume_y",
"volCcy_y",
"volCCyQuote_y",
"create_time_y",
]
)
# 根据timestamp排序 # 根据timestamp排序
data = data.sort_values(by="timestamp", ascending=True) volume_statistics_data = volume_statistics_data.sort_values(by="timestamp", ascending=True)
data["window_size"] = window_size volume_statistics_data["window_size"] = window_size
data = data[ volume_statistics_data = volume_statistics_data[
[ [
"symbol", "symbol",
"bar", "bar",
@ -299,24 +249,25 @@ class HugeVolumeMain:
"volume", "volume",
"huge_volume", "huge_volume",
"volume_ratio", "volume_ratio",
"volume_price_spike", "volume_80_20_price_spike",
"price_high", "price_80_high",
"price_low", "price_20_low",
"volume_90_10_price_spike",
"price_90_high",
"price_10_low",
] ]
] ]
data = data.dropna() volume_statistics_data = volume_statistics_data.reset_index(drop=True)
data = data.reset_index(drop=True) huge_volume_data, result_data = self.huge_volume.next_periods_rise_or_fall(
data, result_data = self.huge_volume.next_periods_rise_or_fall( data=volume_statistics_data, window_size=window_size, periods=periods
data=data, window_size=window_size, periods=periods, output_excel=output_excel
) )
return data, result_data return huge_volume_data, result_data
def batch_next_periods_rise_or_fall( def batch_next_periods_rise_or_fall(
self, self,
window_size: int = 50,
start: str = None, start: str = None,
end: str = None, end: str = None,
periods: list = [3, 5], next_periods: list = [1, 2, 3, 5, 10],
output_excel: bool = False, output_excel: bool = False,
): ):
if start is None: if start is None:
@ -325,20 +276,25 @@ class HugeVolumeMain:
) )
if end is None: if end is None:
end = datetime.now().strftime("%Y-%m-%d %H:%M:%S") end = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
data_list = [] huge_volume_data_list = []
result_data_list = [] result_data_list = []
window_size_list = WINDOW_SIZE.get("window_sizes", None)
if window_size_list is None or not isinstance(window_size_list, list) or len(window_size_list) == 0:
window_size_list = [50, 80, 100, 120]
for symbol in self.market_data_main.symbols: for symbol in self.market_data_main.symbols:
for bar in self.market_data_main.bars: for bar in self.market_data_main.bars:
data, result_data = self.next_periods_rise_or_fall( for window_size in window_size_list:
symbol, bar, window_size, start, end, periods, output_excel huge_volume_data, result_data = self.next_periods_rise_or_fall(
symbol, bar, window_size, start, end, next_periods
) )
data_list.append(data) huge_volume_data_list.append(huge_volume_data)
result_data_list.append(result_data) result_data_list.append(result_data)
data = pd.concat(data_list) total_huge_volume_data = pd.concat(huge_volume_data_list)
result_data = pd.concat(result_data_list) total_result_data = pd.concat(result_data_list)
if output_excel: if output_excel:
data = data.reset_index(drop=True) total_huge_volume_data = total_huge_volume_data.reset_index(drop=True)
result_data = result_data.reset_index(drop=True) total_result_data = total_result_data.reset_index(drop=True)
current_date = datetime.now().strftime("%Y%m%d%H%M%S") current_date = datetime.now().strftime("%Y%m%d%H%M%S")
file_name = ( file_name = (
f"next_periods_rise_or_fall_{current_date}.xlsx" f"next_periods_rise_or_fall_{current_date}.xlsx"
@ -347,13 +303,13 @@ class HugeVolumeMain:
with pd.ExcelWriter( with pd.ExcelWriter(
os.path.join(self.output_folder, file_name) os.path.join(self.output_folder, file_name)
) as writer: ) as writer:
data.to_excel(writer, sheet_name="details", index=False) total_huge_volume_data.to_excel(writer, sheet_name="details", index=False)
result_data.to_excel( total_result_data.to_excel(
writer, sheet_name="next_periods_statistics", index=False writer, sheet_name="next_periods_statistics", index=False
) )
except Exception as e: except Exception as e:
logging.error(f"导出Excel文件失败: {e}") logging.error(f"导出Excel文件失败: {e}")
return data, result_data return total_huge_volume_data, total_result_data
def batch_initial_detect_volume_spike(threshold: float = 2.0): def batch_initial_detect_volume_spike(threshold: float = 2.0):
@ -379,4 +335,6 @@ def batch_update_volume_spike(threshold: float = 2.0):
if __name__ == "__main__": if __name__ == "__main__":
# batch_initial_detect_volume_spike(threshold=2.0) # batch_initial_detect_volume_spike(threshold=2.0)
batch_update_volume_spike(threshold=2.0) # batch_update_volume_spike(threshold=2.0)
huge_volume_main = HugeVolumeMain(threshold=2.0)
huge_volume_main.batch_next_periods_rise_or_fall(output_excel=True)

View File

@ -1,9 +1,9 @@
import logging import logging
from datetime import datetime from datetime import datetime
from time import sleep from time import sleep
from core.market_data_monitor import MarketDataMonitor from core.biz.market_data_monitor import MarketDataMonitor
from core.db_market_data import DBMarketData from core.db.db_market_data import DBMarketData
from core.utils import datetime_to_timestamp, timestamp_to_datetime from core.utils import datetime_to_timestamp, timestamp_to_datetime, transform_date_time_to_timestamp
from trade_data_main import TradeDataMain from trade_data_main import TradeDataMain
from config import ( from config import (
API_KEY, API_KEY,
@ -72,16 +72,15 @@ class MarketDataMain:
获取保存数据 获取保存数据
""" """
end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
end_time_ts = datetime_to_timestamp(end_time) end_time_ts = transform_date_time_to_timestamp(end_time)
if isinstance(start, str): if end_time_ts is None:
if start.isdigit(): logging.error(f"结束时间格式错误: {end_time}")
start_time_ts = int(start) return None
else:
start_time_ts = datetime_to_timestamp(start) start_time_ts = transform_date_time_to_timestamp(start)
elif isinstance(start, int): if start_time_ts is None:
start_time_ts = start logging.error(f"开始时间格式错误: {start}")
else: return None
raise ValueError(f"开始时间格式错误: {start}")
# 如果bar为5m, 15m: # 如果bar为5m, 15m:
# end_time_ts与start_time_ts相差超过1天则按照1天为单位 # end_time_ts与start_time_ts相差超过1天则按照1天为单位
@ -91,39 +90,26 @@ class MarketDataMain:
# 获取数据直到end_time_ts # 获取数据直到end_time_ts
threshold = None threshold = None
if bar in ["5m", "15m"]: if bar in ["5m", "15m"]:
threshold = 86400000 - 1 threshold = 86400000
elif bar in ["1H", "4H"]: elif bar in ["1H", "4H"]:
threshold = 432000000 - 1 threshold = 432000000
elif bar == "1D": elif bar == "1D":
threshold = 864000000 - 1 threshold = 864000000
while start_time_ts < end_time_ts: while start_time_ts < end_time_ts:
current_end_time_ts = start_time_ts + threshold current_start_time_ts = end_time_ts - threshold
if current_end_time_ts >= end_time_ts: if current_start_time_ts < start_time_ts:
current_end_time_ts = end_time_ts current_start_time_ts = start_time_ts
start_date_time = timestamp_to_datetime(start_time_ts) start_date_time = timestamp_to_datetime(current_start_time_ts)
end_date_time = timestamp_to_datetime(current_end_time_ts) end_date_time = timestamp_to_datetime(end_time_ts)
logging.info( logging.info(
f"获取行情数据: {symbol} {bar}{start_date_time}{end_date_time}" f"获取行情数据: {symbol} {bar}{start_date_time}{end_date_time}"
) )
# 首先判断是否存在 > current_end_time_ts + 1 的数据
# 如果存在,则跳过此次循环
data = self.db_market_data.query_market_data_by_symbol_bar(
symbol=symbol,
bar=bar,
start=start_time_ts,
end=current_end_time_ts,
)
if data is not None and len(data) > 0:
logging.info(f"已存在{symbol}, {bar}{start_date_time}{end_date_time} 的数据,跳过此次循环")
start_time_ts = current_end_time_ts
continue
# current_end_time_ts + 1, 目的是为了避免缺少最后一条数据
data = self.market_data_monitor.get_historical_kline_data( data = self.market_data_monitor.get_historical_kline_data(
symbol=symbol, symbol=symbol,
start=start_time_ts, start=current_start_time_ts,
bar=bar, bar=bar,
end_time=current_end_time_ts + 1, end_time=end_time_ts,
) )
if data is not None and len(data) > 0: if data is not None and len(data) > 0:
data["buy_sz"] = -1 data["buy_sz"] = -1
@ -133,9 +119,8 @@ class MarketDataMain:
# 比特币的数据获取过慢,暂时不获取交易数据 # 比特币的数据获取过慢,暂时不获取交易数据
# if not symbol.endswith("-SWAP"): # if not symbol.endswith("-SWAP"):
# # trade_data的end_time需要比market_data的end_time大一个周期 # # trade_data的end_time需要比market_data的end_time大一个周期
# trade_end_time_ts = current_end_time_ts + BAR_THRESHOLD[bar] + 1
# trade_data = self.trade_data_main.get_trade_data( # trade_data = self.trade_data_main.get_trade_data(
# symbol=symbol, start_time=start_time_ts, end_time=trade_end_time_ts # symbol=symbol, start_time=current_start_time_ts, end_time=end_time_ts
# ) # )
# for index, row in data.iterrows(): # for index, row in data.iterrows():
# try: # try:
@ -180,8 +165,9 @@ class MarketDataMain:
] ]
] ]
self.db_market_data.insert_data_to_mysql(data) self.db_market_data.insert_data_to_mysql(data)
if current_start_time_ts == start_time_ts:
start_time_ts = current_end_time_ts break
end_time_ts = current_start_time_ts
return data return data
def batch_update_data(self): def batch_update_data(self):
@ -203,11 +189,14 @@ class MarketDataMain:
logging.info(f"开始更新行情数据: {symbol} {bar}") logging.info(f"开始更新行情数据: {symbol} {bar}")
latest_data = self.db_market_data.query_latest_data(symbol, bar) latest_data = self.db_market_data.query_latest_data(symbol, bar)
if not latest_data: if not latest_data:
logging.info(f"{symbol}, {bar} 无数据,开始从{self.initial_date}初始化数据")
data = self.fetch_save_data(symbol, bar, self.initial_date) data = self.fetch_save_data(symbol, bar, self.initial_date)
else: else:
latest_timestamp = latest_data.get("timestamp") latest_timestamp = latest_data.get("timestamp")
if latest_timestamp: if latest_timestamp:
latest_timestamp = int(latest_timestamp) latest_timestamp = int(latest_timestamp)
latest_date_time = timestamp_to_datetime(latest_timestamp)
logging.info(f"{symbol}, {bar} 上次获取的最新数据时间: {latest_date_time}")
else: else:
logging.warning(f"获取{symbol}, {bar} 最新数据失败") logging.warning(f"获取{symbol}, {bar} 最新数据失败")
return return
@ -217,5 +206,5 @@ class MarketDataMain:
if __name__ == "__main__": if __name__ == "__main__":
market_data_main = MarketDataMain() market_data_main = MarketDataMain()
# market_data_main.batch_update_data() market_data_main.batch_update_data()
market_data_main.initial_data() # market_data_main.initial_data()

View File

@ -1,6 +1,6 @@
select * from crypto_market_data select * from crypto_market_data
WHERE symbol='DOGE-USDT-SWAP' and bar='1D' #and date_time > '2025-07-01' WHERE symbol='XCH-USDT-SWAP' and bar='5m' #and date_time > '2025-07-01'
order by timestamp; order by timestamp desc;
delete FROM crypto_market_data where symbol != 'XCH-USDT'; delete FROM crypto_market_data where symbol != 'XCH-USDT';

View File

@ -12,12 +12,12 @@ CREATE TABLE IF NOT EXISTS crypto_huge_volume (
volume DECIMAL(30,8) NOT NULL COMMENT '交易量', volume DECIMAL(30,8) NOT NULL COMMENT '交易量',
volCcy DECIMAL(30,8) NOT NULL COMMENT '交易量(基础货币)', volCcy DECIMAL(30,8) NOT NULL COMMENT '交易量(基础货币)',
volCCyQuote DECIMAL(30,8) NOT NULL COMMENT '交易量(计价货币)', volCCyQuote DECIMAL(30,8) NOT NULL COMMENT '交易量(计价货币)',
volume_ma DECIMAL(30,8) NOT NULL COMMENT '交易量移动平均', volume_ma DECIMAL(30,8) NULL COMMENT '交易量移动平均',
volume_std DECIMAL(30,8) NOT NULL COMMENT '交易量标准差', volume_std DECIMAL(30,8) NULL COMMENT '交易量标准差',
volume_threshold DECIMAL(30,8) NOT NULL COMMENT '交易量阈值', volume_threshold DECIMAL(30,8) NULL COMMENT '交易量阈值',
huge_volume TINYINT NOT NULL DEFAULT 0 COMMENT '是否为巨量(0:否,1:是)', huge_volume TINYINT NOT NULL DEFAULT 0 COMMENT '是否为巨量(0:否,1:是)',
volume_ratio DECIMAL(20,8) NOT NULL COMMENT '交易量比率', volume_ratio DECIMAL(20,8) NULL COMMENT '交易量比率',
spike_intensity DECIMAL(20,8) NOT NULL COMMENT '尖峰强度', spike_intensity DECIMAL(20,8) NULL COMMENT '尖峰强度',
close_80_percentile DECIMAL(20,5) NOT NULL COMMENT '收盘价80%分位数', close_80_percentile DECIMAL(20,5) NOT NULL COMMENT '收盘价80%分位数',
close_20_percentile DECIMAL(20,5) NOT NULL COMMENT '收盘价20%分位数', close_20_percentile DECIMAL(20,5) NOT NULL COMMENT '收盘价20%分位数',
price_80_high TINYINT NOT NULL DEFAULT 0 COMMENT '价格是否达到80%分位数高点(0:否,1:是)', price_80_high TINYINT NOT NULL DEFAULT 0 COMMENT '价格是否达到80%分位数高点(0:否,1:是)',

View File

@ -9,7 +9,7 @@ import sys
import os import os
sys.path.append(os.path.dirname(os.path.abspath(__file__))) sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from core.db_huge_volume_data import DBHugeVolumeData from core.db.db_huge_volume_data import DBHugeVolumeData
import logging import logging
# 配置日志 # 配置日志

View File

@ -9,7 +9,7 @@ import sys
import os import os
sys.path.append(os.path.dirname(os.path.abspath(__file__))) sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from core.db_trade_data import DBTradeData from core.db.db_trade_data import DBTradeData
import logging import logging
# 配置日志 # 配置日志

View File

@ -12,7 +12,7 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from datetime import datetime, timedelta from datetime import datetime, timedelta
from core.huge_volume import HugeVolume from core.biz.huge_volume import HugeVolume
import logging import logging
# 配置日志 # 配置日志

View File

@ -2,8 +2,8 @@ import logging
import time import time
from datetime import datetime, timedelta from datetime import datetime, timedelta
import pandas as pd import pandas as pd
from core.utils import datetime_to_timestamp, timestamp_to_datetime from core.utils import datetime_to_timestamp, timestamp_to_datetime, transform_date_time_to_timestamp
from core.trade_data import TradeData from core.biz.trade_data import TradeData
from config import ( from config import (
API_KEY, API_KEY,
SECRET_KEY, SECRET_KEY,
@ -44,14 +44,14 @@ class TradeDataMain:
if end_time is None: if end_time is None:
end_time = int(time.time() * 1000) end_time = int(time.time() * 1000)
else: else:
end_time = self.transform_date_time(end_time) end_time = transform_date_time_to_timestamp(end_time)
# 处理start参数 # 处理start参数
if start_time is None: if start_time is None:
# 默认两个月前 # 默认两个月前
start_time_str = MONITOR_CONFIG.get("volume_monitor", {}).get("initial_date", "2025-05-01 00:00:00") start_time_str = MONITOR_CONFIG.get("volume_monitor", {}).get("initial_date", "2025-05-01 00:00:00")
start_time = self.transform_date_time(start_time_str) start_time = transform_date_time_to_timestamp(start_time_str)
else: else:
start_time = self.transform_date_time(start_time) start_time = transform_date_time_to_timestamp(start_time)
# 从数据库获取最早数据 # 从数据库获取最早数据
earliest_data = self.trade_data.db_trade_data.query_earliest_data(symbol) earliest_data = self.trade_data.db_trade_data.query_earliest_data(symbol)
db_earliest_time = None db_earliest_time = None
@ -87,25 +87,6 @@ class TradeDataMain:
logging.info(f"获取交易数据失败: {symbol}, {start_date_time}, {end_date_time}") logging.info(f"获取交易数据失败: {symbol}, {start_date_time}, {end_date_time}")
return None return None
def transform_date_time(self, date_time: str):
"""
将日期时间转换为毫秒级timestamp
"""
try:
# 判断是否就是timestamp整型数据
if isinstance(date_time, int):
date_time = date_time
# 判断是否为纯数字UTC毫秒级timestamp
elif date_time.isdigit():
date_time = int(date_time)
else:
# 按北京时间字符串处理转换为毫秒级timestamp
date_time = datetime_to_timestamp(date_time)
return date_time
except Exception as e:
logging.error(f"start参数解析失败: {e}")
return None
if __name__ == "__main__": if __name__ == "__main__":
trade_data_main = TradeDataMain() trade_data_main = TradeDataMain()

View File

@ -1,7 +1,7 @@
import logging import logging
from time import sleep from time import sleep
from core.base import QuantTrader from core.biz.quant_trader import QuantTrader
from core.strategy import QuantStrategy from core.biz.strategy import QuantStrategy
from config import API_KEY, SECRET_KEY, PASSPHRASE, SANDBOX, TRADING_CONFIG, TIME_CONFIG from config import API_KEY, SECRET_KEY, PASSPHRASE, SANDBOX, TRADING_CONFIG, TIME_CONFIG
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')