From 89d06d9fbd889d4e64013bae7afabef552275adc Mon Sep 17 00:00:00 2001 From: blade <8019068@qq.com> Date: Mon, 4 Aug 2025 21:07:44 +0800 Subject: [PATCH] 1. update database and relevant code 2. support calculate technical metrics. --- .gitignore | 3 + config.py | 29 +- .../market_data_monitor.cpython-312.pyc | Bin 9642 -> 9654 bytes core/biz/market_data_monitor.py | 2 + core/biz/metrics_calculation.py | 846 ++++++++++++++++++ .../db_market_data.cpython-312.pyc | Bin 6473 -> 17012 bytes core/db/db_huge_volume_data.py | 331 ++++++- core/db/db_market_data.py | 368 ++++++++ core/wechat.py | 66 +- huge_volume_main.py | 122 ++- market_data_main.py | 152 +++- sql/query/sql_playground.sql | 6 +- sql/table/crypto_huge_volume.sql | 63 +- sql/table/crypto_market_data.sql | 57 +- sql/table/crypto_trade_data.sql | 9 +- test_k_shape.py | 74 ++ 16 files changed, 2035 insertions(+), 93 deletions(-) create mode 100644 core/biz/metrics_calculation.py create mode 100644 test_k_shape.py diff --git a/.gitignore b/.gitignore index c407bbe..c603121 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ /core/__pycache__ /__pycache__/*.pyc /output +core/db/__pycache__/db_market_data.cpython-312.pyc +core/biz/__pycache__/metrics_calculation.cpython-312.pyc +core/biz/__pycache__/market_data_monitor.cpython-312.pyc diff --git a/config.py b/config.py index 265790b..4cf6311 100644 --- a/config.py +++ b/config.py @@ -1,16 +1,16 @@ # OKX API 配置 # 请将以下信息替换为你的实际API密钥 # API密钥配置 -API_KEY = "7286d434-225b-401f-b3af-fd595e15d23f" -SECRET_KEY = "80B95C5757F9208F70282A85C9DDBC86" -PASSPHRASE = "Bengbu_2001" -SANDBOX = False +# API_KEY = "7286d434-225b-401f-b3af-fd595e15d23f" +# SECRET_KEY = "80B95C5757F9208F70282A85C9DDBC86" +# PASSPHRASE = "Bengbu_2001" +# SANDBOX = False # 实盘读取API密钥配置 -# API_KEY = "a73f9096-8e76-49ff-947c-a4f4edf657ec" -# SECRET_KEY = "F7AD69272FBF7C44E69CC110D2EDDB7A" -# PASSPHRASE = "Bengbu!2001" -# SANDBOX = False +API_KEY = "a73f9096-8e76-49ff-947c-a4f4edf657ec" +SECRET_KEY = "F7AD69272FBF7C44E69CC110D2EDDB7A" +PASSPHRASE = "Bengbu!2001" +SANDBOX = False # 模拟盘API密钥配置 # API_KEY = "f309e789-3497-4ed3-896f-d18bdc4d9817" @@ -50,10 +50,11 @@ TIME_CONFIG = { MONITOR_CONFIG = { "volume_monitor":{ - "symbols": ["XCH-USDT", "BTC-USDT", "ETH-USDT", "DOGE-USDT", - "XCH-USDT-SWAP", "BTC-USDT-SWAP", "ETH-USDT-SWAP", "DOGE-USDT-SWAP"], - "bars": ["5m", "15m", "1H", "1D"], - "initial_date": "2025-05-01 00:00:00" + "symbols": ["XCH-USDT", "BONK-USDT", "PENGU-USDT", + "CFX-USDT", "PUMP-USDT", "SOL-USDT", + "BTC-USDT", "ETH-USDT", "DOGE-USDT"], + "bars": ["5m", "15m", "30m", "1H"], + "initial_date": "2025-05-15 00:00:00" }, # "volume_monitor":{ # "symbols": ["BTC-USDT"], @@ -86,4 +87,8 @@ MYSQL_CONFIG = { "user": "xch", "password": "xch_okx_2025", "database": "okx" +} + +WECHAT_CONFIG = { + "key": "11e6f7ac-efa9-418a-904c-9325a9f5d324" } \ No newline at end of file diff --git a/core/biz/__pycache__/market_data_monitor.cpython-312.pyc b/core/biz/__pycache__/market_data_monitor.cpython-312.pyc index 98427ee0e5943719f2c6170d1db46ab2ce428690..889092262f3b6a26860d873ec9e978ecb780becd 100644 GIT binary patch delta 252 zcmZ4Gz0I5VG%qg~0}xb&Psn((k@uD;dN z1V&%BkDgU_K`v%;rNj!28{$!)7#Mk-7$-2_k&>COJ5zTB(+cC2>^nHFNI6bmnqWMM z{R1;lezLRVJk}3v3_>E4pGfXzOx(OcY9}*WJJ8bNr<3F564__#eqvzd)SrAvu8bK- zZ?=|~V`7}OIZE*@BjftX9?JK1vKUz;SG%qg~0}!kd=+5Zd$a_na@zLfNqQy)c4^6Bu%f)PTpPVQm!nDDCa)rbS z=G)>?lQkt}m>DFZCwoZFW4<9MJo&ZcZpPTn%cXWQv$X=vDSj|HO)imXp6=vxa%Id6 z(i)rH<>iY#AtW5iNY8>YYU+WE$?+;0 hY#@anLMHdBoMF5*IZD-+Kb?tD{1XF+ERqFU3IL?hK+^yK diff --git a/core/biz/market_data_monitor.py b/core/biz/market_data_monitor.py index 069ef29..2ddcdea 100644 --- a/core/biz/market_data_monitor.py +++ b/core/biz/market_data_monitor.py @@ -117,6 +117,8 @@ class MarketDataMonitor: # 添加bar列,内容为bar df['bar'] = bar df['create_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + # 获取df中date_time的最早时间与最新时间 并保存到df中 df = df[['symbol', 'bar', 'timestamp', 'date_time', 'open', 'high', 'low', 'close', 'volume', 'volCcy', 'volCCyQuote', 'create_time']] df.sort_values('timestamp', inplace=True) df.reset_index(drop=True, inplace=True) diff --git a/core/biz/metrics_calculation.py b/core/biz/metrics_calculation.py new file mode 100644 index 0000000..ca5d825 --- /dev/null +++ b/core/biz/metrics_calculation.py @@ -0,0 +1,846 @@ +import pandas as pd +import logging +import numpy as np +import talib as tb +from talib import MA_Type + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + + +class MetricsCalculation: + def __init__(self): + pass + + def pre_close(self, df: pd.DataFrame): + # 计算前一日收盘价、涨跌幅、涨跌幅百分比 + df["pre_close"] = df["close"].shift(1) + df["close_change"] = df["close"] - df["pre_close"] + df["pct_chg"] = df["close_change"] / df["pre_close"] * 100 + # 设置k_up_down,亦即阳线或阴线 + df["k_up_down"] = "" + df.loc[df["close"] >= df["open"], "k_up_down"] = "阳线" + df.loc[df["close"] < df["open"], "k_up_down"] = "阴线" + return df + + def macd(self, df: pd.DataFrame): + logging.info("计算MACD指标") + data = np.array(df.close) + ndata = len(data) + m, n, T = 12, 26, 9 + EMA1 = np.copy(data) + EMA2 = np.copy(data) + f1 = (m - 1) / (m + 1) + f2 = (n - 1) / (n + 1) + f3 = (T - 1) / (T + 1) + for i in range(1, ndata): + EMA1[i] = EMA1[i - 1] * f1 + EMA1[i] * (1 - f1) + EMA2[i] = EMA2[i - 1] * f2 + EMA2[i] * (1 - f2) + df["ma1"] = EMA1 + df["ma2"] = EMA2 + DIF = EMA1 - EMA2 + df["dif"] = DIF + DEA = np.copy(DIF) + for i in range(1, ndata): + DEA[i] = DEA[i - 1] * f3 + DEA[i] * (1 - f3) + df["dea"] = DEA + df["macd"] = 2 * (DIF - DEA) + + # DIFF, macdsignal, macdhist = tb.MACD(data, fastperiod=12, slowperiod=26, signalperiod=9) + df["macd_signal"] = "" + macd_position = df["dif"] > df["dea"] + df.loc[ + macd_position[ + (macd_position == True) & (macd_position.shift() == False) + ].index, + "macd_signal", + ] = "金叉" + df.loc[ + macd_position[ + (macd_position == False) & (macd_position.shift() == True) + ].index, + "macd_signal", + ] = "死叉" + return df + + def kdj(self, df: pd.DataFrame): + logging.info("计算KDJ指标") + low_list = df["low"].rolling(window=9).min() + low_list.fillna(value=df["low"].expanding().min(), inplace=True) + high_list = df["high"].rolling(window=9).max() + high_list.fillna(value=df["high"].expanding().max(), inplace=True) + + rsv = (df["close"] - low_list) / (high_list - low_list) * 100 + df["kdj_k"] = rsv.ewm(com=2).mean() + df["kdj_d"] = df["kdj_k"].ewm(com=2).mean() + df["kdj_j"] = 3 * df["kdj_k"] - 2 * df["kdj_d"] + + df["kdj_signal"] = "" + kdj_position = df["kdj_k"] > df["kdj_d"] + df.loc[ + kdj_position[ + (kdj_position == True) & (kdj_position.shift() == False) + ].index, + "kdj_signal", + ] = "金叉" + df.loc[ + kdj_position[ + (kdj_position == False) & (kdj_position.shift() == True) + ].index, + "kdj_signal", + ] = "死叉" + return df + + def set_kdj_pattern(self, df: pd.DataFrame): + """ + 设置每一根K线数据对应的KDJ形态超买超卖情况 + + KDJ_K > 80, KDJ_D > 80, KDJ_J > 90: 超超买 + KDJ_K > 70, KDJ_D > 70, KDJ_J > 80: 超买 + KDJ_K < 20, KDJ_D < 20, KDJ_J < 10: 超超卖 + KDJ_K < 30, KDJ_D < 30, KDJ_J < 20: 超卖 + 否则为"徘徊" + """ + # 初始化kdj_pattern列 + df["kdj_pattern"] = "徘徊" + + # 超超买条件:KDJ_K > 80, KDJ_D > 80, KDJ_J > 90 + kdj_super_buy = (df["kdj_k"] > 80) & (df["kdj_d"] > 80) & (df["kdj_j"] > 90) + df.loc[kdj_super_buy, "kdj_pattern"] = "超超买" + + # 超买条件:KDJ_K > 70, KDJ_D > 70, KDJ_J > 80 + kdj_buy = (df["kdj_k"] > 70) & (df["kdj_d"] > 70) & (df["kdj_j"] > 80) + df.loc[kdj_buy, "kdj_pattern"] = "超买" + + # 超超卖条件:KDJ_K < 20, KDJ_D < 20, KDJ_J < 10 + kdj_super_sell = (df["kdj_k"] < 20) & (df["kdj_d"] < 20) & (df["kdj_j"] < 10) + df.loc[kdj_super_sell, "kdj_pattern"] = "超超卖" + + # 超卖条件:KDJ_K < 30, KDJ_D < 30, KDJ_J < 20 + kdj_sell = (df["kdj_k"] < 30) & (df["kdj_d"] < 30) & (df["kdj_j"] < 20) + df.loc[kdj_sell, "kdj_pattern"] = "超卖" + + return df + + def calculate_ma_price_percent(self, data: pd.DataFrame): + data["ma5_close_diff"] = (data["close"] - data["ma5"]) / (data["close"]) * 100 + data["ma10_close_diff"] = (data["close"] - data["ma10"]) / (data["close"]) * 100 + data["ma20_close_diff"] = (data["close"] - data["ma20"]) / (data["close"]) * 100 + data["ma30_close_diff"] = (data["close"] - data["ma30"]) / (data["close"]) * 100 + data["ma_close_avg"] = ( + data["ma5_close_diff"] + + data["ma10_close_diff"] + + data["ma20_close_diff"] + + data["ma30_close_diff"] + ) / 4 + return data + + def set_ma_long_short_divergence(self, data: pd.DataFrame): + """ + 根据ma5_close_diff, ma10_close_diff, ma20_close_diff, ma30_close_diff, ma_close_avg + 设置均线多空列: ma_long_short (多,空,震荡) + 设置均线发散列: ma_divergence (超发散,发散,适中,粘合,未知) + + 均线发散度使用相对统计方法分类: + - 超发散:标准差Z-score > 1.5 且 均值Z-score绝对值 > 1.2 + - 发散:标准差Z-score > 0.8 或 均值Z-score绝对值 > 0.8 + - 适中:标准差Z-score在0.3-0.8之间,且均值Z-score绝对值 < 0.5 + - 粘合:标准差Z-score < 0.3,均线高度粘合 + + 使用20个周期的滚动窗口计算相对统计特征,避免绝对阈值过于严格的问题 + """ + data["ma_long_short"] = "震荡" + data["ma_divergence"] = "未知" + + # 检查数据完整性 + # if (pd.isnull(data['ma5_close_diff']).any() or + # pd.isnull(data['ma10_close_diff']).any() or + # pd.isnull(data['ma20_close_diff']).any() or + # pd.isnull(data['ma30_close_diff']).any() or + # pd.isnull(data['ma_close_avg']).any()): + # data['ma_long_short'] = '数据不全' + # return data + + # 设置均线多空逻辑 + # 多:所有均线都在价格下方,且平均偏离度为正 + long_condition = ( + (data["ma5_close_diff"] > 0) + & (data["ma10_close_diff"] > 0) + & (data["ma20_close_diff"] > 0) + & (data["ma30_close_diff"] > 0) + & (data["ma_close_avg"] > 0) + ) + data.loc[long_condition, "ma_long_short"] = "多" + + # 空:所有均线都在价格上方,且平均偏离度为负 + short_condition = ( + (data["ma5_close_diff"] < 0) + & (data["ma10_close_diff"] < 0) + & (data["ma20_close_diff"] < 0) + & (data["ma30_close_diff"] < 0) + & (data["ma_close_avg"] < 0) + ) + data.loc[short_condition, "ma_long_short"] = "空" + + # 计算各均线偏离度的标准差和均值 + data["ma_divergence"] = "未知" + ma_diffs = data[ + ["ma5_close_diff", "ma10_close_diff", "ma20_close_diff", "ma30_close_diff"] + ] + ma_std = ma_diffs.std(axis=1) # 标准差 + ma_mean = ma_diffs.mean(axis=1) # 均值 + abs_ma_mean = abs(ma_mean) # 均值的绝对值 + + # 计算标准差和均值绝对值的百分位数(基于历史数据分布) + # 这里使用 25%、50%、75% 分位数作为阈值,可根据实际需求调整 + std_25, std_50, std_75 = ma_std.quantile([0.25, 0.50, 0.75]) + mean_25, mean_50, mean_75 = abs_ma_mean.quantile([0.25, 0.50, 0.75]) + + # 超发散:标准差和均值绝对值均处于高百分位(>75%) + super_divergence = (ma_std > std_75) & (abs_ma_mean > mean_75) + data.loc[super_divergence, "ma_divergence"] = "超发散" + + # 发散:标准差或均值绝对值处于中等偏高百分位(50%-75%) + divergence = ((ma_std > std_50) & (ma_std <= std_75)) | ( + (abs_ma_mean > mean_50) & (abs_ma_mean <= mean_75) + ) + data.loc[divergence & (data["ma_divergence"] == "未知"), "ma_divergence"] = ( + "发散" + ) + + # 适中:标准差和均值绝对值处于中等偏低百分位(25%-50%) + moderate = (ma_std > std_25) & (ma_std <= std_50) & (abs_ma_mean <= mean_50) + data.loc[moderate & (data["ma_divergence"] == "未知"), "ma_divergence"] = "适中" + + # 粘合:标准差处于低百分位(<25%) + convergence = ma_std <= std_25 + data.loc[convergence & (data["ma_divergence"] == "未知"), "ma_divergence"] = ( + "粘合" + ) + + return data + + def update_macd_divergence_column(self, df: pd.DataFrame): + """ + 更新整个DataFrame的macd_divergence列 + 计算每个时间点的MACD背离情况(顶背离或底背离) + + :param df: 包含timestamp, close, dif, macd, kdj_j列的DataFrame + :return: 更新了macd_divergence列的DataFrame + """ + if df is None or df.empty: + return df + + # 确保必要的列存在 + required_columns = ["timestamp", "close", "dif", "macd", "kdj_j"] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + print(f"缺少必要的列: {missing_columns}") + return df + + # 按时间戳排序(升序) + df = df.sort_values("timestamp").reset_index(drop=True) + + # 初始化macd_divergence列 + df["macd_divergence"] = "未知" + + # 遍历DataFrame,计算每个时间点的背离情况 + for i in range(1, len(df)): + current_row = df.iloc[i] + previous_row = df.iloc[i - 1] + + current_close = current_row["close"] + current_dif = current_row["dif"] + current_macd = current_row["macd"] + current_kdj_j = current_row["kdj_j"] + + previous_close = previous_row["close"] + previous_dif = previous_row["dif"] + previous_macd = previous_row["macd"] + previous_kdj_j = previous_row["kdj_j"] + + # 检查是否为顶背离 + # 条件:价格创新高,但MACD指标没有创新高,且KDJ超买 + if ( + current_close > previous_close + and current_kdj_j > 70 + and current_dif <= previous_dif + and current_macd <= previous_macd + ): + df.at[i, "macd_divergence"] = "顶背离" + + # 检查是否为底背离 + # 条件:价格创新低,但MACD指标没有创新低,且KDJ超卖 + elif ( + current_close < previous_close + and current_kdj_j < 20 + and current_dif >= previous_dif + and current_macd >= previous_macd + ): + df.at[i, "macd_divergence"] = "底背离" + + # 检查更严格的背离条件(与历史高点/低点比较) + else: + # 获取当前时间点之前的数据 + historical_data = df.iloc[: i + 1] + + # 检查顶背离:价格接近历史高点,但MACD指标明显低于历史高点 + if current_kdj_j > 70: + price_high = historical_data["close"].max() + dif_high = historical_data["dif"].max() + macd_high = historical_data["macd"].max() + + # 价格接近历史高点(差距小于5%),但MACD指标明显低于历史高点 + if ( + current_close >= price_high * 0.95 + and current_dif <= dif_high * 0.8 + and current_macd <= macd_high * 0.8 + ): + df.at[i, "macd_divergence"] = "顶背离" + + # 检查底背离:价格接近历史低点,但MACD指标明显高于历史低点 + elif current_kdj_j < 20: + price_low = historical_data["close"].min() + dif_low = historical_data["dif"].min() + macd_low = historical_data["macd"].min() + + # 价格接近历史低点(差距小于5%),但MACD指标明显高于历史低点 + if ( + current_close <= price_low * 1.05 + and current_dif >= dif_low * 1.2 + and current_macd >= macd_low * 1.2 + ): + df.at[i, "macd_divergence"] = "底背离" + + return df + + def update_macd_divergence_column_simple( + self, df: pd.DataFrame, window_size: int = 20 + ): + """ + 简化版本的MACD背离检测函数 + 使用滑动窗口来检测背离,提高计算效率 + + :param df: 包含timestamp, close, dif, macd, kdj_j列的DataFrame + :param window_size: 滑动窗口大小,用于检测背离 + :return: 更新了macd_divergence列的DataFrame + """ + if df is None or df.empty: + return df + + # 确保必要的列存在 + required_columns = ["timestamp", "close", "dif", "macd", "kdj_j"] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + print(f"缺少必要的列: {missing_columns}") + return df + + # 按时间戳排序(升序) + df = df.sort_values("timestamp").reset_index(drop=True) + + # 初始化macd_divergence列 + df["macd_divergence"] = "未知" + + # 使用滑动窗口检测背离 + for i in range(window_size, len(df)): + window_data = df.iloc[i - window_size : i + 1] + + current_row = df.iloc[i] + current_close = current_row["close"] + current_dif = current_row["dif"] + current_macd = current_row["macd"] + current_kdj_j = current_row["kdj_j"] + + # 计算窗口内的极值 + window_price_high = window_data["close"].max() + window_price_low = window_data["close"].min() + window_dif_high = window_data["dif"].max() + window_dif_low = window_data["dif"].min() + window_macd_high = window_data["macd"].max() + window_macd_low = window_data["macd"].min() + + # 检测顶背离 + if ( + current_kdj_j > 70 + and current_close >= window_price_high * 0.98 # 价格接近窗口内最高点 + and current_dif <= window_dif_high * 0.85 # DIF明显低于窗口内最高点 + and current_macd <= window_macd_high * 0.85 + ): # MACD明显低于窗口内最高点 + df.at[i, "macd_divergence"] = "顶背离" + + # 检测底背离 + elif ( + current_kdj_j < 20 + and current_close <= window_price_low * 1.02 # 价格接近窗口内最低点 + and current_dif >= window_dif_low * 1.15 # DIF明显高于窗口内最低点 + and current_macd >= window_macd_low * 1.15 + ): # MACD明显高于窗口内最低点 + df.at[i, "macd_divergence"] = "底背离" + + return df + + def ma5102030(self, df: pd.DataFrame): + logging.info("计算均线指标") + df["ma5"] = df["close"].rolling(window=5).mean().dropna() + df["ma10"] = df["close"].rolling(window=10).mean().dropna() + df["ma20"] = df["close"].rolling(window=20).mean().dropna() + df["ma30"] = df["close"].rolling(window=30).mean().dropna() + + df["ma_cross"] = "" + ma_position = df["ma5"] > df["ma10"] + df.loc[ + ma_position[(ma_position == True) & (ma_position.shift() == False)].index, + "ma_cross", + ] = "5穿10" + ma_position = df["ma5"] > df["ma20"] + df.loc[ + ma_position[(ma_position == True) & (ma_position.shift() == False)].index, + "ma_cross", + ] = "5穿20" + ma_position = df["ma5"] > df["ma30"] + df.loc[ + ma_position[(ma_position == True) & (ma_position.shift() == False)].index, + "ma_cross", + ] = "5穿30" + ma_position = df["ma10"] > df["ma30"] + df.loc[ + ma_position[(ma_position == True) & (ma_position.shift() == False)].index, + "ma_cross", + ] = "10穿30" + + ma_position = df["ma5"] < df["ma10"] + df.loc[ + ma_position[(ma_position == True) & (ma_position.shift() == False)].index, + "ma_cross", + ] = "10穿5" + ma_position = df["ma5"] < df["ma20"] + df.loc[ + ma_position[(ma_position == True) & (ma_position.shift() == False)].index, + "ma_cross", + ] = "20穿5" + ma_position = df["ma5"] < df["ma30"] + df.loc[ + ma_position[(ma_position == True) & (ma_position.shift() == False)].index, + "ma_cross", + ] = "30穿5" + ma_position = df["ma10"] < df["ma30"] + df.loc[ + ma_position[(ma_position == True) & (ma_position.shift() == False)].index, + "ma_cross", + ] = "30穿10" + return df + + def rsi(self, df: pd.DataFrame): + logging.info("计算RSI指标") + df["rsi_14"] = tb.RSI(df["close"].values, timeperiod=14) + df["rsi_signal"] = "" + rsi_high = df["rsi_14"] > 70 + rsi_low = df["rsi_14"] < 30 + df.loc[ + rsi_high[(rsi_high == True) & (rsi_high.shift() == False)].index, + "rsi_signal", + ] = "超买" + df.loc[ + rsi_low[(rsi_low == True) & (rsi_low.shift() == False)].index, "rsi_signal" + ] = "超卖" + return df + + def boll(self, df: pd.DataFrame): + logging.info("计算BOLL指标") + df["boll_upper"], df["boll_middle"], df["boll_lower"] = tb.BBANDS( + df["close"].values, timeperiod=20, matype=MA_Type.SMA + ) + return df + + def set_boll_pattern(self, df: pd.DataFrame): + """ + 设置BOLL形态 + 根据价格与布林带的位置关系判断超买超卖状态 + + 超超买:价格接近或突破上轨,且KDJ超买 + 超买:价格接近上轨,且KDJ超买 + 超超卖:价格接近或突破下轨,且KDJ超卖 + 超卖:价格接近下轨,且KDJ超卖 + 震荡:其他情况 + """ + # 初始化boll_pattern列 + df["boll_pattern"] = "震荡" + + # 检查必要的列是否存在 + required_columns = ["close", "boll_upper", "boll_lower", "kdj_j"] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + print(f"缺少必要的列: {missing_columns}") + return df + + # 计算价格与布林带的距离百分比 + df["upper_distance"] = abs(df["close"] - df["boll_upper"]) / df["close"] * 100 + df["lower_distance"] = abs(df["close"] - df["boll_lower"]) / df["close"] * 100 + + # 超超买:价格突破上轨,且KDJ超买 + super_buy_condition = (df["close"] >= df["boll_upper"]) & (df["kdj_j"] > 80) + df.loc[super_buy_condition, "boll_pattern"] = "超超买" + + # 超买:价格接近上轨(距离小于2%),且KDJ超买 + buy_condition = ( + (df["upper_distance"] <= 2) + & (df["kdj_j"] > 80) + & (df["boll_pattern"] == "震荡") + ) + df.loc[buy_condition, "boll_pattern"] = "超买" + + # 超超卖:价格突破下轨,且KDJ超卖 + super_sell_condition = (df["close"] <= df["boll_lower"]) & (df["kdj_j"] < 20) + df.loc[super_sell_condition, "boll_pattern"] = "超超卖" + + # 超卖:价格接近下轨(距离小于2%),且KDJ超卖 + sell_condition = ( + (df["lower_distance"] <= 2) + & (df["kdj_j"] < 20) + & (df["boll_pattern"] == "震荡") + ) + df.loc[sell_condition, "boll_pattern"] = "超卖" + + # 设置boll_signal列(保持与原有逻辑兼容) + df["boll_signal"] = "" + + # 突破下轨信号 + close_gt_low = df["close"] > df["boll_lower"] + pre_close_less_low = df["pre_close"] < df["boll_lower"].shift() + low_break = close_gt_low & pre_close_less_low + df.loc[ + low_break[(low_break == True) & (low_break.shift() == False)].index, + "boll_signal", + ] = "突破下轨" + + # 击穿上轨信号 + close_less_high = df["close"] < df["boll_upper"] + pre_close_gt_high = df["pre_close"] > df["boll_upper"].shift() + high_down = close_less_high & pre_close_gt_high + df.loc[ + high_down[(high_down == True) & (high_down.shift() == False)].index, + "boll_signal", + ] = "击穿上轨" + + # 删除临时列 + df.drop(columns=["upper_distance", "lower_distance"], inplace=True) + return df + + def set_k_length(self, df: pd.DataFrame): + """ + 设置K线长度:k_length + 根据close, open, high, low计算K线长度 + 使用统计方法(标准差、均值)来分类K线长度 + + K线长度分类: + - 短:K线实体和影线都较短 + - 中:K线长度适中 + - 长:K线实体或影线较长 + - 超长:K线实体和影线都很长 + """ + # 检查必要的列是否存在 + required_columns = ["close", "open", "high", "low"] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + print(f"缺少必要的列: {missing_columns}") + return df + + # 计算K线的基本特征 + df["k_body"] = abs(df["close"] - df["open"]) # K线实体长度 + df["k_upper_shadow"] = df["high"] - df[["open", "close"]].max( + axis=1 + ) # 上影线长度 + df["k_lower_shadow"] = ( + df[["open", "close"]].min(axis=1) - df["low"] + ) # 下影线长度 + df["k_total_range"] = df["high"] - df["low"] # K线总长度 + + # 计算K线实体占总长度的比例 + df["k_body_ratio"] = df["k_body"] / df["k_total_range"] + + # 使用滚动窗口计算统计特征(使用20个周期的滚动窗口) + window_size = min(20, len(df)) + + # 计算K线总长度的统计特征 + df["k_range_mean"] = ( + df["k_total_range"].rolling(window=window_size, min_periods=1).mean() + ) + df["k_range_std"] = ( + df["k_total_range"].rolling(window=window_size, min_periods=1).std() + ) + + # 计算K线实体的统计特征 + df["k_body_mean"] = ( + df["k_body"].rolling(window=window_size, min_periods=1).mean() + ) + df["k_body_std"] = df["k_body"].rolling(window=window_size, min_periods=1).std() + + # 初始化k_length列 + df["k_length"] = "中" + + # 计算Z-score(标准化分数) + df["k_range_zscore"] = (df["k_total_range"] - df["k_range_mean"]) / df[ + "k_range_std" + ] + df["k_body_zscore"] = (df["k_body"] - df["k_body_mean"]) / df["k_body_std"] + + # 处理无穷大和NaN值 + df["k_range_zscore"] = df["k_range_zscore"].replace([np.inf, -np.inf], 0) + df["k_body_zscore"] = df["k_body_zscore"].replace([np.inf, -np.inf], 0) + df["k_range_zscore"] = df["k_range_zscore"].fillna(0) + df["k_body_zscore"] = df["k_body_zscore"].fillna(0) + + # 分类逻辑 + # 超长:K线总长度Z-score > 1.5 且 实体Z-score > 1.0 + super_long_condition = (df["k_range_zscore"] > 1.5) & ( + df["k_body_zscore"] > 1.0 + ) + df.loc[super_long_condition, "k_length"] = "超长" + + # 长:K线总长度Z-score > 0.8 或 实体Z-score > 0.8 + long_condition = ( + (df["k_range_zscore"] > 0.8) | (df["k_body_zscore"] > 0.8) + ) & (df["k_length"] == "中") + df.loc[long_condition, "k_length"] = "长" + + # 短:K线总长度Z-score < -0.8 且 实体Z-score < -0.5 + short_condition = (df["k_range_zscore"] < -0.8) & (df["k_body_zscore"] < -0.5) + df.loc[short_condition, "k_length"] = "短" + + # 清理临时列 + temp_columns = [ + "k_body", + "k_upper_shadow", + "k_lower_shadow", + "k_total_range", + "k_body_ratio", + "k_range_mean", + "k_range_std", + "k_body_mean", + "k_body_std", + "k_range_zscore", + "k_body_zscore", + ] + df.drop(columns=temp_columns, inplace=True) + + return df + + def set_k_shape(self, df: pd.DataFrame): + """ + 设置K线形状:k_shape + 根据close, open, high, low计算K线形状 + 使用统计方法(标准差、均值)来分类K线形状 + + K线形态分类: + - 一字:open, high, low, close几乎完全一样(价格波动极小) + - 长吊锤线:实体占比≤30%,上影线<25%,实体占比<10% + - 吊锤线:实体占比≤30%,上影线<25%,实体占比≥10% + - 长倒T线:实体占比≤30%,下影线<25%,实体占比<10% + - 倒T线:实体占比≤30%,下影线<25%,实体占比≥10% + - 长十字星:实体占比≤30%,上下影线都≥25%,实体占比<10% + - 十字星:实体占比≤30%,上下影线都≥25%,实体占比≥10% + - 小实体:实体占比30%-55% + - 大实体:实体占比55%-70% + - 超大实体:实体占比70%-90% + - 光头光脚:实体占比>90%(非一字情况) + """ + # 检查必要的列是否存在 + required_columns = ["close", "open", "high", "low"] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + print(f"缺少必要的列: {missing_columns}") + return df + + # 计算K线的基本特征 + df["high_low_diff"] = df["high"] - df["low"] # 最高价与最低价差值 + df["open_close_diff"] = abs( + df["close"] - df["open"] + ) # 开盘价与收盘价差值绝对值 + df["high_close_diff"] = df["high"] - df[["open", "close"]].max( + axis=1 + ) # 上影线长度 + df["low_close_diff"] = ( + df[["open", "close"]].min(axis=1) - df["low"] + ) # 下影线长度 + + # 计算实体占比 + df["open_close_fill"] = df["open_close_diff"] / df["high_low_diff"].replace(0, np.nan) + df["open_close_fill"] = df["open_close_fill"].fillna(1.0) # 处理除零情况 + + # 计算影线占比 + df["upper_shadow_ratio"] = df["high_close_diff"] / df["high_low_diff"].replace(0, np.nan) + df["lower_shadow_ratio"] = df["low_close_diff"] / df["high_low_diff"].replace(0, np.nan) + df["upper_shadow_ratio"] = df["upper_shadow_ratio"].fillna(0) # 无波动时影线占比为 0 + df["lower_shadow_ratio"] = df["lower_shadow_ratio"].fillna(0) + + # 初始化k_shape列 + df["k_shape"] = "未知" + + # 首先识别"一字"形态:open, high, low, close几乎完全一样 + # 计算价格波动范围相对于价格的百分比 + df["price_range_ratio"] = df["high_low_diff"] / df["close"] * 100 + + # 使用滚动窗口计算价格波动范围的平均值,用于动态判断"一字"阈值 + window_size = min(20, len(df)) + df["avg_price_range"] = ( + df["price_range_ratio"].rolling(window=window_size, min_periods=1).mean() + ) + df["std_price_range"] = ( + df["price_range_ratio"].rolling(window=window_size, min_periods=1).std() + ) + + # 计算价格波动范围的Z-score + df["price_range_zscore"] = ( + df["price_range_ratio"] - df["avg_price_range"] + ) / df["std_price_range"] + df["price_range_zscore"] = ( + df["price_range_zscore"].replace([np.inf, -np.inf], 0).fillna(0) + ) + + # 计算滚动窗口内 price_range_ratio 和 price_range_zscore 的分位数 + df["price_range_ratio_p75"] = df["price_range_ratio"].rolling(window=window_size, min_periods=1).quantile(0.75) + df["price_range_zscore_p75"] = df["price_range_zscore"].rolling(window=window_size, min_periods=1).quantile(0.75) + + # 识别“一字”形态:波动极小(Z 分数 < -1.0 或 price_range_ratio < 0.05%)且无影线 + one_line_condition = ( + ((df["price_range_zscore"] < -1.0) | (df["price_range_ratio"] < 0.05)) & + (df["upper_shadow_ratio"] <= 0.01) & # 上影线极小或无 + (df["lower_shadow_ratio"] <= 0.01) & # 下影线极小或无 + (df["open_close_diff"] / df["close"] < 0.0005) # 开收盘价差小于0.05% + ) + df.loc[one_line_condition, "k_shape"] = "一字" + + # 使用滚动窗口计算统计特征(使用20个周期的滚动窗口) + window_size = min(20, len(df)) + + # 计算实体占比的统计特征 + df["fill_mean"] = ( + df["open_close_fill"].rolling(window=window_size, min_periods=1).mean() + ) + df["fill_std"] = ( + df["open_close_fill"].rolling(window=window_size, min_periods=1).std() + ) + + # 计算Z-score(标准化分数) + df["fill_zscore"] = (df["open_close_fill"] - df["fill_mean"]) / df["fill_std"] + + # 处理无穷大和NaN值 + df["fill_zscore"] = df["fill_zscore"].replace([np.inf, -np.inf], 0) + df["fill_zscore"] = df["fill_zscore"].fillna(0) + + # 分类逻辑(只在非"一字"的情况下进行分类) + # 实体占比≤30%的情况 + small_body_condition = (df["open_close_fill"] <= 0.3) & ( + df["k_shape"] != "一字" + ) + + # 长吊锤线:实体占比≤30%,上影线<25%,实体占比<10% + long_hammer_condition = ( + small_body_condition + & (df["upper_shadow_ratio"] < 0.25) + & (df["open_close_fill"] < 0.1) + ) + df.loc[long_hammer_condition, "k_shape"] = "长吊锤线" + + # 吊锤线:实体占比≤30%,上影线<25%,实体占比≥10% + hammer_condition = ( + small_body_condition + & (df["upper_shadow_ratio"] < 0.25) + & (df["open_close_fill"] >= 0.1) + & (df["k_shape"] == "未知") + ) + df.loc[hammer_condition, "k_shape"] = "吊锤线" + + # 长倒T线:实体占比≤30%,下影线<25%,实体占比<10% + long_inverted_t_condition = ( + small_body_condition + & (df["lower_shadow_ratio"] < 0.25) + & (df["open_close_fill"] < 0.1) + & (df["k_shape"] == "未知") + ) + df.loc[long_inverted_t_condition, "k_shape"] = "长倒T线" + + # 倒T线:实体占比≤30%,下影线<25%,实体占比≥10% + inverted_t_condition = ( + small_body_condition + & (df["lower_shadow_ratio"] < 0.25) + & (df["open_close_fill"] >= 0.1) + & (df["k_shape"] == "未知") + ) + df.loc[inverted_t_condition, "k_shape"] = "倒T线" + + # 长十字星:实体占比≤30%,上下影线都≥25%,实体占比<10% + long_doji_condition = ( + small_body_condition + & (df["upper_shadow_ratio"] >= 0.25) + & (df["lower_shadow_ratio"] >= 0.25) + & (df["open_close_fill"] < 0.1) + & (df["k_shape"] == "未知") + ) + df.loc[long_doji_condition, "k_shape"] = "长十字星" + + # 十字星:实体占比≤30%,上下影线都≥25%,实体占比≥10% + doji_condition = ( + small_body_condition + & (df["upper_shadow_ratio"] >= 0.25) + & (df["lower_shadow_ratio"] >= 0.25) + & (df["open_close_fill"] >= 0.1) + & (df["k_shape"] == "未知") + ) + df.loc[doji_condition, "k_shape"] = "十字星" + + # 小实体:实体占比30%-55% + small_body_condition_2 = ( + (df["open_close_fill"] > 0.3) + & (df["open_close_fill"] <= 0.55) + & (df["k_shape"] != "一字") + ) + df.loc[small_body_condition_2 + & (df["upper_shadow_ratio"] >= 0.25) & (df["k_shape"] == "未知"), "k_shape"] = "长上影线纺锤体" + df.loc[small_body_condition_2 + & (df["lower_shadow_ratio"] >= 0.25) & (df["k_shape"] == "未知"), "k_shape"] = "长下影线纺锤体" + df.loc[small_body_condition_2 & (df["k_shape"] == "未知"), "k_shape"] = "小实体" + + # 大实体:实体占比55%-90% + large_body_condition = ( + (df["open_close_fill"] > 0.55) + & (df["open_close_fill"] <= 0.9) + & (df["k_shape"] != "一字") + ) + df.loc[large_body_condition & (df["k_shape"] == "未知"), "k_shape"] = "大实体" + + # 识别“超大实体”形态:实体占比 75%-90%,价格波动显著,且非“一字”或“大实体” + super_large_body_condition = ( + (df["open_close_fill"] > 0.75) & + (df["open_close_fill"] <= 1) & + (df["price_range_ratio"] >= df["price_range_ratio_p75"]) & # 价格波动范围超过75th分位数 + (df["k_shape"] != "一字") + ) + df.loc[super_large_body_condition, "k_shape"] = "超大实体" + + # 光头光脚:实体占比>90%(非一字情况) + bald_body_condition = (df["open_close_fill"] > 0.9) & (df["k_shape"] != "一字") + df.loc[bald_body_condition & (df["k_shape"] == "超大实体"), "k_shape"] = "超大实体+光头光脚" + df.loc[bald_body_condition & (df["k_shape"] == "未知"), "k_shape"] = "光头光脚" + + # 清理临时列 + temp_columns = [ + "high_low_diff", + "open_close_diff", + "high_close_diff", + "low_close_diff", + "open_close_fill", + "upper_shadow_ratio", + "lower_shadow_ratio", + "fill_mean", + "fill_std", + "fill_zscore", + "price_range_ratio", + "avg_price_range", + "std_price_range", + "price_range_zscore", + ] + df.drop(columns=temp_columns, inplace=True) + + return df diff --git a/core/db/__pycache__/db_market_data.cpython-312.pyc b/core/db/__pycache__/db_market_data.cpython-312.pyc index 0fb8c836083a01b012d3f18df65048278960076d..0f9a006f0f0ed5390d3dd09401638e03996c8c82 100644 GIT binary patch literal 17012 zcmeHOYj6`+mTtXYZOOK5W8NY>MM%InJYq<8z?eV+W@9tSgs`KL)omk?Wv5$8##(t| zG9;J;g7btB7$8d$Q;^`TN$mh31An$QwYxv;!W5>5sbXi4fZZQa#+lkGe(gE8yRTYu z`;`otOw|f}`##P+_ujtee&^hKPya_jfs=vX)%LD!|Gkc3{)h?nXI3JENr)U^IEFJs zm@NkKG;T4%(-<)|o41$^SZ0b?nyp)`2FA$n#_evEN%jf3h1rxEpsSC z)?Ex|Q}ebs+6;dC9VDpr8zk$R)oX&WAjEtzAs7`KTZHBS7mV?NShyL27V;Eh!RFS) zoZ0h&VhHp7qHp%R2>(1Ei3TxM1<7V!6oXBW@EauyDU+e3 zF8~H_;7yH2&bSMP&zX43c2-+SjzhT_%55W*TcF&|%^Z$`v+|BcGiTdn+2S0I23oNL z7Z>N?O`H>YW#(LvX1Q5v4#)Bit^hdMIX7@Ba4_0`+-yo$2po#wuNeNk@K*wVrSMk< zT#LC0P}eQjXz0lKO1H%&wD z=B5LEkxD-zXCF`&BPHjjJk=iBxfxLE)ycmP-dRbT$3I_U*R0+c6n5}26g0`oH3Z^9 zB&3KanOO^jCiolt1q26}q#?-!NC5^Mra)HYUlS~n4={(;kJ;2Rthb%!tGNsVx$4r( zW-_O+Mfd?PTH40=O|?>SNN8&X$!{jT#?=t?S4vi~t+}BkBAFV3g5%PsUqfkl7G#3y2BHCi0O8Moi7Y3JexYCNA6v0Uwmi&A|{SxiAcf;ihOX zB6&!H3qQ{bO?))OOO_qn_P`GL!O4&9k`ptP8ZL~t24gW^h++dvkZr{x3>TBIa*cxx*!NG=j@4s%?Dm*fJN16rWeU^aMq%tfTAQhL9&~JQNTa} zAkYkx)kvy{D3U(k-G|pyZEbCfZEJ}JT7{PFd?+SvRV3}XcrY5<8fp>vty}~A*NWbQ zt!#HA2)Fc~{DVQQy&uWMk^gYfYD$+)E42@&?2h=G|r z<5*dD$(iXVr=Oj5alyF-mnS{eY2N1S3d@0T=a>-kObKLdf&r~FbquRL+<{KebH_ojdO za{8T~^uAATUObZe#TSGrahZ_r>yH|*AKA8H|*?w&mPZJ`+`3Eg6_I2_C-Ps)cS!7A1)F)As`+gJ?Z^# z!i&$BLYV9HyXVr!I~SuId~p%Pg8w-mipO|Bhqm;db080LO|2y83ZpOe+s;uX5k%K_ z^NZJM--b#e$SIXXN;X!}6L4Aq{no=ncpQT#Fd$+yQBMp4jX^OcYy_elA=(5Pi8-SR z5vlzYRvwQeRrJ{_dhWYoUqU2Fa)!3WqX6jP1RtW%1UymE6G+xz^ZZf=e6Ls4xriqX8$#YnC%q8ur> z<{pb2Rrc8{dsbhuFC$VU;HFq8fFK9e==KTqG@L6K+D)((g3a_{W4js5Ey&IPZP%ir zQkXO)Ve5U`tZt0z0rlf3`SiV|kA8Uj%m;uF>34f%+@VFvYL@p%Rlb|&PoxjNl{#}S zp8z&bvZ<4LqdR^0C@oHO(P(|mhMMZlzWKhjb(=Q&hMI7c@ac6mbu~V@N#7&BDkZk6 zc8w33!5nx{>P>ZPYU+HepYhSHn{Q1`eKjrLuzutE&Atkn25X4E?zJZk5-aerxDX9A z$0M;Y6cPpKd@jxlZSoe;BI6CMB$;4ENbXQelnckgVD)ohF!^z^hy_bvNGg&UL|~gC z%H~*NT8=agDenLVstmCf58G+Vu*~fHdn)@TF6(p;c)a^x-1Fj9kH633@1EKHi!W<> zJ^ss{x?X#oEJJvb1P3(X=5Q>8ol$>OiV!P_6j@M;h*TI4o7=79kR(geIz*Db0Jiky zrjMNidp&*NU+x_EEZzBQS+H)Ndo8tRADHcbVcAk-lg@jUk2Xv@q|lJp0_C!xztaPz z8YM~e(CA-QM13f+w0FLhzABoaEsjlkt4RN8CBHc z3C|+YRtQ8qU?C!_=uT;JR_X%{JRI`Lr-kw*O(7On|V_14tL+=3k0zz*RW@b0=Ns~&`&L-JmIk{CTcoQse@(@nu98UCY zwHG7{hUHYK;Zy*eijWgmyj_b;%0wjTc-!5aH|c(#;YyB}+CAeqQt&_vrKig@I&MO| z9VXSO2)Iu{rdK=ChtH(Jw;={HlB#S5>dMdGUB^|8Mg7WRLkk0k3&npD25@R^_X!U$0L@PSE7zH0b)- zU#5R`Aa(X5eOmRf5D1U#h7%4j3{v~|@LiX}VDuDJhN%nij zrCY>I@OLlhEnU{>9GEp`$=)G@Qf8p|h`44xw55CY>IQN>%eSONxv-Ov!tm@o&%`-(qxTL>u>A<`t zKp5{UoPA@$v;)aElE)suJYiucn=u-_-W%TOT@Pj$;|xc}Vpc56!je3DGEW1;y$HE& z2x$*>sg7L@FPQ3BE7h^KqzUwabyOW|O4<}1JDg^*Cs8}z_YQ{pvN^jlYBr6L5nm@E8!7FV)~WBCP_!xN#O5F(6>ngfT=? zVH*Zv3~-yPn6^70>Syy|jPM}UNzEu$+KNF72CW!8hXDa2+{FzAqGB0{G<_5k({+GH zOK<}?iT@4(0F6%{Xo@roe@Q>PbYSiSu<)1nv9o^23*V+(qJ`)MEDVyad_lYNZFJ?^ z$;!8lVddND$}dam)68Fbi~v%1A-Yy?YB3v>APzP{f5; zpXu)7sSDYy^}RrnlB@-o2PBCyEK<6IWlB_d9$ww}Q08QTF3J!_V}%+#&(lJUd_b+9 z3}r_l>YfZ~%2`-)Vj8#s(##qiX^4qIk>;AKSc5`K``Kj!^A-XMJ>196{vo4~MfOE- zYWxPAS*L-J9bSj}quBI;7Mtu8n;Zn2>|?+thX$MU=_y(eY;sU+Dog6qYZ1tjbc_js z^f~1skV82Wu;pNx26>V$z%qSK+S39T^og7naBf{Jqy2AZIZu)WEGq;oD;Nis6+jC` zr;9aM=8eyViORz=WgAJhxc3Imh(L|FUsd2LD{?Az>ckGr>oe&?U^ z>ByD|NWRr!K)xYWq}{KQAqfds2#Lu#J~oJc*4Wm^nD90A-xwBvXqYL6|f`05CaEA0`VmP*~B=E*_Y3KS1HK zK6YmA?rudtyK=alUvAkC7()NwegnZ$eq1hk{x$43MhEk9&)Dt_?5W$mQ4aK|6-Vj5 z2V1T*?{rSHD@;t#I`n-vYfkt%h=DkaJvKpx8Eg&QsCYqzlf*0?v>8GrBIu0WHp|@w z=Gtb<`q|}w6SkTD^_j%eNI|~%?_N%VG(E7HFh+IMjRX)fwAKRW5+oTW5)?CxK6VgW2Yeg z8H}8o1h-%RO=|zUY0Wjnt1b5#UcKqZwVUV7_xpmPFV+%+gI<9L=U7f$Fbrw+s`?r{60h}XPF{TL z>wUGGHv4LSykUcHGZt3(YHHUIt{SpHs3jhajm|l{(He=u6pXRdAaW6WM04v9DJX(Ugbk4My{IUX|Ac8lRsSL9Itdu z<(&NCD(85xYbxjD4_7(s?XIbr&D5`klV9-=tsp1$Ny&5)e|Sos%^IPK>^7fUN6s6e z5^=B3twXYMR_SP7Jh;cZOrGJ*F21}hLbh|O0;h8l4ZaBQezv{C5-bSW4edu=L+l~uW zoP5uzs>jD6C;L%`tR9Cr_R-lM$+2&`dX$ErYZ9+kbf1u&a{}OzlUR_W4M(BA1`0&H z3!MX{2;T!0bv|;i`rs28quE>1S)Fk*{)Iha-_#X*YHxT-dOg1D<;%~OUM~N;F3W(o zyz7zcC6f`ihc+0w0Xh^-6 zEWNYV_Ln?HNRjQa`eOlLc(C%#6$3M79pevHcGW;1CX{!XpbrmJo~`_{wD;*PeKVeY z!#z+`)?0L6erex~O;}XYH9fcFlGr!ni7p+N+dLNuzae}Z{H^_lI~=?WzpYmO!KbFR zwSF@u{1%esH$t=&zxKn=3j+bk83;7DaB=wFFd(sk0DLA$KDdUI2|lqAK&`=Nd##-0 zpr3}3Z?y#c)U`Z@1O-EnWTZWR2&mNP99GVG7!y7VJ%C$SD)E%as#H9 zem$kYQf=t2JG1rV)(nI3OX47TW~R9;%ex!SL{3Hq8Hi>k6#_sNM#s&o~@RjUncrMvB+5eTgQQaCX9i?LIe8YUoG_wrB2k$Wi8`6$ z(u|8lS!7y3qHbhL>Sy3q3to-MJ*4u*h{Bgd<(ERDP8_p~L|Kd$kf0qt5aqI8!;_+Em2#VoQ zbxX7{+$5RT*RI_pubh3zs|EvHO?T}pOzKhhFyi-~O#L#w6eLBXP;6_mx@(qJ3Y+2qiI zUJO*Q735t!>A9Dp>_N~&Zx!(a6hv7s9z90eiyqYXQmho|!2D+3$Nu-to7t?gmxJmn zMe#Xsj0Epa-wQoaTbbWOHja&T2PD}g7KBB1e!tg{bsx?~oQfe&d33evoK)rmi6&@~ z*O>~ooP#0fA$aRrWhuDn4toQ8eRrHojhozh=ypl&2#VURFsHP3tVp6AEfGVQ5`uFP zB}X@6AqFvjjK$%XC*$PkfHn6c`0V`=Ie+g;Qi5Bz{qI?L7fqN^;_>YoMIUL(2>!RyBpM6dSZUmSZMsst zo`+9L?q9i2)o;5>uLWY^|B5_JdOf7qU>0UeGsbD)!3^Wp<=`?4C#0=o`sONzn7^uP zWepqA4!@g{EDFiyG@AsyITPflV!O<;Cd#&K6m?i_4q30$&m=ZrEe>}w{}3jKVfxn# zxSzcRsq6(d1WVbE7s+`}3?fn4NohMJlfU9F{S=N!e_fn vT2a?bF+#phcr!ZQc7nE?K=QUHH+w^`%ourb3X;duO=l%G!M;1{OFiumAjq}a diff --git a/core/db/db_huge_volume_data.py b/core/db/db_huge_volume_data.py index abaa2da..0479544 100644 --- a/core/db/db_huge_volume_data.py +++ b/core/db/db_huge_volume_data.py @@ -24,6 +24,9 @@ class DBHugeVolumeData: "high", "low", "close", + "pre_close", + "close_change", + "pct_chg", "volume", "volCcy", "volCCyQuote", @@ -35,14 +38,28 @@ class DBHugeVolumeData: "spike_intensity", "close_80_percentile", "close_20_percentile", - "price_80_high", - "price_20_low", - "volume_80_20_price_spike", + "close_80_high", + "close_20_low", "close_90_percentile", "close_10_percentile", - "price_90_high", - "price_10_low", - "volume_90_10_price_spike", + "close_90_high", + "close_10_low", + "high_80_percentile", + "high_20_percentile", + "high_80_high", + "high_20_low", + "high_90_percentile", + "high_10_percentile", + "high_90_high", + "high_10_low", + "low_80_percentile", + "low_20_percentile", + "low_80_high", + "low_20_low", + "low_90_percentile", + "low_10_percentile", + "low_90_high", + "low_10_low", "create_time", ] self.db_manager = DBData(db_url, self.table_name, self.columns) @@ -260,7 +277,7 @@ class DBHugeVolumeData: end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ - 查询80/20量价尖峰记录(只返回volume_80_20_price_spike=1的记录) + 查询80/20量价尖峰记录(只返回close_80_high=1或close_20_low=1的记录) :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 @@ -269,7 +286,7 @@ class DBHugeVolumeData: :return: 80/20量价尖峰记录列表或None """ conditions, condition_dict = self._build_query_conditions( - symbol, bar, window_size, start, end, additional_conditions=["volume_80_20_price_spike = 1"] + symbol, bar, window_size, start, end, additional_conditions=["(close_80_high = 1 OR close_20_low = 1)"] ) where_clause = " AND ".join(conditions) @@ -290,7 +307,7 @@ class DBHugeVolumeData: end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ - 查询90/10量价尖峰记录(只返回volume_90_10_price_spike=1的记录) + 查询90/10量价尖峰记录(只返回close_90_high=1或close_10_low=1的记录) :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 @@ -299,7 +316,7 @@ class DBHugeVolumeData: :return: 90/10量价尖峰记录列表或None """ conditions, condition_dict = self._build_query_conditions( - symbol, bar, window_size, start, end, additional_conditions=["volume_90_10_price_spike = 1"] + symbol, bar, window_size, start, end, additional_conditions=["(close_90_high = 1 OR close_10_low = 1)"] ) where_clause = " AND ".join(conditions) @@ -311,7 +328,7 @@ class DBHugeVolumeData: return self.db_manager.query_data(sql, condition_dict, return_multi=True) - def query_price_80_high_records( + def query_close_80_high_records( self, symbol: Optional[str] = None, bar: Optional[str] = None, @@ -320,7 +337,7 @@ class DBHugeVolumeData: end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ - 查询价格达到80%分位数高点的记录(只返回price_80_high=1的记录) + 查询价格达到80%分位数高点的记录(只返回close_80_high=1的记录) :param symbol: 交易对 :param bar: K线周期 :param window_size: 窗口大小 @@ -329,7 +346,7 @@ class DBHugeVolumeData: :return: 价格80%分位数高点记录列表或None """ conditions, condition_dict = self._build_query_conditions( - symbol, bar, window_size, start, end, additional_conditions=["price_80_high = 1"] + symbol, bar, window_size, start, end, additional_conditions=["close_80_high = 1"] ) where_clause = " AND ".join(conditions) @@ -341,7 +358,7 @@ class DBHugeVolumeData: return self.db_manager.query_data(sql, condition_dict, return_multi=True) - def query_price_20_low_records( + def query_close_20_low_records( self, symbol: Optional[str] = None, bar: Optional[str] = None, @@ -350,7 +367,7 @@ class DBHugeVolumeData: end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ - 查询价格达到20%分位数低点的记录(只返回price_20_low=1的记录) + 查询价格达到20%分位数低点的记录(只返回close_20_low=1的记录) :param symbol: 交易对 :param bar: K线周期 :param start: 开始时间 @@ -358,7 +375,7 @@ class DBHugeVolumeData: :return: 价格20%分位数低点记录列表或None """ conditions, condition_dict = self._build_query_conditions( - symbol, bar, window_size, start, end, additional_conditions=["price_20_low = 1"] + symbol, bar, window_size, start, end, additional_conditions=["close_20_low = 1"] ) where_clause = " AND ".join(conditions) @@ -370,7 +387,7 @@ class DBHugeVolumeData: return self.db_manager.query_data(sql, condition_dict, return_multi=True) - def query_price_90_high_records( + def query_close_90_high_records( self, symbol: Optional[str] = None, bar: Optional[str] = None, @@ -379,7 +396,7 @@ class DBHugeVolumeData: end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ - 查询价格达到90%分位数高点的记录(只返回price_90_high=1的记录) + 查询价格达到90%分位数高点的记录(只返回close_90_high=1的记录) :param symbol: 交易对 :param bar: K线周期 :param start: 开始时间 @@ -387,7 +404,7 @@ class DBHugeVolumeData: :return: 价格90%分位数高点记录列表或None """ conditions, condition_dict = self._build_query_conditions( - symbol, bar, window_size, start, end, additional_conditions=["price_90_high = 1"] + symbol, bar, window_size, start, end, additional_conditions=["close_90_high = 1"] ) where_clause = " AND ".join(conditions) @@ -399,7 +416,7 @@ class DBHugeVolumeData: return self.db_manager.query_data(sql, condition_dict, return_multi=True) - def query_price_10_low_records( + def query_close_10_low_records( self, symbol: Optional[str] = None, bar: Optional[str] = None, @@ -408,7 +425,7 @@ class DBHugeVolumeData: end: Optional[Union[str, int]] = None ) -> Optional[List[Dict[str, Any]]]: """ - 查询价格达到10%分位数低点的记录(只返回price_10_low=1的记录) + 查询价格达到10%分位数低点的记录(只返回close_10_low=1的记录) :param symbol: 交易对 :param bar: K线周期 :param start: 开始时间 @@ -416,7 +433,241 @@ class DBHugeVolumeData: :return: 价格10%分位数低点记录列表或None """ conditions, condition_dict = self._build_query_conditions( - symbol, bar, window_size, start, end, additional_conditions=["price_10_low = 1"] + symbol, bar, window_size, start, end, additional_conditions=["close_10_low = 1"] + ) + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_huge_volume + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_high_80_high_records( + self, + symbol: Optional[str] = None, + bar: Optional[str] = None, + window_size: Optional[int] = None, + start: Optional[Union[str, int]] = None, + end: Optional[Union[str, int]] = None + ) -> Optional[List[Dict[str, Any]]]: + """ + 查询最高价达到80%分位数高点的记录(只返回high_80_high=1的记录) + :param symbol: 交易对 + :param bar: K线周期 + :param window_size: 窗口大小 + :param start: 开始时间 + :param end: 结束时间 + :return: 最高价80%分位数高点记录列表或None + """ + conditions, condition_dict = self._build_query_conditions( + symbol, bar, window_size, start, end, additional_conditions=["high_80_high = 1"] + ) + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_huge_volume + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_high_20_low_records( + self, + symbol: Optional[str] = None, + bar: Optional[str] = None, + window_size: Optional[int] = None, + start: Optional[Union[str, int]] = None, + end: Optional[Union[str, int]] = None + ) -> Optional[List[Dict[str, Any]]]: + """ + 查询最高价达到20%分位数低点的记录(只返回high_20_low=1的记录) + :param symbol: 交易对 + :param bar: K线周期 + :param start: 开始时间 + :param end: 结束时间 + :return: 最高价20%分位数低点记录列表或None + """ + conditions, condition_dict = self._build_query_conditions( + symbol, bar, window_size, start, end, additional_conditions=["high_20_low = 1"] + ) + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_huge_volume + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_high_90_high_records( + self, + symbol: Optional[str] = None, + bar: Optional[str] = None, + window_size: Optional[int] = None, + start: Optional[Union[str, int]] = None, + end: Optional[Union[str, int]] = None + ) -> Optional[List[Dict[str, Any]]]: + """ + 查询最高价达到90%分位数高点的记录(只返回high_90_high=1的记录) + :param symbol: 交易对 + :param bar: K线周期 + :param start: 开始时间 + :param end: 结束时间 + :return: 最高价90%分位数高点记录列表或None + """ + conditions, condition_dict = self._build_query_conditions( + symbol, bar, window_size, start, end, additional_conditions=["high_90_high = 1"] + ) + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_huge_volume + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_high_10_low_records( + self, + symbol: Optional[str] = None, + bar: Optional[str] = None, + window_size: Optional[int] = None, + start: Optional[Union[str, int]] = None, + end: Optional[Union[str, int]] = None + ) -> Optional[List[Dict[str, Any]]]: + """ + 查询最高价达到10%分位数低点的记录(只返回high_10_low=1的记录) + :param symbol: 交易对 + :param bar: K线周期 + :param start: 开始时间 + :param end: 结束时间 + :return: 最高价10%分位数低点记录列表或None + """ + conditions, condition_dict = self._build_query_conditions( + symbol, bar, window_size, start, end, additional_conditions=["high_10_low = 1"] + ) + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_huge_volume + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_low_80_high_records( + self, + symbol: Optional[str] = None, + bar: Optional[str] = None, + window_size: Optional[int] = None, + start: Optional[Union[str, int]] = None, + end: Optional[Union[str, int]] = None + ) -> Optional[List[Dict[str, Any]]]: + """ + 查询最低价达到80%分位数高点的记录(只返回low_80_high=1的记录) + :param symbol: 交易对 + :param bar: K线周期 + :param window_size: 窗口大小 + :param start: 开始时间 + :param end: 结束时间 + :return: 最低价80%分位数高点记录列表或None + """ + conditions, condition_dict = self._build_query_conditions( + symbol, bar, window_size, start, end, additional_conditions=["low_80_high = 1"] + ) + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_huge_volume + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_low_20_low_records( + self, + symbol: Optional[str] = None, + bar: Optional[str] = None, + window_size: Optional[int] = None, + start: Optional[Union[str, int]] = None, + end: Optional[Union[str, int]] = None + ) -> Optional[List[Dict[str, Any]]]: + """ + 查询最低价达到20%分位数低点的记录(只返回low_20_low=1的记录) + :param symbol: 交易对 + :param bar: K线周期 + :param start: 开始时间 + :param end: 结束时间 + :return: 最低价20%分位数低点记录列表或None + """ + conditions, condition_dict = self._build_query_conditions( + symbol, bar, window_size, start, end, additional_conditions=["low_20_low = 1"] + ) + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_huge_volume + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_low_90_high_records( + self, + symbol: Optional[str] = None, + bar: Optional[str] = None, + window_size: Optional[int] = None, + start: Optional[Union[str, int]] = None, + end: Optional[Union[str, int]] = None + ) -> Optional[List[Dict[str, Any]]]: + """ + 查询最低价达到90%分位数高点的记录(只返回low_90_high=1的记录) + :param symbol: 交易对 + :param bar: K线周期 + :param start: 开始时间 + :param end: 结束时间 + :return: 最低价90%分位数高点记录列表或None + """ + conditions, condition_dict = self._build_query_conditions( + symbol, bar, window_size, start, end, additional_conditions=["low_90_high = 1"] + ) + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_huge_volume + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_low_10_low_records( + self, + symbol: Optional[str] = None, + bar: Optional[str] = None, + window_size: Optional[int] = None, + start: Optional[Union[str, int]] = None, + end: Optional[Union[str, int]] = None + ) -> Optional[List[Dict[str, Any]]]: + """ + 查询最低价达到10%分位数低点的记录(只返回low_10_low=1的记录) + :param symbol: 交易对 + :param bar: K线周期 + :param start: 开始时间 + :param end: 结束时间 + :return: 最低价10%分位数低点记录列表或None + """ + conditions, condition_dict = self._build_query_conditions( + symbol, bar, window_size, start, end, additional_conditions=["low_10_low = 1"] ) where_clause = " AND ".join(conditions) @@ -452,12 +703,18 @@ class DBHugeVolumeData: SELECT COUNT(*) as total_records, SUM(huge_volume) as huge_volume_count, - SUM(volume_80_20_price_spike) as volume_80_20_price_spike_count, - SUM(volume_90_10_price_spike) as volume_90_10_price_spike_count, - SUM(price_80_high) as price_80_high_count, - SUM(price_20_low) as price_20_low_count, - SUM(price_90_high) as price_90_high_count, - SUM(price_10_low) as price_10_low_count, + SUM(close_80_high) as close_80_high_count, + SUM(close_20_low) as close_20_low_count, + SUM(close_90_high) as close_90_high_count, + SUM(close_10_low) as close_10_low_count, + SUM(high_80_high) as high_80_high_count, + SUM(high_20_low) as high_20_low_count, + SUM(high_90_high) as high_90_high_count, + SUM(high_10_low) as high_10_low_count, + SUM(low_80_high) as low_80_high_count, + SUM(low_20_low) as low_20_low_count, + SUM(low_90_high) as low_90_high_count, + SUM(low_10_low) as low_10_low_count, AVG(volume_ratio) as avg_volume_ratio, MAX(volume_ratio) as max_volume_ratio, AVG(spike_intensity) as avg_spike_intensity, @@ -527,7 +784,23 @@ class DBHugeVolumeData: MAX(close_80_percentile) as max_close_80_percentile, MIN(close_20_percentile) as min_close_20_percentile, MAX(close_90_percentile) as max_close_90_percentile, - MIN(close_10_percentile) as min_close_10_percentile + MIN(close_10_percentile) as min_close_10_percentile, + AVG(high_80_percentile) as avg_high_80_percentile, + AVG(high_20_percentile) as avg_high_20_percentile, + AVG(high_90_percentile) as avg_high_90_percentile, + AVG(high_10_percentile) as avg_high_10_percentile, + MAX(high_80_percentile) as max_high_80_percentile, + MIN(high_20_percentile) as min_high_20_percentile, + MAX(high_90_percentile) as max_high_90_percentile, + MIN(high_10_percentile) as min_high_10_percentile, + AVG(low_80_percentile) as avg_low_80_percentile, + AVG(low_20_percentile) as avg_low_20_percentile, + AVG(low_90_percentile) as avg_low_90_percentile, + AVG(low_10_percentile) as avg_low_10_percentile, + MAX(low_80_percentile) as max_low_80_percentile, + MIN(low_20_percentile) as min_low_20_percentile, + MAX(low_90_percentile) as max_low_90_percentile, + MIN(low_10_percentile) as min_low_10_percentile FROM crypto_huge_volume WHERE {where_clause} """ diff --git a/core/db/db_market_data.py b/core/db/db_market_data.py index ac0b364..fd9d031 100644 --- a/core/db/db_market_data.py +++ b/core/db/db_market_data.py @@ -22,11 +22,49 @@ class DBMarketData: "high", "low", "close", + "pre_close", + "close_change", + "pct_chg", "volume", "volCcy", "volCCyQuote", "buy_sz", "sell_sz", + # 技术指标字段 + "ma1", + "ma2", + "dif", + "dea", + "macd", + "macd_signal", + "macd_divergence", + "kdj_k", + "kdj_d", + "kdj_j", + "kdj_signal", + "kdj_pattern", + "ma5", + "ma10", + "ma20", + "ma30", + "ma_cross", + "ma5_close_diff", + "ma10_close_diff", + "ma20_close_diff", + "ma30_close_diff", + "ma_close_avg", + "ma_long_short", + "ma_divergence", + "rsi_14", + "rsi_signal", + "boll_upper", + "boll_middle", + "boll_lower", + "boll_signal", + "boll_pattern", + "k_length", + "k_shape", + "k_up_down", "create_time", ] self.db_manager = DBData(db_url, self.table_name, self.columns) @@ -102,6 +140,336 @@ class DBMarketData: condition_dict = {"symbol": symbol, "bar": bar} return self.db_manager.query_data(sql, condition_dict, return_multi=False) + def query_data_before_timestamp(self, symbol: str, bar: str, timestamp: int, limit: int = 100): + """ + 根据时间戳查询之前的数据 + :param symbol: 交易对 + :param bar: K线周期 + :param timestamp: 时间戳 + :param limit: 查询数量 + """ + sql = """ + SELECT * FROM crypto_market_data + WHERE symbol = :symbol AND bar = :bar AND timestamp < :timestamp + ORDER BY timestamp DESC + LIMIT :limit + """ + condition_dict = {"symbol": symbol, "bar": bar, "timestamp": timestamp, "limit": limit} + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_data_by_technical_indicators( + self, + symbol: str, + bar: str, + start: str = None, + end: str = None, + macd_signal: str = None, + kdj_signal: str = None, + rsi_signal: str = None, + boll_signal: str = None, + ma_cross: str = None + ): + """ + 根据技术指标查询数据 + :param symbol: 交易对 + :param bar: K线周期 + :param start: 开始时间 + :param end: 结束时间 + :param macd_signal: MACD信号 + :param kdj_signal: KDJ信号 + :param rsi_signal: RSI信号 + :param boll_signal: 布林带信号 + :param ma_cross: 均线交叉信号 + """ + conditions = ["symbol = :symbol", "bar = :bar"] + condition_dict = {"symbol": symbol, "bar": bar} + + if macd_signal: + conditions.append("macd_signal = :macd_signal") + condition_dict["macd_signal"] = macd_signal + if kdj_signal: + conditions.append("kdj_signal = :kdj_signal") + condition_dict["kdj_signal"] = kdj_signal + if rsi_signal: + conditions.append("rsi_signal = :rsi_signal") + condition_dict["rsi_signal"] = rsi_signal + if boll_signal: + conditions.append("boll_signal = :boll_signal") + condition_dict["boll_signal"] = boll_signal + if ma_cross: + conditions.append("ma_cross = :ma_cross") + condition_dict["ma_cross"] = ma_cross + + # 处理时间范围 + if start: + start_timestamp = transform_date_time_to_timestamp(start) + if start_timestamp: + conditions.append("timestamp >= :start") + condition_dict["start"] = start_timestamp + if end: + end_timestamp = transform_date_time_to_timestamp(end) + if end_timestamp: + conditions.append("timestamp <= :end") + condition_dict["end"] = end_timestamp + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_market_data + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_macd_signals( + self, + symbol: str, + bar: str, + signal: str = None, + start: str = None, + end: str = None + ): + """ + 查询MACD信号数据 + :param symbol: 交易对 + :param bar: K线周期 + :param signal: MACD信号类型 + :param start: 开始时间 + :param end: 结束时间 + """ + conditions = ["symbol = :symbol", "bar = :bar"] + condition_dict = {"symbol": symbol, "bar": bar} + + if signal: + conditions.append("macd_signal = :signal") + condition_dict["signal"] = signal + + # 处理时间范围 + if start: + start_timestamp = transform_date_time_to_timestamp(start) + if start_timestamp: + conditions.append("timestamp >= :start") + condition_dict["start"] = start_timestamp + if end: + end_timestamp = transform_date_time_to_timestamp(end) + if end_timestamp: + conditions.append("timestamp <= :end") + condition_dict["end"] = end_timestamp + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_market_data + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_kdj_signals( + self, + symbol: str, + bar: str, + signal: str = None, + pattern: str = None, + start: str = None, + end: str = None + ): + """ + 查询KDJ信号数据 + :param symbol: 交易对 + :param bar: K线周期 + :param signal: KDJ信号类型 + :param pattern: KDJ模式 + :param start: 开始时间 + :param end: 结束时间 + """ + conditions = ["symbol = :symbol", "bar = :bar"] + condition_dict = {"symbol": symbol, "bar": bar} + + if signal: + conditions.append("kdj_signal = :signal") + condition_dict["signal"] = signal + if pattern: + conditions.append("kdj_pattern = :pattern") + condition_dict["pattern"] = pattern + + # 处理时间范围 + if start: + start_timestamp = transform_date_time_to_timestamp(start) + if start_timestamp: + conditions.append("timestamp >= :start") + condition_dict["start"] = start_timestamp + if end: + end_timestamp = transform_date_time_to_timestamp(end) + if end_timestamp: + conditions.append("timestamp <= :end") + condition_dict["end"] = end_timestamp + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_market_data + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_ma_signals( + self, + symbol: str, + bar: str, + cross: str = None, + long_short: str = None, + divergence: str = None, + start: str = None, + end: str = None + ): + """ + 查询均线信号数据 + :param symbol: 交易对 + :param bar: K线周期 + :param cross: 均线交叉信号 + :param long_short: 均线多空 + :param divergence: 均线发散 + :param start: 开始时间 + :param end: 结束时间 + """ + conditions = ["symbol = :symbol", "bar = :bar"] + condition_dict = {"symbol": symbol, "bar": bar} + + if cross: + conditions.append("ma_cross = :cross") + condition_dict["cross"] = cross + if long_short: + conditions.append("ma_long_short = :long_short") + condition_dict["long_short"] = long_short + if divergence: + conditions.append("ma_divergence = :divergence") + condition_dict["divergence"] = divergence + + # 处理时间范围 + if start: + start_timestamp = transform_date_time_to_timestamp(start) + if start_timestamp: + conditions.append("timestamp >= :start") + condition_dict["start"] = start_timestamp + if end: + end_timestamp = transform_date_time_to_timestamp(end) + if end_timestamp: + conditions.append("timestamp <= :end") + condition_dict["end"] = end_timestamp + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_market_data + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def query_bollinger_signals( + self, + symbol: str, + bar: str, + signal: str = None, + pattern: str = None, + start: str = None, + end: str = None + ): + """ + 查询布林带信号数据 + :param symbol: 交易对 + :param bar: K线周期 + :param signal: 布林带信号 + :param pattern: 布林带模式 + :param start: 开始时间 + :param end: 结束时间 + """ + conditions = ["symbol = :symbol", "bar = :bar"] + condition_dict = {"symbol": symbol, "bar": bar} + + if signal: + conditions.append("boll_signal = :signal") + condition_dict["signal"] = signal + if pattern: + conditions.append("boll_pattern = :pattern") + condition_dict["pattern"] = pattern + + # 处理时间范围 + if start: + start_timestamp = transform_date_time_to_timestamp(start) + if start_timestamp: + conditions.append("timestamp >= :start") + condition_dict["start"] = start_timestamp + if end: + end_timestamp = transform_date_time_to_timestamp(end) + if end_timestamp: + conditions.append("timestamp <= :end") + condition_dict["end"] = end_timestamp + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT * FROM crypto_market_data + WHERE {where_clause} + ORDER BY timestamp DESC + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=True) + + def get_technical_statistics( + self, + symbol: str, + bar: str, + start: str = None, + end: str = None + ): + """ + 获取技术指标统计信息 + :param symbol: 交易对 + :param bar: K线周期 + :param start: 开始时间 + :param end: 结束时间 + """ + conditions = ["symbol = :symbol", "bar = :bar"] + condition_dict = {"symbol": symbol, "bar": bar} + + # 处理时间范围 + if start: + start_timestamp = transform_date_time_to_timestamp(start) + if start_timestamp: + conditions.append("timestamp >= :start") + condition_dict["start"] = start_timestamp + if end: + end_timestamp = transform_date_time_to_timestamp(end) + if end_timestamp: + conditions.append("timestamp <= :end") + condition_dict["end"] = end_timestamp + + where_clause = " AND ".join(conditions) + sql = f""" + SELECT + COUNT(*) as total_records, + COUNT(CASE WHEN macd_signal IS NOT NULL THEN 1 END) as macd_signal_count, + COUNT(CASE WHEN kdj_signal IS NOT NULL THEN 1 END) as kdj_signal_count, + COUNT(CASE WHEN rsi_signal IS NOT NULL THEN 1 END) as rsi_signal_count, + COUNT(CASE WHEN boll_signal IS NOT NULL THEN 1 END) as boll_signal_count, + COUNT(CASE WHEN ma_cross IS NOT NULL THEN 1 END) as ma_cross_count, + AVG(ma5_close_diff) as avg_ma5_close_diff, + AVG(ma10_close_diff) as avg_ma10_close_diff, + AVG(ma20_close_diff) as avg_ma20_close_diff, + AVG(ma30_close_diff) as avg_ma30_close_diff, + AVG(ma_close_avg) as avg_ma_close_avg, + AVG(rsi_14) as avg_rsi_14, + AVG(boll_upper) as avg_boll_upper, + AVG(boll_middle) as avg_boll_middle, + AVG(boll_lower) as avg_boll_lower + FROM crypto_market_data + WHERE {where_clause} + """ + + return self.db_manager.query_data(sql, condition_dict, return_multi=False) + def query_market_data_by_symbol_bar(self, symbol: str, bar: str, start: str, end: str): """ 根据交易对和K线周期查询数据 diff --git a/core/wechat.py b/core/wechat.py index 7a78d80..a47c0ae 100644 --- a/core/wechat.py +++ b/core/wechat.py @@ -4,7 +4,71 @@ 但需要管理员提供企业id以及secret信息 通过wechatpy库实现 """ -import wechatpy import logging +import requests +from config import WECHAT_CONFIG logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s') + +class Wechat: + def __init__(self): + # 虽然config在根目录,但是取决于调用代码在哪 + # 只要启动代码文件在根目录,config就能找到 + self.key = WECHAT_CONFIG["key"] + self.url = f"https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key={self.key}" + + def send_text(self, text: str): + """ + 发送文本消息 + """ + data = { + "msgtype": "text", + "text": {"content": text} + } + response = requests.post(self.url, json=data) + return response.json() + + def send_markdown(self, text: str): + """ + 发送markdown消息 + """ + data = { + "msgtype": "markdown_v2", + "markdown_v2": {"content": text} + } + response = requests.post(self.url, json=data) + return response.json() + + def send_image(self, image_url: str): + """ + 发送图片消息 + """ + data = { + "msgtype": "image", + "image": {"url": image_url} + } + response = requests.post(self.url, json=data) + return response.json() + + def send_file(self, file_url: str): + """ + 发送文件消息 + """ + data = { + "msgtype": "file", + "file": {"url": file_url} + } + response = requests.post(self.url, json=data) + return response.json() + + def send_news(self, news: list): + """ + 发送图文消息 + """ + data = { + "msgtype": "news", + "news": {"articles": news} + } + response = requests.post(self.url, json=data) + return response.json() + \ No newline at end of file diff --git a/huge_volume_main.py b/huge_volume_main.py index cefa1ec..548d8d5 100644 --- a/huge_volume_main.py +++ b/huge_volume_main.py @@ -4,9 +4,10 @@ from core.db.db_market_data import DBMarketData from core.db.db_huge_volume_data import DBHugeVolumeData from core.utils import timestamp_to_datetime, transform_date_time_to_timestamp from market_data_main import MarketDataMain +from core.wechat import Wechat import logging from config import MONITOR_CONFIG, MYSQL_CONFIG, WINDOW_SIZE -from datetime import datetime +from datetime import datetime, timedelta import pandas as pd import os import re @@ -279,6 +280,95 @@ class HugeVolumeMain: data=volume_statistics_data, window_size=window_size, periods=periods ) return huge_volume_data, result_data + + def send_huge_volume_data_to_wechat(self, start: str = None, end: str = None): + if start is None: + start = MONITOR_CONFIG.get("volume_monitor", {}).get( + "initial_date", "2025-05-01 00:00:00" + ) + if end is None: + end = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + start_timestamp = transform_date_time_to_timestamp(start) + end_timestamp = transform_date_time_to_timestamp(end) + + start_date_time = timestamp_to_datetime(start_timestamp) + end_date_time = timestamp_to_datetime(end_timestamp) + logging.info(f"开始获取巨量交易数据: {start} 到 {end}") + huge_volume_data = self.db_huge_volume_data.query_huge_volume_records( + start=start_timestamp, + end=end_timestamp + ) + if huge_volume_data is None or len(huge_volume_data) == 0: + logging.warning(f"获取巨量交易数据为空: {start} 到 {end}") + return + else: + if isinstance(huge_volume_data, list): + huge_volume_data = pd.DataFrame(huge_volume_data) + else: + huge_volume_data = pd.DataFrame([huge_volume_data]) + # 过滤huge_volume_data,要求huge_volume为1,且(price_80_high == 1 or price_90_high == 1 or price_20_low == 1 or price_10_low == 1) + huge_volume_data = huge_volume_data[huge_volume_data["huge_volume"] == 1] + # 过滤huge_volume_data,要求(price_80_high == 1 or price_90_high == 1 or price_20_low == 1 or price_10_low == 1) + huge_volume_data = huge_volume_data[ + (huge_volume_data["price_90_high"] == 1) | + (huge_volume_data["price_10_low"] == 1) + ] + # 过滤huge_volume_data,要求volume_ratio > 10 + huge_volume_data = huge_volume_data[huge_volume_data["volume_ratio"] > 10] + # 根据symbol, bar, window_size, timestamp排序 + huge_volume_data = huge_volume_data.sort_values(by=["symbol", "bar", "window_size", "timestamp"], ascending=True) + huge_volume_data = huge_volume_data.reset_index(drop=True) + logging.info(f"获取巨量交易数据: {len(huge_volume_data)}条") + contents = [] + contents.append(f"# 放量交易数据: {start_date_time} 到 {end_date_time}") + symbol_list = huge_volume_data["symbol"].unique() + # 根据symbol_list排序 + symbol_list.sort() + for symbol in symbol_list: + contents = [] + contents.append(f"# 放量交易数据: {start_date_time} 到 {end_date_time}") + contents.append(f"## 币种: {symbol}") + symbol_data = huge_volume_data[huge_volume_data["symbol"] == symbol] + symbol_data = symbol_data.sort_values(by=["bar", "window_size", "timestamp"], ascending=True) + symbol_data = symbol_data.reset_index(drop=True) + for index, row in symbol_data.iterrows(): + if row['huge_volume'] == 1 and (row['price_80_high'] == 1 or row['price_90_high'] == 1 or row['price_20_low'] == 1 or row['price_10_low'] == 1): + if row['price_90_high'] == 1: + price_position_text = "90%分位数高点" + elif row['price_80_high'] == 1: + price_position_text = "80%分位数高点" + else: + price_position_text = "" + + if price_position_text == "": + if row['price_10_low'] == 1: + price_position_text = "10%分位数低点" + elif row['price_20_low'] == 1: + price_position_text = "20%分位数低点" + else: + price_position_text = "" + open_price = str(round(row['open'], 6)) + high = str(round(row['high'], 6)) + low = str(round(row['low'], 6)) + close = str(round(row['close'], 6)) + volume = str(round(row['volume'], 6)) + volCCyQuote = str(round(row['volCCyQuote'], 6)) + volume_ratio = str(round(row['volume_ratio'], 6)) + contents.append(f"交易周期: {row['bar']}, 滑动窗口: {row['window_size']} , 发生时间: {row['date_time']}") + contents.append(f"开盘价: {open_price}, 最高价: {high}, 最低价: {low}, 收盘价: {close}") + contents.append(f"成交量: {volume}, 成交量USDT: {volCCyQuote}, 交易量比率: {volume_ratio}") + contents.append(f"价格分位: {price_position_text}") + contents.append(f"--------------------------------") + text = "\n\n".join(contents) + # 获得text的字节数 + text_length = len(text.encode('utf-8')) + + logging.info(f"发送巨量交易数据到微信,字节数: {text_length}") + # with open(os.path.join(self.output_folder, "huge_volume_data.md"), "w", encoding="utf-8") as f: + # f.write(text) + wechat = Wechat() + wechat.send_markdown(text) def batch_next_periods_rise_or_fall( self, @@ -377,16 +467,28 @@ def batch_update_volume_spike(threshold: float = 2.0): huge_volume_main.batch_update_volume_spike(window_size=window_size) +def test_send_huge_volume_data_to_wechat(): + huge_volume_main = HugeVolumeMain(threshold=2.0) + # 获得昨天日期 + yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") + logging.info(f"昨天日期: {yesterday}") + # 获得今天日期 + today = datetime.now().strftime("%Y-%m-%d") + logging.info(f"今天日期: {today}") + huge_volume_main.send_huge_volume_data_to_wechat(start=yesterday, end=today) + + if __name__ == "__main__": + test_send_huge_volume_data_to_wechat() # batch_initial_detect_volume_spike(threshold=2.0) # batch_update_volume_spike(threshold=2.0) - huge_volume_main = HugeVolumeMain(threshold=2.0) + # huge_volume_main = HugeVolumeMain(threshold=2.0) # huge_volume_main.batch_next_periods_rise_or_fall(output_excel=True) - data_file_path = "./output/huge_volume_statistics/next_periods_rise_or_fall_stat_20250731200304.xlsx" - sheet_name = "next_periods_statistics" - output_folder = "./output/huge_volume_statistics/" - huge_volume_main.plot_huge_volume_data( - data_file_path=data_file_path, - sheet_name=sheet_name, - output_folder=output_folder, - ) + # data_file_path = "./output/huge_volume_statistics/next_periods_rise_or_fall_stat_20250731200304.xlsx" + # sheet_name = "next_periods_statistics" + # output_folder = "./output/huge_volume_statistics/" + # huge_volume_main.plot_huge_volume_data( + # data_file_path=data_file_path, + # sheet_name=sheet_name, + # output_folder=output_folder, + # ) diff --git a/market_data_main.py b/market_data_main.py index 904acf2..8ed0fa4 100644 --- a/market_data_main.py +++ b/market_data_main.py @@ -1,9 +1,15 @@ import logging from datetime import datetime from time import sleep +import pandas as pd from core.biz.market_data_monitor import MarketDataMonitor from core.db.db_market_data import DBMarketData -from core.utils import datetime_to_timestamp, timestamp_to_datetime, transform_date_time_to_timestamp +from core.biz.metrics_calculation import MetricsCalculation +from core.utils import ( + datetime_to_timestamp, + timestamp_to_datetime, + transform_date_time_to_timestamp, +) from trade_data_main import TradeDataMain from config import ( API_KEY, @@ -96,6 +102,7 @@ class MarketDataMain: elif bar == "1D": threshold = 864000000 + min_start_time_ts = start_time_ts while start_time_ts < end_time_ts: current_start_time_ts = end_time_ts - threshold if current_start_time_ts < start_time_ts: @@ -164,10 +171,145 @@ class MarketDataMain: "create_time", ] ] + data["pre_close"] = None + data["close_change"] = None + data["pct_chg"] = None + data["ma1"] = None + data["ma2"] = None + data["dif"] = None + data["dea"] = None + data["macd"] = None + data["macd_signal"] = None + data["macd_divergence"] = None + data["kdj_k"] = None + data["kdj_d"] = None + data["kdj_j"] = None + data["kdj_signal"] = None + data["kdj_pattern"] = None + data["ma5"] = None + data["ma10"] = None + data["ma20"] = None + data["ma30"] = None + data["ma_cross"] = None + data["ma5_close_diff"] = None + data["ma10_close_diff"] = None + data["ma20_close_diff"] = None + data["ma30_close_diff"] = None + data["ma_close_avg"] = None + data["ma_long_short"] = None + data["ma_divergence"] = None + data["rsi_14"] = None + data["rsi_signal"] = None + data["boll_upper"] = None + data["boll_middle"] = None + data["boll_lower"] = None + data["boll_signal"] = None + data["boll_pattern"] = None + data["k_length"] = None + data["k_shape"] = None + data["k_up_down"] = None self.db_market_data.insert_data_to_mysql(data) + current_min_start_time_ts = data["timestamp"].min() + if current_min_start_time_ts < min_start_time_ts: + min_start_time_ts = current_min_start_time_ts if current_start_time_ts == start_time_ts: break end_time_ts = current_start_time_ts + if min_start_time_ts is not None: + # 补充技术指标数据 + # 获得min_start_time_ts之前30条数据 + logging.info(f"开始补充技术指标数据: {symbol} {bar}") + before_data = self.db_market_data.query_data_before_timestamp( + symbol, bar, min_start_time_ts, 30 + ) + if before_data is not None and len(before_data) > 0: + earliest_timestamp = before_data[0]["timestamp"] + else: + earliest_timestamp = min_start_time_ts + handle_data = self.db_market_data.query_market_data_by_symbol_bar( + symbol=symbol, bar=bar, start=earliest_timestamp, end=None + ) + if handle_data is not None and len(handle_data) > 0: + if isinstance(handle_data, list): + handle_data = pd.DataFrame(handle_data) + elif isinstance(handle_data, dict): + handle_data = pd.DataFrame([handle_data]) + elif isinstance(handle_data, pd.DataFrame): + pass + else: + logging.error(f"handle_data类型错误: {type(handle_data)}") + return None + + handle_data = self.calculate_metrics(handle_data) + logging.info(f"开始保存技术指标数据: {symbol} {bar}") + self.db_market_data.insert_data_to_mysql(handle_data) + return data + + def calculate_metrics(self, data: pd.DataFrame): + """ + 计算技术指标 + 1. 计算前一日收盘价、涨跌幅、涨跌幅百分比 + 2. 计算MACD指标 + 3. 计算KDJ指标 + 4. 计算BOLL指标 + 5. 计算K线长度 + 6. 计算K线形状 + 7. 计算K线方向 + pre_close DECIMAL(20,10) NULL, + close_change DECIMAL(20,10) NULL, + pct_chg DECIMAL(20,10) NULL, + ma1 DOUBLE DEFAULT NULL COMMENT '移动平均线1', + ma2 DOUBLE DEFAULT NULL COMMENT '移动平均线2', + dif DOUBLE DEFAULT NULL COMMENT 'MACD指标DIF线', + dea DOUBLE DEFAULT NULL COMMENT 'MACD指标DEA线', + macd DOUBLE DEFAULT NULL COMMENT 'MACD指标值', + macd_signal VARCHAR(15) DEFAULT NULL COMMENT 'MACD金叉死叉信号', + macd_divergence varchar(25) DEFAULT NULL COMMENT 'MACD背离,顶背离或底背离', + kdj_k DOUBLE DEFAULT NULL COMMENT 'KDJ指标K值', + kdj_d DOUBLE DEFAULT NULL COMMENT 'KDJ指标D值', + kdj_j DOUBLE DEFAULT NULL COMMENT 'KDJ指标J值', + kdj_signal VARCHAR(15) DEFAULT NULL COMMENT 'KDJ金叉死叉信号', + kdj_pattern varchar(25) DEFAULT NULL COMMENT 'KDJ超买,超卖,徘徊', + ma5 DOUBLE DEFAULT NULL COMMENT '5移动平均线', + ma10 DOUBLE DEFAULT NULL COMMENT '10移动平均线', + ma20 DOUBLE DEFAULT NULL COMMENT '20移动平均线', + ma30 DOUBLE DEFAULT NULL COMMENT '30移动平均线', + ma_cross VARCHAR(15) DEFAULT NULL COMMENT '均线交叉信号', + ma5_close_diff double DEFAULT NULL COMMENT '5移动平均线与收盘价差值', + ma10_close_diff double DEFAULT NULL COMMENT '10移动平均线与收盘价差值', + ma20_close_diff double DEFAULT NULL COMMENT '20移动平均线与收盘价差值', + ma30_close_diff double DEFAULT NULL COMMENT '30移动平均线与收盘价差值', + ma_close_avg double DEFAULT NULL COMMENT '收盘价移动平均值', + ma_long_short varchar(25) DEFAULT NULL COMMENT '均线多空', + ma_divergence varchar(25) DEFAULT NULL COMMENT '均线发散,均线粘合,均线适中,均线发散,均线超发散' + rsi_14 DOUBLE DEFAULT NULL COMMENT '14RSI指标', + rsi_signal VARCHAR(15) DEFAULT NULL COMMENT 'RSI强弱信号', + boll_upper DOUBLE DEFAULT NULL COMMENT '布林带上轨', + boll_middle DOUBLE DEFAULT NULL COMMENT '布林带中轨', + boll_lower DOUBLE DEFAULT NULL COMMENT '布林带下轨', + boll_signal VARCHAR(15) DEFAULT NULL COMMENT '布林带强弱信号', + boll_pattern varchar(25) DEFAULT NULL COMMENT 'BOLL超买,超卖,徘徊', + k_length varchar(25) DEFAULT NULL COMMENT 'K线长度', + k_shape varchar(25) DEFAULT NULL COMMENT 'K线形状', + k_up_down varchar(25) DEFAULT NULL COMMENT 'K线方向', + """ + data = data.sort_values(by="timestamp") + data = data.reset_index(drop=True) + + metrics_calculation = MetricsCalculation() + data = metrics_calculation.pre_close(data) + data = metrics_calculation.macd(data) + data = metrics_calculation.kdj(data) + data = metrics_calculation.set_kdj_pattern(data) + data = metrics_calculation.update_macd_divergence_column_simple(data) + data = metrics_calculation.ma5102030(data) + data = metrics_calculation.calculate_ma_price_percent(data) + data = metrics_calculation.set_ma_long_short_divergence(data) + data = metrics_calculation.rsi(data) + data = metrics_calculation.boll(data) + data = metrics_calculation.set_boll_pattern(data) + data = metrics_calculation.set_k_length(data) + data = metrics_calculation.set_k_shape(data) return data def batch_update_data(self): @@ -196,7 +338,9 @@ class MarketDataMain: if latest_timestamp: latest_timestamp = int(latest_timestamp) latest_date_time = timestamp_to_datetime(latest_timestamp) - logging.info(f"{symbol}, {bar} 上次获取的最新数据时间: {latest_date_time}") + logging.info( + f"{symbol}, {bar} 上次获取的最新数据时间: {latest_date_time}" + ) else: logging.warning(f"获取{symbol}, {bar} 最新数据失败") return @@ -206,5 +350,5 @@ class MarketDataMain: if __name__ == "__main__": market_data_main = MarketDataMain() - market_data_main.batch_update_data() - # market_data_main.initial_data() + # market_data_main.batch_update_data() + market_data_main.initial_data() diff --git a/sql/query/sql_playground.sql b/sql/query/sql_playground.sql index f57bcc7..edf506b 100644 --- a/sql/query/sql_playground.sql +++ b/sql/query/sql_playground.sql @@ -1,6 +1,6 @@ -select * from crypto_market_data -WHERE symbol='XCH-USDT' and bar='5m' #and date_time > '2025-07-01' -order by timestamp desc; +select date_time, open, high, low, close, k_shape from crypto_market_data +WHERE symbol='XCH-USDT' and bar='5m' and date_time > '2025-08-04 15:00:00' +order by timestamp ; delete FROM crypto_market_data where symbol != 'XCH-USDT'; diff --git a/sql/table/crypto_huge_volume.sql b/sql/table/crypto_huge_volume.sql index 63db711..ff477e5 100644 --- a/sql/table/crypto_huge_volume.sql +++ b/sql/table/crypto_huge_volume.sql @@ -5,36 +5,51 @@ CREATE TABLE IF NOT EXISTS crypto_huge_volume ( window_size INT NOT NULL COMMENT '窗口大小, 50, 80, 100, 120', timestamp BIGINT NOT NULL COMMENT '时间戳', date_time VARCHAR(50) NOT NULL COMMENT '日期时间', - open DECIMAL(20,5) NOT NULL COMMENT '开盘价', - high DECIMAL(20,5) NOT NULL COMMENT '最高价', - low DECIMAL(20,5) NOT NULL COMMENT '最低价', - close DECIMAL(20,5) NOT NULL COMMENT '收盘价', - volume DECIMAL(30,8) NOT NULL COMMENT '交易量', - volCcy DECIMAL(30,8) NOT NULL COMMENT '交易量(基础货币)', - volCCyQuote DECIMAL(30,8) NOT NULL COMMENT '交易量(计价货币)', - volume_ma DECIMAL(30,8) NULL COMMENT '交易量移动平均', - volume_std DECIMAL(30,8) NULL COMMENT '交易量标准差', - volume_threshold DECIMAL(30,8) NULL COMMENT '交易量阈值', + open DECIMAL(20,10) NOT NULL COMMENT '开盘价', + high DECIMAL(20,10) NOT NULL COMMENT '最高价', + low DECIMAL(20,10) NOT NULL COMMENT '最低价', + close DECIMAL(20,10) NOT NULL COMMENT '收盘价', + pre_close DECIMAL(20,10) NOT NULL COMMENT '前收盘价', + close_change DECIMAL(20,10) NOT NULL COMMENT '涨跌额', + pct_chg DECIMAL(20,10) NOT NULL COMMENT '涨跌幅', + volume DECIMAL(30,10) NOT NULL COMMENT '交易量', + volCcy DECIMAL(30,10) NOT NULL COMMENT '交易量(基础货币)', + volCCyQuote DECIMAL(30,10) NOT NULL COMMENT '交易量(计价货币)', + volume_ma DECIMAL(30,10) NULL COMMENT '交易量移动平均', + volume_std DECIMAL(30,10) NULL COMMENT '交易量标准差', + volume_threshold DECIMAL(30,10) NULL COMMENT '交易量阈值', huge_volume TINYINT NOT NULL DEFAULT 0 COMMENT '是否为巨量(0:否,1:是)', - volume_ratio DECIMAL(20,8) NULL COMMENT '交易量比率', - spike_intensity DECIMAL(20,8) NULL COMMENT '尖峰强度', - close_80_percentile DECIMAL(20,5) NOT NULL COMMENT '收盘价80%分位数', - close_20_percentile DECIMAL(20,5) NOT NULL COMMENT '收盘价20%分位数', - price_80_high TINYINT NOT NULL DEFAULT 0 COMMENT '价格是否达到80%分位数高点(0:否,1:是)', - price_20_low TINYINT NOT NULL DEFAULT 0 COMMENT '价格是否达到20%分位数低点(0:否,1:是)', - volume_80_20_price_spike TINYINT NOT NULL DEFAULT 0 COMMENT '是否出现80/20量价尖峰(0:否,1:是)', - close_90_percentile DECIMAL(20,5) NOT NULL COMMENT '收盘价90%分位数', - close_10_percentile DECIMAL(20,5) NOT NULL COMMENT '收盘价10%分位数', - price_90_high TINYINT NOT NULL DEFAULT 0 COMMENT '价格是否达到90%分位数高点(0:否,1:是)', - price_10_low TINYINT NOT NULL DEFAULT 0 COMMENT '价格是否达到10%分位数低点(0:否,1:是)', - volume_90_10_price_spike TINYINT NOT NULL DEFAULT 0 COMMENT '是否出现90/10量价尖峰(0:否,1:是)', + volume_ratio DECIMAL(20,10) NULL COMMENT '交易量比率', + spike_intensity DECIMAL(20,10) NULL COMMENT '尖峰强度', + close_80_percentile DECIMAL(20,10) NOT NULL COMMENT '收盘价80%分位数', + close_20_percentile DECIMAL(20,10) NOT NULL COMMENT '收盘价20%分位数', + close_80_high TINYINT NOT NULL DEFAULT 0 COMMENT '收盘价是否达到80%分位数高点(0:否,1:是)', + close_20_low TINYINT NOT NULL DEFAULT 0 COMMENT '收盘价是否达到20%分位数低点(0:否,1:是)', + close_90_percentile DECIMAL(20,10) NOT NULL COMMENT '收盘价90%分位数', + close_10_percentile DECIMAL(20,10) NOT NULL COMMENT '收盘价10%分位数', + close_90_high TINYINT NOT NULL DEFAULT 0 COMMENT '收盘价是否达到90%分位数高点(0:否,1:是)', + close_10_low TINYINT NOT NULL DEFAULT 0 COMMENT '收盘价是否达到10%分位数低点(0:否,1:是)', + high_80_percentile DECIMAL(20,10) NOT NULL COMMENT '最高价80%分位数', + high_20_percentile DECIMAL(20,10) NOT NULL COMMENT '最高价20%分位数', + high_80_high TINYINT NOT NULL DEFAULT 0 COMMENT '最高价是否达到80%分位数高点(0:否,1:是)', + high_20_low TINYINT NOT NULL DEFAULT 0 COMMENT '最高价是否达到20%分位数低点(0:否,1:是)', + high_90_percentile DECIMAL(20,10) NOT NULL COMMENT '最高价90%分位数', + high_10_percentile DECIMAL(20,10) NOT NULL COMMENT '最高价10%分位数', + high_90_high TINYINT NOT NULL DEFAULT 0 COMMENT '最高价是否达到90%分位数高点(0:否,1:是)', + high_10_low TINYINT NOT NULL DEFAULT 0 COMMENT '最高价是否达到10%分位数低点(0:否,1:是)', + low_80_percentile DECIMAL(20,10) NOT NULL COMMENT '最低价80%分位数', + low_20_percentile DECIMAL(20,10) NOT NULL COMMENT '最低价20%分位数', + low_80_high TINYINT NOT NULL DEFAULT 0 COMMENT '最低价是否达到80%分位数高点(0:否,1:是)', + low_20_low TINYINT NOT NULL DEFAULT 0 COMMENT '最低价是否达到20%分位数低点(0:否,1:是)', + low_90_percentile DECIMAL(20,10) NOT NULL COMMENT '最低价90%分位数', + low_10_percentile DECIMAL(20,10) NOT NULL COMMENT '最低价10%分位数', + low_90_high TINYINT NOT NULL DEFAULT 0 COMMENT '最低价是否达到90%分位数高点(0:否,1:是)', + low_10_low TINYINT NOT NULL DEFAULT 0 COMMENT '最低价是否达到10%分位数低点(0:否,1:是)', create_time VARCHAR(50) NOT NULL COMMENT '创建时间', UNIQUE KEY uniq_symbol_bar_window_size_timestamp (symbol, bar, window_size, timestamp), INDEX idx_symbol_bar_window_size (symbol, bar, window_size), INDEX idx_timestamp (timestamp), INDEX idx_huge_volume (huge_volume), - INDEX idx_volume_80_20_price_spike (volume_80_20_price_spike), - INDEX idx_volume_90_10_price_spike (volume_90_10_price_spike), INDEX idx_date_time (date_time) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='加密货币巨量交易数据表'; diff --git a/sql/table/crypto_market_data.sql b/sql/table/crypto_market_data.sql index 6a0c9b0..e9cbdd9 100644 --- a/sql/table/crypto_market_data.sql +++ b/sql/table/crypto_market_data.sql @@ -7,15 +7,54 @@ CREATE TABLE IF NOT EXISTS crypto_market_data ( bar VARCHAR(20) NOT NULL, timestamp BIGINT NOT NULL, date_time VARCHAR(50) NOT NULL, - open DECIMAL(20,5) NOT NULL, - high DECIMAL(20,5) NOT NULL, - low DECIMAL(20,5) NOT NULL, - close DECIMAL(20,5) NOT NULL, - volume DECIMAL(30,8) NOT NULL, - volCcy DECIMAL(30,8) NOT NULL, - volCCyQuote DECIMAL(30,8) NOT NULL, - buy_sz DECIMAL(20, 6) NOT NULL, - sell_sz DECIMAL(20, 6) NOT NULL, + open DECIMAL(20,10) NOT NULL, + high DECIMAL(20,10) NOT NULL, + low DECIMAL(20,10) NOT NULL, + close DECIMAL(20,10) NOT NULL, + pre_close DECIMAL(20,10) NULL, + close_change DECIMAL(20,10) NULL, + pct_chg DECIMAL(20,10) NULL, + volume DECIMAL(30,10) NOT NULL, + volCcy DECIMAL(30,10) NOT NULL, + volCCyQuote DECIMAL(30,10) NOT NULL, + buy_sz DECIMAL(20, 10) NOT NULL, + sell_sz DECIMAL(20, 10) NOT NULL, + -- 技术指标字段 + ma1 DOUBLE DEFAULT NULL COMMENT '移动平均线1', + ma2 DOUBLE DEFAULT NULL COMMENT '移动平均线2', + dif DOUBLE DEFAULT NULL COMMENT 'MACD指标DIF线', + dea DOUBLE DEFAULT NULL COMMENT 'MACD指标DEA线', + macd DOUBLE DEFAULT NULL COMMENT 'MACD指标值', + macd_signal VARCHAR(15) DEFAULT NULL COMMENT 'MACD金叉死叉信号', + macd_divergence varchar(25) DEFAULT NULL COMMENT 'MACD背离,顶背离或底背离', + kdj_k DOUBLE DEFAULT NULL COMMENT 'KDJ指标K值', + kdj_d DOUBLE DEFAULT NULL COMMENT 'KDJ指标D值', + kdj_j DOUBLE DEFAULT NULL COMMENT 'KDJ指标J值', + kdj_signal VARCHAR(15) DEFAULT NULL COMMENT 'KDJ金叉死叉信号', + kdj_pattern varchar(25) DEFAULT NULL COMMENT 'KDJ超买,超卖,徘徊', + ma5 DOUBLE DEFAULT NULL COMMENT '5移动平均线', + ma10 DOUBLE DEFAULT NULL COMMENT '10移动平均线', + ma20 DOUBLE DEFAULT NULL COMMENT '20移动平均线', + ma30 DOUBLE DEFAULT NULL COMMENT '30移动平均线', + ma_cross VARCHAR(15) DEFAULT NULL COMMENT '均线交叉信号', + ma5_close_diff double DEFAULT NULL COMMENT '5移动平均线与收盘价差值', + ma10_close_diff double DEFAULT NULL COMMENT '10移动平均线与收盘价差值', + ma20_close_diff double DEFAULT NULL COMMENT '20移动平均线与收盘价差值', + ma30_close_diff double DEFAULT NULL COMMENT '30移动平均线与收盘价差值', + ma_close_avg double DEFAULT NULL COMMENT '收盘价移动平均值', + ma_long_short varchar(25) DEFAULT NULL COMMENT '均线多空', + ma_divergence varchar(25) DEFAULT NULL COMMENT '均线发散,均线粘合,均线适中,均线发散,均线超发散', + rsi_14 DOUBLE DEFAULT NULL COMMENT '14RSI指标', + rsi_signal VARCHAR(15) DEFAULT NULL COMMENT 'RSI强弱信号', + boll_upper DOUBLE DEFAULT NULL COMMENT '布林带上轨', + boll_middle DOUBLE DEFAULT NULL COMMENT '布林带中轨', + boll_lower DOUBLE DEFAULT NULL COMMENT '布林带下轨', + boll_signal VARCHAR(15) DEFAULT NULL COMMENT '布林带强弱信号', + boll_pattern varchar(25) DEFAULT NULL COMMENT 'BOLL超买,超卖,震荡', + k_length varchar(25) DEFAULT NULL COMMENT 'K线长度', + k_shape varchar(25) DEFAULT NULL COMMENT 'K线形状', + k_up_down varchar(25) DEFAULT NULL COMMENT 'K线方向', create_time VARCHAR(50) NOT NULL, UNIQUE KEY uniq_symbol_bar_timestamp (symbol, bar, timestamp) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + diff --git a/sql/table/crypto_trade_data.sql b/sql/table/crypto_trade_data.sql index 99736ce..d028a6c 100644 --- a/sql/table/crypto_trade_data.sql +++ b/sql/table/crypto_trade_data.sql @@ -32,4 +32,11 @@ CREATE TABLE IF NOT EXISTS crypto_trade_data ( -- idx_ts: 时间戳索引,支持按时间查询 -- idx_date_time: 日期时间索引,支持按日期查询 -- idx_symbol_ts: 交易对+时间戳复合索引,支持按交易对和时间范围查询 --- idx_side_ts: 交易方向+时间戳复合索引,支持按方向和时间范围查询 \ No newline at end of file +-- idx_side_ts: 交易方向+时间戳复合索引,支持按方向和时间范围查询 + +-- 修改字段精度 +-- 将sz字段的小数点精度从8位修改为10位 +ALTER TABLE crypto_trade_data MODIFY COLUMN sz DECIMAL(30,10) NOT NULL COMMENT '交易数量'; + +-- 将px字段的小数点精度从5位修改为10位 +ALTER TABLE crypto_trade_data MODIFY COLUMN px DECIMAL(20,10) NOT NULL COMMENT '交易价格'; \ No newline at end of file diff --git a/test_k_shape.py b/test_k_shape.py new file mode 100644 index 0000000..1bf73a6 --- /dev/null +++ b/test_k_shape.py @@ -0,0 +1,74 @@ +import pandas as pd +import numpy as np +import sys +import os + +from core.biz.metrics_calculation import MetricsCalculation + +def test_k_shape(): + # 创建测试数据 + test_data = pd.DataFrame({ + 'open': [9.3030000000], + 'high': [9.3030000000], + 'low': [9.3020000000], + 'close': [9.3020000000] + }) + + print("测试数据:") + print(test_data) + print() + + # 计算基本特征 + test_data['high_low_diff'] = test_data['high'] - test_data['low'] + test_data['open_close_diff'] = abs(test_data['close'] - test_data['open']) + test_data['open_close_fill'] = test_data['open_close_diff'] / test_data['high_low_diff'] + test_data['price_range_ratio'] = test_data['high_low_diff'] / test_data['close'] * 100 + + print("计算的特征:") + print(f"high_low_diff: {test_data['high_low_diff'].iloc[0]}") + print(f"open_close_diff: {test_data['open_close_diff'].iloc[0]}") + print(f"open_close_fill: {test_data['open_close_fill'].iloc[0]}") + print(f"price_range_ratio: {test_data['price_range_ratio'].iloc[0]}%") + print() + + # 检查"一字"条件 + price_range_ratio = test_data['price_range_ratio'].iloc[0] + open_close_fill = test_data['open_close_fill'].iloc[0] + + print("条件检查:") + print(f"price_range_ratio < 0.01: {price_range_ratio < 0.01}") + print(f"open_close_fill > 0.9: {open_close_fill > 0.9}") + print() + + # 使用MetricsCalculation类 + mc = MetricsCalculation() + + # 为了测试,我们需要创建一个有足够数据的DataFrame + # 复制测试数据多次以创建滚动窗口 + extended_data = pd.concat([test_data] * 25, ignore_index=True) + + # 运行set_k_shape函数 + result = mc.set_k_shape(extended_data.copy()) + + print("分类结果:") + print(f"k_shape: {result['k_shape'].iloc[0]}") + print() + + # 详细分析为什么没有被分类为"一字" + print("详细分析:") + print(f"价格范围比例: {price_range_ratio:.6f}%") + print(f"实体占比: {open_close_fill:.6f}") + print() + + if price_range_ratio < 0.01: + print("✓ 满足价格范围比例 < 0.01% 的条件") + else: + print(f"✗ 不满足价格范围比例 < 0.01% 的条件 (实际: {price_range_ratio:.6f}%)") + + if open_close_fill > 0.9: + print("✓ 满足实体占比 > 0.9 的条件") + else: + print(f"✗ 不满足实体占比 > 0.9 的条件 (实际: {open_close_fill:.6f})") + +if __name__ == "__main__": + test_k_shape() \ No newline at end of file