1220 lines
47 KiB
Python
1220 lines
47 KiB
Python
"""
|
||
均线多空判定模块
|
||
|
||
本模块提供了多种科学的均线多空判定方法,解决了传统方法过于严格的问题。
|
||
|
||
传统方法的问题:
|
||
1. 要求所有均线都严格满足条件(MA5、MA10、MA20、MA30都>0或<0)
|
||
2. 缺乏权重考虑,短期和长期均线影响权重相同
|
||
3. 没有考虑趋势强度,只是简单的正负判断
|
||
4. 缺乏历史对比,使用固定阈值
|
||
|
||
改进方法:
|
||
1. 加权投票机制:短期均线权重更高(MA5:40%, MA10:30%, MA20:20%, MA30:10%)
|
||
2. 趋势强度评估:考虑偏离幅度而非简单正负
|
||
3. 历史分位数对比:动态阈值调整
|
||
4. 趋势一致性:考虑均线排列顺序
|
||
5. 多种判定策略:可根据不同市场环境选择最适合的方法
|
||
|
||
使用示例:
|
||
```python
|
||
# 基本使用(改进后的方法)
|
||
metrics = MetricsCalculation()
|
||
data = metrics.set_ma_long_short_divergence(data)
|
||
|
||
# 高级使用(多种策略)
|
||
# 1. 加权投票机制(推荐)
|
||
data = metrics.set_ma_long_short_advanced(data, method="weighted_voting")
|
||
|
||
# 2. 趋势强度评估
|
||
data = metrics.set_ma_long_short_advanced(data, method="trend_strength")
|
||
|
||
# 3. 均线排列分析
|
||
data = metrics.set_ma_long_short_advanced(data, method="ma_alignment")
|
||
|
||
# 4. 统计分布方法
|
||
data = metrics.set_ma_long_short_advanced(data, method="statistical")
|
||
|
||
# 5. 混合方法
|
||
data = metrics.set_ma_long_short_advanced(data, method="hybrid")
|
||
```
|
||
|
||
判定结果说明:
|
||
- "多":多头趋势,建议做多
|
||
- "空":空头趋势,建议做空
|
||
- "震荡":震荡市场,建议观望或区间交易
|
||
"""
|
||
|
||
import core.logger as logging
|
||
import pandas as pd
|
||
import numpy as np
|
||
import talib as tb
|
||
from talib import MA_Type
|
||
|
||
logger = logging.logger
|
||
|
||
|
||
class MetricsCalculation:
|
||
def __init__(self):
|
||
pass
|
||
|
||
def pre_close(self, df: pd.DataFrame):
|
||
# 计算前一日收盘价、涨跌幅、涨跌幅百分比
|
||
df["pre_close"] = df["close"].shift(1)
|
||
df["close_change"] = df["close"] - df["pre_close"]
|
||
df["pct_chg"] = df["close_change"] / df["pre_close"] * 100
|
||
# 设置k_up_down,亦即阳线或阴线
|
||
df["k_up_down"] = ""
|
||
df.loc[df["close"] >= df["open"], "k_up_down"] = "阳线"
|
||
df.loc[df["close"] < df["open"], "k_up_down"] = "阴线"
|
||
return df
|
||
|
||
def macd(self, df: pd.DataFrame):
|
||
logger.info("计算MACD指标")
|
||
data = np.array(df.close)
|
||
ndata = len(data)
|
||
m, n, T = 12, 26, 9
|
||
EMA1 = np.copy(data)
|
||
EMA2 = np.copy(data)
|
||
f1 = (m - 1) / (m + 1)
|
||
f2 = (n - 1) / (n + 1)
|
||
f3 = (T - 1) / (T + 1)
|
||
for i in range(1, ndata):
|
||
EMA1[i] = EMA1[i - 1] * f1 + EMA1[i] * (1 - f1)
|
||
EMA2[i] = EMA2[i - 1] * f2 + EMA2[i] * (1 - f2)
|
||
df["ma1"] = EMA1
|
||
df["ma2"] = EMA2
|
||
DIF = EMA1 - EMA2
|
||
df["dif"] = DIF
|
||
DEA = np.copy(DIF)
|
||
for i in range(1, ndata):
|
||
DEA[i] = DEA[i - 1] * f3 + DEA[i] * (1 - f3)
|
||
df["dea"] = DEA
|
||
df["macd"] = 2 * (DIF - DEA)
|
||
|
||
# DIFF, macdsignal, macdhist = tb.MACD(data, fastperiod=12, slowperiod=26, signalperiod=9)
|
||
df["macd_signal"] = ""
|
||
macd_position = df["dif"] > df["dea"]
|
||
df.loc[
|
||
macd_position[
|
||
(macd_position == True) & (macd_position.shift() == False)
|
||
].index,
|
||
"macd_signal",
|
||
] = "金叉"
|
||
df.loc[
|
||
macd_position[
|
||
(macd_position == False) & (macd_position.shift() == True)
|
||
].index,
|
||
"macd_signal",
|
||
] = "死叉"
|
||
return df
|
||
|
||
def kdj(self, df: pd.DataFrame):
|
||
logger.info("计算KDJ指标")
|
||
low_list = df["low"].rolling(window=9).min()
|
||
low_list.fillna(value=df["low"].expanding().min(), inplace=True)
|
||
high_list = df["high"].rolling(window=9).max()
|
||
high_list.fillna(value=df["high"].expanding().max(), inplace=True)
|
||
|
||
rsv = (df["close"] - low_list) / (high_list - low_list) * 100
|
||
df["kdj_k"] = rsv.ewm(com=2).mean()
|
||
df["kdj_d"] = df["kdj_k"].ewm(com=2).mean()
|
||
df["kdj_j"] = 3 * df["kdj_k"] - 2 * df["kdj_d"]
|
||
|
||
df["kdj_signal"] = ""
|
||
kdj_position = df["kdj_k"] > df["kdj_d"]
|
||
df.loc[
|
||
kdj_position[
|
||
(kdj_position == True) & (kdj_position.shift() == False)
|
||
].index,
|
||
"kdj_signal",
|
||
] = "金叉"
|
||
df.loc[
|
||
kdj_position[
|
||
(kdj_position == False) & (kdj_position.shift() == True)
|
||
].index,
|
||
"kdj_signal",
|
||
] = "死叉"
|
||
return df
|
||
|
||
def sar(self, df: pd.DataFrame, acceleration=0.02, maximum=0.2):
|
||
"""
|
||
计算SAR(抛物线转向指标)
|
||
|
||
Args:
|
||
df: 包含high, low, close列的DataFrame
|
||
acceleration: 加速因子,默认0.02。控制SAR值随价格变化的加速程度
|
||
maximum: 最大加速因子,默认0.2。设定加速因子的上限,防止过度增加
|
||
|
||
参数说明:
|
||
- acceleration=0.02: 标准设置,适合大多数市场
|
||
- maximum=0.2: 标准设置,防止SAR过度敏感
|
||
- 对于高波动性市场(如加密货币),可适当增加acceleration到0.03-0.04
|
||
- 对于低波动性市场,可降低acceleration到0.015-0.02
|
||
"""
|
||
logger.info(f"计算SAR指标,acceleration={acceleration}, maximum={maximum}")
|
||
# 初始化sar和sar_signal列
|
||
df["sar"] = np.nan
|
||
df["sar_signal"] = ""
|
||
|
||
df["sar"] = tb.SAR(
|
||
df["high"], df["low"], acceleration=acceleration, maximum=maximum
|
||
)
|
||
# sar_position = df["sar"] > df["close"]
|
||
# df.loc[
|
||
# sar_position[
|
||
# (sar_position == True) & (sar_position.shift() == False)
|
||
# ].index,
|
||
# "sar_signal",
|
||
# ] = "SAR多头"
|
||
# df.loc[
|
||
# sar_position[
|
||
# (sar_position == False) & (sar_position.shift() == True)
|
||
# ].index,
|
||
# "sar_signal",
|
||
# ] = "SAR空头"
|
||
# df.loc[sar_position[sar_position == False].index, "sar_signal"] = "SAR观望"
|
||
|
||
# 生成交易信号
|
||
# SAR多头: SAR < close
|
||
# SAR空头: SAR > close
|
||
# SAR观望: SAR == close 或 SAR为NaN
|
||
df["sar_signal"] = np.where(
|
||
df["sar"].isna(),
|
||
"SAR观望",
|
||
np.where(
|
||
df["sar"] < df["close"],
|
||
"SAR多头",
|
||
np.where(df["sar"] > df["close"], "SAR空头", "SAR观望"),
|
||
),
|
||
)
|
||
|
||
# 确保sar列为float类型
|
||
df["sar"] = df["sar"].astype(float)
|
||
# 确保sar_signal列为str类型
|
||
df["sar_signal"] = df["sar_signal"].astype(str)
|
||
return df
|
||
|
||
def set_kdj_pattern(self, df: pd.DataFrame):
|
||
"""
|
||
设置每一根K线数据对应的KDJ形态超买超卖情况
|
||
|
||
KDJ_K > 80, KDJ_D > 80, KDJ_J > 90: 超超买
|
||
KDJ_K > 70, KDJ_D > 70, KDJ_J > 80: 超买
|
||
KDJ_K < 20, KDJ_D < 20, KDJ_J < 10: 超超卖
|
||
KDJ_K < 30, KDJ_D < 30, KDJ_J < 20: 超卖
|
||
否则为"徘徊"
|
||
"""
|
||
logger.info("设置KDJ形态")
|
||
# 初始化kdj_pattern列
|
||
df["kdj_pattern"] = "徘徊"
|
||
|
||
# 超超买条件:KDJ_K > 80, KDJ_D > 80, KDJ_J > 90
|
||
kdj_super_buy = (df["kdj_k"] > 80) & (df["kdj_d"] > 80) & (df["kdj_j"] > 90)
|
||
df.loc[kdj_super_buy, "kdj_pattern"] = "超超买"
|
||
|
||
# 超买条件:KDJ_K > 70, KDJ_D > 70, KDJ_J > 80
|
||
kdj_buy = (df["kdj_k"] > 70) & (df["kdj_d"] > 70) & (df["kdj_j"] > 80)
|
||
df.loc[kdj_buy, "kdj_pattern"] = "超买"
|
||
|
||
# 超超卖条件:KDJ_K < 20, KDJ_D < 20, KDJ_J < 10
|
||
kdj_super_sell = (df["kdj_k"] < 20) & (df["kdj_d"] < 20) & (df["kdj_j"] < 10)
|
||
df.loc[kdj_super_sell, "kdj_pattern"] = "超超卖"
|
||
|
||
# 超卖条件:KDJ_K < 30, KDJ_D < 30, KDJ_J < 20
|
||
kdj_sell = (df["kdj_k"] < 30) & (df["kdj_d"] < 30) & (df["kdj_j"] < 20)
|
||
df.loc[kdj_sell, "kdj_pattern"] = "超卖"
|
||
|
||
return df
|
||
|
||
def calculate_ma_price_percent(self, data: pd.DataFrame):
|
||
data["ma5_close_diff"] = (data["close"] - data["ma5"]) / (data["close"]) * 100
|
||
data["ma10_close_diff"] = (data["close"] - data["ma10"]) / (data["close"]) * 100
|
||
data["ma20_close_diff"] = (data["close"] - data["ma20"]) / (data["close"]) * 100
|
||
data["ma30_close_diff"] = (data["close"] - data["ma30"]) / (data["close"]) * 100
|
||
data["ma_close_avg"] = (
|
||
data["ma5_close_diff"]
|
||
+ data["ma10_close_diff"]
|
||
+ data["ma20_close_diff"]
|
||
+ data["ma30_close_diff"]
|
||
) / 4
|
||
return data
|
||
|
||
def set_ma_long_short_divergence(self, data: pd.DataFrame):
|
||
"""
|
||
根据ma5_close_diff, ma10_close_diff, ma20_close_diff, ma30_close_diff, ma_close_avg
|
||
设置均线多空列: ma_long_short (多,空,震荡)
|
||
设置均线发散列: ma_divergence (超发散,发散,适中,粘合,未知)
|
||
|
||
改进的均线多空判定逻辑:
|
||
1. 加权投票机制:短期均线权重更高
|
||
2. 趋势强度评估:考虑偏离幅度而非简单正负
|
||
3. 历史分位数对比:动态阈值调整
|
||
4. 趋势一致性:考虑均线排列顺序
|
||
|
||
均线发散度使用相对统计方法分类:
|
||
- 超发散:标准差Z-score > 1.5 且 均值Z-score绝对值 > 1.2
|
||
- 发散:标准差Z-score > 0.8 或 均值Z-score绝对值 > 0.8
|
||
- 适中:标准差Z-score在0.3-0.8之间,且均值Z-score绝对值 < 0.5
|
||
- 粘合:标准差Z-score < 0.3,均线高度粘合
|
||
|
||
使用20个周期的滚动窗口计算相对统计特征,避免绝对阈值过于严格的问题
|
||
"""
|
||
logger.info("设置均线多空和发散")
|
||
|
||
# 通过趋势强度计算多空
|
||
# 震荡:不满足多空条件的其他情况
|
||
# 震荡条件已经在初始化时设置,无需额外处理
|
||
data["ma_long_short"] = "震荡"
|
||
data = self._trend_strength_method(data)
|
||
|
||
# 计算各均线偏离度的标准差和均值
|
||
data["ma_divergence"] = "未知"
|
||
ma_diffs = data[
|
||
["ma5_close_diff", "ma10_close_diff", "ma20_close_diff", "ma30_close_diff"]
|
||
]
|
||
ma_std = ma_diffs.std(axis=1) # 标准差
|
||
ma_mean = ma_diffs.mean(axis=1) # 均值
|
||
abs_ma_mean = abs(ma_mean) # 均值的绝对值
|
||
|
||
# 计算标准差和均值绝对值的百分位数(基于历史数据分布)
|
||
# 这里使用 25%、50%、75% 分位数作为阈值,可根据实际需求调整
|
||
std_25, std_50, std_75 = ma_std.quantile([0.25, 0.50, 0.75])
|
||
mean_25, mean_50, mean_75 = abs_ma_mean.quantile([0.25, 0.50, 0.75])
|
||
|
||
# 超发散:标准差和均值绝对值均处于高百分位(>75%)
|
||
super_divergence = (ma_std > std_75) & (abs_ma_mean > mean_75)
|
||
data.loc[super_divergence, "ma_divergence"] = "超发散"
|
||
|
||
# 发散:标准差或均值绝对值处于中等偏高百分位(50%-75%)
|
||
divergence = ((ma_std > std_50) & (ma_std <= std_75)) | (
|
||
(abs_ma_mean > mean_50) & (abs_ma_mean <= mean_75)
|
||
)
|
||
data.loc[divergence & (data["ma_divergence"] == "未知"), "ma_divergence"] = (
|
||
"发散"
|
||
)
|
||
|
||
# 适中:标准差和均值绝对值处于中等偏低百分位(25%-50%)
|
||
moderate = (ma_std > std_25) & (ma_std <= std_50) & (abs_ma_mean <= mean_50)
|
||
data.loc[moderate & (data["ma_divergence"] == "未知"), "ma_divergence"] = "适中"
|
||
|
||
# 粘合:标准差处于低百分位(<25%)
|
||
convergence = ma_std <= std_25
|
||
data.loc[convergence & (data["ma_divergence"] == "未知"), "ma_divergence"] = (
|
||
"粘合"
|
||
)
|
||
|
||
return data
|
||
|
||
def update_macd_divergence_column(self, df: pd.DataFrame):
|
||
"""
|
||
更新整个DataFrame的macd_divergence列
|
||
计算每个时间点的MACD背离情况(顶背离或底背离)
|
||
|
||
:param df: 包含timestamp, close, dif, macd, kdj_j列的DataFrame
|
||
:return: 更新了macd_divergence列的DataFrame
|
||
"""
|
||
if df is None or df.empty:
|
||
return df
|
||
|
||
# 确保必要的列存在
|
||
required_columns = ["timestamp", "close", "dif", "macd", "kdj_j"]
|
||
missing_columns = [col for col in required_columns if col not in df.columns]
|
||
if missing_columns:
|
||
logger.info(f"缺少必要的列: {missing_columns}")
|
||
return df
|
||
|
||
# 按时间戳排序(升序)
|
||
df = df.sort_values("timestamp").reset_index(drop=True)
|
||
|
||
# 初始化macd_divergence列
|
||
df["macd_divergence"] = "未知"
|
||
|
||
# 遍历DataFrame,计算每个时间点的背离情况
|
||
for i in range(1, len(df)):
|
||
current_row = df.iloc[i]
|
||
previous_row = df.iloc[i - 1]
|
||
|
||
current_close = current_row["close"]
|
||
current_dif = current_row["dif"]
|
||
current_macd = current_row["macd"]
|
||
current_kdj_j = current_row["kdj_j"]
|
||
|
||
previous_close = previous_row["close"]
|
||
previous_dif = previous_row["dif"]
|
||
previous_macd = previous_row["macd"]
|
||
previous_kdj_j = previous_row["kdj_j"]
|
||
|
||
# 检查是否为顶背离
|
||
# 条件:价格创新高,但MACD指标没有创新高,且KDJ超买
|
||
if (
|
||
current_close > previous_close
|
||
and current_kdj_j > 70
|
||
and current_dif <= previous_dif
|
||
and current_macd <= previous_macd
|
||
):
|
||
df.at[i, "macd_divergence"] = "顶背离"
|
||
|
||
# 检查是否为底背离
|
||
# 条件:价格创新低,但MACD指标没有创新低,且KDJ超卖
|
||
elif (
|
||
current_close < previous_close
|
||
and current_kdj_j < 20
|
||
and current_dif >= previous_dif
|
||
and current_macd >= previous_macd
|
||
):
|
||
df.at[i, "macd_divergence"] = "底背离"
|
||
|
||
# 检查更严格的背离条件(与历史高点/低点比较)
|
||
else:
|
||
# 获取当前时间点之前的数据
|
||
historical_data = df.iloc[: i + 1]
|
||
|
||
# 检查顶背离:价格接近历史高点,但MACD指标明显低于历史高点
|
||
if current_kdj_j > 70:
|
||
price_high = historical_data["close"].max()
|
||
dif_high = historical_data["dif"].max()
|
||
macd_high = historical_data["macd"].max()
|
||
|
||
# 价格接近历史高点(差距小于5%),但MACD指标明显低于历史高点
|
||
if (
|
||
current_close >= price_high * 0.95
|
||
and current_dif <= dif_high * 0.8
|
||
and current_macd <= macd_high * 0.8
|
||
):
|
||
df.at[i, "macd_divergence"] = "顶背离"
|
||
|
||
# 检查底背离:价格接近历史低点,但MACD指标明显高于历史低点
|
||
elif current_kdj_j < 20:
|
||
price_low = historical_data["close"].min()
|
||
dif_low = historical_data["dif"].min()
|
||
macd_low = historical_data["macd"].min()
|
||
|
||
# 价格接近历史低点(差距小于5%),但MACD指标明显高于历史低点
|
||
if (
|
||
current_close <= price_low * 1.05
|
||
and current_dif >= dif_low * 1.2
|
||
and current_macd >= macd_low * 1.2
|
||
):
|
||
df.at[i, "macd_divergence"] = "底背离"
|
||
|
||
return df
|
||
|
||
def update_macd_divergence_column_simple(
|
||
self, df: pd.DataFrame, window_size: int = 20
|
||
):
|
||
"""
|
||
简化版本的MACD背离检测函数
|
||
使用滑动窗口来检测背离,提高计算效率
|
||
|
||
:param df: 包含timestamp, close, dif, macd, kdj_j列的DataFrame
|
||
:param window_size: 滑动窗口大小,用于检测背离
|
||
:return: 更新了macd_divergence列的DataFrame
|
||
"""
|
||
if df is None or df.empty:
|
||
return df
|
||
|
||
# 确保必要的列存在
|
||
required_columns = ["timestamp", "close", "dif", "macd", "kdj_j"]
|
||
missing_columns = [col for col in required_columns if col not in df.columns]
|
||
if missing_columns:
|
||
logger.info(f"缺少必要的列: {missing_columns}")
|
||
return df
|
||
|
||
# 按时间戳排序(升序)
|
||
df = df.sort_values("timestamp").reset_index(drop=True)
|
||
|
||
# 初始化macd_divergence列
|
||
df["macd_divergence"] = "未知"
|
||
|
||
# 使用滑动窗口检测背离
|
||
for i in range(window_size, len(df)):
|
||
window_data = df.iloc[i - window_size : i + 1]
|
||
|
||
current_row = df.iloc[i]
|
||
current_close = current_row["close"]
|
||
current_dif = current_row["dif"]
|
||
current_macd = current_row["macd"]
|
||
current_kdj_j = current_row["kdj_j"]
|
||
|
||
# 计算窗口内的极值
|
||
window_price_high = window_data["close"].max()
|
||
window_price_low = window_data["close"].min()
|
||
window_dif_high = window_data["dif"].max()
|
||
window_dif_low = window_data["dif"].min()
|
||
window_macd_high = window_data["macd"].max()
|
||
window_macd_low = window_data["macd"].min()
|
||
|
||
# 检测顶背离
|
||
if (
|
||
current_kdj_j > 70
|
||
and current_close >= window_price_high * 0.98 # 价格接近窗口内最高点
|
||
and current_dif <= window_dif_high * 0.85 # DIF明显低于窗口内最高点
|
||
and current_macd <= window_macd_high * 0.85
|
||
): # MACD明显低于窗口内最高点
|
||
df.at[i, "macd_divergence"] = "顶背离"
|
||
|
||
# 检测底背离
|
||
elif (
|
||
current_kdj_j < 20
|
||
and current_close <= window_price_low * 1.02 # 价格接近窗口内最低点
|
||
and current_dif >= window_dif_low * 1.15 # DIF明显高于窗口内最低点
|
||
and current_macd >= window_macd_low * 1.15
|
||
): # MACD明显高于窗口内最低点
|
||
df.at[i, "macd_divergence"] = "底背离"
|
||
|
||
return df
|
||
|
||
def ma5102030(self, df: pd.DataFrame):
|
||
"""
|
||
计算均线指标并检测交叉信号
|
||
优化版本:同时检测多个均线交叉,更好地判断趋势转变
|
||
支持所有均线交叉类型:5上穿10/20/30,10上穿20/30,20上穿30
|
||
以及对应的下穿信号:30下穿20/10/5, 20下穿10/5,10下穿5
|
||
"""
|
||
logger.info("计算均线指标")
|
||
df["ma5"] = df["close"].rolling(window=5).mean().dropna()
|
||
df["ma10"] = df["close"].rolling(window=10).mean().dropna()
|
||
df["ma20"] = df["close"].rolling(window=20).mean().dropna()
|
||
df["ma30"] = df["close"].rolling(window=30).mean().dropna()
|
||
|
||
df["ma_cross"] = ""
|
||
|
||
# 定义均线交叉检测函数
|
||
def detect_cross(short_ma, long_ma, short_name, long_name):
|
||
"""检测均线交叉"""
|
||
position = df[short_ma] > df[long_ma]
|
||
cross_up = (position == True) & (position.shift() == False)
|
||
cross_down = (position == False) & (position.shift() == True)
|
||
return cross_up, cross_down
|
||
|
||
# 检测所有均线交叉
|
||
crosses = {}
|
||
|
||
# MA5与其他均线的交叉
|
||
ma5_ma10_up, ma5_ma10_down = detect_cross("ma5", "ma10", "5", "10")
|
||
ma5_ma20_up, ma5_ma20_down = detect_cross("ma5", "ma20", "5", "20")
|
||
ma5_ma30_up, ma5_ma30_down = detect_cross("ma5", "ma30", "5", "30")
|
||
|
||
# MA10与其他均线的交叉
|
||
ma10_ma20_up, ma10_ma20_down = detect_cross("ma10", "ma20", "10", "20")
|
||
ma10_ma30_up, ma10_ma30_down = detect_cross("ma10", "ma30", "10", "30")
|
||
|
||
# MA20与MA30的交叉
|
||
ma20_ma30_up, ma20_ma30_down = detect_cross("ma20", "ma30", "20", "30")
|
||
|
||
# 存储上穿信号
|
||
crosses["5上穿10"] = ma5_ma10_up
|
||
crosses["5上穿20"] = ma5_ma20_up
|
||
crosses["5上穿30"] = ma5_ma30_up
|
||
crosses["10上穿20"] = ma10_ma20_up
|
||
crosses["10上穿30"] = ma10_ma30_up
|
||
crosses["20上穿30"] = ma20_ma30_up
|
||
|
||
# 存储下穿信号
|
||
crosses["10下穿5"] = ma5_ma10_down
|
||
crosses["20下穿10"] = ma10_ma20_down
|
||
crosses["20下穿5"] = ma5_ma20_down
|
||
crosses["30下穿20"] = ma20_ma30_down
|
||
crosses["30下穿10"] = ma10_ma30_down
|
||
crosses["30下穿5"] = ma5_ma30_down
|
||
|
||
# 分析每个时间点的交叉组合
|
||
for idx in df.index:
|
||
current_crosses = []
|
||
|
||
# 检查当前时间点的所有交叉信号
|
||
for cross_name, cross_signal in crosses.items():
|
||
if cross_signal.loc[idx]:
|
||
current_crosses.append(cross_name)
|
||
|
||
# 根据交叉类型组合信号
|
||
if len(current_crosses) > 0:
|
||
# 分离上穿和下穿信号
|
||
up_crosses = [c for c in current_crosses if "上穿" in c]
|
||
down_crosses = [c for c in current_crosses if "下穿" in c]
|
||
|
||
# 组合信号
|
||
if len(up_crosses) > 1:
|
||
# 多个上穿信号
|
||
df.loc[idx, "ma_cross"] = ",".join(sorted(up_crosses))
|
||
elif len(down_crosses) > 1:
|
||
# 多个下穿信号
|
||
df.loc[idx, "ma_cross"] = ",".join(sorted(down_crosses))
|
||
else:
|
||
# 单个交叉信号
|
||
df.loc[idx, "ma_cross"] = current_crosses[0]
|
||
|
||
return df
|
||
|
||
def rsi(self, df: pd.DataFrame):
|
||
logger.info("计算RSI指标")
|
||
df["rsi_14"] = tb.RSI(df["close"].values, timeperiod=14)
|
||
df["rsi_signal"] = ""
|
||
rsi_high = df["rsi_14"] > 70
|
||
rsi_low = df["rsi_14"] < 30
|
||
df.loc[
|
||
rsi_high[(rsi_high == True) & (rsi_high.shift() == False)].index,
|
||
"rsi_signal",
|
||
] = "超买"
|
||
df.loc[
|
||
rsi_low[(rsi_low == True) & (rsi_low.shift() == False)].index, "rsi_signal"
|
||
] = "超卖"
|
||
return df
|
||
|
||
def boll(self, df: pd.DataFrame):
|
||
logger.info("计算BOLL指标")
|
||
df["boll_upper"], df["boll_middle"], df["boll_lower"] = tb.BBANDS(
|
||
df["close"].values, timeperiod=20, matype=MA_Type.SMA
|
||
)
|
||
return df
|
||
|
||
def set_boll_pattern(self, df: pd.DataFrame):
|
||
"""
|
||
设置BOLL形态
|
||
根据价格与布林带的位置关系判断超买超卖状态
|
||
|
||
超超买:价格接近或突破上轨,且KDJ超买
|
||
超买:价格接近上轨,且KDJ超买
|
||
超超卖:价格接近或突破下轨,且KDJ超卖
|
||
超卖:价格接近下轨,且KDJ超卖
|
||
震荡:其他情况
|
||
"""
|
||
logger.info("设置BOLL形态")
|
||
# 初始化boll_pattern列
|
||
df["boll_pattern"] = "震荡"
|
||
|
||
# 检查必要的列是否存在
|
||
required_columns = ["close", "boll_upper", "boll_lower", "kdj_j"]
|
||
missing_columns = [col for col in required_columns if col not in df.columns]
|
||
if missing_columns:
|
||
logger.info(f"缺少必要的列: {missing_columns}")
|
||
return df
|
||
|
||
# 计算价格与布林带的距离百分比
|
||
df["upper_distance"] = abs(df["close"] - df["boll_upper"]) / df["close"] * 100
|
||
df["lower_distance"] = abs(df["close"] - df["boll_lower"]) / df["close"] * 100
|
||
|
||
# 超超买:价格突破上轨,且KDJ超买
|
||
super_buy_condition = (df["close"] >= df["boll_upper"]) & (df["kdj_j"] > 80)
|
||
df.loc[super_buy_condition, "boll_pattern"] = "超超买"
|
||
|
||
# 超买:价格接近上轨(距离小于2%),且KDJ超买
|
||
buy_condition = (
|
||
(df["upper_distance"] <= 2)
|
||
& (df["kdj_j"] > 80)
|
||
& (df["boll_pattern"] == "震荡")
|
||
)
|
||
df.loc[buy_condition, "boll_pattern"] = "超买"
|
||
|
||
# 超超卖:价格突破下轨,且KDJ超卖
|
||
super_sell_condition = (df["close"] <= df["boll_lower"]) & (df["kdj_j"] < 20)
|
||
df.loc[super_sell_condition, "boll_pattern"] = "超超卖"
|
||
|
||
# 超卖:价格接近下轨(距离小于2%),且KDJ超卖
|
||
sell_condition = (
|
||
(df["lower_distance"] <= 2)
|
||
& (df["kdj_j"] < 20)
|
||
& (df["boll_pattern"] == "震荡")
|
||
)
|
||
df.loc[sell_condition, "boll_pattern"] = "超卖"
|
||
|
||
# 设置boll_signal列(保持与原有逻辑兼容)
|
||
df["boll_signal"] = ""
|
||
|
||
# 突破下轨信号
|
||
close_gt_low = df["close"] > df["boll_lower"]
|
||
pre_close_less_low = df["pre_close"] < df["boll_lower"].shift()
|
||
low_break = close_gt_low & pre_close_less_low
|
||
df.loc[
|
||
low_break[(low_break == True) & (low_break.shift() == False)].index,
|
||
"boll_signal",
|
||
] = "突破下轨"
|
||
|
||
# 击穿上轨信号
|
||
close_less_high = df["close"] < df["boll_upper"]
|
||
pre_close_gt_high = df["pre_close"] > df["boll_upper"].shift()
|
||
high_down = close_less_high & pre_close_gt_high
|
||
df.loc[
|
||
high_down[(high_down == True) & (high_down.shift() == False)].index,
|
||
"boll_signal",
|
||
] = "击穿上轨"
|
||
|
||
# 删除临时列
|
||
df.drop(columns=["upper_distance", "lower_distance"], inplace=True)
|
||
return df
|
||
|
||
def set_k_length(self, df: pd.DataFrame):
|
||
"""
|
||
设置K线长度:k_length
|
||
根据close, open, high, low计算K线长度
|
||
使用统计方法(标准差、均值)来分类K线长度
|
||
|
||
K线长度分类:
|
||
- 短:K线实体和影线都较短
|
||
- 中:K线长度适中
|
||
- 长:K线实体或影线较长
|
||
- 超长:K线实体和影线都很长
|
||
"""
|
||
logger.info("设置K线长度")
|
||
# 检查必要的列是否存在
|
||
required_columns = ["close", "open", "high", "low"]
|
||
missing_columns = [col for col in required_columns if col not in df.columns]
|
||
if missing_columns:
|
||
print(f"缺少必要的列: {missing_columns}")
|
||
return df
|
||
|
||
# 计算K线的基本特征
|
||
df["k_body"] = abs(df["close"] - df["open"]) # K线实体长度
|
||
df["k_upper_shadow"] = df["high"] - df[["open", "close"]].max(
|
||
axis=1
|
||
) # 上影线长度
|
||
df["k_lower_shadow"] = (
|
||
df[["open", "close"]].min(axis=1) - df["low"]
|
||
) # 下影线长度
|
||
df["k_total_range"] = df["high"] - df["low"] # K线总长度
|
||
|
||
# 计算K线实体占总长度的比例
|
||
df["k_body_ratio"] = df["k_body"] / df["k_total_range"]
|
||
|
||
# 使用滚动窗口计算统计特征(使用20个周期的滚动窗口)
|
||
window_size = min(20, len(df))
|
||
|
||
# 计算K线总长度的统计特征
|
||
df["k_range_mean"] = (
|
||
df["k_total_range"].rolling(window=window_size, min_periods=1).mean()
|
||
)
|
||
df["k_range_std"] = (
|
||
df["k_total_range"].rolling(window=window_size, min_periods=1).std()
|
||
)
|
||
|
||
# 计算K线实体的统计特征
|
||
df["k_body_mean"] = (
|
||
df["k_body"].rolling(window=window_size, min_periods=1).mean()
|
||
)
|
||
df["k_body_std"] = df["k_body"].rolling(window=window_size, min_periods=1).std()
|
||
|
||
# 初始化k_length列
|
||
df["k_length"] = "中"
|
||
|
||
# 计算Z-score(标准化分数)
|
||
df["k_range_zscore"] = (df["k_total_range"] - df["k_range_mean"]) / df[
|
||
"k_range_std"
|
||
]
|
||
df["k_body_zscore"] = (df["k_body"] - df["k_body_mean"]) / df["k_body_std"]
|
||
|
||
# 处理无穷大和NaN值
|
||
df["k_range_zscore"] = df["k_range_zscore"].replace([np.inf, -np.inf], 0)
|
||
df["k_body_zscore"] = df["k_body_zscore"].replace([np.inf, -np.inf], 0)
|
||
df["k_range_zscore"] = df["k_range_zscore"].fillna(0)
|
||
df["k_body_zscore"] = df["k_body_zscore"].fillna(0)
|
||
|
||
# 分类逻辑
|
||
# 超长:K线总长度Z-score > 1.5 且 实体Z-score > 1.0
|
||
super_long_condition = (df["k_range_zscore"] > 1.5) & (
|
||
df["k_body_zscore"] > 1.0
|
||
)
|
||
df.loc[super_long_condition, "k_length"] = "超长"
|
||
|
||
# 长:K线总长度Z-score > 0.8 或 实体Z-score > 0.8
|
||
long_condition = (
|
||
(df["k_range_zscore"] > 0.8) | (df["k_body_zscore"] > 0.8)
|
||
) & (df["k_length"] == "中")
|
||
df.loc[long_condition, "k_length"] = "长"
|
||
|
||
# 短:K线总长度Z-score < -0.8 且 实体Z-score < -0.5
|
||
short_condition = (df["k_range_zscore"] < -0.8) & (df["k_body_zscore"] < -0.5)
|
||
df.loc[short_condition, "k_length"] = "短"
|
||
|
||
# 清理临时列
|
||
temp_columns = [
|
||
"k_body",
|
||
"k_upper_shadow",
|
||
"k_lower_shadow",
|
||
"k_total_range",
|
||
"k_body_ratio",
|
||
"k_range_mean",
|
||
"k_range_std",
|
||
"k_body_mean",
|
||
"k_body_std",
|
||
"k_range_zscore",
|
||
"k_body_zscore",
|
||
]
|
||
df.drop(columns=temp_columns, inplace=True)
|
||
|
||
return df
|
||
|
||
def set_k_shape(self, df: pd.DataFrame):
|
||
"""
|
||
设置K线形状:k_shape
|
||
根据close, open, high, low计算K线形状
|
||
使用统计方法(标准差、均值)来分类K线形状
|
||
|
||
K线形态分类:
|
||
- 一字:open, high, low, close几乎完全一样(价格波动极小)
|
||
- 长吊锤线:实体占比≤30%,上影线<25%,实体占比<10%
|
||
- 吊锤线:实体占比≤30%,上影线<25%,实体占比≥10%
|
||
- 长倒T线:实体占比≤30%,下影线<25%,实体占比<10%
|
||
- 倒T线:实体占比≤30%,下影线<25%,实体占比≥10%
|
||
- 长十字星:实体占比≤30%,上下影线都≥25%,实体占比<10%
|
||
- 十字星:实体占比≤30%,上下影线都≥25%,实体占比≥10%
|
||
- 小实体:实体占比30%-55%
|
||
- 大实体:实体占比55%-70%
|
||
- 超大实体:实体占比70%-90%
|
||
- 光头光脚:实体占比>90%(非一字情况)
|
||
"""
|
||
logger.info("设置K线形状")
|
||
# 检查必要的列是否存在
|
||
required_columns = ["close", "open", "high", "low"]
|
||
missing_columns = [col for col in required_columns if col not in df.columns]
|
||
if missing_columns:
|
||
logger.info(f"缺少必要的列: {missing_columns}")
|
||
return df
|
||
|
||
# 计算K线的基本特征
|
||
df["high_low_diff"] = df["high"] - df["low"] # 最高价与最低价差值
|
||
df["open_close_diff"] = abs(
|
||
df["close"] - df["open"]
|
||
) # 开盘价与收盘价差值绝对值
|
||
df["high_close_diff"] = df["high"] - df[["open", "close"]].max(
|
||
axis=1
|
||
) # 上影线长度
|
||
df["low_close_diff"] = (
|
||
df[["open", "close"]].min(axis=1) - df["low"]
|
||
) # 下影线长度
|
||
|
||
# 计算实体占比
|
||
df["open_close_fill"] = df["open_close_diff"] / df["high_low_diff"].replace(
|
||
0, np.nan
|
||
)
|
||
df["open_close_fill"] = df["open_close_fill"].fillna(1.0) # 处理除零情况
|
||
|
||
# 计算影线占比
|
||
df["upper_shadow_ratio"] = df["high_close_diff"] / df["high_low_diff"].replace(
|
||
0, np.nan
|
||
)
|
||
df["lower_shadow_ratio"] = df["low_close_diff"] / df["high_low_diff"].replace(
|
||
0, np.nan
|
||
)
|
||
df["upper_shadow_ratio"] = df["upper_shadow_ratio"].fillna(
|
||
0
|
||
) # 无波动时影线占比为 0
|
||
df["lower_shadow_ratio"] = df["lower_shadow_ratio"].fillna(0)
|
||
|
||
# 初始化k_shape列
|
||
df["k_shape"] = "未知"
|
||
|
||
# 首先识别"一字"形态:open, high, low, close几乎完全一样
|
||
# 计算价格波动范围相对于价格的百分比
|
||
df["price_range_ratio"] = df["high_low_diff"] / df["close"] * 100
|
||
|
||
# 使用滚动窗口计算价格波动范围的平均值,用于动态判断"一字"阈值
|
||
window_size = min(20, len(df))
|
||
df["avg_price_range"] = (
|
||
df["price_range_ratio"].rolling(window=window_size, min_periods=1).mean()
|
||
)
|
||
df["std_price_range"] = (
|
||
df["price_range_ratio"].rolling(window=window_size, min_periods=1).std()
|
||
)
|
||
|
||
# 计算价格波动范围的Z-score
|
||
df["price_range_zscore"] = (
|
||
df["price_range_ratio"] - df["avg_price_range"]
|
||
) / df["std_price_range"]
|
||
df["price_range_zscore"] = (
|
||
df["price_range_zscore"].replace([np.inf, -np.inf], 0).fillna(0)
|
||
)
|
||
|
||
# 计算滚动窗口内 price_range_ratio 和 price_range_zscore 的分位数
|
||
df["price_range_ratio_p75"] = (
|
||
df["price_range_ratio"]
|
||
.rolling(window=window_size, min_periods=1)
|
||
.quantile(0.75)
|
||
)
|
||
df["price_range_zscore_p75"] = (
|
||
df["price_range_zscore"]
|
||
.rolling(window=window_size, min_periods=1)
|
||
.quantile(0.75)
|
||
)
|
||
|
||
# 识别“一字”形态:波动极小(Z 分数 < -1.0 或 price_range_ratio < 0.05%)且无影线
|
||
one_line_condition = (
|
||
((df["price_range_zscore"] < -1.0) | (df["price_range_ratio"] < 0.05))
|
||
& (df["upper_shadow_ratio"] <= 0.01) # 上影线极小或无
|
||
& (df["lower_shadow_ratio"] <= 0.01) # 下影线极小或无
|
||
& (df["open_close_diff"] / df["close"] < 0.0005) # 开收盘价差小于0.05%
|
||
)
|
||
df.loc[one_line_condition, "k_shape"] = "一字"
|
||
|
||
# 使用滚动窗口计算统计特征(使用20个周期的滚动窗口)
|
||
window_size = min(20, len(df))
|
||
|
||
# 计算实体占比的统计特征
|
||
df["fill_mean"] = (
|
||
df["open_close_fill"].rolling(window=window_size, min_periods=1).mean()
|
||
)
|
||
df["fill_std"] = (
|
||
df["open_close_fill"].rolling(window=window_size, min_periods=1).std()
|
||
)
|
||
|
||
# 计算Z-score(标准化分数)
|
||
df["fill_zscore"] = (df["open_close_fill"] - df["fill_mean"]) / df["fill_std"]
|
||
|
||
# 处理无穷大和NaN值
|
||
df["fill_zscore"] = df["fill_zscore"].replace([np.inf, -np.inf], 0)
|
||
df["fill_zscore"] = df["fill_zscore"].fillna(0)
|
||
|
||
# 分类逻辑(只在非"一字"的情况下进行分类)
|
||
# 实体占比≤30%的情况
|
||
small_body_condition = (df["open_close_fill"] <= 0.3) & (
|
||
df["k_shape"] != "一字"
|
||
)
|
||
|
||
# 长吊锤线:实体占比≤30%,上影线<25%,实体占比<10%
|
||
long_hammer_condition = (
|
||
small_body_condition
|
||
& (df["upper_shadow_ratio"] < 0.25)
|
||
& (df["open_close_fill"] < 0.1)
|
||
)
|
||
df.loc[long_hammer_condition, "k_shape"] = "长吊锤线"
|
||
|
||
# 吊锤线:实体占比≤30%,上影线<25%,实体占比≥10%
|
||
hammer_condition = (
|
||
small_body_condition
|
||
& (df["upper_shadow_ratio"] < 0.25)
|
||
& (df["open_close_fill"] >= 0.1)
|
||
& (df["k_shape"] == "未知")
|
||
)
|
||
df.loc[hammer_condition, "k_shape"] = "吊锤线"
|
||
|
||
# 长倒T线:实体占比≤30%,下影线<25%,实体占比<10%
|
||
long_inverted_t_condition = (
|
||
small_body_condition
|
||
& (df["lower_shadow_ratio"] < 0.25)
|
||
& (df["open_close_fill"] < 0.1)
|
||
& (df["k_shape"] == "未知")
|
||
)
|
||
df.loc[long_inverted_t_condition, "k_shape"] = "长倒T线"
|
||
|
||
# 倒T线:实体占比≤30%,下影线<25%,实体占比≥10%
|
||
inverted_t_condition = (
|
||
small_body_condition
|
||
& (df["lower_shadow_ratio"] < 0.25)
|
||
& (df["open_close_fill"] >= 0.1)
|
||
& (df["k_shape"] == "未知")
|
||
)
|
||
df.loc[inverted_t_condition, "k_shape"] = "倒T线"
|
||
|
||
# 长十字星:实体占比≤30%,上下影线都≥25%,实体占比<10%
|
||
long_doji_condition = (
|
||
small_body_condition
|
||
& (df["upper_shadow_ratio"] >= 0.25)
|
||
& (df["lower_shadow_ratio"] >= 0.25)
|
||
& (df["open_close_fill"] < 0.1)
|
||
& (df["k_shape"] == "未知")
|
||
)
|
||
df.loc[long_doji_condition, "k_shape"] = "长十字星"
|
||
|
||
# 十字星:实体占比≤30%,上下影线都≥25%,实体占比≥10%
|
||
doji_condition = (
|
||
small_body_condition
|
||
& (df["upper_shadow_ratio"] >= 0.25)
|
||
& (df["lower_shadow_ratio"] >= 0.25)
|
||
& (df["open_close_fill"] >= 0.1)
|
||
& (df["k_shape"] == "未知")
|
||
)
|
||
df.loc[doji_condition, "k_shape"] = "十字星"
|
||
|
||
# 小实体:实体占比30%-55%
|
||
small_body_condition_2 = (
|
||
(df["open_close_fill"] > 0.3)
|
||
& (df["open_close_fill"] <= 0.55)
|
||
& (df["k_shape"] != "一字")
|
||
)
|
||
df.loc[
|
||
small_body_condition_2
|
||
& (df["upper_shadow_ratio"] >= 0.25)
|
||
& (df["k_shape"] == "未知"),
|
||
"k_shape",
|
||
] = "长上影线纺锤体"
|
||
df.loc[
|
||
small_body_condition_2
|
||
& (df["lower_shadow_ratio"] >= 0.25)
|
||
& (df["k_shape"] == "未知"),
|
||
"k_shape",
|
||
] = "长下影线纺锤体"
|
||
df.loc[small_body_condition_2 & (df["k_shape"] == "未知"), "k_shape"] = "小实体"
|
||
|
||
# 大实体:实体占比55%-90%
|
||
large_body_condition = (
|
||
(df["open_close_fill"] > 0.55)
|
||
& (df["open_close_fill"] <= 0.9)
|
||
& (df["k_shape"] != "一字")
|
||
)
|
||
df.loc[large_body_condition & (df["k_shape"] == "未知"), "k_shape"] = "大实体"
|
||
|
||
# 识别“超大实体”形态:实体占比 75%-90%,价格波动显著,且非“一字”或“大实体”
|
||
super_large_body_condition = (
|
||
(df["open_close_fill"] > 0.75)
|
||
& (df["open_close_fill"] <= 1)
|
||
& (
|
||
df["price_range_ratio"] >= df["price_range_ratio_p75"]
|
||
) # 价格波动范围超过75th分位数
|
||
& (df["k_shape"] != "一字")
|
||
)
|
||
df.loc[super_large_body_condition, "k_shape"] = "超大实体"
|
||
|
||
# 光头光脚:实体占比>90%(非一字情况)
|
||
bald_body_condition = (df["open_close_fill"] > 0.9) & (df["k_shape"] != "一字")
|
||
df.loc[bald_body_condition & (df["k_shape"] == "超大实体"), "k_shape"] = (
|
||
"超大实体+光头光脚"
|
||
)
|
||
df.loc[bald_body_condition & (df["k_shape"] == "未知"), "k_shape"] = "光头光脚"
|
||
|
||
# 清理临时列
|
||
temp_columns = [
|
||
"high_low_diff",
|
||
"open_close_diff",
|
||
"high_close_diff",
|
||
"low_close_diff",
|
||
"open_close_fill",
|
||
"upper_shadow_ratio",
|
||
"lower_shadow_ratio",
|
||
"fill_mean",
|
||
"fill_std",
|
||
"fill_zscore",
|
||
"price_range_ratio",
|
||
"avg_price_range",
|
||
"std_price_range",
|
||
"price_range_zscore",
|
||
"price_range_ratio_p75",
|
||
"price_range_zscore_p75",
|
||
]
|
||
df.drop(columns=temp_columns, inplace=True)
|
||
|
||
return df
|
||
|
||
def set_ma_long_short_advanced(self, data: pd.DataFrame, method="weighted_voting"):
|
||
"""
|
||
高级均线多空判定方法,提供多种科学的判定策略
|
||
|
||
Args:
|
||
data: 包含均线数据的DataFrame
|
||
method: 判定方法
|
||
- "weighted_voting": 加权投票机制(推荐)
|
||
- "trend_strength": 趋势强度评估
|
||
- "ma_alignment": 均线排列分析
|
||
- "statistical": 统计分布方法
|
||
- "hybrid": 混合方法
|
||
"""
|
||
logger.info(f"使用{method}方法设置均线多空")
|
||
|
||
if method == "weighted_voting":
|
||
return self._weighted_voting_method(data)
|
||
elif method == "trend_strength":
|
||
return self._trend_strength_method(data)
|
||
elif method == "ma_alignment":
|
||
return self._ma_alignment_method(data)
|
||
elif method == "statistical":
|
||
return self._statistical_method(data)
|
||
elif method == "hybrid":
|
||
return self._hybrid_method(data)
|
||
else:
|
||
logger.warning(f"未知的方法: {method},使用默认加权投票方法")
|
||
return self._weighted_voting_method(data)
|
||
|
||
def _weighted_voting_method(self, data: pd.DataFrame):
|
||
"""加权投票机制:短期均线权重更高"""
|
||
# 权重设置:短期均线权重更高
|
||
weights = {
|
||
"ma5_close_diff": 0.4, # 40%权重
|
||
"ma10_close_diff": 0.3, # 30%权重
|
||
"ma20_close_diff": 0.2, # 20%权重
|
||
"ma30_close_diff": 0.1, # 10%权重
|
||
}
|
||
|
||
# 计算加权得分
|
||
weighted_score = sum(data[col] * weight for col, weight in weights.items())
|
||
|
||
# 动态阈值:基于历史分布
|
||
window_size = min(50, len(data) // 4)
|
||
if window_size > 10:
|
||
threshold_25 = weighted_score.rolling(window=window_size).quantile(0.25)
|
||
threshold_75 = weighted_score.rolling(window=window_size).quantile(0.75)
|
||
long_threshold = threshold_25 * 0.3
|
||
short_threshold = threshold_75 * 0.3
|
||
else:
|
||
long_threshold = 0.3
|
||
short_threshold = -0.3
|
||
|
||
# 判定逻辑
|
||
data.loc[weighted_score > long_threshold, "ma_long_short"] = "多"
|
||
data.loc[weighted_score < short_threshold, "ma_long_short"] = "空"
|
||
|
||
return data
|
||
|
||
def _trend_strength_method(self, data: pd.DataFrame):
|
||
"""趋势强度评估:考虑偏离幅度和趋势持续性"""
|
||
# 计算趋势强度(考虑偏离幅度)
|
||
trend_strength = data["ma_close_avg"]
|
||
|
||
# 计算趋势持续性(连续同向的周期数)
|
||
trend_persistence = self._calculate_trend_persistence(data)
|
||
|
||
# 综合评分
|
||
strength_threshold = 0.5
|
||
persistence_threshold = 3 # 至少连续3个周期
|
||
|
||
long_condition = (trend_strength > strength_threshold) & (
|
||
trend_persistence >= persistence_threshold
|
||
)
|
||
short_condition = (trend_strength < -strength_threshold) & (
|
||
trend_persistence >= persistence_threshold
|
||
)
|
||
|
||
data.loc[long_condition, "ma_long_short"] = "多"
|
||
data.loc[short_condition, "ma_long_short"] = "空"
|
||
|
||
return data
|
||
|
||
def _ma_alignment_method(self, data: pd.DataFrame):
|
||
"""均线排列分析:检查均线的排列顺序和间距"""
|
||
# 检查均线排列顺序
|
||
ma_alignment_score = 0
|
||
|
||
# 多头排列:MA5 > MA10 > MA20 > MA30
|
||
bullish_alignment = (
|
||
(data["ma5_close_diff"] > data["ma10_close_diff"])
|
||
& (data["ma10_close_diff"] > data["ma20_close_diff"])
|
||
& (data["ma20_close_diff"] > data["ma30_close_diff"])
|
||
)
|
||
|
||
# 空头排列:MA5 < MA10 < MA20 < MA30
|
||
bearish_alignment = (
|
||
(data["ma5_close_diff"] < data["ma10_close_diff"])
|
||
& (data["ma10_close_diff"] < data["ma20_close_diff"])
|
||
& (data["ma20_close_diff"] < data["ma30_close_diff"])
|
||
)
|
||
|
||
# 计算均线间距的合理性
|
||
ma_spacing = self._calculate_ma_spacing(data)
|
||
|
||
# 综合判定
|
||
long_condition = bullish_alignment & (ma_spacing > 0.2)
|
||
short_condition = bearish_alignment & (ma_spacing > 0.2)
|
||
|
||
data.loc[long_condition, "ma_long_short"] = "多"
|
||
data.loc[short_condition, "ma_long_short"] = "空"
|
||
|
||
return data
|
||
|
||
def _statistical_method(self, data: pd.DataFrame):
|
||
"""统计分布方法:基于历史分位数和Z-score"""
|
||
# 计算各均线偏离度的Z-score
|
||
ma_cols = [
|
||
"ma5_close_diff",
|
||
"ma10_close_diff",
|
||
"ma20_close_diff",
|
||
"ma30_close_diff",
|
||
]
|
||
|
||
# 使用滚动窗口计算Z-score
|
||
window_size = min(30, len(data) // 4)
|
||
if window_size > 10:
|
||
z_scores = pd.DataFrame()
|
||
for col in ma_cols:
|
||
rolling_mean = data[col].rolling(window=window_size).mean()
|
||
rolling_std = data[col].rolling(window=window_size).std()
|
||
z_scores[col] = (data[col] - rolling_mean) / rolling_std
|
||
|
||
# 计算综合Z-score
|
||
avg_z_score = z_scores.mean(axis=1)
|
||
|
||
# 基于Z-score判定
|
||
long_condition = avg_z_score > 0.5
|
||
short_condition = avg_z_score < -0.5
|
||
|
||
data.loc[long_condition, "ma_long_short"] = "多"
|
||
data.loc[short_condition, "ma_long_short"] = "空"
|
||
|
||
return data
|
||
|
||
def _hybrid_method(self, data: pd.DataFrame):
|
||
"""混合方法:结合多种判定策略"""
|
||
# 1. 加权投票得分
|
||
weights = {
|
||
"ma5_close_diff": 0.4,
|
||
"ma10_close_diff": 0.3,
|
||
"ma20_close_diff": 0.2,
|
||
"ma30_close_diff": 0.1,
|
||
}
|
||
weighted_score = sum(data[col] * weight for col, weight in weights.items())
|
||
|
||
# 2. 均线排列得分
|
||
alignment_score = (
|
||
(data["ma5_close_diff"] >= data["ma10_close_diff"]) * 0.25
|
||
+ (data["ma10_close_diff"] >= data["ma20_close_diff"]) * 0.25
|
||
+ (data["ma20_close_diff"] >= data["ma30_close_diff"]) * 0.25
|
||
+ (data["ma_close_avg"] > 0) * 0.25
|
||
)
|
||
|
||
# 3. 趋势强度得分
|
||
strength_score = data["ma_close_avg"].abs()
|
||
|
||
# 4. 综合评分
|
||
composite_score = (
|
||
weighted_score * 0.4 + alignment_score * 0.3 + strength_score * 0.3
|
||
)
|
||
|
||
# 动态阈值
|
||
window_size = min(50, len(data) // 4)
|
||
if window_size > 10:
|
||
threshold_25 = composite_score.rolling(window=window_size).quantile(0.25)
|
||
threshold_75 = composite_score.rolling(window=window_size).quantile(0.75)
|
||
long_threshold = threshold_25 * 0.4
|
||
short_threshold = threshold_75 * 0.4
|
||
else:
|
||
long_threshold = 0.4
|
||
short_threshold = -0.4
|
||
|
||
# 判定
|
||
long_condition = composite_score > long_threshold
|
||
short_condition = composite_score < short_threshold
|
||
|
||
data.loc[long_condition, "ma_long_short"] = "多"
|
||
data.loc[short_condition, "ma_long_short"] = "空"
|
||
|
||
return data
|
||
|
||
def _calculate_trend_persistence(self, data: pd.DataFrame):
|
||
"""计算趋势持续性"""
|
||
trend_persistence = pd.Series(0, index=data.index)
|
||
|
||
for i in range(1, len(data)):
|
||
if (
|
||
data["ma_close_avg"].iloc[i] > 0
|
||
and data["ma_close_avg"].iloc[i - 1] > 0
|
||
):
|
||
trend_persistence.iloc[i] = trend_persistence.iloc[i - 1] + 1
|
||
elif (
|
||
data["ma_close_avg"].iloc[i] < 0
|
||
and data["ma_close_avg"].iloc[i - 1] < 0
|
||
):
|
||
trend_persistence.iloc[i] = trend_persistence.iloc[i - 1] + 1
|
||
else:
|
||
trend_persistence.iloc[i] = 0
|
||
|
||
return trend_persistence
|
||
|
||
def _calculate_ma_spacing(self, data: pd.DataFrame):
|
||
"""计算均线间距的合理性"""
|
||
# 计算相邻均线之间的间距
|
||
spacing_5_10 = abs(data["ma5_close_diff"] - data["ma10_close_diff"])
|
||
spacing_10_20 = abs(data["ma10_close_diff"] - data["ma20_close_diff"])
|
||
spacing_20_30 = abs(data["ma20_close_diff"] - data["ma30_close_diff"])
|
||
|
||
# 平均间距
|
||
avg_spacing = (spacing_5_10 + spacing_10_20 + spacing_20_30) / 3
|
||
|
||
return avg_spacing
|