1. support import binance data.

2. optimize update huge volume data algorithm
This commit is contained in:
blade 2025-09-06 14:10:07 +08:00
parent aee344b0db
commit 019b894c9a
3 changed files with 186 additions and 78 deletions

View File

@ -84,16 +84,16 @@ BINANCE_MONITOR_CONFIG = {
US_STOCK_MONITOR_CONFIG = { US_STOCK_MONITOR_CONFIG = {
"volume_monitor": { "volume_monitor": {
"symbols": [ "symbols": [
"QQQ", # "QQQ",
"TQQQ", # "TQQQ",
"MSFT", # "MSFT",
"AAPL", # "AAPL",
"GOOG", # "GOOG",
"NVDA", # "NVDA",
"META", # "META",
"AMZN", # "AMZN",
"AVGO", # "AVGO",
"TSLA", # "TSLA",
"PLTR", "PLTR",
"COIN", "COIN",
"MSTR", "MSTR",

View File

@ -139,8 +139,8 @@ class HugeVolumeMain:
only_output_huge_volume=only_output_huge_volume, only_output_huge_volume=only_output_huge_volume,
output_excel=False, output_excel=False,
) )
if data is not None: if data is not None and len(data) > 0:
if is_update: if is_update and len(data) > window_size:
min_timestamp = int(data["timestamp"].min()) min_timestamp = int(data["timestamp"].min())
max_timestamp = int(data["timestamp"].max()) max_timestamp = int(data["timestamp"].max())
exist_data = self.db_huge_volume_data.query_huge_volume_data_by_symbol_bar_window_size( exist_data = self.db_huge_volume_data.query_huge_volume_data_by_symbol_bar_window_size(
@ -197,11 +197,26 @@ class HugeVolumeMain:
logger.info(f"共有{len(folders)}个文件夹") logger.info(f"共有{len(folders)}个文件夹")
symbols = self.market_data_main.symbols symbols = self.market_data_main.symbols
bars = self.market_data_main.bars bars = self.market_data_main.bars
output_folder = r"./data/binance/"
os.makedirs(output_folder, exist_ok=True)
success_folder_file = os.path.join(output_folder, "success_folder.txt")
success_folder_list = []
with open(success_folder_file, "r", encoding="utf-8") as f:
for line in f:
if line.strip() == "":
continue
success_folder_list.append(line.strip())
for folder in folders: for folder in folders:
if not os.path.isdir(os.path.join(root_path, folder)): if not os.path.isdir(os.path.join(root_path, folder)):
continue continue
logger.info(f"开始处理文件夹: {folder}") logger.info(f"开始处理文件夹: {folder}")
if folder in success_folder_list:
logger.info(f"文件夹: {folder} 已处理")
continue
files = os.listdir(os.path.join(root_path, folder)) files = os.listdir(os.path.join(root_path, folder))
all_success = True
for file in files: for file in files:
if not os.path.isfile(os.path.join(root_path, folder, file)): if not os.path.isfile(os.path.join(root_path, folder, file)):
continue continue
@ -215,10 +230,18 @@ class HugeVolumeMain:
bar = "1H" bar = "1H"
if symbol not in symbols or bar not in bars: if symbol not in symbols or bar not in bars:
continue continue
logger.info(f"开始处理文件: {file} {symbol} {bar}")
self.import_binance_data_by_csv( success = self.import_binance_data_by_csv(
full_file_path, symbol, bar, window_sizes full_file_path, symbol, bar, window_sizes
) )
if not success:
all_success = False
if all_success:
try:
with open(success_folder_file, "a", encoding="utf-8") as f:
f.write(folder + "\n")
except Exception as e:
logger.error(f"写入记录失败: {folder} {e}")
def import_binance_data_by_csv( def import_binance_data_by_csv(
self, full_file_path: str, symbol: str, bar: str, window_sizes: list self, full_file_path: str, symbol: str, bar: str, window_sizes: list
@ -229,72 +252,104 @@ class HugeVolumeMain:
:param symbol: 虚拟货币名称 :param symbol: 虚拟货币名称
:param bar: 时间周期 :param bar: 时间周期
""" """
if full_file_path is None or symbol is None or bar is None:
logger.error("信息不完整")
return
if not os.path.exists(full_file_path):
logger.error(f"文件不存在: {full_file_path}")
return
df = pd.read_csv(full_file_path, encoding="GBK")
if df is None or len(df) == 0:
logger.error(f"文件为空: {full_file_path}")
return
columns = list(df)
if "邢不行" in columns[0] or "Unnamed" in columns[1]:
# 将第一行作为列名
df.columns = df.iloc[0]
df = df.iloc[1:]
df.reset_index(drop=True, inplace=True)
df = self.market_data_main.adjust_binance_csv_data(symbol, bar, df)
df = self.market_data_main.post_save_data(df)
min_start_time_ts = int(df["timestamp"].min())
max_start_time_ts = int(df["timestamp"].max())
df = self.market_data_main.post_calculate_metrics(
symbol, bar, min_start_time_ts, max_start_time_ts
)
df = df.sort_values(by="timestamp", ascending=True)
df = df.reset_index(drop=True)
for window_size in window_sizes:
self.update_volume_spike(symbol, bar, window_size)
def update_volume_spike(self, symbol: str, bar: str, window_size: int = 50):
try: try:
latest_huge_volume_data = self.db_huge_volume_data.query_latest_data( logger.info(f"开始处理文件: {full_file_path} {symbol} {bar}")
symbol, bar, window_size if full_file_path is None or symbol is None or bar is None:
) logger.error("信息不完整")
if latest_huge_volume_data is None or len(latest_huge_volume_data) == 0:
self.detect_volume_spike(
symbol=symbol,
bar=bar,
window_size=window_size,
only_output_huge_volume=False,
)
return return
if not os.path.exists(full_file_path):
logger.error(f"文件不存在: {full_file_path}")
return
df = pd.read_csv(full_file_path, encoding="GBK")
if df is None or len(df) == 0:
raise Exception(f"文件为空: {full_file_path}")
columns = list(df)
if len(columns) == 0:
raise Exception(f"文件为空: {full_file_path}")
elif len(columns) == 1 and "邢不行" in columns[0]:
df.reset_index(inplace=True)
df.columns = df.iloc[0]
df = df.iloc[1:]
elif "邢不行" in columns[0] or "Unnamed" in columns[1]:
# 将第一行作为列名
df.columns = df.iloc[0]
df = df.iloc[1:]
else: else:
earliest_date_time = latest_huge_volume_data["date_time"] pass
earliest_timestamp = latest_huge_volume_data["timestamp"] df.reset_index(drop=True, inplace=True)
seconds = self.get_seconds_by_bar(bar)
earliest_timestamp = earliest_timestamp - (
(window_size - 1) * seconds * 1000
)
earliest_date_time = timestamp_to_datetime(earliest_timestamp)
data = self.detect_volume_spike( df = self.market_data_main.adjust_binance_csv_data(symbol, bar, df)
symbol=symbol, df = self.market_data_main.post_save_data(df)
bar=bar, min_start_time_ts = int(df["timestamp"].min())
window_size=window_size, max_start_time_ts = int(df["timestamp"].max())
start=earliest_date_time, df = self.market_data_main.post_calculate_metrics(
only_output_huge_volume=False, symbol, bar, min_start_time_ts, max_start_time_ts
is_update=True, )
df = df.sort_values(by="timestamp", ascending=True)
df = df.reset_index(drop=True)
earliest_date_time = str(df.iloc[0]["date_time"])
earliest_timestamp = int(df.iloc[0]["timestamp"])
end_date_time = str(df.iloc[-1]["date_time"])
for window_size in window_sizes:
self.update_volume_spike(symbol, bar, window_size, earliest_date_time, earliest_timestamp, end_date_time)
return True
except Exception as e:
logger.error(f"导入binance数据失败: {e}")
output_folder = r"./data/binance/"
os.makedirs(output_folder, exist_ok=True)
error_record_file = os.path.join(output_folder, "error_record.txt")
with open(error_record_file, "a", encoding="utf-8") as f:
f.write(full_file_path + "\n")
return False
def update_volume_spike(
self,
symbol: str,
bar: str,
window_size: int = 50,
earliest_date_time: str = None,
earliest_timestamp: int = None,
end_date_time: str = None,
):
try:
if earliest_date_time is None or earliest_timestamp is None:
latest_huge_volume_data = self.db_huge_volume_data.query_latest_data(
symbol, bar, window_size
) )
logger.info( if latest_huge_volume_data is None or len(latest_huge_volume_data) == 0:
f"更新巨量交易数据: {symbol} {bar} 窗口大小: {window_size}{earliest_date_time}{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" self.detect_volume_spike(
) symbol=symbol,
if data is not None and len(data) > 0: bar=bar,
logger.info(f"此次更新巨量交易数据: {len(data)}") window_size=window_size,
only_output_huge_volume=False,
)
return
else: else:
logger.info(f"此次更新巨量交易数据为空") earliest_date_time = latest_huge_volume_data["date_time"]
earliest_timestamp = latest_huge_volume_data["timestamp"]
seconds = self.get_seconds_by_bar(bar)
earliest_timestamp = earliest_timestamp - (
(window_size - 1) * seconds * 1000
)
earliest_date_time = timestamp_to_datetime(earliest_timestamp)
data = self.detect_volume_spike(
symbol=symbol,
bar=bar,
window_size=window_size,
start=earliest_date_time,
end=end_date_time,
only_output_huge_volume=False,
is_update=True,
)
logger.info(
f"更新巨量交易数据: {symbol} {bar} 窗口大小: {window_size}{earliest_date_time}{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
)
if data is not None and len(data) > 0:
logger.info(f"此次更新巨量交易数据: {len(data)}")
else:
logger.info(f"此次更新巨量交易数据为空")
except Exception as e: except Exception as e:
logger.error( logger.error(
f"更新巨量交易数据失败: {symbol} {bar} 窗口大小: {window_size}{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: {e}" f"更新巨量交易数据失败: {symbol} {bar} 窗口大小: {window_size}{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: {e}"
@ -610,7 +665,7 @@ def batch_update_volume_spike(threshold: float = 2.0, is_us_stock: bool = False)
or len(window_sizes) == 0 or len(window_sizes) == 0
): ):
window_sizes = [50, 80, 100, 120] window_sizes = [50, 80, 100, 120]
huge_volume_main = HugeVolumeMain(threshold, is_us_stock) huge_volume_main = HugeVolumeMain(threshold, is_us_stock, is_binance=False)
for window_size in window_sizes: for window_size in window_sizes:
huge_volume_main.batch_update_volume_spike(window_size=window_size) huge_volume_main.batch_update_volume_spike(window_size=window_size)
@ -621,6 +676,14 @@ def batch_import_binance_data_by_csv():
huge_volume_main.batch_import_binance_data_by_csv(root_path) huge_volume_main.batch_import_binance_data_by_csv(root_path)
def test_import_binance_data_by_csv():
huge_volume_main = HugeVolumeMain(threshold=2.0, is_us_stock=False, is_binance=True)
file_path = "./data/binance/spot/2020-08-11/SOL-USDT_1h.csv"
huge_volume_main.import_binance_data_by_csv(
file_path, "SOL-USDT", "1H", [50, 80, 100, 120]
)
def test_send_huge_volume_data_to_wechat(): def test_send_huge_volume_data_to_wechat():
huge_volume_main = HugeVolumeMain(threshold=2.0) huge_volume_main = HugeVolumeMain(threshold=2.0)
# 获得昨天日期 # 获得昨天日期
@ -633,8 +696,9 @@ def test_send_huge_volume_data_to_wechat():
if __name__ == "__main__": if __name__ == "__main__":
batch_import_binance_data_by_csv() test_import_binance_data_by_csv()
# batch_update_volume_spike(threshold=2.0, is_us_stock=False) # batch_import_binance_data_by_csv()
# batch_update_volume_spike(threshold=2.0, is_us_stock=True)
# test_send_huge_volume_data_to_wechat() # test_send_huge_volume_data_to_wechat()
# batch_initial_detect_volume_spike(threshold=2.0) # batch_initial_detect_volume_spike(threshold=2.0)

View File

@ -1,5 +1,5 @@
import core.logger as logging import core.logger as logging
from datetime import datetime from datetime import datetime, timedelta, timezone
from time import sleep from time import sleep
import pandas as pd import pandas as pd
from core.biz.market_data import MarketData from core.biz.market_data import MarketData
@ -212,6 +212,8 @@ class MarketDataMain:
data["volCCyQuote"] = None data["volCCyQuote"] = None
data["create_time"] = None data["create_time"] = None
data = self.check_date_time(data, bar)
for index, row in data.iterrows(): for index, row in data.iterrows():
candle_begin_time = row["candle_begin_time"] candle_begin_time = row["candle_begin_time"]
timestamp = datetime_to_timestamp(candle_begin_time, is_utc=True) timestamp = datetime_to_timestamp(candle_begin_time, is_utc=True)
@ -256,6 +258,48 @@ class MarketDataMain:
data = data.reset_index(drop=True) data = data.reset_index(drop=True)
return data return data
def check_date_time(self, data: pd.DataFrame, bar: str):
"""
检查日期时间
"""
sample_date_time = data["candle_begin_time"].iloc[0]
is_ok = True
try:
timestamp = datetime_to_timestamp(sample_date_time, is_utc=True)
except Exception as e:
is_ok = False
if not is_ok:
date_part = sample_date_time.split(" ")[0]
first_date_time = f"{date_part} 00:00:00"
first_date_time_utc = datetime.strptime(first_date_time, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
if bar == "1H":
# 将candle_begin_time以first_date_time为起点每条记录增加一小时
for index, row in data.iterrows():
if index == 0:
candle_begin_time = first_date_time_utc
else:
candle_begin_time = first_date_time_utc + timedelta(hours=1 * index)
data.loc[index, "candle_begin_time"] = candle_begin_time.strftime("%Y-%m-%d %H:%M:%S")
elif bar == "5m":
# 将candle_begin_time以first_date_time为起点每条记录增加五分钟
for index, row in data.iterrows():
if index == 0:
candle_begin_time = first_date_time_utc
else:
candle_begin_time = first_date_time_utc + timedelta(minutes=5 * index)
data.loc[index, "candle_begin_time"] = candle_begin_time.strftime("%Y-%m-%d %H:%M:%S")
elif bar == "30m":
# 将candle_begin_time以first_date_time为起点每条记录增加三十分钟
for index, row in data.iterrows():
if index == 0:
candle_begin_time = first_date_time_utc
else:
candle_begin_time = first_date_time_utc + timedelta(minutes=30 * index)
data.loc[index, "candle_begin_time"] = candle_begin_time.strftime("%Y-%m-%d %H:%M:%S")
else:
pass
return data
def post_save_data(self, data: pd.DataFrame): def post_save_data(self, data: pd.DataFrame):
if data is not None and len(data) > 0: if data is not None and len(data) > 0:
data["buy_sz"] = -1 data["buy_sz"] = -1