1. support import binance data.
2. optimize update huge volume data algorithm
This commit is contained in:
parent
aee344b0db
commit
019b894c9a
20
config.py
20
config.py
|
|
@ -84,16 +84,16 @@ BINANCE_MONITOR_CONFIG = {
|
||||||
US_STOCK_MONITOR_CONFIG = {
|
US_STOCK_MONITOR_CONFIG = {
|
||||||
"volume_monitor": {
|
"volume_monitor": {
|
||||||
"symbols": [
|
"symbols": [
|
||||||
"QQQ",
|
# "QQQ",
|
||||||
"TQQQ",
|
# "TQQQ",
|
||||||
"MSFT",
|
# "MSFT",
|
||||||
"AAPL",
|
# "AAPL",
|
||||||
"GOOG",
|
# "GOOG",
|
||||||
"NVDA",
|
# "NVDA",
|
||||||
"META",
|
# "META",
|
||||||
"AMZN",
|
# "AMZN",
|
||||||
"AVGO",
|
# "AVGO",
|
||||||
"TSLA",
|
# "TSLA",
|
||||||
"PLTR",
|
"PLTR",
|
||||||
"COIN",
|
"COIN",
|
||||||
"MSTR",
|
"MSTR",
|
||||||
|
|
|
||||||
|
|
@ -139,8 +139,8 @@ class HugeVolumeMain:
|
||||||
only_output_huge_volume=only_output_huge_volume,
|
only_output_huge_volume=only_output_huge_volume,
|
||||||
output_excel=False,
|
output_excel=False,
|
||||||
)
|
)
|
||||||
if data is not None:
|
if data is not None and len(data) > 0:
|
||||||
if is_update:
|
if is_update and len(data) > window_size:
|
||||||
min_timestamp = int(data["timestamp"].min())
|
min_timestamp = int(data["timestamp"].min())
|
||||||
max_timestamp = int(data["timestamp"].max())
|
max_timestamp = int(data["timestamp"].max())
|
||||||
exist_data = self.db_huge_volume_data.query_huge_volume_data_by_symbol_bar_window_size(
|
exist_data = self.db_huge_volume_data.query_huge_volume_data_by_symbol_bar_window_size(
|
||||||
|
|
@ -197,11 +197,26 @@ class HugeVolumeMain:
|
||||||
logger.info(f"共有{len(folders)}个文件夹")
|
logger.info(f"共有{len(folders)}个文件夹")
|
||||||
symbols = self.market_data_main.symbols
|
symbols = self.market_data_main.symbols
|
||||||
bars = self.market_data_main.bars
|
bars = self.market_data_main.bars
|
||||||
|
|
||||||
|
output_folder = r"./data/binance/"
|
||||||
|
os.makedirs(output_folder, exist_ok=True)
|
||||||
|
success_folder_file = os.path.join(output_folder, "success_folder.txt")
|
||||||
|
|
||||||
|
success_folder_list = []
|
||||||
|
with open(success_folder_file, "r", encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
if line.strip() == "":
|
||||||
|
continue
|
||||||
|
success_folder_list.append(line.strip())
|
||||||
for folder in folders:
|
for folder in folders:
|
||||||
if not os.path.isdir(os.path.join(root_path, folder)):
|
if not os.path.isdir(os.path.join(root_path, folder)):
|
||||||
continue
|
continue
|
||||||
logger.info(f"开始处理文件夹: {folder}")
|
logger.info(f"开始处理文件夹: {folder}")
|
||||||
|
if folder in success_folder_list:
|
||||||
|
logger.info(f"文件夹: {folder} 已处理")
|
||||||
|
continue
|
||||||
files = os.listdir(os.path.join(root_path, folder))
|
files = os.listdir(os.path.join(root_path, folder))
|
||||||
|
all_success = True
|
||||||
for file in files:
|
for file in files:
|
||||||
if not os.path.isfile(os.path.join(root_path, folder, file)):
|
if not os.path.isfile(os.path.join(root_path, folder, file)):
|
||||||
continue
|
continue
|
||||||
|
|
@ -215,10 +230,18 @@ class HugeVolumeMain:
|
||||||
bar = "1H"
|
bar = "1H"
|
||||||
if symbol not in symbols or bar not in bars:
|
if symbol not in symbols or bar not in bars:
|
||||||
continue
|
continue
|
||||||
logger.info(f"开始处理文件: {file} {symbol} {bar}")
|
|
||||||
self.import_binance_data_by_csv(
|
success = self.import_binance_data_by_csv(
|
||||||
full_file_path, symbol, bar, window_sizes
|
full_file_path, symbol, bar, window_sizes
|
||||||
)
|
)
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
if all_success:
|
||||||
|
try:
|
||||||
|
with open(success_folder_file, "a", encoding="utf-8") as f:
|
||||||
|
f.write(folder + "\n")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"写入记录失败: {folder} {e}")
|
||||||
|
|
||||||
def import_binance_data_by_csv(
|
def import_binance_data_by_csv(
|
||||||
self, full_file_path: str, symbol: str, bar: str, window_sizes: list
|
self, full_file_path: str, symbol: str, bar: str, window_sizes: list
|
||||||
|
|
@ -229,72 +252,104 @@ class HugeVolumeMain:
|
||||||
:param symbol: 虚拟货币名称
|
:param symbol: 虚拟货币名称
|
||||||
:param bar: 时间周期
|
:param bar: 时间周期
|
||||||
"""
|
"""
|
||||||
if full_file_path is None or symbol is None or bar is None:
|
|
||||||
logger.error("信息不完整")
|
|
||||||
return
|
|
||||||
if not os.path.exists(full_file_path):
|
|
||||||
logger.error(f"文件不存在: {full_file_path}")
|
|
||||||
return
|
|
||||||
df = pd.read_csv(full_file_path, encoding="GBK")
|
|
||||||
if df is None or len(df) == 0:
|
|
||||||
logger.error(f"文件为空: {full_file_path}")
|
|
||||||
return
|
|
||||||
columns = list(df)
|
|
||||||
if "邢不行" in columns[0] or "Unnamed" in columns[1]:
|
|
||||||
# 将第一行作为列名
|
|
||||||
df.columns = df.iloc[0]
|
|
||||||
df = df.iloc[1:]
|
|
||||||
df.reset_index(drop=True, inplace=True)
|
|
||||||
|
|
||||||
df = self.market_data_main.adjust_binance_csv_data(symbol, bar, df)
|
|
||||||
df = self.market_data_main.post_save_data(df)
|
|
||||||
min_start_time_ts = int(df["timestamp"].min())
|
|
||||||
max_start_time_ts = int(df["timestamp"].max())
|
|
||||||
df = self.market_data_main.post_calculate_metrics(
|
|
||||||
symbol, bar, min_start_time_ts, max_start_time_ts
|
|
||||||
)
|
|
||||||
df = df.sort_values(by="timestamp", ascending=True)
|
|
||||||
df = df.reset_index(drop=True)
|
|
||||||
for window_size in window_sizes:
|
|
||||||
self.update_volume_spike(symbol, bar, window_size)
|
|
||||||
|
|
||||||
def update_volume_spike(self, symbol: str, bar: str, window_size: int = 50):
|
|
||||||
try:
|
try:
|
||||||
latest_huge_volume_data = self.db_huge_volume_data.query_latest_data(
|
logger.info(f"开始处理文件: {full_file_path} {symbol} {bar}")
|
||||||
symbol, bar, window_size
|
if full_file_path is None or symbol is None or bar is None:
|
||||||
)
|
logger.error("信息不完整")
|
||||||
if latest_huge_volume_data is None or len(latest_huge_volume_data) == 0:
|
|
||||||
self.detect_volume_spike(
|
|
||||||
symbol=symbol,
|
|
||||||
bar=bar,
|
|
||||||
window_size=window_size,
|
|
||||||
only_output_huge_volume=False,
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
if not os.path.exists(full_file_path):
|
||||||
|
logger.error(f"文件不存在: {full_file_path}")
|
||||||
|
return
|
||||||
|
df = pd.read_csv(full_file_path, encoding="GBK")
|
||||||
|
if df is None or len(df) == 0:
|
||||||
|
raise Exception(f"文件为空: {full_file_path}")
|
||||||
|
columns = list(df)
|
||||||
|
if len(columns) == 0:
|
||||||
|
raise Exception(f"文件为空: {full_file_path}")
|
||||||
|
elif len(columns) == 1 and "邢不行" in columns[0]:
|
||||||
|
df.reset_index(inplace=True)
|
||||||
|
df.columns = df.iloc[0]
|
||||||
|
df = df.iloc[1:]
|
||||||
|
elif "邢不行" in columns[0] or "Unnamed" in columns[1]:
|
||||||
|
# 将第一行作为列名
|
||||||
|
df.columns = df.iloc[0]
|
||||||
|
df = df.iloc[1:]
|
||||||
else:
|
else:
|
||||||
earliest_date_time = latest_huge_volume_data["date_time"]
|
pass
|
||||||
earliest_timestamp = latest_huge_volume_data["timestamp"]
|
df.reset_index(drop=True, inplace=True)
|
||||||
seconds = self.get_seconds_by_bar(bar)
|
|
||||||
earliest_timestamp = earliest_timestamp - (
|
|
||||||
(window_size - 1) * seconds * 1000
|
|
||||||
)
|
|
||||||
earliest_date_time = timestamp_to_datetime(earliest_timestamp)
|
|
||||||
|
|
||||||
data = self.detect_volume_spike(
|
df = self.market_data_main.adjust_binance_csv_data(symbol, bar, df)
|
||||||
symbol=symbol,
|
df = self.market_data_main.post_save_data(df)
|
||||||
bar=bar,
|
min_start_time_ts = int(df["timestamp"].min())
|
||||||
window_size=window_size,
|
max_start_time_ts = int(df["timestamp"].max())
|
||||||
start=earliest_date_time,
|
df = self.market_data_main.post_calculate_metrics(
|
||||||
only_output_huge_volume=False,
|
symbol, bar, min_start_time_ts, max_start_time_ts
|
||||||
is_update=True,
|
)
|
||||||
|
df = df.sort_values(by="timestamp", ascending=True)
|
||||||
|
df = df.reset_index(drop=True)
|
||||||
|
earliest_date_time = str(df.iloc[0]["date_time"])
|
||||||
|
earliest_timestamp = int(df.iloc[0]["timestamp"])
|
||||||
|
end_date_time = str(df.iloc[-1]["date_time"])
|
||||||
|
for window_size in window_sizes:
|
||||||
|
self.update_volume_spike(symbol, bar, window_size, earliest_date_time, earliest_timestamp, end_date_time)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"导入binance数据失败: {e}")
|
||||||
|
output_folder = r"./data/binance/"
|
||||||
|
os.makedirs(output_folder, exist_ok=True)
|
||||||
|
error_record_file = os.path.join(output_folder, "error_record.txt")
|
||||||
|
with open(error_record_file, "a", encoding="utf-8") as f:
|
||||||
|
f.write(full_file_path + "\n")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def update_volume_spike(
|
||||||
|
self,
|
||||||
|
symbol: str,
|
||||||
|
bar: str,
|
||||||
|
window_size: int = 50,
|
||||||
|
earliest_date_time: str = None,
|
||||||
|
earliest_timestamp: int = None,
|
||||||
|
end_date_time: str = None,
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
if earliest_date_time is None or earliest_timestamp is None:
|
||||||
|
latest_huge_volume_data = self.db_huge_volume_data.query_latest_data(
|
||||||
|
symbol, bar, window_size
|
||||||
)
|
)
|
||||||
logger.info(
|
if latest_huge_volume_data is None or len(latest_huge_volume_data) == 0:
|
||||||
f"更新巨量交易数据: {symbol} {bar} 窗口大小: {window_size} 从 {earliest_date_time} 到 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
self.detect_volume_spike(
|
||||||
)
|
symbol=symbol,
|
||||||
if data is not None and len(data) > 0:
|
bar=bar,
|
||||||
logger.info(f"此次更新巨量交易数据: {len(data)}条")
|
window_size=window_size,
|
||||||
|
only_output_huge_volume=False,
|
||||||
|
)
|
||||||
|
return
|
||||||
else:
|
else:
|
||||||
logger.info(f"此次更新巨量交易数据为空")
|
earliest_date_time = latest_huge_volume_data["date_time"]
|
||||||
|
earliest_timestamp = latest_huge_volume_data["timestamp"]
|
||||||
|
|
||||||
|
seconds = self.get_seconds_by_bar(bar)
|
||||||
|
earliest_timestamp = earliest_timestamp - (
|
||||||
|
(window_size - 1) * seconds * 1000
|
||||||
|
)
|
||||||
|
earliest_date_time = timestamp_to_datetime(earliest_timestamp)
|
||||||
|
|
||||||
|
data = self.detect_volume_spike(
|
||||||
|
symbol=symbol,
|
||||||
|
bar=bar,
|
||||||
|
window_size=window_size,
|
||||||
|
start=earliest_date_time,
|
||||||
|
end=end_date_time,
|
||||||
|
only_output_huge_volume=False,
|
||||||
|
is_update=True,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"更新巨量交易数据: {symbol} {bar} 窗口大小: {window_size} 从 {earliest_date_time} 到 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
||||||
|
)
|
||||||
|
if data is not None and len(data) > 0:
|
||||||
|
logger.info(f"此次更新巨量交易数据: {len(data)}条")
|
||||||
|
else:
|
||||||
|
logger.info(f"此次更新巨量交易数据为空")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"更新巨量交易数据失败: {symbol} {bar} 窗口大小: {window_size} 到 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: {e}"
|
f"更新巨量交易数据失败: {symbol} {bar} 窗口大小: {window_size} 到 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: {e}"
|
||||||
|
|
@ -610,7 +665,7 @@ def batch_update_volume_spike(threshold: float = 2.0, is_us_stock: bool = False)
|
||||||
or len(window_sizes) == 0
|
or len(window_sizes) == 0
|
||||||
):
|
):
|
||||||
window_sizes = [50, 80, 100, 120]
|
window_sizes = [50, 80, 100, 120]
|
||||||
huge_volume_main = HugeVolumeMain(threshold, is_us_stock)
|
huge_volume_main = HugeVolumeMain(threshold, is_us_stock, is_binance=False)
|
||||||
for window_size in window_sizes:
|
for window_size in window_sizes:
|
||||||
huge_volume_main.batch_update_volume_spike(window_size=window_size)
|
huge_volume_main.batch_update_volume_spike(window_size=window_size)
|
||||||
|
|
||||||
|
|
@ -621,6 +676,14 @@ def batch_import_binance_data_by_csv():
|
||||||
huge_volume_main.batch_import_binance_data_by_csv(root_path)
|
huge_volume_main.batch_import_binance_data_by_csv(root_path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_import_binance_data_by_csv():
|
||||||
|
huge_volume_main = HugeVolumeMain(threshold=2.0, is_us_stock=False, is_binance=True)
|
||||||
|
file_path = "./data/binance/spot/2020-08-11/SOL-USDT_1h.csv"
|
||||||
|
huge_volume_main.import_binance_data_by_csv(
|
||||||
|
file_path, "SOL-USDT", "1H", [50, 80, 100, 120]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_send_huge_volume_data_to_wechat():
|
def test_send_huge_volume_data_to_wechat():
|
||||||
huge_volume_main = HugeVolumeMain(threshold=2.0)
|
huge_volume_main = HugeVolumeMain(threshold=2.0)
|
||||||
# 获得昨天日期
|
# 获得昨天日期
|
||||||
|
|
@ -633,8 +696,9 @@ def test_send_huge_volume_data_to_wechat():
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
batch_import_binance_data_by_csv()
|
test_import_binance_data_by_csv()
|
||||||
# batch_update_volume_spike(threshold=2.0, is_us_stock=False)
|
# batch_import_binance_data_by_csv()
|
||||||
|
# batch_update_volume_spike(threshold=2.0, is_us_stock=True)
|
||||||
# test_send_huge_volume_data_to_wechat()
|
# test_send_huge_volume_data_to_wechat()
|
||||||
# batch_initial_detect_volume_spike(threshold=2.0)
|
# batch_initial_detect_volume_spike(threshold=2.0)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
import core.logger as logging
|
import core.logger as logging
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta, timezone
|
||||||
from time import sleep
|
from time import sleep
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from core.biz.market_data import MarketData
|
from core.biz.market_data import MarketData
|
||||||
|
|
@ -212,6 +212,8 @@ class MarketDataMain:
|
||||||
data["volCCyQuote"] = None
|
data["volCCyQuote"] = None
|
||||||
data["create_time"] = None
|
data["create_time"] = None
|
||||||
|
|
||||||
|
data = self.check_date_time(data, bar)
|
||||||
|
|
||||||
for index, row in data.iterrows():
|
for index, row in data.iterrows():
|
||||||
candle_begin_time = row["candle_begin_time"]
|
candle_begin_time = row["candle_begin_time"]
|
||||||
timestamp = datetime_to_timestamp(candle_begin_time, is_utc=True)
|
timestamp = datetime_to_timestamp(candle_begin_time, is_utc=True)
|
||||||
|
|
@ -256,6 +258,48 @@ class MarketDataMain:
|
||||||
data = data.reset_index(drop=True)
|
data = data.reset_index(drop=True)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def check_date_time(self, data: pd.DataFrame, bar: str):
|
||||||
|
"""
|
||||||
|
检查日期时间
|
||||||
|
"""
|
||||||
|
sample_date_time = data["candle_begin_time"].iloc[0]
|
||||||
|
is_ok = True
|
||||||
|
try:
|
||||||
|
timestamp = datetime_to_timestamp(sample_date_time, is_utc=True)
|
||||||
|
except Exception as e:
|
||||||
|
is_ok = False
|
||||||
|
if not is_ok:
|
||||||
|
date_part = sample_date_time.split(" ")[0]
|
||||||
|
first_date_time = f"{date_part} 00:00:00"
|
||||||
|
first_date_time_utc = datetime.strptime(first_date_time, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
|
||||||
|
if bar == "1H":
|
||||||
|
# 将candle_begin_time以first_date_time为起点,每条记录增加一小时
|
||||||
|
for index, row in data.iterrows():
|
||||||
|
if index == 0:
|
||||||
|
candle_begin_time = first_date_time_utc
|
||||||
|
else:
|
||||||
|
candle_begin_time = first_date_time_utc + timedelta(hours=1 * index)
|
||||||
|
data.loc[index, "candle_begin_time"] = candle_begin_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
elif bar == "5m":
|
||||||
|
# 将candle_begin_time以first_date_time为起点,每条记录增加五分钟
|
||||||
|
for index, row in data.iterrows():
|
||||||
|
if index == 0:
|
||||||
|
candle_begin_time = first_date_time_utc
|
||||||
|
else:
|
||||||
|
candle_begin_time = first_date_time_utc + timedelta(minutes=5 * index)
|
||||||
|
data.loc[index, "candle_begin_time"] = candle_begin_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
elif bar == "30m":
|
||||||
|
# 将candle_begin_time以first_date_time为起点,每条记录增加三十分钟
|
||||||
|
for index, row in data.iterrows():
|
||||||
|
if index == 0:
|
||||||
|
candle_begin_time = first_date_time_utc
|
||||||
|
else:
|
||||||
|
candle_begin_time = first_date_time_utc + timedelta(minutes=30 * index)
|
||||||
|
data.loc[index, "candle_begin_time"] = candle_begin_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
return data
|
||||||
|
|
||||||
def post_save_data(self, data: pd.DataFrame):
|
def post_save_data(self, data: pd.DataFrame):
|
||||||
if data is not None and len(data) > 0:
|
if data is not None and len(data) > 0:
|
||||||
data["buy_sz"] = -1
|
data["buy_sz"] = -1
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue