refresh Trump articles on Truth Social every 5 minutes
This commit is contained in:
parent
b1d6e62ca9
commit
8e2313ed76
|
|
@ -26,8 +26,9 @@ def run_script():
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
logger.info(f"Script execution time: {end_time - start_time} seconds")
|
logger.info(f"Script execution time: {end_time - start_time} seconds")
|
||||||
# 设置每天上午09:00:00 运行一次
|
# 设置每天上午09:00:00 运行一次
|
||||||
schedule.every().day.at("09:00:00").do(run_script)
|
# schedule.every().day.at("09:00:00").do(run_script)
|
||||||
# schedule.every(60).seconds.do(run_script)
|
# 设置每5分钟运行一次
|
||||||
|
schedule.every(5).minutes.do(run_script)
|
||||||
|
|
||||||
# 保持程序运行并检查调度
|
# 保持程序运行并检查调度
|
||||||
logger.info("Scheduler started. Press Ctrl+C to stop.")
|
logger.info("Scheduler started. Press Ctrl+C to stop.")
|
||||||
|
|
|
||||||
|
|
@ -126,20 +126,22 @@ class TruthSocialRetriever:
|
||||||
print("获取帖子失败,请检查 API 密钥或网络。")
|
print("获取帖子失败,请检查 API 密钥或网络。")
|
||||||
|
|
||||||
if len(results) > 0:
|
if len(results) > 0:
|
||||||
user_path = os.path.join(self.save_path, user_name)
|
# user_path = os.path.join(self.save_path, user_name)
|
||||||
os.makedirs(user_path, exist_ok=True)
|
# os.makedirs(user_path, exist_ok=True)
|
||||||
now_date_time = datetime.now().strftime("%Y%m%d%H%M%S")
|
# now_date_time = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
json_file_name = os.path.join(user_path, f"{user_name}_{now_date_time}.json")
|
# json_file_name = os.path.join(user_path, f"{user_name}_{now_date_time}.json")
|
||||||
# 将results内容写入json_file_name文件中
|
# # 将results内容写入json_file_name文件中
|
||||||
with open(json_file_name, 'w', encoding='utf-8') as f:
|
# with open(json_file_name, 'w', encoding='utf-8') as f:
|
||||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
# json.dump(results, f, ensure_ascii=False, indent=2)
|
||||||
logger.info(f"已将{len(results)}条数据保存到: {json_file_name}")
|
# logger.info(f"已将{len(results)}条数据保存到: {json_file_name}")
|
||||||
|
|
||||||
result_df = pd.DataFrame(results)
|
result_df = pd.DataFrame(results)
|
||||||
result_df = self.remove_duplicate_posts(result_df)
|
result_df = self.remove_duplicate_posts(result_df)
|
||||||
self.db_truth_social_content.insert_data_to_mysql(result_df)
|
if len(result_df) > 0:
|
||||||
logger.info(f"已将{len(result_df)}条数据插入到数据库")
|
self.db_truth_social_content.insert_data_to_mysql(result_df)
|
||||||
self.send_wechat_message(result_df)
|
logger.info(f"已将{len(result_df)}条数据插入到数据库")
|
||||||
|
self.send_wechat_message(result_df)
|
||||||
|
else:
|
||||||
|
logger.info(f"没有数据需要插入到数据库和发送企业微信消息")
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
print(f"请求错误: {e}")
|
print(f"请求错误: {e}")
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
|
|
@ -150,20 +152,27 @@ class TruthSocialRetriever:
|
||||||
results = json.load(f)
|
results = json.load(f)
|
||||||
result_df = pd.DataFrame(results)
|
result_df = pd.DataFrame(results)
|
||||||
result_df = self.remove_duplicate_posts(result_df)
|
result_df = self.remove_duplicate_posts(result_df)
|
||||||
self.send_wechat_message(result_df)
|
if len(result_df) > 0:
|
||||||
|
self.send_wechat_message(result_df)
|
||||||
|
else:
|
||||||
|
logger.info(f"没有数据需要发送企业微信消息")
|
||||||
|
|
||||||
def remove_duplicate_posts(self, result_df: pd.DataFrame):
|
def remove_duplicate_posts(self, result_df: pd.DataFrame):
|
||||||
duplicate_index_list = []
|
try:
|
||||||
for index, row in result_df.iterrows():
|
duplicate_index_list = []
|
||||||
article_id = row["article_id"]
|
for index, row in result_df.iterrows():
|
||||||
exist_data = self.db_truth_social_content.query_data_by_article_id(article_id)
|
article_id = row["article_id"]
|
||||||
if exist_data:
|
exist_data = self.db_truth_social_content.query_data_by_article_id(article_id)
|
||||||
duplicate_index_list.append(index)
|
if exist_data:
|
||||||
# 删除重复的行
|
duplicate_index_list.append(index)
|
||||||
result_df = result_df.drop(duplicate_index_list)
|
# 删除重复的行
|
||||||
result_df.sort_values(by="timestamp", ascending=True, inplace=True)
|
result_df = result_df.drop(duplicate_index_list)
|
||||||
result_df.reset_index(drop=True, inplace=True)
|
result_df.sort_values(by="timestamp", ascending=True, inplace=True)
|
||||||
logger.info(f"删除重复的行后,剩余{len(result_df)}条数据")
|
result_df.reset_index(drop=True, inplace=True)
|
||||||
|
logger.info(f"删除重复的行后,剩余{len(result_df)}条数据")
|
||||||
|
except Exception as e:
|
||||||
|
result_df = pd.DataFrame([])
|
||||||
|
logger.error(f"删除重复的行失败: {e}")
|
||||||
return result_df
|
return result_df
|
||||||
|
|
||||||
def send_wechat_message(self, result_df: pd.DataFrame):
|
def send_wechat_message(self, result_df: pd.DataFrame):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue