diff --git a/auto_fetch_truth_social.py b/auto_fetch_truth_social.py index 0a33541..b33b611 100644 --- a/auto_fetch_truth_social.py +++ b/auto_fetch_truth_social.py @@ -26,8 +26,9 @@ def run_script(): end_time = time.time() logger.info(f"Script execution time: {end_time - start_time} seconds") # 设置每天上午09:00:00 运行一次 -schedule.every().day.at("09:00:00").do(run_script) -# schedule.every(60).seconds.do(run_script) +# schedule.every().day.at("09:00:00").do(run_script) +# 设置每5分钟运行一次 +schedule.every(5).minutes.do(run_script) # 保持程序运行并检查调度 logger.info("Scheduler started. Press Ctrl+C to stop.") diff --git a/core/media/truth_social_retriever.py b/core/media/truth_social_retriever.py index f37e601..d655989 100644 --- a/core/media/truth_social_retriever.py +++ b/core/media/truth_social_retriever.py @@ -126,20 +126,22 @@ class TruthSocialRetriever: print("获取帖子失败,请检查 API 密钥或网络。") if len(results) > 0: - user_path = os.path.join(self.save_path, user_name) - os.makedirs(user_path, exist_ok=True) - now_date_time = datetime.now().strftime("%Y%m%d%H%M%S") - json_file_name = os.path.join(user_path, f"{user_name}_{now_date_time}.json") - # 将results内容写入json_file_name文件中 - with open(json_file_name, 'w', encoding='utf-8') as f: - json.dump(results, f, ensure_ascii=False, indent=2) - logger.info(f"已将{len(results)}条数据保存到: {json_file_name}") - + # user_path = os.path.join(self.save_path, user_name) + # os.makedirs(user_path, exist_ok=True) + # now_date_time = datetime.now().strftime("%Y%m%d%H%M%S") + # json_file_name = os.path.join(user_path, f"{user_name}_{now_date_time}.json") + # # 将results内容写入json_file_name文件中 + # with open(json_file_name, 'w', encoding='utf-8') as f: + # json.dump(results, f, ensure_ascii=False, indent=2) + # logger.info(f"已将{len(results)}条数据保存到: {json_file_name}") result_df = pd.DataFrame(results) result_df = self.remove_duplicate_posts(result_df) - self.db_truth_social_content.insert_data_to_mysql(result_df) - logger.info(f"已将{len(result_df)}条数据插入到数据库") - self.send_wechat_message(result_df) + if len(result_df) > 0: + self.db_truth_social_content.insert_data_to_mysql(result_df) + logger.info(f"已将{len(result_df)}条数据插入到数据库") + self.send_wechat_message(result_df) + else: + logger.info(f"没有数据需要插入到数据库和发送企业微信消息") except requests.exceptions.RequestException as e: print(f"请求错误: {e}") except json.JSONDecodeError as e: @@ -150,20 +152,27 @@ class TruthSocialRetriever: results = json.load(f) result_df = pd.DataFrame(results) result_df = self.remove_duplicate_posts(result_df) - self.send_wechat_message(result_df) + if len(result_df) > 0: + self.send_wechat_message(result_df) + else: + logger.info(f"没有数据需要发送企业微信消息") def remove_duplicate_posts(self, result_df: pd.DataFrame): - duplicate_index_list = [] - for index, row in result_df.iterrows(): - article_id = row["article_id"] - exist_data = self.db_truth_social_content.query_data_by_article_id(article_id) - if exist_data: - duplicate_index_list.append(index) - # 删除重复的行 - result_df = result_df.drop(duplicate_index_list) - result_df.sort_values(by="timestamp", ascending=True, inplace=True) - result_df.reset_index(drop=True, inplace=True) - logger.info(f"删除重复的行后,剩余{len(result_df)}条数据") + try: + duplicate_index_list = [] + for index, row in result_df.iterrows(): + article_id = row["article_id"] + exist_data = self.db_truth_social_content.query_data_by_article_id(article_id) + if exist_data: + duplicate_index_list.append(index) + # 删除重复的行 + result_df = result_df.drop(duplicate_index_list) + result_df.sort_values(by="timestamp", ascending=True, inplace=True) + result_df.reset_index(drop=True, inplace=True) + logger.info(f"删除重复的行后,剩余{len(result_df)}条数据") + except Exception as e: + result_df = pd.DataFrame([]) + logger.error(f"删除重复的行失败: {e}") return result_df def send_wechat_message(self, result_df: pd.DataFrame):