diff --git a/auto_fetch_truth_social.py b/auto_fetch_truth_social.py index b4be396..0a33541 100644 --- a/auto_fetch_truth_social.py +++ b/auto_fetch_truth_social.py @@ -25,8 +25,8 @@ def run_script(): subprocess.run([python_path, script_path]) end_time = time.time() logger.info(f"Script execution time: {end_time - start_time} seconds") -# 设置每天凌晨00:00 运行一次 -schedule.every().day.at("00:00:00").do(run_script) +# 设置每天上午09:00:00 运行一次 +schedule.every().day.at("09:00:00").do(run_script) # schedule.every(60).seconds.do(run_script) # 保持程序运行并检查调度 diff --git a/config.py b/config.py index ef10b04..4804750 100644 --- a/config.py +++ b/config.py @@ -210,6 +210,7 @@ A_MYSQL_CONFIG = { WECHAT_CONFIG = { "general_key": "11e6f7ac-efa9-418a-904c-9325a9f5d324", "btc_key": "529e135d-843b-43dc-8aca-677a860f4b4b", + "trump_key": "dabe1166-9faa-49b0-b41e-64c4507a2f5b", } ITICK_API_KEY = "dfd4bc0caed148d6bc03b960224754ffb5356349e389431f828702b3a27e8a2b" diff --git a/core/db/db_truth_social_content.py b/core/db/db_truth_social_content.py index 6e3b9eb..0cf2c88 100644 --- a/core/db/db_truth_social_content.py +++ b/core/db/db_truth_social_content.py @@ -117,6 +117,22 @@ class DBTruthSocialContent: condition_dict = {"user_id": user_id, "limit": limit} return self.db_manager.query_data(sql, condition_dict, return_multi=True) + def query_data_by_article_id(self, article_id: str): + """ + 根据文章ID查询数据 + :param article_id: 文章ID + """ + sql = """ + SELECT * FROM truth_social_content + WHERE article_id = :article_id + """ + condition_dict = {"article_id": article_id} + result = self.db_manager.query_data(sql, condition_dict, return_multi=False) + if result: + return result + else: + return None + def query_data_by_timestamp_range( self, start_timestamp: int = None, diff --git a/core/media/__pycache__/truth_social_retriever.cpython-312.pyc b/core/media/__pycache__/truth_social_retriever.cpython-312.pyc index 6953118..9cc9fa1 100644 Binary files a/core/media/__pycache__/truth_social_retriever.cpython-312.pyc and b/core/media/__pycache__/truth_social_retriever.cpython-312.pyc differ diff --git a/core/media/truth_social_retriever.py b/core/media/truth_social_retriever.py index a84661e..f37e601 100644 --- a/core/media/truth_social_retriever.py +++ b/core/media/truth_social_retriever.py @@ -1,6 +1,7 @@ import core.logger as logging from core.db.db_truth_social_content import DBTruthSocialContent -from config import TRUTH_SOCIAL_API, COIN_MYSQL_CONFIG +from config import TRUTH_SOCIAL_API, COIN_MYSQL_CONFIG, WECHAT_CONFIG +from core.wechat import Wechat import requests import json @@ -28,6 +29,12 @@ class TruthSocialRetriever: self.db_url = f"mysql+pymysql://{mysql_user}:{mysql_password}@{mysql_host}:{mysql_port}/{mysql_database}" self.db_truth_social_content = DBTruthSocialContent(self.db_url) + trump_key = WECHAT_CONFIG.get("trump_key", "") + if trump_key: + self.wechat = Wechat(trump_key) + else: + self.wechat = None + self.save_path = r"./output/media/truth_social/" os.makedirs(self.save_path, exist_ok=True) @@ -129,14 +136,59 @@ class TruthSocialRetriever: logger.info(f"已将{len(results)}条数据保存到: {json_file_name}") result_df = pd.DataFrame(results) - + result_df = self.remove_duplicate_posts(result_df) self.db_truth_social_content.insert_data_to_mysql(result_df) - + logger.info(f"已将{len(result_df)}条数据插入到数据库") + self.send_wechat_message(result_df) except requests.exceptions.RequestException as e: print(f"请求错误: {e}") except json.JSONDecodeError as e: print(f"JSON 解析错误: {e}") + def send_message_by_json_file(self, json_file_name: str): + with open(json_file_name, 'r', encoding='utf-8') as f: + results = json.load(f) + result_df = pd.DataFrame(results) + result_df = self.remove_duplicate_posts(result_df) + self.send_wechat_message(result_df) + + def remove_duplicate_posts(self, result_df: pd.DataFrame): + duplicate_index_list = [] + for index, row in result_df.iterrows(): + article_id = row["article_id"] + exist_data = self.db_truth_social_content.query_data_by_article_id(article_id) + if exist_data: + duplicate_index_list.append(index) + # 删除重复的行 + result_df = result_df.drop(duplicate_index_list) + result_df.sort_values(by="timestamp", ascending=True, inplace=True) + result_df.reset_index(drop=True, inplace=True) + logger.info(f"删除重复的行后,剩余{len(result_df)}条数据") + return result_df + + def send_wechat_message(self, result_df: pd.DataFrame): + if self.wechat is None: + logger.error("企业微信未初始化") + return + for index, row in result_df.iterrows(): + try: + date_time = row["date_time"] + text = row["text"] + media_thumbnail = row["media_thumbnail"] + if media_thumbnail and len(media_thumbnail) > 0: + self.wechat.send_image(media_thumbnail) + else: + contents = [] + contents.append(f"### 川普推文") + contents.append(text) + contents.append(f"### 推文时间") + contents.append(date_time) + mark_down_text = "\n\n".join(contents) + self.wechat.send_markdown(mark_down_text) + except Exception as e: + logger.error(f"发送企业微信消息失败: {e}") + continue + def transform_datetime(self, datetime_text: str): utc_time = datetime.strptime(datetime_text, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=pytz.UTC) diff --git a/core/wechat.py b/core/wechat.py index ac91a4f..14ce006 100644 --- a/core/wechat.py +++ b/core/wechat.py @@ -6,7 +6,9 @@ """ import core.logger as logging import requests - +import base64 +import hashlib +import json logger = logging.logger class Wechat: @@ -25,6 +27,7 @@ class Wechat: "text": {"content": text} } response = requests.post(self.url, json=data) + response.raise_for_status() return response.json() def send_markdown(self, text: str): @@ -36,18 +39,43 @@ class Wechat: "markdown_v2": {"content": text} } response = requests.post(self.url, json=data) + response.raise_for_status() return response.json() def send_image(self, image_url: str): """ 发送图片消息 """ + # data = { + # "msgtype": "image", + # "image": {"url": image_url} + # } + # response = requests.post(self.url, json=data) + # return response.json() + image_bytes = self.download_image(image_url) + base64_str, md5_str = self.get_base64_and_md5(image_bytes) data = { "msgtype": "image", - "image": {"url": image_url} + "image": { + "base64": base64_str, + "md5": md5_str, + } } response = requests.post(self.url, json=data) + response.raise_for_status() return response.json() + + def download_image(self, image_url): + """下载图片并返回 bytes""" + response = requests.get(image_url, timeout=10) + response.raise_for_status() # 抛出 HTTP 错误 + return response.content + + def get_base64_and_md5(self,image_bytes): + """计算 Base64(不带 data: 前缀)和 MD5""" + b64 = base64.b64encode(image_bytes).decode('utf-8') + md5 = hashlib.md5(image_bytes).hexdigest() + return b64, md5 def send_file(self, file_url: str): """ @@ -58,6 +86,7 @@ class Wechat: "file": {"url": file_url} } response = requests.post(self.url, json=data) + response.raise_for_status() return response.json() def send_news(self, news: list): @@ -69,5 +98,6 @@ class Wechat: "news": {"articles": news} } response = requests.post(self.url, json=data) + response.raise_for_status() return response.json() \ No newline at end of file diff --git a/truth_social_retriever_main.py b/truth_social_retriever_main.py index 568d5e2..46ed5f8 100644 --- a/truth_social_retriever_main.py +++ b/truth_social_retriever_main.py @@ -4,6 +4,8 @@ from core.media.truth_social_retriever import TruthSocialRetriever def main(): truth_social_retriever = TruthSocialRetriever() truth_social_retriever.get_user_posts() + # json_file_name = r"./output/media/truth_social/realDonaldTrump/realDonaldTrump_20251021172241.json" + # truth_social_retriever.send_message_by_json_file(json_file_name) if __name__ == "__main__":