support auto-fetch-push Trump articles on Truth Social at 9:00 am (Beijing)

2025-10-21 17:49:04 +08:00 · 2025-10-21 17:49:04 +08:00 · b1d6e62ca9
parent 2dc2d51171
commit b1d6e62ca9
7 changed files with 108 additions and 7 deletions
--- a/auto_fetch_truth_social.py
+++ b/auto_fetch_truth_social.py
@ -25,8 +25,8 @@ def run_script():
    subprocess.run([python_path, script_path])
    end_time = time.time()
    logger.info(f"Script execution time: {end_time - start_time} seconds")
-# 设置每天凌晨00:00 运行一次
-schedule.every().day.at("00:00:00").do(run_script)
+# 设置每天上午09:00:00 运行一次
+schedule.every().day.at("09:00:00").do(run_script)
 # schedule.every(60).seconds.do(run_script)

 # 保持程序运行并检查调度
--- a/config.py
+++ b/config.py
@ -210,6 +210,7 @@ A_MYSQL_CONFIG = {
 WECHAT_CONFIG = {
    "general_key": "11e6f7ac-efa9-418a-904c-9325a9f5d324",
    "btc_key": "529e135d-843b-43dc-8aca-677a860f4b4b",
+    "trump_key": "dabe1166-9faa-49b0-b41e-64c4507a2f5b",
 }

 ITICK_API_KEY = "dfd4bc0caed148d6bc03b960224754ffb5356349e389431f828702b3a27e8a2b"
--- a/core/db/db_truth_social_content.py
+++ b/core/db/db_truth_social_content.py
@ -117,6 +117,22 @@ class DBTruthSocialContent:
        condition_dict = {"user_id": user_id, "limit": limit}
        return self.db_manager.query_data(sql, condition_dict, return_multi=True)

+    def query_data_by_article_id(self, article_id: str):
+        """
+        根据文章ID查询数据
+        :param article_id: 文章ID
+        """
+        sql = """
+        SELECT * FROM truth_social_content
+        WHERE article_id = :article_id
+        """
+        condition_dict = {"article_id": article_id}
+        result = self.db_manager.query_data(sql, condition_dict, return_multi=False)
+        if result:
+            return result
+        else:
+            return None
+
    def query_data_by_timestamp_range(
        self, 
        start_timestamp: int = None, 
--- a/core/media/pycache/truth_social_retriever.cpython-312.pyc
+++ b/core/media/pycache/truth_social_retriever.cpython-312.pyc
--- a/core/media/truth_social_retriever.py
+++ b/core/media/truth_social_retriever.py
@ -1,6 +1,7 @@
 import core.logger as logging
 from core.db.db_truth_social_content import DBTruthSocialContent
-from config import TRUTH_SOCIAL_API, COIN_MYSQL_CONFIG
+from config import TRUTH_SOCIAL_API, COIN_MYSQL_CONFIG, WECHAT_CONFIG
+from core.wechat import Wechat

 import requests
 import json
@ -28,6 +29,12 @@ class TruthSocialRetriever:
        self.db_url = f"mysql+pymysql://{mysql_user}:{mysql_password}@{mysql_host}:{mysql_port}/{mysql_database}"
        self.db_truth_social_content = DBTruthSocialContent(self.db_url)

+        trump_key = WECHAT_CONFIG.get("trump_key", "")
+        if trump_key:
+            self.wechat = Wechat(trump_key)
+        else:
+            self.wechat = None
+
        self.save_path = r"./output/media/truth_social/"
        os.makedirs(self.save_path, exist_ok=True)

@ -129,14 +136,59 @@ class TruthSocialRetriever:
                    logger.info(f"已将{len(results)}条数据保存到: {json_file_name}")

                    result_df = pd.DataFrame(results)
-                    
+                    result_df = self.remove_duplicate_posts(result_df)
                    self.db_truth_social_content.insert_data_to_mysql(result_df)
-                    
+                    logger.info(f"已将{len(result_df)}条数据插入到数据库")
+                    self.send_wechat_message(result_df)
            except requests.exceptions.RequestException as e:
                print(f"请求错误: {e}")
            except json.JSONDecodeError as e:
                print(f"JSON 解析错误: {e}")
    
+    def send_message_by_json_file(self, json_file_name: str):
+        with open(json_file_name, 'r', encoding='utf-8') as f:
+            results = json.load(f)
+        result_df = pd.DataFrame(results)
+        result_df = self.remove_duplicate_posts(result_df)
+        self.send_wechat_message(result_df)
+    
+    def remove_duplicate_posts(self, result_df: pd.DataFrame):
+        duplicate_index_list = []
+        for index, row in result_df.iterrows():
+            article_id = row["article_id"]
+            exist_data = self.db_truth_social_content.query_data_by_article_id(article_id)
+            if exist_data:
+                duplicate_index_list.append(index)
+        # 删除重复的行
+        result_df = result_df.drop(duplicate_index_list)
+        result_df.sort_values(by="timestamp", ascending=True, inplace=True)
+        result_df.reset_index(drop=True, inplace=True)
+        logger.info(f"删除重复的行后，剩余{len(result_df)}条数据")
+        return result_df
+    
+    def send_wechat_message(self, result_df: pd.DataFrame):
+        if self.wechat is None:
+            logger.error("企业微信未初始化")
+            return
+        for index, row in result_df.iterrows():
+            try:
+                date_time = row["date_time"]
+                text = row["text"]
+                media_thumbnail = row["media_thumbnail"]
+                if media_thumbnail and len(media_thumbnail) > 0:
+                    self.wechat.send_image(media_thumbnail)
+                else:
+                    contents = []
+                    contents.append(f"### 川普推文")
+                    contents.append(text)
+                    contents.append(f"### 推文时间")
+                    contents.append(date_time)
+                    mark_down_text = "\n\n".join(contents)
+                    self.wechat.send_markdown(mark_down_text)
+            except Exception as e:
+                logger.error(f"发送企业微信消息失败: {e}")
+                continue
+    
    def transform_datetime(self, datetime_text: str):
        utc_time = datetime.strptime(datetime_text, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=pytz.UTC)

--- a/core/wechat.py
+++ b/core/wechat.py
@ -6,7 +6,9 @@
 """
 import core.logger as logging
 import requests
-
+import base64
+import hashlib
+import json
 logger = logging.logger

 class Wechat:
@ -25,6 +27,7 @@ class Wechat:
            "text": {"content": text}
        }
        response = requests.post(self.url, json=data)
+        response.raise_for_status()
        return response.json()
    
    def send_markdown(self, text: str):
@ -36,18 +39,43 @@ class Wechat:
            "markdown_v2": {"content": text}
        }
        response = requests.post(self.url, json=data)
+        response.raise_for_status()
        return response.json()
    
    def send_image(self, image_url: str):
        """
        发送图片消息
        """
+        # data = {
+        #     "msgtype": "image",
+        #     "image": {"url": image_url}
+        # }
+        # response = requests.post(self.url, json=data)
+        # return response.json()
+        image_bytes = self.download_image(image_url)
+        base64_str, md5_str = self.get_base64_and_md5(image_bytes)
        data = {
            "msgtype": "image",
-            "image": {"url": image_url}
+            "image": {
+                "base64": base64_str,
+                "md5": md5_str,
+            }
        }
        response = requests.post(self.url, json=data)
+        response.raise_for_status()
        return response.json()
+
+    def download_image(self, image_url):
+        """下载图片并返回 bytes"""
+        response = requests.get(image_url, timeout=10)
+        response.raise_for_status()  # 抛出 HTTP 错误
+        return response.content
+    
+    def get_base64_and_md5(self,image_bytes):
+        """计算 Base64（不带 data: 前缀）和 MD5"""
+        b64 = base64.b64encode(image_bytes).decode('utf-8')
+        md5 = hashlib.md5(image_bytes).hexdigest()
+        return b64, md5
    
    def send_file(self, file_url: str):
        """
@ -58,6 +86,7 @@ class Wechat:
            "file": {"url": file_url}
        }
        response = requests.post(self.url, json=data)
+        response.raise_for_status()
        return response.json()
    
    def send_news(self, news: list):
@ -69,5 +98,6 @@ class Wechat:
            "news": {"articles": news}
        }
        response = requests.post(self.url, json=data)
+        response.raise_for_status()
        return response.json()
    
--- a/truth_social_retriever_main.py
+++ b/truth_social_retriever_main.py
@ -4,6 +4,8 @@ from core.media.truth_social_retriever import TruthSocialRetriever
 def main():
    truth_social_retriever = TruthSocialRetriever()
    truth_social_retriever.get_user_posts()
+    # json_file_name = r"./output/media/truth_social/realDonaldTrump/realDonaldTrump_20251021172241.json"
+    # truth_social_retriever.send_message_by_json_file(json_file_name)


 if __name__ == "__main__":