From 9cac40a7a1e42de5a5f0f1212fb0ecb0a612c39a Mon Sep 17 00:00:00 2001 From: blade <8019068@qq.com> Date: Wed, 22 Oct 2025 17:18:52 +0800 Subject: [PATCH] support analyze article by ALI QWEN-PLUS --- config.py | 4 +- core/db/db_truth_social_content.py | 2 + .../truth_social_retriever.cpython-312.pyc | Bin 11681 -> 14759 bytes core/media/truth_social_retriever.py | 178 +++++++++++++----- instructions/media_article_instructions.json | 5 + requirements.txt | 3 +- sql/table/truth_social_content.sql | 6 + 7 files changed, 147 insertions(+), 51 deletions(-) create mode 100644 instructions/media_article_instructions.json diff --git a/config.py b/config.py index 4804750..53eb277 100644 --- a/config.py +++ b/config.py @@ -234,4 +234,6 @@ TWITTER_CONFIG = { } TRUTH_SOCIAL_API = {"api_key": "FRfhlDHnmYc1PCCrVHZdWtqDENr2", -"user_id": {"realDonaldTrump": "107780257626128497"}} \ No newline at end of file +"user_id": {"realDonaldTrump": "107780257626128497"}} + +ALI_API_KEY = "sk-216039fdd9ee4bc48667418b23e648d0" \ No newline at end of file diff --git a/core/db/db_truth_social_content.py b/core/db/db_truth_social_content.py index 0cf2c88..8aaf326 100644 --- a/core/db/db_truth_social_content.py +++ b/core/db/db_truth_social_content.py @@ -17,6 +17,8 @@ class DBTruthSocialContent: "timestamp", "date_time", "text", + "analysis_result", + "analysis_token", "media_url", "media_type", "media_thumbnail" diff --git a/core/media/__pycache__/truth_social_retriever.cpython-312.pyc b/core/media/__pycache__/truth_social_retriever.cpython-312.pyc index 9cc9fa1030595958df57865b7aa170991bd6132c..d25757a8c304a80a9526e0a32c3ced693724c4c7 100644 GIT binary patch delta 6723 zcmaJ_Yfu|kmhNu7p9raiBpyOY0wJEp4-Ce3Lck9U@e{`Z+ffu+fUuC5?q)EIFm@)J zD78)u)3swe4+rmNCZv)|NSsQ@B%5K9nOaY>Th$79r0R7lo+?Ae|K#AUWW2Mrdu|H> zb~4*tb^7!<=bpaz_C4P@_g+5w8l(S0r&D8aJ$dQ-UhU=yy%qm>!c$H4t2$LUMq!jR zr0&;rYD69nX*;zb!$Z1Go%qyu>ftF1W%V064a+v!o!KChhm8HkPUEu7)M*m!$xgHQ z%<0St5FD-~Cq6afmWiYbmrnd`qfXE?wzc>g_qX_3cXV*;@NJG4plj!`0M?09SSLXl zDB>w;r-aI;Bv0X;Qp(6Zhf{V+6Hs&$lnzvSN-y$Rz-JBPUXws_yqe@Rghi&7fG_Si z;kdGkI7BqFb~J8uMKPZt&0!doGpb-+nYXBdvQXAHZErwRbZx>$m27dE$FRN2>0f4m!wZB1bmBtKPYe> zS;gaCTu=s11{u~DIWbaIRUHg7EIru81|wmnx<5b#{XRd<2D?H5-&$2w9}@`+IDJ<( z$aYuOy^g$uyCPID+~YOVQs{~x=?Sp3p6i#j;!5sAnR^4RKw^zQ6!iHAg1!@hVL=)h z2!sWx=u8@l_$fiN<||$m_qD9GEFU#9tdaY=gQ0+ccQY!`I5OAW=4Ni2JS0c+!n9v` zTW-W3xu#ryGp{5jomX0ud2PJXdR>>z8=aF))1G25#~-;;r9CT=($BMn+-z2UMO6I;j#^>ZsEM_tIat`HjFc)m0i&TG0XHtRXl*N+ zQB%~^gHg_Z#%*G2E|rx-^m&kx1dO<{Y>&tFV=iCBDEG^f%m;8RZ07!!U077SDj6rv zc1W>di3?*zj6vP%E9Lxd(^y%ii>psJM%zY7wjd*+N}}Y8*vrJKzYdI&jFP8F@Ow7R z4dLucdbeL^=^UhDak2AIR?!xrb9AhbJdH`M*-@Ux}(zA>I zG`;xTTen_1{g+>#Sv>cPg@5?fLToxMT=)&Q@Qbqx=O%8QK6CTqGdE{mSoq|vo4>uh z_h-g}{s6=H!AA)BF4ER==kMNsl6O9H|V;XvnhuuHODYm=#p%$@5X&0v=$@V6+;s zYd8qrd0SoePS7(bC!EGoxmZ#0Yud|Ne*6Bo@jy&*!&&lL)#a*$vo`LmopaVLVUKbT zj?at&>Gxnp}5`Sdj~Tf9lAbW7BxcO-t^D zdhXN|{-wp6beAUFTjTDnGaYm8T^CeIcjdQ)w!nT{qA$+9plVCmFi5*jKCjII{tK5}qdXyk zzb_1bottpw5M?-LvQIf!9e5G1s5BF6lvKnZBVbULm^i2HDkvI;u{AMl0*o_pqh8h6 zkLAs?f#tj@-%F$HO&22KKm5O7f{n_K;CQ zN4e)+HpLCV`SY0SO1*OvJdGM#X*`kdH26MsRLTvKZ#aC}V^W4I# z&wca4Q-8U9cHz}`{_@_hwm7_cK?$fr(~%*DE&)@)`jf!`J?uk4?t5Z*wX&h;5UA;A zxDuy*!^+f)ze7;%IN22#Kn2m7|0P3SX|FQ@;2yr61aoh&Fy6x7EH>JtSA;{^wkq#aX{-gcZF{zj&)YQZ zpFYIf>J!>6ymm`6%L=XjJEb6sRSkC;l!Tw$@=yi#uZ@*JKdn$TRZ2eHRM}K2`K(k1 zycp~4QtoxvZWBdBaY}+JCQ1sN3^*?3Dq25|AD_XnV-T66#Hd7tMJ1G6tTCj(DMU^N zoKocEQ8``CHM-4;G)fe3$KAEcaXdWO11pdFUv~~csknFCP27H!iu=%Q;{ZM6u)WYM zgGpVno;y%v;x0MVRpK{Qqv|mYrH!i4q8`2{eSL zIwR6jvSkt0XGVBsQARZ*V3G+TN-yHszd%ndxJ5*?aIr=T@Fto!YVI>U%%Yl3yrQeR ztPwOpTId=U)#hTOx~L`!n=Ih;%Z?#7vkg(*y|KBfGi68h_v*si)@Y4U&3#(?YKJTD z8xoF<<7YpTVkqJPTUHPRu*Ef+#Z43!R-2+(_xgjX3>(cll?5#q(%c%9Ai1xK3&*lx z&~z6f2&QQYkzDfAUEDC7b_>VI*2_EmBlSFL7;VgU%PMKeDmha>03Xa0%zdk z?^yii?21M94}EYAC8c9J+5?(&qIeE*F+@-X5!7Ni6zoS;5CS&XVMT&rAzv)=Mgl(A z;jz6wCeju3hkRX;FpH`g!p{m>UH(wlV93t~03O)@Bj`j={eJoc6&VWCXTh1*Ac9_= zn1bkY=%eF^teIdgUSb~;=O0bIpj$0211DL**6R;-`|gqXLV<7(+bd{Sq%{n?=52kL zqD3@CHCqNvGtV@tqO3Vm+Kxtc2n~!A)Eds^F)9876wEGy+vX{(_IP6|-sp-e-N_OU zVx4iNE1By`mmN1XX_UxY6j`0mCqJ$gtY_Iqe*>IzW|m$Kho6wzkFTZ{vH((x&UW39b6z{483gRV zlkMDDv#Wvlw+7X2kL;?{y4xwc>XZR52LC-YWwOhug}wG@##`x%w2h)va! zHF7CrN&+^}kpFGZ*s)2Fn=lm&$!nhm%p1Ntq?mfHI*v$NOqQnBLEc)y+3 zL>V$3N1RG~VhT9Y8mdj)OFnlmmM zfxaN$H^>eQvLhOoqY1&`Nmi{MF#_g0*4lzhi)3kIcvJmkC=jk32*I%rLTN7{Na+Y1 z_$bntI>xpOI@rZ_1t=dA@Y7wrf?|1d>XnOIUpVcFP=S!3Towc12EzU);IM7kUY~yY zB4Pmw>g6pbc!hH!K^_tN4Ik`{1iJ!EI^X>o4M{f0i~;@&S~kK0%8SW~8AR24I*X;V z+)AmeqN_nA?zIIKjm6vNv#%xgDZH(3yRs;qYzRD30wab14>s;l)iwID6|0rJK!Onl)y?*bG9<4JcuNnTw|HdDJbDYxtk642Qu3w zbf zycjod@0S;k)q_*|VMN3MoB~deK=g4N~2;b=xfS`#+ z1l5lWYWx(oMC8gUmg-0udAp(qm(@%*En(=HIw(=fb_MYn$r1((o+6Dh+cfck_8sjK z25hRdN@kln`1Y~ak1b)qrXI&}nHRN&M@nv!S*J})7(7z7dRfl2C-j3o4ZueGPZmmFDlY!XEe$+=~K5##Cd@Tk|&srG}=lCQt^MGz@&eR-sJ;u z!0l(!6Z0+-i(^I=*XlgnRc-~48w>@u(|-a5O0rBdkd#4!xt!B^OWUt%?*^O_PSL$N1KZ3+oaoeoZC+L8%>FzJL!V^g~~&NR~j+DtlA5i>*TOh4X} z5G0cv!|(0w?(Ob-+I_no6GxxT{-aK(0r*Mmd~mS4d^p=oO$=8o{Q(68ASgoW9*tL% zqNz}pHw*Kqkk+eBtvas`uWTs0C&!!fP)G08W1cdU+mq|feaOr6=B4xvUOqshHijR* zX{3z9w;dFNII0nCpjO*o#u1za0pO(s;H3qfK%Z2286jI>PEuZlkb{0n3Cg3COE2^2 z1uc(O(fRCQ8h?XcqwOeJn>BfYUQa8I0+$JOu`jC@sg;Y=lZ)t|Y_;_wC3ArzK~eHW z4!Lrm0=tr#0|5!@&=F+^r9#)0Wwc9$3RE420xT3&80_h1(Ji1MRY;jf_<55wmq&4b z=9VqsS1{SF-bR~?FeU4uKPU!#`yjDCoM+a(fY>gl#4 zYN>&8pkcd#0o`R}&1Wv*lO#{SK`KsD0CblVS*}4Sz3j-yDImGf`8*q%S95fC4NB04 z{IW|_`jdYC``xTH#F#gM;EeGY%g-w^#`=xY(u}j1QK%3qFS{-=X-*U9H%Yk}X?Gn7 z#i5Crge5}N)MFDGv~`5DcA!fQz#Tq}`eJ(8q7y1wc0kqmoWdY;MHpa8b9ntZ+D zkp=~ZnA1fHaFlU?dLUALGzIqHT~Xll+eer^ppS9@DOH>#&h9lNf{G_IF+X0*0JQX* zrNRtHsEe6ViNVlje$k9QJSQ;ynChb>eMY1EsSNda{f?FTw3ULil~*!W`an81*RtsE zC$|`)6!)-J?C3flQ?AAqVGdengj7BHz}S#S4q4Xtp%m;21$;pP3;%1}MlDBMO-?Es z9X3@{YtUP!T?0Fz4XYm0I`=qxoPu+`bF*`YO!BV!Dl3^qu8e}wGSN*nK%#lQXyxP&HDBS z@y`~zoiPGlj(v}6_MzW=qT8Z+lXDAaIvtFkZC{)lg5Z|_U zdX;ZP8Lwzb*jN0;?o6r`h2_|S#eO0CT=vM}Tjtt(H74WI`*px*ja#?P@Y`=2OhbKW z)kw*-!7*x-}6H|_Kfd2U+@ z-w2O)O+7thaVG&~+(g}==FPSX>T~Lmit+lXrW?7p%pDIHBDu|5Q!dLd8^(8EvW;iQ zn>XHA6W{2K+n=4`J8$DiNtC;%?M*`)=Imv&_SVE+d+XGOY5O|N;LWpqU4pNh<=Yc{ z`?cTBOhF|{gdu_CvQrAD|rAQ>{NWZ?H zha$GEZjweH8rHAaY@$EbS~qLhkISF3}G@dlN=SHO@*Fj_LGKekBYsiGGY2= zVYa89y;;v<8oq?$#TNQ+0Ow{dLCUa)Sz~j51ej$czq~5G6kU*dxOtnaH(LJ z&|Mg0MJVDIU@v)pfdC{RVQ{hOB6x`~UM6sqzzGZ@iy@VTum&ZIJj$OE=3eA3USBoG zX=gcmg0qiUZgEu))RfwRH?4Loidk6cjH%7ZeCTA`YE|$A37vfg_lUET$>Lp<#>IJH z2Udbx&}>Ns^(y*bNn^H4BXdD10AZw0gr#U-X;JGzqVpOCIZKS-8b0g~MFXOog$pJi z`N(~A1p0~2^91%|z&lPA^a*sUw7in|kTvN!g_Q3%r;t=Br4Hd(6&`<#XpO@(@FxC1 zyn+Fb48IBRK2Sej-n zo`l77dqF8fT@sjRrE@%T!eTCIRTg_jy0v&!|C+5omJ3^eZsMq$n0siWH@c zDd7@{+_R}Crp)wNfjxvTNN7WCmHDXGSk!4m|Y{3MZuk zPZ1m{DR&@uC6BlgLs^W=7{iVDH@H0m+=j^jn0j!VEbUjLOJ&8B8vU-$%%A>L0R8G0 z)qT{X&Zt3uuPmyXW4$``Y;3pS_Z1qBa+=uwya0 zkZ&Hi^NaWX@&4N@?Jf;;U|uS7WR@fxgl7nYB#^8T17X1z?C~E6z*PPnB3xaH+vA6a zgh*dlR<|GN3iL|BNEjPSCmb+eSC*KHPhz1+#*fEWdpZ$GAs(wt zCE}JUgX$vD`wu|Ds$$ExQZQF-NvB|v)}yOcYie)tt)CT_46UEzZD+R(Z;6*T&+sh| zUq;+Byz5&6R8n1F<;~=hoaJj0e9d@xhF|^Fy+UmFtNTUhbdAk5uQz|4WNDpcQ8eQ) ze*09-+9mWyOVn;7`%#10t!J<6Sxm#&WL4v@D4yuJX!xin;uBG$&znk>0W823p4)6G zggr!wOCMDX(=r#8x>6kxdeElkqS911{UMf!9EQy4q|fF6<{YaaXJeM2KP=)u33^g3 zm2}i=S+;tLSpaxRW^3K7WrDt{eNVdpw5%m*GqRTP z?N>T4b}j%+CTnw8(*$!L;3e&lxN(`}tgZ+Q)P^DlaHc{(I@_{g;32Os&~?x+K^4}6 zsbQ;&P)-ao^Ssyy$q~y6a(bz;3cf|&mA&{+D!-WCZfZLdHHg!IZWC1LdO- AB>(^b diff --git a/core/media/truth_social_retriever.py b/core/media/truth_social_retriever.py index d655989..e6f830f 100644 --- a/core/media/truth_social_retriever.py +++ b/core/media/truth_social_retriever.py @@ -1,6 +1,6 @@ import core.logger as logging from core.db.db_truth_social_content import DBTruthSocialContent -from config import TRUTH_SOCIAL_API, COIN_MYSQL_CONFIG, WECHAT_CONFIG +from config import TRUTH_SOCIAL_API, COIN_MYSQL_CONFIG, WECHAT_CONFIG, ALI_API_KEY from core.wechat import Wechat import requests @@ -11,9 +11,11 @@ import time from datetime import datetime import pytz import pandas as pd +import dashscope logger = logging.logger + class TruthSocialRetriever: def __init__(self) -> None: self.api_key = TRUTH_SOCIAL_API.get("api_key", "") @@ -38,25 +40,33 @@ class TruthSocialRetriever: self.save_path = r"./output/media/truth_social/" os.makedirs(self.save_path, exist_ok=True) - def get_user_id_from_page(self, handle='realDonaldTrump'): - url = f'https://truthsocial.com/@{handle}' - headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'} # 模拟浏览器 - + self.ali_api_key = ALI_API_KEY + instruction_file = r"./instructions/media_article_instructions.json" + with open(instruction_file, "r", encoding="utf-8") as f: + self.instruction = json.load(f) + + def get_user_id_from_page(self, handle="realDonaldTrump"): + url = f"https://truthsocial.com/@{handle}" + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + } # 模拟浏览器 + response = requests.get(url, headers=headers) response.raise_for_status() - - soup = BeautifulSoup(response.text, 'html.parser') + + soup = BeautifulSoup(response.text, "html.parser") # 查找嵌入的 JSON(Truth Social 使用 data 属性或 script 标签) - scripts = soup.find_all('script') + scripts = soup.find_all("script") for script in scripts: - if script.string and 'id' in script.string and handle in script.string: + if script.string and "id" in script.string and handle in script.string: # 简单提取(实际可能需正则匹配 JSON) import re + match = re.search(r'"id"\s*:\s*"(\d+)"', script.string) if match: return match.group(1) return None - + def get_user_posts(self, limit: int = None): """ 获取用户在 Truth Social 的最新帖子。 @@ -66,65 +76,64 @@ class TruthSocialRetriever: 497美元:500,000次,如果5分钟跑一次,则可以跑1736天 参数: - limit: 最大帖子数(API 默认返回 20 条,可通过分页获取更多)。 - + 返回: - 帖子列表(JSON 格式)。 """ - headers = { - 'x-api-key': self.api_key, - 'Content-Type': 'application/json' - } - + headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + for user_name, user_id in self.user_info.items(): params = { - 'handle': user_name, # 用户名 - 'user_id': user_id, # 可选,用户 ID - 'next_max_id': None, # 分页时设置为上一次响应的 max_id - 'trim': 'false' # 保留完整内容 + "handle": user_name, # 用户名 + "user_id": user_id, # 可选,用户 ID + "next_max_id": None, # 分页时设置为上一次响应的 max_id + "trim": "false", # 保留完整内容 } - - url = 'https://api.scrapecreators.com/v1/truthsocial/user/posts' + + url = "https://api.scrapecreators.com/v1/truthsocial/user/posts" logger.info(f"Searching contents for user: {user_name}") try: response = requests.get(url, headers=headers, params=params) response.raise_for_status() # 检查 HTTP 错误 data = response.json() - + # 提取帖子列表(假设响应中 'posts' 是键,根据实际文档调整) if limit is not None and isinstance(limit, int): - posts = data.get('posts', [])[:limit] + posts = data.get("posts", [])[:limit] else: - posts = data.get('posts', []) + posts = data.get("posts", []) results = [] if posts: logger.info(f"获取{user_name}帖子: {len(posts)}条") for post in posts: result = {} - result["article_id"] = post.get('id') + result["article_id"] = post.get("id") result["user_id"] = user_id result["user_name"] = user_name - datetime_text = post.get('created_at') + datetime_text = post.get("created_at") datetime_dict = self.transform_datetime(datetime_text) timestamp_ms = datetime_dict["timestamp_ms"] result["timestamp"] = timestamp_ms beijing_time_str = datetime_dict["beijing_time_str"] result["date_time"] = beijing_time_str - result["text"] = post.get('text', '无内容') - media_attachments = post.get('media_attachments', []) + result["text"] = post.get("text", "无内容") + media_attachments = post.get("media_attachments", []) result["media_url"] = "" result["media_type"] = "" result["media_thumbnail"] = "" if media_attachments: for media_attachment in media_attachments: - result["media_url"] = media_attachment.get('url') - result["media_type"] = media_attachment.get('type') - result["media_thumbnail"] = media_attachment.get('preview_url') + result["media_url"] = media_attachment.get("url") + result["media_type"] = media_attachment.get("type") + result["media_thumbnail"] = media_attachment.get( + "preview_url" + ) break results.append(result) else: print("获取帖子失败,请检查 API 密钥或网络。") - + if len(results) > 0: # user_path = os.path.join(self.save_path, user_name) # os.makedirs(user_path, exist_ok=True) @@ -136,19 +145,37 @@ class TruthSocialRetriever: # logger.info(f"已将{len(results)}条数据保存到: {json_file_name}") result_df = pd.DataFrame(results) result_df = self.remove_duplicate_posts(result_df) + result_df["analysis_result"] = "" + result_df["analysis_token"] = 0 if len(result_df) > 0: + result_df = self.send_wechat_message(result_df) + result_df = result_df[ + [ + "article_id", + "user_id", + "user_name", + "timestamp", + "date_time", + "text", + "analysis_result", + "analysis_token", + "media_url", + "media_type", + "media_thumbnail", + ] + ] self.db_truth_social_content.insert_data_to_mysql(result_df) logger.info(f"已将{len(result_df)}条数据插入到数据库") - self.send_wechat_message(result_df) + else: logger.info(f"没有数据需要插入到数据库和发送企业微信消息") except requests.exceptions.RequestException as e: print(f"请求错误: {e}") except json.JSONDecodeError as e: print(f"JSON 解析错误: {e}") - + def send_message_by_json_file(self, json_file_name: str): - with open(json_file_name, 'r', encoding='utf-8') as f: + with open(json_file_name, "r", encoding="utf-8") as f: results = json.load(f) result_df = pd.DataFrame(results) result_df = self.remove_duplicate_posts(result_df) @@ -156,13 +183,15 @@ class TruthSocialRetriever: self.send_wechat_message(result_df) else: logger.info(f"没有数据需要发送企业微信消息") - + def remove_duplicate_posts(self, result_df: pd.DataFrame): try: duplicate_index_list = [] for index, row in result_df.iterrows(): article_id = row["article_id"] - exist_data = self.db_truth_social_content.query_data_by_article_id(article_id) + exist_data = self.db_truth_social_content.query_data_by_article_id( + article_id + ) if exist_data: duplicate_index_list.append(index) # 删除重复的行 @@ -174,7 +203,7 @@ class TruthSocialRetriever: result_df = pd.DataFrame([]) logger.error(f"删除重复的行失败: {e}") return result_df - + def send_wechat_message(self, result_df: pd.DataFrame): if self.wechat is None: logger.error("企业微信未初始化") @@ -188,18 +217,73 @@ class TruthSocialRetriever: self.wechat.send_image(media_thumbnail) else: contents = [] - contents.append(f"### 川普推文") + contents.append(f"## 川普推文") contents.append(text) - contents.append(f"### 推文时间") + contents.append(f"## 推文时间") contents.append(date_time) mark_down_text = "\n\n".join(contents) - self.wechat.send_markdown(mark_down_text) + analysis_result, analysis_token = self.analyze_truth_social_content( + text + ) + result_df.at[index, "analysis_result"] = analysis_result + result_df.at[index, "analysis_token"] = analysis_token + analysis_text = f"\n\n## 分析结果\n\n{analysis_result}" + analysis_text += f"\n\n## 分析token\n\n{analysis_token}" + if self.calculate_bytes(mark_down_text + analysis_text) > 4096: + self.wechat.send_markdown(mark_down_text) + if self.calculate_bytes(analysis_text) > 4096: + half_analysis_text_length = len(analysis_text) // 2 + analysis_1st = analysis_text[:half_analysis_text_length].strip() + analysis_2nd = analysis_text[half_analysis_text_length:].strip() + self.wechat.send_markdown( + f"## 分析结果第一部分\n\n{analysis_1st}" + ) + self.wechat.send_markdown( + f"## 分析结果第二部分\n\n{analysis_2nd}" + ) + else: + self.wechat.send_markdown(f"## 分析结果\n\n{analysis_text}") + else: + self.wechat.send_markdown(mark_down_text + analysis_text) except Exception as e: logger.error(f"发送企业微信消息失败: {e}") continue - + return result_df + + def calculate_bytes(self, text: str): + return len(text.encode("utf-8")) + + def analyze_truth_social_content(self, text: str): + try: + context = text + instructions = self.instruction.get("Instructions", "") + output = self.instruction.get("Output", "") + prompt = f"# Context\n\n{context}\n\n# Instructions\n\n{instructions}\n\n# Output\n\n{output}" + response = dashscope.Generation.call( + api_key=self.ali_api_key, + model="qwen-plus", + messages=[{"role": "user", "content": prompt}], + enable_search=True, + search_options={"forced_search": True}, # 强制联网搜索 + result_format="message", + ) + response_contents = ( + response.get("output", {}) + .get("choices", [])[0] + .get("message", {}) + .get("content", "") + ) + # 获取response的token + token = response.get("usage", {}).get("total_tokens", 0) + return response_contents, token + except Exception as e: + logger.error(f"分析推文失败: {e}") + return None + def transform_datetime(self, datetime_text: str): - utc_time = datetime.strptime(datetime_text, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=pytz.UTC) + utc_time = datetime.strptime(datetime_text, "%Y-%m-%dT%H:%M:%S.%fZ").replace( + tzinfo=pytz.UTC + ) # 1. 转换为时间戳(毫秒) timestamp_ms = int(utc_time.timestamp() * 1000) @@ -209,9 +293,5 @@ class TruthSocialRetriever: beijing_time_str = beijing_time.strftime("%Y-%m-%dT%H:%M:%S%z") # 插入冒号到时区偏移(如 +0800 -> +08:00) beijing_time_str = beijing_time_str[:-2] + ":" + beijing_time_str[-2:] - result = { - "timestamp_ms": timestamp_ms, - "beijing_time_str": beijing_time_str - } + result = {"timestamp_ms": timestamp_ms, "beijing_time_str": beijing_time_str} return result - diff --git a/instructions/media_article_instructions.json b/instructions/media_article_instructions.json new file mode 100644 index 0000000..49033bf --- /dev/null +++ b/instructions/media_article_instructions.json @@ -0,0 +1,5 @@ +{ + "Context": "{0}\n\n", + "Instructions": "你是一个专业的时政与金融分析师,你的任务是分析推文,结合推文时间(北京时间),联网搜索,并给出分析结果。\n要求:1. 翻译推文为中文,要求符合中文表达习惯;\n2. 分析推文内容,给出推文的核心观点;\n3. 人物分析:分析推文涉及人物以及人物简介;4. 区域分析:包括国家与地区;5. 行业以及影响分析;6. 经济与金融分析:分析涉及经济与金融影响,包括美股、虚拟货币以及中国A股;\n\n", + "Output": "## 输出要求\n\n除了翻译之外,核心观点+人物分析+区域分析+行业及影响分析+经济与金融分析,不超过1000汉字。\n\n## 输出格式\n\n### 翻译\n\n### 人物分析\n\n### 区域分析\n\n### 行业及影响分析\n\n### 经济与金融分析\n\n" +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 01c840d..c97f5b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,5 @@ xlsxwriter >= 3.2.5 openpyxl >= 3.1.5 cryptography >= 3.4.8 mplfinance -schedule \ No newline at end of file +schedule +dashscope >= 1.24.7 \ No newline at end of file diff --git a/sql/table/truth_social_content.sql b/sql/table/truth_social_content.sql index aaaf4ba..c053a08 100644 --- a/sql/table/truth_social_content.sql +++ b/sql/table/truth_social_content.sql @@ -5,6 +5,8 @@ CREATE TABLE `truth_social_content` ( `timestamp` BIGINT NOT NULL, `date_time` VARCHAR(50) NOT NULL, `text` TEXT NOT NULL, + `analysis_result` TEXT NULL, + `analysis_token` INT NULL, `media_url` TEXT NULL, `media_type` VARCHAR(50) NULL, `media_thumbnail` TEXT NULL, @@ -14,6 +16,10 @@ CREATE TABLE `truth_social_content` ( -- 对于 MySQL 8.0.29 之前的版本不支持 "ADD COLUMN IF NOT EXISTS" -- 如需在已有表上添加列,请分别执行以下语句(每条仅需执行一次) +ALTER TABLE `truth_social_content` + ADD COLUMN `analysis_result` TEXT NULL DEFAULT NULL AFTER `text`; +ALTER TABLE `truth_social_content` + ADD COLUMN `analysis_token` INT NULL DEFAULT NULL AFTER `analysis_result`; ALTER TABLE `truth_social_content` ADD COLUMN `media_url` TEXT NULL DEFAULT NULL AFTER `text`; ALTER TABLE `truth_social_content`