From ce10e4fbf2847493cf821a792f2dc0fa6894afd5 Mon Sep 17 00:00:00 2001 From: blade <8019068@qq.com> Date: Tue, 28 Oct 2025 16:56:47 +0800 Subject: [PATCH] support text + image hybrid scenariors --- config.py | 21 +- .../truth_social_retriever.cpython-312.pyc | Bin 18034 -> 20876 bytes core/media/truth_social_retriever.py | 312 +++++++++++------- core/wechat.py | 1 + ...media_article_image_post_instructions.json | 4 + instructions/media_article_instructions.json | 2 +- .../media_image_post_instructions.json | 2 +- 7 files changed, 211 insertions(+), 131 deletions(-) create mode 100644 instructions/media_article_image_post_instructions.json diff --git a/config.py b/config.py index a541f04..4f4c160 100644 --- a/config.py +++ b/config.py @@ -150,7 +150,7 @@ A_STOCK_MONITOR_CONFIG = { "000333.SZ", "002230.SZ", "300308.SZ", - "002475.SZ" + "002475.SZ", ], "bars": ["1D", "1W", "1M"], "initial_date": "2015-01-01 00:00:00", @@ -236,7 +236,7 @@ ITICK_API_KEY = "dfd4bc0caed148d6bc03b960224754ffb5356349e389431f828702b3a27e8a2 TWITTER_CONFIG = { "keys": { "api_key": "FRfhlDHnmYc1PCCrVHZdWtqDENr2", - }, + }, "base_url": "https://api.scrapecreators.com/v1/twitter/user-tweets?handle={0}&trim=false", "monitor_accounts": [ {"name": "realDonaldTrump"}, @@ -247,7 +247,18 @@ TWITTER_CONFIG = { ], } -TRUTH_SOCIAL_API = {"api_key": "FRfhlDHnmYc1PCCrVHZdWtqDENr2", -"user_id": {"realDonaldTrump": "107780257626128497"}} +TRUTH_SOCIAL_API = { + "api_key": "FRfhlDHnmYc1PCCrVHZdWtqDENr2", + "media_config": [ + { + "media_name": "Truth Social", + "base_url": "https://api.scrapecreators.com/v1/truthsocial/user/posts", + "user_info": { + "WhiteHouse": {"id": "", "full_name": "白宫"}, + "realDonaldTrump": {"id": "107780257626128497", "full_name": "川普"}, + }, + } + ], +} -ALI_API_KEY = "sk-216039fdd9ee4bc48667418b23e648d0" \ No newline at end of file +ALI_API_KEY = "sk-216039fdd9ee4bc48667418b23e648d0" diff --git a/core/media/__pycache__/truth_social_retriever.cpython-312.pyc b/core/media/__pycache__/truth_social_retriever.cpython-312.pyc index 150311b5027bc8f1856c76540fbd493a3417f737..963507a7dfa1a32fcce9a34a95228b5e5413add3 100644 GIT binary patch delta 8390 zcmd5>eN-FQm7me~w-883LIO!hAOy&O`LID?3`fx%d~BT)x;6r48O zb++Dw^zBLHO&#NHw~-rnO=_PF=WMozG~JZ5+vdz5Qcbm-UAHGr+kdv$&Ngk@p56OK zNI?2^_MGmX9nhUS_uhBkeIN7Q@7;NLg}m}*Qui-ft%`uF@3X}43x{9Vjgda#8AOl% z4#-PHh&WDC#BmX2q(sk1j*BT1C4PoHE};t1SIPZdlqw`2lu&9=X($cwB1#Lq7W0%2 zetP)j!4KQ1C@juJjRV6Mqej1x zw2@|XLux?|SwY>GpA;V>E|6}bj);&KL?m&HAmQ%{^Lyln(QcW;R@|GG2hR`ppAAi< ziL~Zih#C&|4~&HehllQ5k{u*P7n{)rc?p`8+uasg0u`slL!l_G!<=EQdH?8eB#M3} zf7_9^MMFv#VZwQ=O3`?9~>SHMKn-_6CG7f z)*3_vafw(mdsF5r&Rmt)IYTGSEte%%G)u0!l&g_*HO_?RPA6SEFH05`CGX#nsy;_< zmJ!uwNLEp?q&2Yl8)vr7c^9?2mI+eXIv-rBslPpOb7I!;8gsd8QB(bq7{r3^BP)7N z?P&iuD0t=SzER^7#^Jk5@ilDebA3 z&o@bX+V8xlepz&&9(R)LOfXE1hSK73dX(waB(Fi&;VULVw~Y>yl##qlP_k>{*T{^VBr>*o`5Ig4icB#~q~ zqB*ZtJ3$a9!Ea0+Q!ol#lU})|v@#>Jsn*C;jGR$YYDzrXf+Zg z5!Iq1XK{g)5i#mp`qyy#;EHJ&4VtRNbt@zgum-)=VMO<;l%y8@Ca(-Agka7a$^}cQ z8M26o>B9NUZ|P#XAtI({bW% zQh~0S%=Ol2ads+E)r@uh96v@->X`K@D-`}lhPyyU^H7z^j&7Tzfwg7*f1vDAw(JEW zv%LE@-1(X5C*)*Wm06R+!E+&64{?m+W%T}=m+rs%g`Yh4#{K7C99M$i$AA6I{n;1q ze|;*gI30|H`eAKKE5{=t8kXw8F?t)Aq?Lo?qoe)UQEys07#xj+d=h#u*2puKR9YRz z3;ns^ll{X~S`wv)$#ZD4rIhSICoCIvX;qMp4iAimKue!Q)0P9~9k}?)wh@d*g9B&I zg~HJYvRQp(FY2~V)Ca+WK8eX`OhT9pVSiMU{z$ znfzo?!<3wN`BSbs&Q&*4HA^R5om0AH8R4kl9p${+%iG-#l+vR7DcOfQ!eD+${eqe; z3C!-C4Y2kdi+MfEq8`%Fxa9C&XRb1dp4p-K!9~a6sUF^FPZ|B3aniqNtl@3$>up!t zUT&Z20@s#e)?9Jx_^kBJ$fAE6@ASN?xS>dxXG)XK#+kvHan{*(ReEooI4-?c;+)!% zHrtmhCD+wg)zgECV~O1}PkviG*ZsC?mYJ_-J^L0d`+1M=Az`uB$GiA4Z+tsnKe_3R z&985s?YgrizJ0otv)A&qn}BQP?0$Y@0Js2Wui>j}Z};5nnK_xP?u_q+r&RSeu6o2m$JOj-?bZCo&aBj5cl)uMkIf9uo=*BZK-hG3 zvX`%Icti2Jf^FV2e>z!vfW;LzWDC?c0w;vExh1}xbvJPK#veFp`OP~Y5;9jM>uKOy zcBNW+xt8Af@np;4>FtRC=h?&ucBcaSxWGPk|B+{?yh(+}1;UMdKr} zsDmt@-ag4zfvW9+j3}#I)`H7r9Z~GL-g~t-(e$2uvUfz=ESg`r|KwEq# zTfT|2Z{}M9m~GC?YqzpnPp}my-?KmdzP%Ky*vjEW``KSU42tk1exSxL$}c_~6%iI^ z%3R5rD^unGXAV3hNJH(C(Hief)Fs?((T0?@x24JKqLFC@Qn zr=q-+bkR?PcmUS2+=WZx=STUl^}Ix&yy`;4c=!1wkGocYq&Ox~5{!fr3A-_XR7xyx zGT(S zDpgD!)5NrtDyC!9Py5zHtj^YxiI}#*VQ#wG8sRBliF%AG?0KiNh<}YfY{f2B`nGD(XV(1ip{(ot7WHBYcMWKfr zGKzpd@h#J9FcRzgC)t+VZ-ECPIeE8F6Avjgnw zb3L$PNv|2BRh{)BK2_yIKN3tnTCQ@54RHUDDi>*7@mwIGikJelrMe6zA>94>k|{FW znAH|icE+$CaDv*D4a@zBumP$6oRQNH*90R+iVE-l*<}8dsGtMRb1<2U|DR0ek~PEb zSU2n|FTOuFT+n->DajRBlCOB+yh9!&;3QzrOgDn?5VSjHLQcP<#>tr0j|m(Ch?waq z6O`=Da2LoNkawZ5-w{}ayl2594fBGyD0aJ@<1*krcrNk5i+?_j;XQ~lPanMW_dk8< z?#CjP3QhZsYT22I)ATR}+sh90O}{fwK+1S!UV+f)r+%C0gBOu(Luonqx_|leAG|UZ z9XlHet5g8<1+e|M*!-)Q;L?cNfaHLqaaIRC@b^fZSh(M1LB^Uqzp&R_#kPNiiLle* zttc%S85<6#wUJPmf`bBmmKwVdMw@Dyw(AFiqXXlk!Dy)e^h7iiNlPI|G<=?pLn&Y3 z%6^1>2~od{xi4X|zGsl8wn#+pL;qY;W)VWooeoQk66 z_@_`?+FX9CJ^{eFea@Av+Qz%f;tJm4ysp2hPq-6d*55XJgstdWv~*{sl0}O@%hxPg z0$EB_{&te5vz$2GjIjJ4I>9hj}WGd52yGy%A8OjQNAs=(~FWYyNWe6Fes z(5a&;@#zIeBVW@nt>Ig?q*_jJEhl(a4ezVxeT}@ghOcT~Ruh4f)e;`&4h z0784!Bx`GWvwn6m36Qey;6mUa-+d^)^ZK5vdlDwLwuAL`aQ05Vt8Z1dmGy01uy@?+ z-mxmn*>$hVupMXb;4AmekFk4Cu@(JsEpI7ftM<-^*}admzEhm#RD2R-YkS83eaqH+ zf6|U$?T<3v=g0qE|FSaE8Pb40*Bct5lRQA8Z`74ea+Wr}vSY4>?dW4G4z7gVF}H#3 zIKcY)I7=UhY>t$zhO^bo=;n&K+TNrM`tQhs?FesoV)UA_Hv*!c2`}2ad51gYsOKE@ zGkcSc&bdPij-5-b?Q_~>YcF&;*Lsk3HuLycag^P1l&yIzHCc0tt2vddc>+cg%a33c z$k&MBkfgq$PC1)7C|lk=H#&EU^&I+8EOM2?K?@F>zeD?TL91A&Wn1AS@%a2C+tUAi z=M!)&#j!iG!Oz9GrbF-95C6dAxmA$pPx{*C$OT^qDychb=Dm$Dr8Yf=r_`oG&R9RS zZ~D}-4qdBz!SJ53^@m3LFPA?f!Y|x%rx@||jn@5^14SaPNZBWoa&Ai>DP16?z{6rz z-f*fMuP$j>C_FGmg?ysSQh`^j2wo<$ul8;9wFcj0CkbNw+C%`Qs9L^{6ez8 zo77i8ZkRpansB|(uf8ADzYQKA?LKRL##pT?l`c}DcKE(+CU zE=EXfqMk;rNW!36i3t_w8TFKu66JDGikuwT#M(wBC1(@>RuzmAwhKv2MJXv2q%y@I z(_k2TO)^YUT1GWY4v8q;mqbG(WKutF+*zQ9Y-!%wZ0U#&!PcrY2-qsf!wa%hJkd86B?m zpmwzmst{u)0UK(sRIl@ZM@YDGbt$ojbtSs5Y=>=DJ7PsY%(IE*E3Go)mJC@aGFvhx z3zsmOk)o`})jRZ~6*RdmdYz^SdYsG*E+ZFWBpDJC#gY{Seh{!_37XmD)vg)B5f?_% zsJK!og98UPIKSD4O2jI!lCohW0eEb6NM$iq_?zHRB@7F8So+5tmi|$PgW#}Oa2N!K zo$R2l1*hrP zO}w+eZ{iu&PP{xuwst5d*9@gF46<`IV1+f~5qfSdpcz*ehTyynJNy z1*`WA?D{(BF4hUK>s*_CLpNTeZE#Y9^rz?uskHD$)8%enUvQb>&DJRiZ?#?C#p|tX zaqWV>_RgzqQgYI2T&x$7PhwU=sAj)R=zAbaR^(lLNM?M;=u z!4|);U?>-M^%l<2l5%X}99w3enx~VFzLet_=Qze5Ka+F}qx0AQAM!UrdU##wJ#sO1ZoTg2o!OeQeFdqpN~hCw=g8xuTl zXzcbTk=eNg5Rbob(3lj!FY;v|&k&D9UTN#2KGG;PK7h}_(#Gj+j|hBUJ|tF1(GR-p zlOa-SP4wM9dGq8lf%*CRN5Xx%S4>KM*dA`nvZJKbGE=Zj;QQRR%$+@Ye1$o4Hp46v z#8YI4=#gNyOpA2V!Wr?01l%%#;_!Tyw0dBS4s8&=Oo(avJUZPSB)ieKyWLG6fI(Vz y9=;U^Bbg3(7bHTn=tr14gb8l%$BNc-W7PO)Xbb&oRJ?t={+Ns`d`Ms_82vYATIe+Z delta 6283 zcmb6-3ve6Parf?UAV7Qw68|3o{t59fen3*7Xqlw=0ZIKll`Tu6VID-0A_4jUMWTEV zt;EU5jgnG+or?CaCPUWc^tJX{d2y}tKoQ!5-r|C>PLy7JrcG{-< z4g^GcoKE{l?CtID+qe7fz1@B9(x0Qpmyr6tN|j4NGn;>R^6l<-)IsEuk#}etKLBur z@>53;M;)O#El0m9Il^!{j(HUwk#Kta1GL|Bg6498tmG8_9Ke-;s|e1i;HQRP9?)f+ z2Iy)oUz8~TJZ}cM47i=Hkx3br@{J zsT|+*_{b^$Oo~e7oDTWEkDc8)dgt;bWu2~#kDTtdrKvSlEKWQVPl4zs|=$RXQp|rJ(5_^wwSn^Anl1w~G16E^Duw`M|;UR_1(A#r8JkeBfsL zsO1{f8?<&OF-dyT7vLuSDdsdk$#>$9^X$^6fX?^f|IV`@4>oJ|qE`HZ=0UC;$PD~8 zq}Y?+hKHZh;aAFXopbcDmniBeO;HghA`uut!ZDnLW8aiSSWX&|;u{4Hdyc@oL0zM7 zfq$eSN=}7|*J8s?wN58U1zKQl%Wjb&pha?o99&ed!wafhiI6jqkBbT{bVP=`v}KHJ zLWT1gtyw-s-GYHh;}`*1sE9mJAZ&7tTeuOxBe{Zn4)GddP)9B?h*N}(>E0A2$TP+P zU3rboydmIpQ^!69ixlHjL@6lJW4lxt2|>xJJ)lOA0G20W3}6}&+mu%1XQU%4+*xOI zXM0D~f|@HZL;pygkO%#=Vm~#f6KNVj12p|sT}A_E%yB)>l@;Ni`Erc&( z)+}Zd`{eW8LO%G`31b%s1!=$Z_Kc;OzFAJ;7LKSZ!!bLya75jR3Lpt3wtGU3E~3|d82BBI3|2BTY>=@HRx(UYZa>UjZN z)^f$7%SC{d;BOnOmaJ4nD`>|kuJq54IL%rCJ6SzF;Sxv8N+F6Y7IWEGjRVW19Di=8 zF;-;kLDU?5DJ-F8m=Y?Si*Ffi^h7oG8I2vfus!1xS1IVWZ-wVz?M8IxbKIMMEs3;S6<_d5$`RTsxL?Ous_D;S_tqHyQG~B)k`N^JIXOBH%w8 z9+~o;g_xC+g!%EQbFC-C;ptF&T^&TV+R!NPoA!_LeqT7qhiXTIQ*~c!s0;I_!zV+* z(Q)5o9SLZ4)4@>E8f#nkcB=~wV$M3#cm!TWYx(C1cAS702q4<{ zF#=8!@FD@D1aMem`(Cw447q#>P*Uu8*gq9YNhgD2V}3p*8HWhWcM;iQ0=jX^7P0c> zL{v8(0GGleoG z@Tc}R#W6x1A;5=Sg{4T3_w?#nr_;%M@Kj;z>@cuG)e!Vvq1G)_QQ2F{8_MYJ{jq`r z7jqsKm%mkeqc&085-VM#r6g%BPC81GCdVT=YfxX@yRN3R zL+E1HnzG=Jysvqq7WayL>E)=YYgO63PWK^I!_Mi_C=ff?iI(YhoWTQ`b>wq2EatKnp)C#!OAg7B@k6#%4{8$0twXzCwm>VLb=0ON~L8SI_+wEgk{C;g$*K1i`2 zcBsAi?EQQL;P)%k-d*hdT`b_s)5W(@KTmdHh=4Ew0Rm1FFhRg604_GY$6qCcHUeHI z-~xaU3jnsQxN-fZ!{{CSSEX|j!osf<8ap07^=j^C|Iy@c>@L9W3te@wDyi1n0;sPG1EO!x|sa*fr@lZM_@m~i*h?K*k zHKIXelrX+pS&x3a{9$D=GLCLmMZln_9QZ+ip*CbtxLI+@j(rXEu{SqsHafz{DS_c= zF%Lrh#xWws0wxhLsUYPaJ|2`y5yfdIj6`0Uml26*1iVmhaIhQ&f6Q0SPsW2MUI|(Mw|J%7;+1}zS5TfB zHG=va%z9>6&LDoozsq<`3mV*9SJasGdsE=0EuZEI&v*`2V@emw1I4vudGO-e$L0kzI#E6dj_V zCbNl42|y{umqc_pw$p)I>hlYs0;4u7uzyTrXHd3&vSCM~qu~YUm998Af|2c&VkV}? zQmFP{e+MlknV9yEL7_3_8zVJV51wyy>PLy}lM;ngHm4o<-7fsbF@>){j{PaiH zetGq6{CuNDa*oc;#WhVBtD9@IV!*geG+ZKpge!cqxy7ss`2!pjZTu-NcqYJ;5CfwJ z#{>9Uv$<*xUR|4|3JFd;Cg6_=c!2=1Y7%`IdcCD==QTphT9{J#0=~(a(0FJh43%L@ zHX00sNl}(klkrB#fViWjLn_7~v6RWp2B6b;p`|A0r=T!2%;43QYUOUZ`L%(IJ#&rg zbO%`jZ#=(bikG#|4JTXL6D@~h zEr*l#qGXwi{Fk`CYD{YP5Sr`1GI)7#K_9Jbi_R9c#dPgS&%hI6YqY2}rfW-bq+$8^I$GFms!@53v{V$}n2qxT;7z<4levXPvbFf~B& zuvEKh+LN?864ttywQfljx9+~HdtmKZYu&Xn6mK24H|vYF?u*)*lca(<6m35gt$HR= zbtG1GBwlrNeo!PIB%JMJD95~4y$M@$47wKYxm$g=FzVd@8AIC(p`^1HeHHgVM6PTb zH(Jx`y*G5PGura}N4BF6x0`;qBerw@s_DSz>t_&rHB*5AY$(q zn!FzNzJ~=|TywZqWx$N5r<|4RU@Loz8S*QHIE#BH0xl{P%Id<3c(-?{SPC zBm_uT6fZ7TvZqE#GR`!T3dQvHCqTUdHwzq!uy6}OlcWL%{!)Rypbn#qxRAYt;E2i0NDh}HNa3(26LKK#NFp*$&cW?c`u;$m32hv4Na&v~Ajs+x zPAOX3El?8>Fsrn9AKd%=hE}rDt(1!GGiDYPFqUP@yh$UnK$k$_3$;49Inomi3~0!_ zm;E35ho$(R%5C`U(^X|wPAZhE%Q^{L-C*ZEuLS}Cx< zZ??sR&j#%O{~5o=M3mxugZ2}&n1(EwF9T_)IHG!zmL{@SA4CG%nVe=vIyGm$4x+?d zr8=*gAvp`4TQZe%Sj9I)XAVU+X^@qa_~NvHYX)N~k@czkG=du7ara%>4r1Lt?`#LrbASL*`8SA024{k!>p!|nrn^Nz5{@R%Y{6#5@-8CydD diff --git a/core/media/truth_social_retriever.py b/core/media/truth_social_retriever.py index 5b4a6d3..e797897 100644 --- a/core/media/truth_social_retriever.py +++ b/core/media/truth_social_retriever.py @@ -19,7 +19,8 @@ logger = logging.logger class TruthSocialRetriever: def __init__(self) -> None: self.api_key = TRUTH_SOCIAL_API.get("api_key", "") - self.user_info = TRUTH_SOCIAL_API.get("user_id", {}) + self.media_config_list = TRUTH_SOCIAL_API.get("media_config", []) + # self.user_info = TRUTH_SOCIAL_API.get("user_id", {}) mysql_user = COIN_MYSQL_CONFIG.get("user", "xch") mysql_password = COIN_MYSQL_CONFIG.get("password", "") if not mysql_password: @@ -52,6 +53,10 @@ class TruthSocialRetriever: image_post_instruction_file = r"./instructions/media_image_post_instructions.json" with open(image_post_instruction_file, "r", encoding="utf-8") as f: self.image_post_instruction = json.load(f) + + text_image_post_instruction_file = r"./instructions/media_article_image_post_instructions.json" + with open(text_image_post_instruction_file, "r", encoding="utf-8") as f: + self.text_image_post_instruction = json.load(f) def get_user_id_from_page(self, handle="realDonaldTrump"): url = f"https://truthsocial.com/@{handle}" @@ -90,89 +95,96 @@ class TruthSocialRetriever: """ headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} - for user_name, user_id in self.user_info.items(): - params = { - "handle": user_name, # 用户名 - "user_id": user_id, # 可选,用户 ID - "next_max_id": None, # 分页时设置为上一次响应的 max_id - "trim": "false", # 保留完整内容 - } + for media_config in self.media_config_list: + media_name = media_config.get("media_name", "") + logger.info(f"开始获取{media_name}的帖子") + base_url = media_config.get("base_url", "") + user_info = media_config.get("user_info", {}) + for user_name, user_details in user_info.items(): + user_id = user_details.get("id", "") + user_full_name = user_details.get("full_name", "") - url = "https://api.scrapecreators.com/v1/truthsocial/user/posts" - logger.info(f"Searching contents for user: {user_name}") - try: - response = requests.get(url, headers=headers, params=params) - response.raise_for_status() # 检查 HTTP 错误 - data = response.json() + params = { + "handle": user_name, # 用户名 + "user_id": user_id, # 可选,用户 ID + "next_max_id": None, # 分页时设置为上一次响应的 max_id + "trim": "false", # 保留完整内容 + } - # 提取帖子列表(假设响应中 'posts' 是键,根据实际文档调整) - if limit is not None and isinstance(limit, int): - posts = data.get("posts", [])[:limit] - else: - posts = data.get("posts", []) + logger.info(f"Searching contents for user: {user_name}") + try: + response = requests.get(base_url, headers=headers, params=params) + response.raise_for_status() # 检查 HTTP 错误 + data = response.json() - results = [] - if posts: - logger.info(f"获取{user_name}帖子: {len(posts)}条") - for post in posts: - result = {} - result["article_id"] = post.get("id") - result["user_id"] = user_id - result["user_name"] = user_name - datetime_text = post.get("created_at") - datetime_dict = self.transform_datetime(datetime_text) - timestamp_ms = datetime_dict["timestamp_ms"] - result["timestamp"] = timestamp_ms - beijing_time_str = datetime_dict["beijing_time_str"] - result["date_time"] = beijing_time_str - result["text"] = post.get("text", "无内容") - media_attachments = post.get("media_attachments", []) - result["media_url"] = "" - result["media_type"] = "" - result["media_thumbnail"] = "" - if media_attachments: - for media_attachment in media_attachments: - result["media_url"] = media_attachment.get("url") - result["media_type"] = media_attachment.get("type") - result["media_thumbnail"] = media_attachment.get( - "preview_url" - ) - break - results.append(result) - else: - print("获取帖子失败,请检查 API 密钥或网络。") - - if len(results) > 0: - result_df = pd.DataFrame(results) - result_df = self.remove_duplicate_posts(result_df) - - if len(result_df) > 0: - result_df["analysis_result"] = "" - result_df["analysis_token"] = 0 - result_df = self.send_wechat_message(result_df) - result_df = result_df[ - [ - "article_id", - "user_id", - "user_name", - "timestamp", - "date_time", - "text", - "analysis_result", - "analysis_token", - "media_url", - "media_type", - "media_thumbnail", - ] - ] - self.db_truth_social_content.insert_data_to_mysql(result_df) - logger.info(f"已将{len(result_df)}条数据插入到数据库") + # 提取帖子列表(假设响应中 'posts' 是键,根据实际文档调整) + if limit is not None and isinstance(limit, int): + posts = data.get("posts", [])[:limit] else: - logger.info(f"没有数据需要插入到数据库和发送企业微信消息") - except requests.exceptions.RequestException as e: - print(f"请求错误: {e}") - except json.JSONDecodeError as e: - print(f"JSON 解析错误: {e}") + posts = data.get("posts", []) + + results = [] + if posts: + logger.info(f"获取{user_name}帖子: {len(posts)}条") + for post in posts: + result = {} + result["article_id"] = post.get("id") + result["user_id"] = user_id + result["user_name"] = user_name + datetime_text = post.get("created_at") + datetime_dict = self.transform_datetime(datetime_text) + timestamp_ms = datetime_dict["timestamp_ms"] + result["timestamp"] = timestamp_ms + beijing_time_str = datetime_dict["beijing_time_str"] + result["date_time"] = beijing_time_str + result["text"] = post.get("text", "") + media_attachments = post.get("media_attachments", []) + result["media_url"] = "" + result["media_type"] = "" + result["media_thumbnail"] = "" + if media_attachments: + for media_attachment in media_attachments: + result["media_url"] = media_attachment.get("url") + result["media_type"] = media_attachment.get("type") + result["media_thumbnail"] = media_attachment.get( + "preview_url" + ) + break + results.append(result) + else: + print("获取帖子失败,请检查 API 密钥或网络。") + + if len(results) > 0: + result_df = pd.DataFrame(results) + result_df = self.remove_duplicate_posts(result_df) + + if len(result_df) > 0: + result_df["analysis_result"] = "" + result_df["analysis_token"] = 0 + result_df = self.send_wechat_message(result_df, user_full_name) + result_df = result_df[ + [ + "article_id", + "user_id", + "user_name", + "timestamp", + "date_time", + "text", + "analysis_result", + "analysis_token", + "media_url", + "media_type", + "media_thumbnail", + ] + ] + self.db_truth_social_content.insert_data_to_mysql(result_df) + logger.info(f"已将{len(result_df)}条数据插入到数据库") + else: + logger.info(f"没有数据需要插入到数据库和发送企业微信消息") + except requests.exceptions.RequestException as e: + print(f"请求错误: {e}") + except json.JSONDecodeError as e: + print(f"JSON 解析错误: {e}") def send_message_by_json_file(self, json_file_name: str): with open(json_file_name, "r", encoding="utf-8") as f: @@ -204,7 +216,7 @@ class TruthSocialRetriever: logger.error(f"删除重复的行失败: {e}") return result_df - def send_wechat_message(self, result_df: pd.DataFrame): + def send_wechat_message(self, result_df: pd.DataFrame, user_full_name: str): if self.wechat is None: logger.error("企业微信未初始化") return @@ -213,7 +225,72 @@ class TruthSocialRetriever: date_time = row["date_time"] text = row["text"] media_thumbnail = row["media_thumbnail"] - if media_thumbnail and len(media_thumbnail) > 0: + if len(text) > 0: + if media_thumbnail and len(media_thumbnail) > 0: + contents = [] + contents.append(f"## {user_full_name}推文") + contents.append(text) + contents.append(f"## 推文时间") + contents.append(date_time) + mark_down_text = "\n\n".join(contents) + self.wechat.send_markdown(mark_down_text) + response, image_path, base64_str, md5_str = self.wechat.send_image(media_thumbnail) + image_format = "jpg" + if image_path is not None and len(image_path) > 0: + image_format = image_path.split(".")[-1] + if image_format == "jpeg": + image_format = "jpg" + analysis_result, analysis_token = self.analyze_truth_social_content( + text=mark_down_text, + image_stream=base64_str, + image_format=image_format, + media_type="hybrid", + user_full_name=user_full_name + ) + if analysis_result is not None and len(analysis_result) > 0: + result_df.at[index, "analysis_result"] = analysis_result + result_df.at[index, "analysis_token"] = analysis_token + else: + result_df.at[index, "analysis_result"] = "" + result_df.at[index, "analysis_token"] = 0 + analysis_text = f"\n\n## 上述图文分析结果\n\n{analysis_result}" + analysis_text += f"\n\n## 上述图文分析token\n\n{analysis_token}" + self.wechat.send_markdown(analysis_text) + else: + contents = [] + contents.append(f"## {user_full_name}推文") + contents.append(text) + contents.append(f"## 推文时间") + contents.append(date_time) + mark_down_text = "\n\n".join(contents) + analysis_result, analysis_token = self.analyze_truth_social_content( + text=text, + image_stream=None, + image_format=None, + media_type="text", + user_full_name=user_full_name + ) + result_df.at[index, "analysis_result"] = analysis_result + result_df.at[index, "analysis_token"] = analysis_token + analysis_text = f"\n\n## 分析结果\n\n{analysis_result}" + analysis_text += f"\n\n## 分析token\n\n{analysis_token}" + if self.calculate_bytes(mark_down_text + analysis_text) > 4096: + self.wechat.send_markdown(mark_down_text) + if self.calculate_bytes(analysis_text) > 4096: + half_analysis_text_length = len(analysis_text) // 2 + analysis_1st = analysis_text[:half_analysis_text_length].strip() + analysis_2nd = analysis_text[half_analysis_text_length:].strip() + self.wechat.send_markdown( + f"## 分析结果第一部分\n\n{analysis_1st}" + ) + self.wechat.send_markdown( + f"## 分析结果第二部分\n\n{analysis_2nd}" + ) + else: + self.wechat.send_markdown(f"## 分析结果\n\n{analysis_text}") + else: + self.wechat.send_markdown(mark_down_text + analysis_text) + elif media_thumbnail and len(media_thumbnail) > 0: response, image_path, base64_str, md5_str = self.wechat.send_image(media_thumbnail) image_format = "jpg" if image_path is not None and len(image_path) > 0: @@ -221,10 +298,11 @@ class TruthSocialRetriever: if image_format == "jpeg": image_format = "jpg" analysis_result, analysis_token = self.analyze_truth_social_content( - text=None, + text="", image_stream=base64_str, image_format=image_format, - media_type="image" + media_type="image", + user_full_name=user_full_name ) if analysis_result is not None and len(analysis_result) > 0: result_df.at[index, "analysis_result"] = analysis_result @@ -236,38 +314,7 @@ class TruthSocialRetriever: analysis_text += f"\n\n## 上述图片分析token\n\n{analysis_token}" self.wechat.send_markdown(analysis_text) else: - contents = [] - contents.append(f"## 川普推文") - contents.append(text) - contents.append(f"## 推文时间") - contents.append(date_time) - mark_down_text = "\n\n".join(contents) - analysis_result, analysis_token = self.analyze_truth_social_content( - text=text, - image_stream=None, - image_format=None, - media_type="text" - ) - result_df.at[index, "analysis_result"] = analysis_result - result_df.at[index, "analysis_token"] = analysis_token - analysis_text = f"\n\n## 分析结果\n\n{analysis_result}" - analysis_text += f"\n\n## 分析token\n\n{analysis_token}" - if self.calculate_bytes(mark_down_text + analysis_text) > 4096: - self.wechat.send_markdown(mark_down_text) - if self.calculate_bytes(analysis_text) > 4096: - half_analysis_text_length = len(analysis_text) // 2 - analysis_1st = analysis_text[:half_analysis_text_length].strip() - analysis_2nd = analysis_text[half_analysis_text_length:].strip() - self.wechat.send_markdown( - f"## 分析结果第一部分\n\n{analysis_1st}" - ) - self.wechat.send_markdown( - f"## 分析结果第二部分\n\n{analysis_2nd}" - ) - else: - self.wechat.send_markdown(f"## 分析结果\n\n{analysis_text}") - else: - self.wechat.send_markdown(mark_down_text + analysis_text) + continue except Exception as e: logger.error(f"发送企业微信消息失败: {e}") continue @@ -276,10 +323,13 @@ class TruthSocialRetriever: def calculate_bytes(self, text: str): return len(text.encode("utf-8")) - def analyze_truth_social_content(self, text: str, image_stream: str, image_format: str, media_type: str): + def analyze_truth_social_content(self, text: str, image_stream: str, image_format: str, media_type: str, user_full_name: str): try: token = 0 - if media_type == "image": + if text is None: + text = "" + image_text = "" + if media_type in ["image", "hybrid"]: if image_stream is None or len(image_stream) == 0: return "", 0 instructions = self.image_instruction.get("Instructions", "") @@ -300,28 +350,42 @@ class TruthSocialRetriever: messages=messages_local, ) if response.status_code == 200: - text = ( + image_text = ( response.get("output", {}) .get("choices", [])[0] .get("message", {}) .get("content", "") ) + temp_image_text = "" + if isinstance(image_text, list): + for item in image_text: + if isinstance(item, dict): + temp_image_text += item.get("text", "") + "\n\n" + elif isinstance(item, str): + temp_image_text += item + "\n\n" + else: + pass + image_text = temp_image_text.strip() token = response.get("usage", {}).get("total_tokens", 0) else: text = f"{response.code} {response.message} 无法分析图片" token = 0 - if text is None or len(text) == 0: - return "", 0 + text += image_text + context = text if media_type == "text": - instructions = self.text_instruction.get("Instructions", "") + instructions = self.text_instruction.get("Instructions", "").format(user_full_name) output = self.text_instruction.get("Output", "") prompt = f"# Context\n\n{context}\n\n# Instructions\n\n{instructions}\n\n# Output\n\n{output}" - else: - instructions = self.image_post_instruction.get("Instructions", "") + elif media_type == "image": + instructions = self.image_post_instruction.get("Instructions", "").format(user_full_name) output = self.image_post_instruction.get("Output", "") prompt = f"# Context\n\n{context}\n\n# Instructions\n\n{instructions}\n\n# Output\n\n{output}" + elif media_type == "hybrid": + instructions = self.text_image_post_instruction.get("Instructions", "").format(user_full_name) + output = self.text_image_post_instruction.get("Output", "").format(user_full_name) + prompt = f"# Context\n\n{context}\n\n# Instructions\n\n{instructions}\n\n# Output\n\n{output}" response = dashscope.Generation.call( api_key=self.ali_api_key, model="qwen-plus", diff --git a/core/wechat.py b/core/wechat.py index 1b4403d..293f896 100644 --- a/core/wechat.py +++ b/core/wechat.py @@ -67,6 +67,7 @@ class Wechat: image_path = os.path.join(self.image_path, image_name) with open(image_path, "wb") as f: f.write(image_bytes) + response = requests.post(self.url, json=data) response.raise_for_status() return response.json(), image_path, base64_str, md5_str diff --git a/instructions/media_article_image_post_instructions.json b/instructions/media_article_image_post_instructions.json new file mode 100644 index 0000000..abd09fa --- /dev/null +++ b/instructions/media_article_image_post_instructions.json @@ -0,0 +1,4 @@ +{ + "Instructions": "您是一位资深的国际时事与军事政治评论员与经济、金融分析师,Context的内容格式是从社媒图文并茂的推文中获取的信息,包括: ### {0}推文原文\n\n### 推文时间\n\n### 图中文字原文\n\n### 图中文字中文翻译\n\n### 图片场景描述\n\n是通过图片分析到的信息,你的任务是分析其中的信息,进行联网搜索,并给出分析结果。\n\n该信息,就是{0}在社交媒体发布的图文推文,不要怀疑这一点。\n并基于此文章内容进行分析。\n\n要求:\n1. 将推文原文翻译成中文,要求语义通顺,\n2. 结合推文原文,图片中的文字与图像场景描述,给出推文的核心观点;\n2. 人物分析:分析推文涉及人物以及人物简介;\n3. 区域分析:包括国家与地区;\n4. 行业以及影响分析;\n5. 经济与金融分析:分析涉及经济与金融影响,包括美股、虚拟货币以及中国A股,并列出最有可能被影响的股票品种或虚拟货币的名称与代码;\n\n", + "Output": "## 输出要求\n\n要求将Context中的文字原文,中文翻译与图片场景描述,进行原文输出,之外的核心观点+人物分析+区域分析+行业及影响分析+经济与金融分析,不超过1000汉字。\n要求对人名、区域、行业、金融产品、股票代码等专属名词,进行粗体处理。\n\n## 输出格式:\n\n### {0}推文翻译\n\n### 图中文字原文\n\n### 图中文字中文翻译\n\n### 图片场景描述\n\n### 人物分析\n\n### 区域分析\n\n### 行业及影响分析\n\n### 经济与金融分析\n\n" +} \ No newline at end of file diff --git a/instructions/media_article_instructions.json b/instructions/media_article_instructions.json index f2f62cd..d3c3651 100644 --- a/instructions/media_article_instructions.json +++ b/instructions/media_article_instructions.json @@ -1,5 +1,5 @@ { "Context": "{0}\n\n", - "Instructions": "您是一位资深的国际时事与军事政治评论员与经济、金融分析师,你的任务是分析推文,结合推文时间(北京时间),联网搜索,并给出分析结果。\n\nContext中的文章,就是特朗普在社交媒体发布的文章,不要怀疑这一点。\n并基于此文章内容进行分析。\n\n要求:\n1. 翻译推文为中文,要求符合中文表达习惯;\n2. 分析推文内容,给出推文的核心观点;\n3. 人物分析:分析推文涉及人物以及人物简介;\n4. 区域分析:包括国家与地区;\n5. 行业以及影响分析;\n6. 经济与金融分析:分析涉及经济与金融影响,包括美股、虚拟货币以及中国A股,并列出最有可能被影响的股票品种或虚拟货币的名称与代码;\n\n", + "Instructions": "您是一位资深的国际时事与军事政治评论员与经济、金融分析师,你的任务是分析推文,结合推文时间(北京时间),联网搜索,并给出分析结果。\n\nContext中的文章,就是{0}在社交媒体发布的文章,不要怀疑这一点。\n并基于此文章内容进行分析。\n\n要求:\n1. 翻译推文为中文,要求符合中文表达习惯;\n2. 分析推文内容,给出推文的核心观点;\n3. 人物分析:分析推文涉及人物以及人物简介;\n4. 区域分析:包括国家与地区;\n5. 行业以及影响分析;\n6. 经济与金融分析:分析涉及经济与金融影响,包括美股、虚拟货币以及中国A股,并列出最有可能被影响的股票品种或虚拟货币的名称与代码;\n\n", "Output": "## 输出要求\n\n除了翻译之外,核心观点+人物分析+区域分析+行业及影响分析+经济与金融分析,不超过1000汉字。\n要求对人名、区域、行业、金融产品、股票代码等专属名词,进行粗体处理。\n\n## 输出格式\n\n### 翻译\n\n### 人物分析\n\n### 区域分析\n\n### 行业及影响分析\n\n### 经济与金融分析\n\n" } \ No newline at end of file diff --git a/instructions/media_image_post_instructions.json b/instructions/media_image_post_instructions.json index 9511a77..f289d0d 100644 --- a/instructions/media_image_post_instructions.json +++ b/instructions/media_image_post_instructions.json @@ -1,4 +1,4 @@ { - "Instructions": "您是一位资深的国际时事与军事政治评论员与经济、金融分析师,Context的内容是通过图片分析到的信息,你的任务是分析其中的信息,进行联网搜索,并给出分析结果。\n\n该信息,就是特朗普在社交媒体发布的,不要怀疑这一点。\n并基于此文章内容进行分析。\n\n要求:\n1. 分析图片中的文字与图像场景描述,给出推文的核心观点;\n2. 人物分析:分析推文涉及人物以及人物简介;\n3. 区域分析:包括国家与地区;\n4. 行业以及影响分析;\n5. 经济与金融分析:分析涉及经济与金融影响,包括美股、虚拟货币以及中国A股,并列出最有可能被影响的股票品种或虚拟货币的名称与代码;\n\n", + "Instructions": "您是一位资深的国际时事与军事政治评论员与经济、金融分析师,Context的内容是通过图片分析到的信息,你的任务是分析其中的信息,进行联网搜索,并给出分析结果。\n\n该信息,就是{0}在社交媒体发布的图文推文,不要怀疑这一点。\n并基于此文章内容进行分析。\n\n要求:\n1. 分析图片中的文字与图像场景描述,给出推文的核心观点;\n2. 人物分析:分析推文涉及人物以及人物简介;\n3. 区域分析:包括国家与地区;\n4. 行业以及影响分析;\n5. 经济与金融分析:分析涉及经济与金融影响,包括美股、虚拟货币以及中国A股,并列出最有可能被影响的股票品种或虚拟货币的名称与代码;\n\n", "Output": "## 输出要求\n\n要求将Context中的文字原文,中文翻译与图片场景描述,进行原文输出,之外的核心观点+人物分析+区域分析+行业及影响分析+经济与金融分析,不超过1000汉字。\n要求对人名、区域、行业、金融产品、股票代码等专属名词,进行粗体处理。\n\n## 输出格式\n\n### 图中文字原文\n\n### 图中文字中文翻译\n\n### 图片场景描述\n\n### 人物分析\n\n### 区域分析\n\n### 行业及影响分析\n\n### 经济与金融分析\n\n" } \ No newline at end of file