support analyze image
This commit is contained in:
parent
3bca7eca5c
commit
8c5bb34e5a
Binary file not shown.
|
|
@ -41,9 +41,17 @@ class TruthSocialRetriever:
|
||||||
os.makedirs(self.save_path, exist_ok=True)
|
os.makedirs(self.save_path, exist_ok=True)
|
||||||
|
|
||||||
self.ali_api_key = ALI_API_KEY
|
self.ali_api_key = ALI_API_KEY
|
||||||
instruction_file = r"./instructions/media_article_instructions.json"
|
text_instruction_file = r"./instructions/media_article_instructions.json"
|
||||||
with open(instruction_file, "r", encoding="utf-8") as f:
|
with open(text_instruction_file, "r", encoding="utf-8") as f:
|
||||||
self.instruction = json.load(f)
|
self.text_instruction = json.load(f)
|
||||||
|
|
||||||
|
image_instruction_file = r"./instructions/media_image_instructions.json"
|
||||||
|
with open(image_instruction_file, "r", encoding="utf-8") as f:
|
||||||
|
self.image_instruction = json.load(f)
|
||||||
|
|
||||||
|
image_post_instruction_file = r"./instructions/media_image_post_instructions.json"
|
||||||
|
with open(image_post_instruction_file, "r", encoding="utf-8") as f:
|
||||||
|
self.image_post_instruction = json.load(f)
|
||||||
|
|
||||||
def get_user_id_from_page(self, handle="realDonaldTrump"):
|
def get_user_id_from_page(self, handle="realDonaldTrump"):
|
||||||
url = f"https://truthsocial.com/@{handle}"
|
url = f"https://truthsocial.com/@{handle}"
|
||||||
|
|
@ -135,14 +143,6 @@ class TruthSocialRetriever:
|
||||||
print("获取帖子失败,请检查 API 密钥或网络。")
|
print("获取帖子失败,请检查 API 密钥或网络。")
|
||||||
|
|
||||||
if len(results) > 0:
|
if len(results) > 0:
|
||||||
# user_path = os.path.join(self.save_path, user_name)
|
|
||||||
# os.makedirs(user_path, exist_ok=True)
|
|
||||||
# now_date_time = datetime.now().strftime("%Y%m%d%H%M%S")
|
|
||||||
# json_file_name = os.path.join(user_path, f"{user_name}_{now_date_time}.json")
|
|
||||||
# # 将results内容写入json_file_name文件中
|
|
||||||
# with open(json_file_name, 'w', encoding='utf-8') as f:
|
|
||||||
# json.dump(results, f, ensure_ascii=False, indent=2)
|
|
||||||
# logger.info(f"已将{len(results)}条数据保存到: {json_file_name}")
|
|
||||||
result_df = pd.DataFrame(results)
|
result_df = pd.DataFrame(results)
|
||||||
result_df = self.remove_duplicate_posts(result_df)
|
result_df = self.remove_duplicate_posts(result_df)
|
||||||
|
|
||||||
|
|
@ -214,7 +214,27 @@ class TruthSocialRetriever:
|
||||||
text = row["text"]
|
text = row["text"]
|
||||||
media_thumbnail = row["media_thumbnail"]
|
media_thumbnail = row["media_thumbnail"]
|
||||||
if media_thumbnail and len(media_thumbnail) > 0:
|
if media_thumbnail and len(media_thumbnail) > 0:
|
||||||
self.wechat.send_image(media_thumbnail)
|
response, image_path, base64_str, md5_str = self.wechat.send_image(media_thumbnail)
|
||||||
|
image_format = "jpg"
|
||||||
|
if image_path is not None and len(image_path) > 0:
|
||||||
|
image_format = image_path.split(".")[-1]
|
||||||
|
if image_format == "jpeg":
|
||||||
|
image_format = "jpg"
|
||||||
|
analysis_result, analysis_token = self.analyze_truth_social_content(
|
||||||
|
text=None,
|
||||||
|
image_stream=base64_str,
|
||||||
|
image_format=image_format,
|
||||||
|
media_type="image"
|
||||||
|
)
|
||||||
|
if analysis_result is not None and len(analysis_result) > 0:
|
||||||
|
result_df.at[index, "analysis_result"] = analysis_result
|
||||||
|
result_df.at[index, "analysis_token"] = analysis_token
|
||||||
|
else:
|
||||||
|
result_df.at[index, "analysis_result"] = ""
|
||||||
|
result_df.at[index, "analysis_token"] = 0
|
||||||
|
analysis_text = f"\n\n## 上述图片分析结果\n\n{analysis_result}"
|
||||||
|
analysis_text += f"\n\n## 上述图片分析token\n\n{analysis_token}"
|
||||||
|
self.wechat.send_markdown(analysis_text)
|
||||||
else:
|
else:
|
||||||
contents = []
|
contents = []
|
||||||
contents.append(f"## 川普推文")
|
contents.append(f"## 川普推文")
|
||||||
|
|
@ -223,7 +243,10 @@ class TruthSocialRetriever:
|
||||||
contents.append(date_time)
|
contents.append(date_time)
|
||||||
mark_down_text = "\n\n".join(contents)
|
mark_down_text = "\n\n".join(contents)
|
||||||
analysis_result, analysis_token = self.analyze_truth_social_content(
|
analysis_result, analysis_token = self.analyze_truth_social_content(
|
||||||
text
|
text=text,
|
||||||
|
image_stream=None,
|
||||||
|
image_format=None,
|
||||||
|
media_type="text"
|
||||||
)
|
)
|
||||||
result_df.at[index, "analysis_result"] = analysis_result
|
result_df.at[index, "analysis_result"] = analysis_result
|
||||||
result_df.at[index, "analysis_token"] = analysis_token
|
result_df.at[index, "analysis_token"] = analysis_token
|
||||||
|
|
@ -253,12 +276,52 @@ class TruthSocialRetriever:
|
||||||
def calculate_bytes(self, text: str):
|
def calculate_bytes(self, text: str):
|
||||||
return len(text.encode("utf-8"))
|
return len(text.encode("utf-8"))
|
||||||
|
|
||||||
def analyze_truth_social_content(self, text: str):
|
def analyze_truth_social_content(self, text: str, image_stream: str, image_format: str, media_type: str):
|
||||||
try:
|
try:
|
||||||
|
token = 0
|
||||||
|
if media_type == "image":
|
||||||
|
if image_stream is None or len(image_stream) == 0:
|
||||||
|
return "", 0
|
||||||
|
instructions = self.image_instruction.get("Instructions", "")
|
||||||
|
output = self.image_instruction.get("Output", "")
|
||||||
|
prompt = f"# Instructions\n\n{instructions}\n\n# Output\n\n{output}"
|
||||||
|
messages_local = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"image": f"data:image/{image_format};base64,{image_stream}"}, # base64 字符串
|
||||||
|
{"text": prompt} # 你的 prompt
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
response = dashscope.MultiModalConversation.call(
|
||||||
|
api_key=self.ali_api_key,
|
||||||
|
model='qwen-vl-plus',
|
||||||
|
messages=messages_local,
|
||||||
|
)
|
||||||
|
if response.status_code == 200:
|
||||||
|
text = (
|
||||||
|
response.get("output", {})
|
||||||
|
.get("choices", [])[0]
|
||||||
|
.get("message", {})
|
||||||
|
.get("content", "")
|
||||||
|
)
|
||||||
|
token = response.get("usage", {}).get("total_tokens", 0)
|
||||||
|
else:
|
||||||
|
text = f"{response.code} {response.message} 无法分析图片"
|
||||||
|
token = 0
|
||||||
|
|
||||||
|
if text is None or len(text) == 0:
|
||||||
|
return "", 0
|
||||||
context = text
|
context = text
|
||||||
instructions = self.instruction.get("Instructions", "")
|
if media_type == "text":
|
||||||
output = self.instruction.get("Output", "")
|
instructions = self.text_instruction.get("Instructions", "")
|
||||||
prompt = f"# Context\n\n{context}\n\n# Instructions\n\n{instructions}\n\n# Output\n\n{output}"
|
output = self.text_instruction.get("Output", "")
|
||||||
|
prompt = f"# Context\n\n{context}\n\n# Instructions\n\n{instructions}\n\n# Output\n\n{output}"
|
||||||
|
else:
|
||||||
|
instructions = self.image_post_instruction.get("Instructions", "")
|
||||||
|
output = self.image_post_instruction.get("Output", "")
|
||||||
|
prompt = f"# Context\n\n{context}\n\n# Instructions\n\n{instructions}\n\n# Output\n\n{output}"
|
||||||
response = dashscope.Generation.call(
|
response = dashscope.Generation.call(
|
||||||
api_key=self.ali_api_key,
|
api_key=self.ali_api_key,
|
||||||
model="qwen-plus",
|
model="qwen-plus",
|
||||||
|
|
@ -276,7 +339,7 @@ class TruthSocialRetriever:
|
||||||
.get("message", {})
|
.get("message", {})
|
||||||
.get("content", "")
|
.get("content", "")
|
||||||
)
|
)
|
||||||
token = response.get("usage", {}).get("total_tokens", 0)
|
token += response.get("usage", {}).get("total_tokens", 0)
|
||||||
else:
|
else:
|
||||||
response_contents = f"{response.code} {response.message}"
|
response_contents = f"{response.code} {response.message}"
|
||||||
token = 0
|
token = 0
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,8 @@ import requests
|
||||||
import base64
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
logger = logging.logger
|
logger = logging.logger
|
||||||
|
|
||||||
class Wechat:
|
class Wechat:
|
||||||
|
|
@ -17,6 +19,8 @@ class Wechat:
|
||||||
# 只要启动代码文件在根目录,config就能找到
|
# 只要启动代码文件在根目录,config就能找到
|
||||||
self.key = key
|
self.key = key
|
||||||
self.url = f"https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key={self.key}"
|
self.url = f"https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key={self.key}"
|
||||||
|
self.image_path = r"./output/wechat/image"
|
||||||
|
os.makedirs(self.image_path, exist_ok=True)
|
||||||
|
|
||||||
def send_text(self, text: str):
|
def send_text(self, text: str):
|
||||||
"""
|
"""
|
||||||
|
|
@ -46,12 +50,6 @@ class Wechat:
|
||||||
"""
|
"""
|
||||||
发送图片消息
|
发送图片消息
|
||||||
"""
|
"""
|
||||||
# data = {
|
|
||||||
# "msgtype": "image",
|
|
||||||
# "image": {"url": image_url}
|
|
||||||
# }
|
|
||||||
# response = requests.post(self.url, json=data)
|
|
||||||
# return response.json()
|
|
||||||
image_bytes = self.download_image(image_url)
|
image_bytes = self.download_image(image_url)
|
||||||
base64_str, md5_str = self.get_base64_and_md5(image_bytes)
|
base64_str, md5_str = self.get_base64_and_md5(image_bytes)
|
||||||
data = {
|
data = {
|
||||||
|
|
@ -61,9 +59,17 @@ class Wechat:
|
||||||
"md5": md5_str,
|
"md5": md5_str,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
# 获取url中图片的名称
|
||||||
|
image_name = image_url.split("/")[-1].split(".")[0]
|
||||||
|
# 获取当前时间,格式为YYYYMMDDHHMMSS
|
||||||
|
now_time = time.strftime("%Y%m%d%H%M%S", time.localtime())
|
||||||
|
image_name = f"{image_name}_{now_time}.jpg"
|
||||||
|
image_path = os.path.join(self.image_path, image_name)
|
||||||
|
with open(image_path, "wb") as f:
|
||||||
|
f.write(image_bytes)
|
||||||
response = requests.post(self.url, json=data)
|
response = requests.post(self.url, json=data)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json(), image_path, base64_str, md5_str
|
||||||
|
|
||||||
def download_image(self, image_url):
|
def download_image(self, image_url):
|
||||||
"""下载图片并返回 bytes"""
|
"""下载图片并返回 bytes"""
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
{
|
{
|
||||||
"Context": "{0}\n\n",
|
"Context": "{0}\n\n",
|
||||||
"Instructions": "你是一个专业的时政与金融分析师,你的任务是分析推文,结合推文时间(北京时间),联网搜索,并给出分析结果。\n\nContext中的文章,就是特朗普在社交媒体发布的文章,不要怀疑这一点。\n并基于此文章内容进行分析。\n\n要求:\n1. 翻译推文为中文,要求符合中文表达习惯;\n2. 分析推文内容,给出推文的核心观点;\n3. 人物分析:分析推文涉及人物以及人物简介;\n4. 区域分析:包括国家与地区;\n5. 行业以及影响分析;\n6. 经济与金融分析:分析涉及经济与金融影响,包括美股、虚拟货币以及中国A股,并列出最有可能被影响的股票品种或虚拟货币的名称与代码;\n\n",
|
"Instructions": "您是一位资深的国际时事与军事政治评论员与经济、金融分析师,你的任务是分析推文,结合推文时间(北京时间),联网搜索,并给出分析结果。\n\nContext中的文章,就是特朗普在社交媒体发布的文章,不要怀疑这一点。\n并基于此文章内容进行分析。\n\n要求:\n1. 翻译推文为中文,要求符合中文表达习惯;\n2. 分析推文内容,给出推文的核心观点;\n3. 人物分析:分析推文涉及人物以及人物简介;\n4. 区域分析:包括国家与地区;\n5. 行业以及影响分析;\n6. 经济与金融分析:分析涉及经济与金融影响,包括美股、虚拟货币以及中国A股,并列出最有可能被影响的股票品种或虚拟货币的名称与代码;\n\n",
|
||||||
"Output": "## 输出要求\n\n除了翻译之外,核心观点+人物分析+区域分析+行业及影响分析+经济与金融分析,不超过1000汉字。\n要求对人名、区域、行业、金融产品、股票代码等专属名词,进行粗体处理。\n\n## 输出格式\n\n### 翻译\n\n### 人物分析\n\n### 区域分析\n\n### 行业及影响分析\n\n### 经济与金融分析\n\n"
|
"Output": "## 输出要求\n\n除了翻译之外,核心观点+人物分析+区域分析+行业及影响分析+经济与金融分析,不超过1000汉字。\n要求对人名、区域、行业、金融产品、股票代码等专属名词,进行粗体处理。\n\n## 输出格式\n\n### 翻译\n\n### 人物分析\n\n### 区域分析\n\n### 行业及影响分析\n\n### 经济与金融分析\n\n"
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"Instructions": "您是一位资深的国际时事与军事政治评论员与经济、金融分析师,请阅读这张图片,并给出分析结果,分析内容包括:图片文字翻译为中文,将非文字部分的图像,做出图像场景描述。\n\n要求:\n1. 如果图片只有图像信息,请根据图像做出场景描述。\n2. 如果图片既有文字又有图像内容,请提取其中的文字,并将文字翻译为中文,并对非文字部分的图像,做出图像场景描述。\n3. 如果图片中只有文字,请提取其中的文字,并将文字翻译为中文。",
|
||||||
|
"Output": "## 输出要求\n\n图片中的文字原文、文字中文翻译、图片场景描述。\n\n## 输出格式如下:\n\n### 图中文字原文\n\n### 图中文字中文翻译\n\n### 图片场景描述"
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"Instructions": "您是一位资深的国际时事与军事政治评论员与经济、金融分析师,Context的内容是通过图片分析到的信息,你的任务是分析其中的信息,进行联网搜索,并给出分析结果。\n\n该信息,就是特朗普在社交媒体发布的,不要怀疑这一点。\n并基于此文章内容进行分析。\n\n要求:\n1. 分析图片中的文字与图像场景描述,给出推文的核心观点;\n2. 人物分析:分析推文涉及人物以及人物简介;\n3. 区域分析:包括国家与地区;\n4. 行业以及影响分析;\n5. 经济与金融分析:分析涉及经济与金融影响,包括美股、虚拟货币以及中国A股,并列出最有可能被影响的股票品种或虚拟货币的名称与代码;\n\n",
|
||||||
|
"Output": "## 输出要求\n\n要求将Context中的文字原文,中文翻译与图片场景描述,进行原文输出,之外的核心观点+人物分析+区域分析+行业及影响分析+经济与金融分析,不超过1000汉字。\n要求对人名、区域、行业、金融产品、股票代码等专属名词,进行粗体处理。\n\n## 输出格式\n\n### 图中文字原文\n\n### 图中文字中文翻译\n\n### 图片场景描述\n\n### 人物分析\n\n### 区域分析\n\n### 行业及影响分析\n\n### 经济与金融分析\n\n"
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue