Compare commits
No commits in common. "c431f5103fab5705c8cf8c068f03e5a0d6b1782f" and "16ce41545e27648c45e9ff925af9222ff433126e" have entirely different histories.
c431f5103f
...
16ce41545e
|
|
@ -225,9 +225,9 @@ TWITTER_CONFIG = {
|
||||||
"user_search_url": "https://api.twitter.com/2/users/by/username/{0}",
|
"user_search_url": "https://api.twitter.com/2/users/by/username/{0}",
|
||||||
"contents_search_url": "https://api.twitter.com/2/users/{0}/tweets?max_results=100&tweet.fields=text,created_at&exclude=replies,retweets",
|
"contents_search_url": "https://api.twitter.com/2/users/{0}/tweets?max_results=100&tweet.fields=text,created_at&exclude=replies,retweets",
|
||||||
"monitor_accounts": [
|
"monitor_accounts": [
|
||||||
{"name": "FoxNews", "id": ""},
|
"FoxNews",
|
||||||
{"name": "WhiteHouse", "id": "1879644163769335808"},
|
"WhiteHouse",
|
||||||
{"name": "sama", "id": ""},
|
"sama",
|
||||||
{"name": "PressSec", "id": ""},
|
"PressSec",
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -12,11 +12,6 @@ import pandas as pd
|
||||||
logger = logging.logger
|
logger = logging.logger
|
||||||
|
|
||||||
class TwitterRetriever:
|
class TwitterRetriever:
|
||||||
"""
|
|
||||||
免费版本的账号,每个月只能获取100条推文,
|
|
||||||
需要使用付费版本的账号,基础版每个月可以获取15000条推文,200美元/月
|
|
||||||
高级版每个月可以获取1000000条推文,5000美元/月
|
|
||||||
"""
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.keys = TWITTER_CONFIG["keys"]
|
self.keys = TWITTER_CONFIG["keys"]
|
||||||
self.headers = {
|
self.headers = {
|
||||||
|
|
@ -58,19 +53,17 @@ class TwitterRetriever:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def monitor_accounts(self):
|
def monitor_accounts(self):
|
||||||
for account_dict in self.monitor_account_list:
|
for account in self.monitor_account_list:
|
||||||
user_name = account_dict["name"]
|
logger.info(f"Monitoring account: {account}")
|
||||||
user_id = account_dict["id"]
|
|
||||||
logger.info(f"Monitoring account: {user_name}")
|
|
||||||
logger.info(f"Sleeping for {self.sleep_time} seconds")
|
logger.info(f"Sleeping for {self.sleep_time} seconds")
|
||||||
# time.sleep(self.sleep_time)
|
time.sleep(self.sleep_time)
|
||||||
result_list = []
|
result_list = []
|
||||||
if user_id is None or user_id == "":
|
user = self.search_user(account)
|
||||||
user = self.search_user(user_name)
|
if user is None:
|
||||||
if user is None:
|
continue
|
||||||
continue
|
username = user["data"]["username"]
|
||||||
user_id = str(user["data"]["id"])
|
user_id = str(user["data"]["id"])
|
||||||
contents = self.search_contents(user_name, user_id)
|
contents = self.search_contents(username, user_id)
|
||||||
if contents is None:
|
if contents is None:
|
||||||
continue
|
continue
|
||||||
twitter_contents = contents["data"]
|
twitter_contents = contents["data"]
|
||||||
|
|
@ -82,7 +75,7 @@ class TwitterRetriever:
|
||||||
text = content["text"]
|
text = content["text"]
|
||||||
result = {
|
result = {
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
"user_name": user_name,
|
"user_name": username,
|
||||||
"timestamp": timestamp_ms,
|
"timestamp": timestamp_ms,
|
||||||
"date_time": beijing_time_str,
|
"date_time": beijing_time_str,
|
||||||
"text": text
|
"text": text
|
||||||
|
|
@ -93,7 +86,7 @@ class TwitterRetriever:
|
||||||
self.db_twitter_content.insert_data_to_mysql(result_df)
|
self.db_twitter_content.insert_data_to_mysql(result_df)
|
||||||
logger.info(f"Inserted {len(result_df)} rows into twitter_content")
|
logger.info(f"Inserted {len(result_df)} rows into twitter_content")
|
||||||
else:
|
else:
|
||||||
logger.warning(f"No data inserted for account: {user_name}")
|
logger.warning(f"No data inserted for account: {account}")
|
||||||
|
|
||||||
def transform_datetime(self, datetime_text: str):
|
def transform_datetime(self, datetime_text: str):
|
||||||
utc_time = datetime.strptime(datetime_text, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=pytz.UTC)
|
utc_time = datetime.strptime(datetime_text, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=pytz.UTC)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue