😎

Python × Twitter API ツイート種類別にデータ取得する

2021/05/03に公開

キーワードを含むツイート数

累計の場合

import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
keyword = '五反田'
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# 検索実行
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含むツイート数(累計)を表示
for i in total_tweets_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

当日の場合

import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
keyword = '五反田'
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# 検索実行
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含むツイート数(当日)
for i in range(len(total_tweets_data)):
    if i > 0:
        day_tweets_data.append(
            total_tweets_data[i] - total_tweets_data[i - 1])
    else:
        day_tweets_data.append(total_tweets_data[i])

# キーワードを含むツイート数(当日)を表示
for i in day_tweets_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

キーワードを含むRT数

累計の場合

import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含むツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含む && RTを除くツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=key,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        total_exclude_RT_data.append(
            tweet.full_text.strip().replace(
                '\n', '。') + '\n')
    exclude_RT_data.append(len(total_exclude_RT_data))

# ツイート数 - RTを除くツイート数 = RT数
total_RT_data = [x - y for (x, y) in zip(total_tweets_data, exclude_RT_data)]

# キーワードを含むRT数(累計)を表示
for i in total_RT_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

当日の場合

import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含むツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含む && RTを除くツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=key,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        total_exclude_RT_data.append(
            tweet.full_text.strip().replace(
                '\n', '。') + '\n')
    exclude_RT_data.append(len(total_exclude_RT_data))

# ツイート数 - RTを除くツイート数 = RT数
total_RT_data = [x - y for (x, y) in zip(total_tweets_data, exclude_RT_data)]

# キーワードを含むRT数(当日)
for i in range(len(total_RT_data)):
    if i > 0:
        day_RT_data.append(total_RT_data[i] - total_RT_data[i - 1])
    else:
        day_RT_data.append(total_RT_data[i])

# キーワードを含むRT数(当日)を表示
for i in day_RT_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

キーワードを含む引用数

累計の場合

import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
quote_data = []
total_quote_data = []
day_quote_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含む引用数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        quote_data.append(tweet.is_quote_status)
    total_quote_data.append(quote_data.count(True))

# キーワードを含む引用数(累計)を表示
for i in total_quote_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

当日の場合

import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
quote_data = []
total_quote_data = []
day_quote_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含む引用数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        quote_data.append(tweet.is_quote_status)
    total_quote_data.append(quote_data.count(True))

# キーワードを含む引用数(当日)
for i in range(len(total_quote_data)):
    if i > 0:
        day_quote_data.append(total_quote_data[i] - total_quote_data[i - 1])
    else:
        day_quote_data.append(total_quote_data[i])

# キーワードを含む引用数(当日)を表示
for i in day_quote_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

参考文献

Discussion