使用Twitter API获取推特数据
最近由于实验室研究需求,需要对Twitter15及Twitter16数据集进行扩展。具体为:1.根据user_id,获取用户画像;2.根据tweet_id,获取推文下的评论。
连接TwitterAPI
首先根据自己申请的推特开发者账号,去连接Twitter API
import tweepy
import time
import csv
import pandas as pd
import json
from collections import OrderedDict
import datetime
import re
# 填写twitter提供的开发Key和secret
consumer_key = 'XXX'
consumer_secret = 'XXX'
access_token = 'XXX'
access_token_secret = 'XXX'
# 提交你的Key和secret
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
# 获取类似于内容句柄的东西
api = tweepy.API(auth, proxy='127.0.0.1:7890')
读取tweet_id
# 获取所有的tweet_id
with open('uid.csv','r',encoding='utf-8') as f1:
reader = csv.reader(f1)
tweets_id = [row[0] for row in reader]
print(tweets_id)
print(len(tweets_id))
f1.close()
根据tweet_id爬取推文评论
for tweet_id in tweets_id:
print(f"正在获取 tweet_id = %s 的推特的评论..." % tweet_id)
with open('comments_id/' + tweet_id + '.csv', 'r', encoding='utf-8') as f2:
reader = csv.reader(f2)
comments_id = [row[0] for row in reader]
with open('comments/' + tweet_id + '.csv', 'a', encoding='utf-8') as f3:
for comment_id in comments_id:
print(f"正在获取 comment_id = %s 的评论内容..." % comment_id)
flag = 0
while flag == 0:
try: