python读取unl文件_使用python从Twitter的推文中提取数据

I want to extract data like tweet id , twitter username, twitter id of the user who has fb.me link displayed in his tweet and also his fb id and fb username.

I have to do this for 200 such tweets.

My code :

from twitter.oauth import OAuth

import json

import urllib2

from twitter import *

ckey = ''

csecret = ''

atoken = ''

asecret = ''

auth = OAuth(atoken,asecret,ckey,csecret)

t_api = Twitter(auth=auth)

search = t_api.search.tweets(q='http://on.fb.me',count=1)

print search

print 'specific data'

#print search['statuses'][0]['entities']['urls']

right now retrieving 1 result and want to extract the data above mentioned.

Result I got :

{u'search_metadata': {u'count': 1, u'completed_in': 0.021, u'max_id_str': u'542227367834685440', u'since_id_str': u'0', u'next_results': u'?max_id=542227367834685439&q=http%3A%2F%2Fon.fb.me&count=1&include_entities=1', u'refresh_url': u'?since_id=542227367834685440&q=http%3A%2F%2Fon.fb.me&include_entities=1', u'since_id': 0, u'query': u'http%3A%2F%2Fon.fb.me', u'max_id': 542227367834685440L}, u'statuses': [{u'contributors': None, u'truncated': False, u'text': u'Check out Monday Morning Cooking Club Cooking Tip Day #1 -->http://t.co/j6mbg1OE6Z | http://t.co/c7qjunLQz2', u'in_reply_to_status_id': None, u'id': 542227367834685440L, u'favorite_count': 0, u'source': u'Hootsuite', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [{u'url': u'http://t.co/j6mbg1OE6Z', u'indices': [63, 85], u'expanded_url': u'http://on.fb.me/', u'display_url': u'on.fb.me'}, {u'url': u'http://t.co/c7qjunLQz2', u'indices': [88, 110], u'expanded_url': u'http://bit.ly/12BbG16', u'display_url': u'bit.ly/12BbG16'}]}, u'in_reply_to_screen_name': None, u'in_reply_to_user_id': None, u'retweet_count': 0, u'id_str': u'542227367834685440', u'favorited': False, u'user': {u'follow_request_sent': False, u'profile_use_background_image': True, u'profile_text_color': u'333333', u'default_profile_image': False, u'id': 226140415, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/704964581/bc37b358019be05efe1094a0d100ea53.jpeg', u'verified': False, u'profile_location': None, u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/469488950050955264/FOoWjIEZ_normal.jpeg', u'profile_sidebar_fill_color': u'DDEEF6', u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/sida0E6eXy', u'indices': [0, 22], u'expanded_url': u'http://www.mondaymorningcookingclub.com.au', u'display_url': u'mondaymorningcookingclub.com.au'}]}, u'description': {u'urls': []}}, u'followers_count': 1574, u'profile_sidebar_border_color': u'000000', u'id_str': u'226140415', u'profile_background_color': u'EDCDC7', u'listed_count': 50, u'is_translation_enabled': False, u'utc_offset': 39600, u'statuses_count': 12594, u'description': u"Monday Morning Cooking Club. A bunch of Sydney gals sharing and preserving the wonderful recipes of Australia's culturally diverse Jewish community.", u'friends_count': 1904, u'location': u'Sydney, Australia', u'profile_link_color': u'C40A38', u'profile_image_url': u'http://pbs.twimg.com/profile_images/469488950050955264/FOoWjIEZ_normal.jpeg', u'following': False, u'geo_enabled': True, u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/226140415/1400769931', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/704964581/bc37b358019be05efe1094a0d100ea53.jpeg', u'name': u'Lisa Goldberg', u'lang': u'en', u'profile_background_tile': False, u'favourites_count': 1309, u'screen_name': u'MondayMorningCC', u'notifications': False, u'url': u'http://t.co/sida0E6eXy', u'created_at': u'Mon Dec 13 12:22:13 +0000 2010', u'contributors_enabled': False, u'time_zone': u'Sydney', u'protected': False, u'default_profile': False, u'is_translator': False}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'lang': u'en', u'created_at': u'Tue Dec 09 08:00:53 +0000 2014', u'in_reply_to_status_id_str': None, u'place': None, u'metadata': {u'iso_language_code': u'en', u'result_type': u'recent'}}]}

can you please help me out how to retrieve this particular data?

解决方案

You could do something like this to issue a query and afterwards get the data you want by querying with the corresponding keys.

import json

import urllib2

import twitter

ckey = 'Your consumer key'

csecret = 'your consumer secret'

atoken = 'your token'

asecret = 'your secret token'

auth = twitter.oauth.OAuth(atoken, asecret,

ckey, csecret)

twitter_api = twitter.Twitter(auth=auth)

q = 'http://on.fb.me'

count = 100

search_results = twitter_api.search.tweets(q=q, count=count)

statuses = search_results['statuses']

# Iterate through 5 more batches of results by following the cursor

for _ in range(5):

print "Length of statuses", len(statuses)

try:

next_results = search_results['search_metadata']['next_results']

except KeyError, e: # No more results when next_results doesn't exist

break

# Create a dictionary from next_results, which has the following form:

# ?max_id=313519052523986943&q=NCAA&include_entities=1

kwargs = dict([ kv.split('=') for kv in next_results[1:].split("&") ])

search_results = twitter_api.search.tweets(**kwargs)

statuses += search_results['statuses']

# Show one sample search result by slicing the list...

print json.dumps(statuses[0], indent=1)

# get relevant data into lists

user_names = [ user_mention['name']

for status in statuses

for user_mention in status['entities']['user_mentions'] ]

screen_names = [ user_mention['screen_name']

for status in statuses

for user_mention in status['entities']['user_mentions'] ]

id_str = [ user_mention['id_str']

for status in statuses

for user_mention in status['entities']['user_mentions'] ]

t_id = [ status['id']

for status in statuses ]

# print out first 5 results

print json.dumps(screen_names[0:5], indent=1)

print json.dumps(user_names[0:5], indent=1)

print json.dumps(id_str[0:5], indent=1)

print json.dumps(t_id[0:5], indent=1)

Result:

[

"DijalogNet",

"Kihot_ex_of",

"Kihot_ex_of",

"JAsunshine1011",

"RobertCornegyJr"

]

[

"Dijalog Net",

"Sa\u0161a Jankovi\u0107",

"Sa\u0161a Jankovi\u0107",

"Raycent Edwards",

"Robert E Cornegy, Jr"

]

[

"2380692464",

"563692937",

"563692937",

"15920807",

"460051837"

]

[

542309722385580032,

542227367834685440,

542202885514461185,

542201843448045568,

542188061598437376

]

Have a look at this site for more examples on how to use the api.

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值