# coding=utf-8
import linecache
import time
now = time.time()
data_keys = ('bid', 'uid', 'username', 'v_class', 'content', 'img', 'created_at', 'source', 'rt_num', 'cm_num',
'rt_uid', 'rt_username', 'rt_v_class', 'rt_content', 'rt_img', 'src_rt_num', 'src_cm_num', 'gender',
'rt_bid', 'location', 'rt_mid', 'mid', 'lat', 'lon', 'lbs_type', 'lbs_title', 'poiid', 'links', 'hashtags',
'ats', 'rt_links', 'rt_hashtags', 'rt_ats', 'v_url', 'rt_v_url')
keys = {data_keys[i]: i for i in xrange(0, len(data_keys))}
F = linecache.getlines('twitter.txt')
lines = [x[1:-2].split('","') for x in F]
users = set(line[keys['username']] for line in lines)
users_total = len(users) # 用户总数
assert type(users_total) == int
users = list(users) # 用户名组成的list
assert type(users) == list
tweets_from_2012_11 = filter(lambda line: line[keys['created_at']].startswith('2012-11'), lines)
tweets_2012_11_count = len(tweets_from_2012_11) # 2012-11月的tweets的总数
assert type(tweets_2012_11_count) == int
dates = set(line[keys['created_at']].sp
分析python处理基本数据<四>
最新推荐文章于 2020-12-29 05:39:09 发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)