跨行自学python,找个地方记下笔记。
#!-*-coding:utf-8-*-
import json
path = "C:\Python learn\pydata-book-master\ch02\usagov_bitly_data2012-03-16-1331923249.txt"
print open(path).readline(),
print 1
records = [json.loads(line) for line in open(path)]
print records[0:2] #可以看出records是一个列表内嵌字典所构成[{dict1},{dict2}...], type = dict
print
print records[0] #type = list
print
print records[1]
print
print type(records) #type = list
print 2
time_zones = [i['tz'] for i in records if 'tz' in i] # 其中i为records列表中的字典,对于每一个i,如果i包含了键 'tz',则把值i['tz']创建为列表
print time_zones[:10]
print 3
def get_counts(seq):
counts = {} #新建一个空的字典
for x in seq: #对seq中的每个对象x进行循环
if x in counts: #如果对象x已经在字典中,则值counts[x]加1,如果还不在字典中,则值counts[x]初始值为1
counts[x] += 1
else:
counts[x] = 1
return counts
counts = get_counts(time_zones) #对time_zones列表计数并创立字典,如果列表中的对象x出现了n次,则返回一个字典counts = {x:n, ...}
print counts
print 4
print counts['America/New_York'] #相当于print get_counts(time_zones)['America/New_York'],即对列表中的AmericaNewYork计数,并创立字典
print 5
print len(time_zones)
print 6
def top_counts(count_dict):
value_key_pairs = [(count,tz) for tz, count in count_dict.items()]
#count_dict.items()为该字典下的键值元组列表,其形式为[(key1, value1), (key2, value2), ...]
#tz, count即分别对应了key和value,然后建立一个(count,tz)的元组列表,其形式为[(value1, key1), (value2, key2),...]
# print value_key_pairs
value_key_pairs.sort() #对该列表元组进行顺序排列,即value从小到大进行排序
return value_key_pairs[-10:],
print top_counts(counts)