手机python编译器运行不了_vscode作为编辑器运行python,但是运行时读取不了文件,debug时却没有问题...

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93import json

from collections import defaultdict

from collections import Counter

from pylab import *

import matplotlib.pyplot as plt

from pandas import DataFrame, Series

import pandas as pd;

import numpy as np

path = 'usagov_bitly_data2012-03-16-1331923249.txt'

records = [json.loads(line) for line in open(path)]

# for i in range(len(records)):

# for k, v in records[i].items():

# print(k, v)

print(len(records))

print(records[0]['tz'])

time_zones = [rec['tz'] for rec in records if 'tz' in rec]

print(time_zones[:10])

def get_counts(sequence):

counts = defaultdict(int)

for x in sequence:

counts[x] += 1

return counts

counts = get_counts(time_zones)

print(counts['America/New_York'])

print(len(time_zones))

def top_counts(count_dict, n=10):

value_key_pairs = [(count, tz) for tz, count in count_dict.items()]

value_key_pairs.sort()

return value_key_pairs[-n:]

print(top_counts(counts))

print(Counter(time_zones).most_common(10))

frame = DataFrame(records)

print(type(frame))

print(frame['tz'][:10])

tz_counts = frame['tz'].value_counts()

print(tz_counts[:10])

clean_tz = frame['tz'].fillna('Missing')

clean_tz[clean_tz == ''] = 'Unknown'

tz_counts = clean_tz.value_counts()

print(tz_counts[:10])

tz_counts[:10].plot(kind='barh', rot=0)

# plt.show()

print(frame['a'][1])

print(frame['a'][50])

print(frame['a'][51])

results = Series([x.split()[0] for x in frame.a.dropna()])

print(results[:5])

print(results.value_counts()[:8])

cframe = frame[frame.a.notnull()]

operating_systems = np.where(cframe['a'].str.contains('Windows'), 'Windows', 'Not Windows')

print(operating_systems[:5])

by_tz_os = cframe.groupby(['tz', operating_systems])

agg_counts = by_tz_os.size().unstack().fillna(0)

print(agg_counts[:10])

indexer = agg_counts.sum(1).argsort()

print(indexer[:10])

count_subset = agg_counts.take(indexer)[-10:]

print(count_subset)

count_subset.plot(kind='barh',stacked=True)

normed_subset = count_subset.div(count_subset.sum(1),axis=0)

normed_subset.plot(kind='barh',stacked=True)

plt.show()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值