python全栈 人工智能_学习笔记之Python全栈开发/人工智能公开课_腾讯课堂

1 #coding: utf-8

2

3 #In[1]:

4

5

6 importpandas as pd7 importnumpy as np8

9

10 #In[2]:

11

12

13 data = pd.DataFrame(pd.read_excel('originalData.xlsx'))14

15 #date hour pressure wind_direction temperature

16 #0 2016-07-01 0.0 1000.4 225.0 26.4

17 #1 2016-07-01 NaN NaN NaN NaN

18 #2 2016-07-01 6.0 998.9 212.0 31.7

19 #3 2016-07-01 235.0 998.7 244.0 NaN

20 #4 2016-07-01 12.0 999.7 222.0 NaN

21 #5 2016-07-01 15.0 1000.0 102.0 NaN

22 #6 2016-07-01 NaN 998.8 202.0 26.0

23 #7 2016-07-01 NaN 1000.2 334.0 25.5

24 #8 2016-07-01 NaN 1000.2 334.0 25.5

25 #9 2016-07-02 3.0 1002.4 46.0 30.0

26 #10 2016-07-02 6.0 1001.3 37.0 29.3

27 #11 2016-07-02 9.0 1001.9 345.0 25.9

28 #12 2016-07-02 12.0 1003.6 113.0 25.1

29 #13 2016-07-02 12.0 1003.6 113.0 25.1

30 #14 2016-07-02 15.0 1002.4 138.0 25.3

31 #hour pressure wind_direction temperature

32 #count 11.000000 14.000000 14.000000 11.000000

33 #mean 29.545455 1000.864286 190.500000 26.890909

34 #std 68.313049 1.685963 102.932951 2.311473

35 #min 0.000000 998.700000 37.000000 25.100000

36 #25% 6.000000 999.775000 113.000000 25.400000

37 #50% 12.000000 1000.300000 207.000000 25.900000

38 #75% 13.500000 1002.275000 239.250000 27.850000

39 #max 235.000000 1003.600000 345.000000 31.700000

40 print(data)41 print(data.describe())42

43

44 #In[3]:

45

46 #RangeIndex(start=0, stop=15, step=1)

47 #Index(['date', 'hour', 'pressure', 'wind_direction', 'temperature'], dtype='object')

48 print(data.index)49 print(data.columns)50

51

52 #In[4]:

53

54 #date hour pressure wind_direction temperature

55 #0 2016-07-01 0.0 1000.4 225.0 26.4

56 #1 2016-07-01 NaN NaN NaN NaN

57 #2 2016-07-01 6.0 998.9 212.0 31.7

58 #3 2016-07-01 235.0 998.7 244.0 NaN

59 #4 2016-07-01 12.0 999.7 222.0 NaN

60 #5 2016-07-01 15.0 1000.0 102.0 NaN

61 #date hour pressure wind_direction temperature

62 #9 2016-07-02 3.0 1002.4 46.0 30.0

63 #10 2016-07-02 6.0 1001.3 37.0 29.3

64 #11 2016-07-02 9.0 1001.9 345.0 25.9

65 #12 2016-07-02 12.0 1003.6 113.0 25.1

66 #13 2016-07-02 12.0 1003.6 113.0 25.1

67 #14 2016-07-02 15.0 1002.4 138.0 25.3

68 print(data.head(6))69 print(data.tail(6))70

71

72 #In[5]:

73

74

75 #1. 删掉空白值超过3的行

76 data.dropna(axis=0, thresh=3, inplace=True)77 data.reset_index(drop=True, inplace=True)78

79 #date hour pressure wind_direction temperature

80 #0 2016-07-01 0.0 1000.4 225.0 26.4

81 #1 2016-07-01 6.0 998.9 212.0 31.7

82 #2 2016-07-01 235.0 998.7 244.0 NaN

83 #3 2016-07-01 12.0 999.7 222.0 NaN

84 #4 2016-07-01 15.0 1000.0 102.0 NaN

85 #5 2016-07-01 NaN 998.8 202.0 26.0

86 #6 2016-07-01 NaN 1000.2 334.0 25.5

87 #7 2016-07-01 NaN 1000.2 334.0 25.5

88 #8 2016-07-02 3.0 1002.4 46.0 30.0

89 #9 2016-07-02 6.0 1001.3 37.0 29.3

90 #10 2016-07-02 9.0 1001.9 345.0 25.9

91 #11 2016-07-02 12.0 1003.6 113.0 25.1

92 #12 2016-07-02 12.0 1003.6 113.0 25.1

93 #13 2016-07-02 15.0 1002.4 138.0 25.3

94 print(data)95

96

97 #In[6]:

98

99

100 #2. 填充空白,hour填充10,temperature填充25.5

101 data.fillna({'hour':10, 'temperature':25.5}, inplace=True)102

103 #date hour pressure wind_direction temperature

104 #0 2016-07-01 0.0 1000.4 225.0 26.4

105 #1 2016-07-01 6.0 998.9 212.0 31.7

106 #2 2016-07-01 235.0 998.7 244.0 25.5

107 #3 2016-07-01 12.0 999.7 222.0 25.5

108 #4 2016-07-01 15.0 1000.0 102.0 25.5

109 #5 2016-07-01 10.0 998.8 202.0 26.0

110 #6 2016-07-01 10.0 1000.2 334.0 25.5

111 #7 2016-07-01 10.0 1000.2 334.0 25.5

112 #8 2016-07-02 3.0 1002.4 46.0 30.0

113 #9 2016-07-02 6.0 1001.3 37.0 29.3

114 #10 2016-07-02 9.0 1001.9 345.0 25.9

115 #11 2016-07-02 12.0 1003.6 113.0 25.1

116 #12 2016-07-02 12.0 1003.6 113.0 25.1

117 #13 2016-07-02 15.0 1002.4 138.0 25.3

118 print(data)119

120

121 #In[7]:

122

123

124 #3. 删掉hour>24的行

125 num =data.index.max()126

127 for i inrange(num):128 if data.loc[i, 'hour'] > 24:129 data.drop([i], inplace=True)130 print('hour > 24, deleted')131

132 data.reset_index(drop=True, inplace=True)133

134 #hour > 24, deleted

135 #date hour pressure wind_direction temperature

136 #0 2016-07-01 0.0 1000.4 225.0 26.4

137 #1 2016-07-01 6.0 998.9 212.0 31.7

138 #2 2016-07-01 12.0 999.7 222.0 25.5

139 #3 2016-07-01 15.0 1000.0 102.0 25.5

140 #4 2016-07-01 10.0 998.8 202.0 26.0

141 #5 2016-07-01 10.0 1000.2 334.0 25.5

142 #6 2016-07-01 10.0 1000.2 334.0 25.5

143 #7 2016-07-02 3.0 1002.4 46.0 30.0

144 #8 2016-07-02 6.0 1001.3 37.0 29.3

145 #9 2016-07-02 9.0 1001.9 345.0 25.9

146 #10 2016-07-02 12.0 1003.6 113.0 25.1

147 #11 2016-07-02 12.0 1003.6 113.0 25.1

148 #12 2016-07-02 15.0 1002.4 138.0 25.3

149 print(data)150

151

152 #In[8]:

153

154

155 #4. 删掉重复的数据行,保留出现的第一行(全部删掉?保留最后一行?)

156 data.drop_duplicates(keep='first', inplace=True)157 data.reset_index(drop=True, inplace=True)158

159 #date hour pressure wind_direction temperature

160 #0 2016-07-01 0.0 1000.4 225.0 26.4

161 #1 2016-07-01 6.0 998.9 212.0 31.7

162 #2 2016-07-01 12.0 999.7 222.0 25.5

163 #3 2016-07-01 15.0 1000.0 102.0 25.5

164 #4 2016-07-01 10.0 998.8 202.0 26.0

165 #5 2016-07-01 10.0 1000.2 334.0 25.5

166 #6 2016-07-02 3.0 1002.4 46.0 30.0

167 #7 2016-07-02 6.0 1001.3 37.0 29.3

168 #8 2016-07-02 9.0 1001.9 345.0 25.9

169 #9 2016-07-02 12.0 1003.6 113.0 25.1

170 #10 2016-07-02 15.0 1002.4 138.0 25.3

171 print(data)172

173

174 #In[9]:

175

176

177 #5. 数据重排

178 randnum =np.random.permutation(data.index.size)179

180 #[ 4 0 10 3 1 5 8 9 7 2 6]

181 print(randnum)182

183

184 #In[10]:

185

186

187 data2 =data.take(randnum)188

189 #date hour pressure wind_direction temperature

190 #4 2016-07-01 10.0 998.8 202.0 26.0

191 #0 2016-07-01 0.0 1000.4 225.0 26.4

192 #10 2016-07-02 15.0 1002.4 138.0 25.3

193 #3 2016-07-01 15.0 1000.0 102.0 25.5

194 #1 2016-07-01 6.0 998.9 212.0 31.7

195 #5 2016-07-01 10.0 1000.2 334.0 25.5

196 #8 2016-07-02 9.0 1001.9 345.0 25.9

197 #9 2016-07-02 12.0 1003.6 113.0 25.1

198 #7 2016-07-02 6.0 1001.3 37.0 29.3

199 #2 2016-07-01 12.0 999.7 222.0 25.5

200 #6 2016-07-02 3.0 1002.4 46.0 30.0

201 print(data2)202

203

204 #In[11]:

205

206

207 #6. 随机采样

208 data3 = data.sample(8)209

210 #date hour pressure wind_direction temperature

211 #4 2016-07-01 10.0 998.8 202.0 26.0

212 #0 2016-07-01 0.0 1000.4 225.0 26.4

213 #2 2016-07-01 12.0 999.7 222.0 25.5

214 #1 2016-07-01 6.0 998.9 212.0 31.7

215 #5 2016-07-01 10.0 1000.2 334.0 25.5

216 #9 2016-07-02 12.0 1003.6 113.0 25.1

217 #7 2016-07-02 6.0 1001.3 37.0 29.3

218 #8 2016-07-02 9.0 1001.9 345.0 25.9

219 print(data3)220 data3.to_csv('data3.csv')

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值