Matplotlib-城市气候与海洋的关系

导入包

In [1]:

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

导入数据各个海滨城市数据

In [2]:

milano1 = pd.read_csv('milano_250715.csv')
milano2 = pd.read_csv('milano_150715.csv')
milano3 = pd.read_csv('milano_270615.csv')
#数据导入成功之后,需要合并

In [3]:

display(milano1.head(),milano2.head(),milano3.head())
#合并之前,看看数据的列是否完全一样

 

Unnamed: 0

temp

humidity

pressure

description

dt

wind_speed

wind_deg

city

day

dist

0

0

29.50

48

1011

few clouds

1437730849

3.6

90

Milano

2015-07-24 11:40:49

250

1

1

30.81

45

1011

few clouds

1437734492

2.6

70

Milano

2015-07-24 12:41:32

250

2

2

31.91

45

1010

proximity shower rain

1437738045

3.1

80

Milano

2015-07-24 13:40:45

250

3

3

32.72

40

1009

proximity shower rain

1437741578

3.6

130

Milano

2015-07-24 14:39:38

250

4

4

33.44

38

1009

Sky is Clear

1437745188

3.6

130

Milano

2015-07-24 15:39:48

250

 

 

Unnamed: 0

temp

humidity

pressure

description

dt

wind_speed

wind_deg

city

day

dist

0

0

28.57

54

1016

Sky is Clear

1436863175

2.1

100

Milano

2015-07-14 10:39:35

250

1

1

29.74

48

1016

Sky is Clear

1436866758

2.6

0

Milano

2015-07-14 11:39:18

250

2

2

31.12

48

1016

Sky is Clear

1436870509

2.6

140

Milano

2015-07-14 12:41:49

250

3

3

32.16

45

1015

Sky is Clear

1436874098

2.1

0

Milano

2015-07-14 13:41:38

250

4

4

33.59

43

1015

Sky is Clear

1436877644

3.1

80

Milano

2015-07-14 14:40:44

250

 

 

Unnamed: 0

temp

humidity

pressure

description

dt

wind_speed

wind_deg

city

day

dist

0

0

24.69

60

1017

Sky is Clear

1435390925

2.6

140

Milano

2015-06-27 09:42:05

250

1

1

25.34

57

1017

Sky is Clear

1435394243

2.1

160

Milano

2015-06-27 10:37:23

250

2

2

27.70

51

1017

Sky is Clear

1435399015

1.5

210

Milano

2015-06-27 11:56:55

250

3

3

28.36

42

1017

Sky is Clear

1435402416

2.1

220

Milano

2015-06-27 12:53:36

250

4

4

29.45

42

1016

few clouds

1435406054

2.6

210

Milano

2015-06-27 13:54:14

250

In [4]:

milano = pd.concat([milano1,milano2,milano3],ignore_index=True)
#ignore_index=True,重新排列

In [5]:

#查看有多少数据
display(milano.shape,milano.tail())
(66, 11)

 

Unnamed: 0

temp

humidity

pressure

description

dt

wind_speed

wind_deg

city

day

dist

61

13

20.27

68

1017

Sky is Clear

1435453226

2.6

90

Milano

2015-06-28 03:00:26

250

62

14

19.62

72

1017

Sky is Clear

1435456482

2.6

50

Milano

2015-06-28 03:54:42

250

63

15

18.68

72

1017

Sky is Clear

1435460036

2.6

40

Milano

2015-06-28 04:53:56

250

64

16

18.28

72

1017

Sky is Clear

1435463874

2.6

40

Milano

2015-06-28 05:57:54

250

65

17

18.86

77

1017

Sky is Clear

1435467177

0.5

0

Milano

2015-06-28 06:52:57

250

In [6]:

asti1 = pd.read_csv('asti_250715.csv')
asti2 = pd.read_csv('asti_150715.csv')
asti3 = pd.read_csv('asti_270615.csv')
asti = pd.concat([asti1,asti2,asti3],ignore_index=True)
bologna1 = pd.read_csv('bologna_250715.csv')
bologna2 = pd.read_csv('bologna_150715.csv')
bologna3 = pd.read_csv('bologna_270615.csv')
bologna = pd.concat([bologna1,bologna2,bologna3],ignore_index=True)
cesena1 = pd.read_csv('cesena_250715.csv')
cesena2 = pd.read_csv('cesena_150715.csv')
cesena3 = pd.read_csv('cesena_270615.csv')
cesena = pd.concat([cesena1,cesena2,cesena3],ignore_index=True)
faenza1 = pd.read_csv('faenza_250715.csv')
faenza2 = pd.read_csv('faenza_150715.csv')
faenza3 = pd.read_csv('faenza_270615.csv')
faenza = pd.concat([faenza1,faenza2,faenza3],ignore_index=True)
ferrara1 = pd.read_csv('ferrara_250715.csv')
ferrara2 = pd.read_csv('ferrara_150715.csv')
ferrara3 = pd.read_csv('ferrara_270615.csv')
ferrara = pd.concat([ferrara1,ferrara2,ferrara3],ignore_index=True)
mantova1 = pd.read_csv('mantova_250715.csv')
mantova2 = pd.read_csv('mantova_150715.csv')
mantova3 = pd.read_csv('mantova_270615.csv')
mantova = pd.concat([mantova1,mantova2,mantova3],ignore_index=True)
piacenza1 = pd.read_csv('piacenza_250715.csv')
piacenza2 = pd.read_csv('piacenza_150715.csv')
piacenza3 = pd.read_csv('piacenza_270615.csv')
piacenza = pd.concat([piacenza1,piacenza2,piacenza3],ignore_index=True)
ravenna1 = pd.read_csv('ravenna_250715.csv')
ravenna2 = pd.read_csv('ravenna_150715.csv')
ravenna3 = pd.read_csv('ravenna_270615.csv')
ravenna = pd.concat([ravenna1,ravenna2,ravenna3],ignore_index=True)
torino1 = pd.read_csv('torino_250715.csv')
torino2 = pd.read_csv('torino_150715.csv')
torino3 = pd.read_csv('torino_270615.csv')
torino = pd.concat([torino1,torino2,torino3],ignore_index=True)

查看行数

In [7]:

print('asti',asti.shape)
print('bologna',bologna.shape)
print('cesena',cesena.shape)
print('faenza',faenza.shape)
print('ferrara',ferrara.shape)
print('mantova',mantova.shape)
print('milano',milano.shape)
print('piacenza',piacenza.shape)
print('ravenna',ravenna.shape)
print('torino',torino.shape)
asti (68, 11)
bologna (68, 11)
cesena (68, 11)
faenza (67, 11)
ferrara (68, 11)
mantova (68, 11)
milano (66, 11)
piacenza (68, 11)
ravenna (66, 11)
torino (68, 11)

去除没用的列

In [8]:

milano.columns

Out[8]:

Index(['Unnamed: 0', 'temp', 'humidity', 'pressure', 'description', 'dt',
       'wind_speed', 'wind_deg', 'city', 'day', 'dist'],
      dtype='object')

In [9]:

#如何批量删除“没用的列”呢?
#先把所有的城市组合起来
cities = [asti,bologna,cesena,faenza,ferrara,mantova,milano,piacenza,ravenna,torino]
#然后通过for循环,删除,注意‘Unnamed: 0’
for c in cities:
    c.drop('Unnamed: 0',axis = 1,inplace = True)

In [10]:

#查看删除结果
milano
# temp  该城市的 温度  
# humidity  湿度
# pressure  压强
# description  描述
# dt  时间
# wind_speed  风速
# wind_deg   风向
# city  城市名称
# day  采集日期
# dist  距离  (海里)

Out[10]:

 

temp

humidity

pressure

description

dt

wind_speed

wind_deg

city

day

dist

0

29.50

48

1011

few clouds

1437730849

3.6

90

Milano

2015-07-24 11:40:49

250

1

30.81

45

1011

few clouds

1437734492

2.6

70

Milano

2015-07-24 12:41:32

250

2

31.91

45

1010

proximity shower rain

1437738045

3.1

80

Milano

2015-07-24 13:40:45

250

3

32.72

40

1009

proximity shower rain

1437741578

3.6

130

Milano

2015-07-24 14:39:38

250

4

33.44

38

1009

Sky is Clear

1437745188

3.6

130

Milano

2015-07-24 15:39:48

250

...

...

...

...

...

...

...

...

...

...

...

61

20.27

68

1017

Sky is Clear

1435453226

2.6

90

Milano

2015-06-28 03:00:26

250

62

19.62

72

1017

Sky is Clear

1435456482

2.6

50

Milano

2015-06-28 03:54:42

250

63

18.68

72

1017

Sky is Clear

1435460036

2.6

40

Milano

2015-06-28 04:53:56

250

64

18.28

72

1017

Sky is Clear

1435463874

2.6

40

Milano

2015-06-28 05:57:54

250

65

18.86

77

1017

Sky is Clear

1435467177

0.5

0

Milano

2015-06-28 06:52:57

250

66 rows × 10 columns

各城市与海洋距离,最高温度,最低温度,最高湿度,最低湿度

In [11]:

#定义,各城市与海洋距离,最高温度,最低温度,最高湿度,最低湿度
dists = []
temp_max = []
temp_min = []
hum_max = []
hum_min = []

In [12]:

#通过for循环获需要的数据
for city in cities:
    dists.append(city['dist'][0])
    temp_max.append(city['temp'].max())
    temp_min.append(city['temp'].min())
    hum_max.append(city['humidity'].max())
    hum_min.append(city['humidity'].min())

In [13]:

#随意看看一个数据
temp_max

Out[13]:

[34.31,
 33.85000000000002,
 32.81,
 32.74000000000001,
 33.43000000000001,
 34.18000000000001,
 34.81,
 33.920000000000016,
 32.79000000000002,
 34.69]

In [14]:

display(len(temp_max),len(cities))
10
10

显示最高温度与离海远近的关系

In [15]:

#x轴-离海远近 dists,y轴-最高温度 temp_max
plt.plot(dists,temp_max)

Out[15]:

[<matplotlib.lines.Line2D at 0x7fa40a9e76d8>]

In [16]:

 

#使用点来显示,用到scatter
plt.scatter(dists,temp_max)

Out[16]:

<matplotlib.collections.PathCollection at 0x7fa4028c3ac8>

观察发现,离海近的可以形成一条直线,离海远的也能形成一条直线。

首先使用numpy:把列表转换为numpy数组,用于后续计算。

分别以100公里和50公里为分界点,划分为离海近和离海远的两组数据

In [17]:

#100公里之外的数据
#判断之前,需要将(dists,temp_max)列表型的数据,转换为numpy数组
x = np.array(dists)
y = np.array(temp_max)
#x1表示距离小于100公里的海滨城市
x1 = x[x<100]
#y1表示距离小于100公里城市的温度
y1 = y[x<100]
print('距离小于100公里,对应的温度')
print(x1,y1)
#x2表示距离大于50公里的海滨城市
x2 = x[x>50]
#y1表示距离大于50公里城市的温度
y2 = y[x>50]
print('距离大于50公里,对应的温度')
print(x2,y2)
距离小于100公里,对应的温度
[71 14 37 47  8] [33.85 32.81 32.74 33.43 32.79]
距离大于50公里,对应的温度
[315  71 121 250 200 357] [34.31 33.85 34.18 34.81 33.92 34.69]

In [18]:

#将数据转换为二维
x1 = x1.reshape(5,1)
y1 = y1.reshape(5,1)
x2 = x2.reshape(6,1)
y2 = y2.reshape(6,1)
display(x1,y1,x2,y2)
array([[71],
       [14],
       [37],
       [47],
       [ 8]])
array([[33.85],
       [32.81],
       [32.74],
       [33.43],
       [32.79]])
array([[315],
       [ 71],
       [121],
       [250],
       [200],
       [357]])
array([[34.31],
       [33.85],
       [34.18],
       [34.81],
       [33.92],
       [34.69]])

使用支持向量机计算回归参数

In [19]:

#机器学习的模型
from sklearn.svm import SVR

In [20]:

#创建算法
svr1 = SVR(kernel='linear')
svr2 = SVR(kernel='linear')

In [21]:

#将数据交给算法fit == feed
svr1.fit(x1,y1)
svr2.fit(x2,y2)
/home/ccoy/.local/lib/python3.7/site-packages/sklearn/utils/validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  return f(*args, **kwargs)
/home/ccoy/.local/lib/python3.7/site-packages/sklearn/utils/validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  return f(*args, **kwargs)

Out[21]:

SVR(kernel='linear')

In [22]:

#预测数据
x_test1 = np.linspace(0,100,20).reshape(20,1)
x_test2 = np.linspace(50,350,20).reshape(20,1)
display(x_test1,x_test2)
array([[  0.        ],
       [  5.26315789],
       [ 10.52631579],
       [ 15.78947368],
       [ 21.05263158],
       [ 26.31578947],
       [ 31.57894737],
       [ 36.84210526],
       [ 42.10526316],
       [ 47.36842105],
       [ 52.63157895],
       [ 57.89473684],
       [ 63.15789474],
       [ 68.42105263],
       [ 73.68421053],
       [ 78.94736842],
       [ 84.21052632],
       [ 89.47368421],
       [ 94.73684211],
       [100.        ]])
array([[ 50.        ],
       [ 65.78947368],
       [ 81.57894737],
       [ 97.36842105],
       [113.15789474],
       [128.94736842],
       [144.73684211],
       [160.52631579],
       [176.31578947],
       [192.10526316],
       [207.89473684],
       [223.68421053],
       [239.47368421],
       [255.26315789],
       [271.05263158],
       [286.84210526],
       [302.63157895],
       [318.42105263],
       [334.21052632],
       [350.        ]])

In [23]:

y1_ = svr1.predict(x_test1)
y2_ = svr2.predict(x_test2)
display(y1_,y2_)
array([32.55539683, 32.64395155, 32.73250627, 32.82106099, 32.90961571,
       32.99817043, 33.08672515, 33.17527987, 33.26383459, 33.35238931,
       33.44094403, 33.52949875, 33.61805347, 33.70660819, 33.79516291,
       33.88371763, 33.97227235, 34.06082707, 34.14938179, 34.23793651])
array([33.90300699, 33.93834008, 33.97367317, 34.00900626, 34.04433934,
       34.07967243, 34.11500552, 34.15033861, 34.1856717 , 34.22100478,
       34.25633787, 34.29167096, 34.32700405, 34.36233714, 34.39767022,
       34.43300331, 34.4683364 , 34.50366949, 34.53900258, 34.57433566])

In [24]:

#真实数据,点
plt.scatter(dists,temp_max)
#机器学习模拟,使用线
plt.plot(x_test1,y1_,'r')
plt.plot(x_test2,y2_,'b')

Out[24]:

[<matplotlib.lines.Line2D at 0x7fa3eb8e2160>]

查看最低温度与海洋距离的关系

In [25]:

plt.scatter(dists,temp_min)

Out[25]:

<matplotlib.collections.PathCollection at 0x7fa3eb844390>

最低湿度与海洋距离的关系

In [26]:

plt.scatter(dists,hum_min)

Out[26]:

<matplotlib.collections.PathCollection at 0x7fa3eb880f60>

最高湿度与海洋距离的关系

In [27]:

plt.scatter(dists,hum_max)

Out[27]:

<matplotlib.collections.PathCollection at 0x7fa3eb783978>

平均湿度与海洋距离的关系

In [28]:

temp_mean = []
for city in cities:
    temp_mean.append(city['temp'].mean())
temp_mean

Out[28]:

[26.216176470588252,
 27.242352941176495,
 26.82029411764708,
 27.038805970149276,
 27.390735294117665,
 27.643676470588254,
 26.705303030303053,
 27.018529411764725,
 26.948636363636385,
 26.50764705882355]

In [29]:

plt.scatter(dists,temp_mean)

Out[29]:

<matplotlib.collections.PathCollection at 0x7fa3eb6f2ba8>

思考:模仿最高温度,得到平均湿度与海洋距离的回归曲线

风向与风速的关系

In [30]:

#milano城市,'ro',r代表红色,o代表圆点
plt.plot(milano['wind_deg'],milano['wind_speed'],'ro')

Out[30]:

[<matplotlib.lines.Line2D at 0x7fa3eb66d080>]

在子图中,同时比较风向与湿度和风力的关系

In [31]:

axes1 = plt.subplot(121)
axes1.scatter(milano['wind_deg'],milano['humidity'])
axes2 = plt.subplot(122)
axes2.scatter(milano['wind_deg'],milano['wind_speed'])

Out[31]:

<matplotlib.collections.PathCollection at 0x7fa3eb6036d8>

可以看到散点图显示效果不好

由于风向是360度,我们可以考虑使用玫瑰图(极坐标条形图)

首先自定义一个画图函数

In [32]:

def show_rose(values,title):
    #玫瑰花瓣的个数8,(角度)45度
    n = 8
    angle = np.arange(0,2*np.pi,2*np.pi/n)
    
    #绘制的数据values
    radius = np.array(values)
    
    #axis:轴(x,y轴)
    #axes:整个画面
    plt.axes([0,0,2,2],polar = True)
    
    colors = np.random.random(size = 24).reshape(8,3)
    
    plt.bar(angle,radius,color = colors)
    
    plt.title(title,loc = 'left')

用numpy创建一个直方图,将360度划分为8个面元,将数据分类到这8个面元中

In [33]:

#milano城市
degree = milano['wind_deg']
d,b = np.histogram(degree,8,[0,360])

In [34]:

display(d,b)
array([21,  9,  9,  6, 14,  3,  2,  2])
array([  0.,  45.,  90., 135., 180., 225., 270., 315., 360.])

In [35]:

show_rose(d,'milano')

计算米兰各个方向上的风速

In [36]:

print(milano[milano['wind_deg']<45]['wind_speed'].mean())
print(milano[(milano['wind_deg']>44)&(milano['wind_deg']<90)]['wind_speed'].mean())
print(milano[(milano['wind_deg']>89)&(milano['wind_deg']<135)]['wind_speed'].mean())
print(milano[(milano['wind_deg']>134)&(milano['wind_deg']<180)]['wind_speed'].mean())
print(milano[(milano['wind_deg']>179)&(milano['wind_deg']<225)]['wind_speed'].mean())
print(milano[(milano['wind_deg']>224)&(milano['wind_deg']<270)]['wind_speed'].mean())
print(milano[(milano['wind_deg']>269)&(milano['wind_deg']<315)]['wind_speed'].mean())
print(milano[milano['wind_deg']>314]['wind_speed'].mean())
1.8142857142857143
2.2222222222222223
2.855555555555556
2.583333333333333
2.3285714285714287
2.266666666666667
2.05
2.1

将各个方向的风速保存在列表中

In [37]:

degs = np.arange(45,361,45)
tmp =  []
for deg in degs:
    tmp.append(milano[(milano['wind_deg']>(deg-46))&(milano['wind_deg']<deg)]['wind_speed'].mean())
speeds = np.array(tmp)
print('各个方向的风速:',speeds)
各个方向的风速: [1.81428571 2.22222222 2.85555556 2.58333333 2.32857143 2.26666667
 2.05       2.1       ]

画出各个方向的风速

In [38]:

show_rose(speeds,'milano')

将上面步骤写成函数

In [83]:

#定义某个城市,各个方向的风速函数,RoseWind_Speed
def RoseWind_Speed(city):
   degs = np.arange(45,361,45)
   tmp =  []
   for deg in degs:
      tmp.append(city[(city['wind_deg']>(deg-46))&(city['wind_deg']<deg)]['wind_speed'].mean())
   return np.array(tmp)
#定义定义一个画图函数,showRoseWind_Speed
def showRoseWind_Speed(speeds,city_name):
   N = 8
   theta = np.arange(0.,2 * np.pi, 2 * np.pi / N)
   radii = np.array(speeds)
   plt.axes([0,0,2,2], polar=True)
   colors = np.random.random(size = 24).reshape(8,3)
   bars = plt.bar(theta, radii, width=(2*np.pi/N), bottom=0.0, color=colors)
   plt.title(city_name,x=0.2, fontsize=20)

In [74]:

RoseWind_Speed(ravenna)
#ravenna返回值中含有空值nan,调用showRoseWind_Speed函数会报错

Out[74]:

array([3.2       , 4.0925    , 2.62583333, 1.52888889, 1.78909091,
       2.71142857,        nan, 2.11      ])

In [75]:

a = RoseWind_Speed(ravenna)
a.shape

Out[75]:

(8,)

In [84]:

showRoseWind_Speed(RoseWind_Speed(milano),'Milano') 

In [85]:

showRoseWind_Speed(RoseWind_Speed(bologna),'Bologna') 

 

仅供参考学习,严禁转载!

 

 

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值