数据分析——从入门到精通(二十)

城市气候与海洋的关系研究

import numpy as np
import pandas as pd 
from pandas import Series,DataFrame

import matplotlib.pyplot as plt
%matplotlib inline

plt.rcParams['font.sans-serif'] = ['SimSun']
plt.rcParams['axes.unicode_minus'] = False
导入数据各个海滨城市数据
  • os.listdir(‘data3’)
  • filter() 过滤出所有的.csv文件,返回一个可迭代的Filter对象
  • df.append()以行的方式追加,返回一个新的DataFrame
import os
for csv_file in filter(lambda filename: filename.endswith('.csv'),
                      os.listdir('data3')):
    print(csv_file)
asti_150715.csv
asti_250715.csv
asti_270615.csv
bologna_150715.csv
bologna_250715.csv
bologna_270615.csv
cesena_150715.csv
cesena_250715.csv
cesena_270615.csv
faenza_150715.csv
faenza_250715.csv
faenza_270615.csv
ferrara_150715.csv
ferrara_250715.csv
ferrara_270615.csv
mantova_150715.csv
mantova_250715.csv
mantova_270615.csv
milano_150715.csv
milano_250715.csv
milano_270615.csv
piacenza_150715.csv
piacenza_250715.csv
piacenza_270615.csv
ravenna_150715.csv
ravenna_250715.csv
ravenna_270615.csv
torino_150715.csv
torino_250715.csv
torino_270615.csv

在这里插入图片描述

for csv_file in filter(lambda filename: filename.endswith('.csv'),
                      os.listdir('data3')):
    df= pd.read_csv(f'data3/{csv_file}')
    display(df.head())
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0028.05661014Sky is Clear14368631762.5742.501Asti2015-07-14 10:39:36315
1129.51641014Sky is Clear14368667591.54263.000Asti2015-07-14 11:39:19315
2230.39581017Sky is Clear14368705102.60100.000Asti2015-07-14 12:41:50315
3331.10541017Sky is Clear14368740982.1090.000Asti2015-07-14 13:41:38315
4433.23451016few clouds14368776452.10120.000Asti2015-07-14 14:40:45315
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0028.79541012Sky is Clear14377308503.1070Asti2015-07-24 11:40:50315
1130.02581012Sky is Clear14377344933.1070Asti2015-07-24 12:41:33315
2230.79491007light rain14377380452.0624Asti2015-07-24 13:40:45315
3332.02511011few clouds14377415794.1010Asti2015-07-24 14:39:39315
4430.93511011few clouds14377451882.60120Asti2015-07-24 15:39:48315
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0022.68601018Sky is Clear14353909252.1080.000Asti2015-06-27 09:42:05315
1124.05601018Sky is Clear14353942432.6050.000Asti2015-06-27 10:37:23315
2226.56571018Sky is Clear14353990172.10100.000Asti2015-06-27 11:56:57315
3327.20571017Sky is Clear14354024202.1070.000Asti2015-06-27 12:53:40315
4428.56291017Sky is Clear14354060562.06154.505Asti2015-06-27 13:54:16315
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0029.98571021.0Sky is Clear14368631010.5190.0Bologna2015-07-14 10:38:2171
1130.26511021.0moderate rain14368666911.03157.0Bologna2015-07-14 11:38:1171
2232.36461021.0sky is clear14368703922.0667.0Bologna2015-07-14 12:39:5271
3331.16471021.0moderate rain14368740002.0690.0Bologna2015-07-14 13:40:0071
4433.48441021.0sky is clear14368775492.06135.0Bologna2015-07-14 14:39:0971
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0030.61621017Sky is Clear14377307642.06315Bologna2015-07-24 11:39:2471
1132.47581009light rain14377344061.0369Bologna2015-07-24 12:40:0671
2233.64541008light rain14377379612.0628Bologna2015-07-24 13:39:2171
3333.82511008moderate rain14377414892.0674Bologna2015-07-24 14:38:0971
4431.87521008light rain14377451265.14201Bologna2015-07-24 15:38:4671
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0025.26491023Sky is Clear14353908011.54225.0Bologna2015-06-27 09:40:0171
1126.52571016Sky is Clear14353942040.51256.0Bologna2015-06-27 10:36:4471
2228.49531016Sky is Clear14353986530.51325.0Bologna2015-06-27 11:50:5371
3330.00421022light rain14354020841.5422.0Bologna2015-06-27 12:48:0471
4429.86441015light rain14354057222.0628.0Bologna2015-06-27 13:48:4271
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0029.15831015moderate rain14368631013.6294.001Cesena2015-07-14 10:38:2114
1129.37741015moderate rain14368666913.6020.000Cesena2015-07-14 11:38:1114
2229.51781015moderate rain14368703923.6070.000Cesena2015-07-14 12:39:5214
3329.88701016moderate rain14368740004.6060.000Cesena2015-07-14 13:40:0014
4430.12701016moderate rain14368775494.1070.000Cesena2015-07-14 14:39:0914
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0032.35621010moderate rain14377307641.060.0Cesena2015-07-24 11:39:2414
1132.40751009moderate rain14377344063.640.0Cesena2015-07-24 12:40:0614
2232.24751009moderate rain14377379613.670.0Cesena2015-07-24 13:39:2114
3332.26791009moderate rain14377414895.170.0Cesena2015-07-24 14:38:0914
4432.81701008moderate rain14377451253.660.0Cesena2015-07-24 15:38:4514
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0023.34821017very heavy rain14353876231.91175.511Cesena2015-06-27 08:47:0314
1124.95691018very heavy rain14353908012.01159.500Cesena2015-06-27 09:40:0114
2225.67731017very heavy rain14353942042.10100.000Cesena2015-06-27 10:36:4414
3326.17691017very heavy rain14353986523.10120.000Cesena2015-06-27 11:50:5214
4427.07611016very heavy rain14354020833.10110.000Cesena2015-06-27 12:48:0314
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0029.40831015moderate rain14368631773.6294.001Faenza2015-07-14 10:39:3737
1130.12781015moderate rain14368667593.1080.000Faenza2015-07-14 11:39:1937
2230.10781015moderate rain14368705103.6070.000Faenza2015-07-14 12:41:5037
3330.75741015moderate rain14368740994.6090.000Faenza2015-07-14 13:41:3937
4430.71661015moderate rain14368776465.10100.000Faenza2015-07-14 14:40:4637
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0032.08791010moderate rain14377308501.5310.0Faenza2015-07-24 11:40:5037
1132.40751009moderate rain14377344933.640.0Faenza2015-07-24 12:41:3337
2232.74751009moderate rain14377380463.670.0Faenza2015-07-24 13:40:4637
3332.70791009moderate rain14377415795.170.0Faenza2015-07-24 14:39:3937
4432.33701008moderate rain14377451883.660.0Faenza2015-07-24 15:39:4837
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0025.44691018very heavy rain14353909251.2914.5002Faenza2015-06-27 09:42:0537
1126.38731017very heavy rain14353942432.10100.0000Faenza2015-06-27 10:37:2337
2227.70691017very heavy rain14353990193.10120.0000Faenza2015-06-27 11:56:5937
3329.04611016very heavy rain14354024223.10110.0000Faenza2015-06-27 12:53:4237
4429.11691016very heavy rain14354060583.60110.0000Faenza2015-06-27 13:54:1837
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0030.44601011.0moderate rain14368630961.03180.0Ferrara2015-07-14 10:38:1647
1131.40581011.0moderate rain14368666851.54135.0Ferrara2015-07-14 11:38:0547
2231.95541011.0moderate rain14368703870.51113.0Ferrara2015-07-14 12:39:4747
3332.06501011.0moderate rain14368739892.0690.0Ferrara2015-07-14 13:39:4947
4432.63491010.0moderate rain14368775351.5468.0Ferrara2015-07-14 14:38:5547
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0031.33491006moderate rain14377307571.0323.000Ferrara2015-07-24 11:39:1747
1132.91451006moderate rain14377343991.5445.000Ferrara2015-07-24 12:39:5947
2233.43421005moderate rain14377379562.12123.003Ferrara2015-07-24 13:39:1647
3333.43421004moderate rain14377414822.12123.003Ferrara2015-07-24 14:38:0247
4432.66421004moderate rain14377451210.5123.000Ferrara2015-07-24 15:38:4147
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0023.13741013very heavy rain14353876071.54135.0000Ferrara2015-06-27 08:46:4747
1125.02661013very heavy rain14353907900.5145.0000Ferrara2015-06-27 09:39:5047
2226.79581013very heavy rain14353942015.14158.0000Ferrara2015-06-27 10:36:4147
3328.43481012very heavy rain14353986221.2914.5002Ferrara2015-06-27 11:50:2247
4429.44431012very heavy rain14354020501.54203.0000Ferrara2015-06-27 12:47:3047
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0028.66511016Sky is Clear14368631132.1140Mantova2015-07-14 10:38:33121
1130.10451016Sky is Clear14368667002.10Mantova2015-07-14 11:38:20121
2230.14421016Sky is Clear14368704062.1170Mantova2015-07-14 12:40:06121
3330.74451016Sky is Clear14368740121.50Mantova2015-07-14 13:40:12121
4431.22381015Sky is Clear14368775611.5180Mantova2015-07-14 14:39:21121
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0029.86451011few clouds14377307731.00Mantova2015-07-24 11:39:33121
1131.14481011few clouds14377344192.6130Mantova2015-07-24 12:40:19121
2232.21461010few clouds14377379692.6130Mantova2015-07-24 13:39:29121
3333.62411009few clouds14377415012.6120Mantova2015-07-24 14:38:21121
4434.18381009few clouds14377451333.6130Mantova2015-07-24 15:38:53121
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0023.40601018Sky is Clear14353876601.570Mantova2015-06-27 08:47:40121
1125.60541018Sky is Clear14353908151.5110Mantova2015-06-27 09:40:15121
2227.23391018Sky is Clear14353942081.5170Mantova2015-06-27 10:36:48121
3328.11321018Sky is Clear14353987051.5210Mantova2015-06-27 11:51:45121
4428.88351018Sky is Clear14354021301.5180Mantova2015-06-27 12:48:50121
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0028.57541016Sky is Clear14368631752.1100Milano2015-07-14 10:39:35250
1129.74481016Sky is Clear14368667582.60Milano2015-07-14 11:39:18250
2231.12481016Sky is Clear14368705092.6140Milano2015-07-14 12:41:49250
3332.16451015Sky is Clear14368740982.10Milano2015-07-14 13:41:38250
4433.59431015Sky is Clear14368776443.180Milano2015-07-14 14:40:44250
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0029.50481011few clouds14377308493.690Milano2015-07-24 11:40:49250
1130.81451011few clouds14377344922.670Milano2015-07-24 12:41:32250
2231.91451010proximity shower rain14377380453.180Milano2015-07-24 13:40:45250
3332.72401009proximity shower rain14377415783.6130Milano2015-07-24 14:39:38250
4433.44381009Sky is Clear14377451883.6130Milano2015-07-24 15:39:48250
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0024.69601017Sky is Clear14353909252.6140Milano2015-06-27 09:42:05250
1125.34571017Sky is Clear14353942432.1160Milano2015-06-27 10:37:23250
2227.70511017Sky is Clear14353990151.5210Milano2015-06-27 11:56:55250
3328.36421017Sky is Clear14354024162.1220Milano2015-06-27 12:53:36250
4429.45421016few clouds14354060542.6210Milano2015-06-27 13:54:14250
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0027.99541016Sky is Clear14368630962.10100.0Piacenza2015-07-14 10:38:16200
1129.13481016Sky is Clear14368666852.600.0Piacenza2015-07-14 11:38:05200
2230.21481016Sky is Clear14368703872.60140.0Piacenza2015-07-14 12:39:47200
3331.40451015Sky is Clear14368739902.100.0Piacenza2015-07-14 13:39:50200
4431.88481010Sky is Clear14368775350.5123.0Piacenza2015-07-14 14:38:55200
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0029.82481011few clouds14377307573.6090.0Piacenza2015-07-24 11:39:17200
1131.22521006Sky is Clear14377343993.0855.0Piacenza2015-07-24 12:39:59200
2232.56451010Sky is Clear14377379563.1080.0Piacenza2015-07-24 13:39:16200
3333.03401009few clouds14377414823.60130.0Piacenza2015-07-24 14:38:02200
4433.73381009Sky is Clear14377451213.60130.0Piacenza2015-07-24 15:38:41200
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0022.57671013Sky is Clear14353876071.03279.0Piacenza2015-06-27 08:46:47200
1124.61601017Sky is Clear14353907902.60140.0Piacenza2015-06-27 09:39:50200
2225.48571017Sky is Clear14353942012.10160.0Piacenza2015-06-27 10:36:41200
3326.52531012Sky is Clear14353986221.540.0Piacenza2015-06-27 11:50:22200
4428.00491012sky is clear14354020501.03230.0Piacenza2015-06-27 12:47:30200
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0029.10741015moderate rain14368631773.1010.0Ravenna2015-07-14 10:39:378
1129.51741015moderate rain14368667593.6020.0Ravenna2015-07-14 11:39:198
2229.63701016moderate rain14368705113.6040.0Ravenna2015-07-14 12:41:518
3330.17371015moderate rain14368741064.6390.0Ravenna2015-07-14 13:41:468
4430.45341015moderate rain14368776463.0887.0Ravenna2015-07-14 14:40:468
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0032.18541010moderate rain14377308512.11330.003Ravenna2015-07-24 11:40:518
1132.37621010moderate rain14377344942.6020.000Ravenna2015-07-24 12:41:348
2232.79751009moderate rain14377380463.6070.000Ravenna2015-07-24 13:40:468
3332.75791009moderate rain14377415805.1070.000Ravenna2015-07-24 14:39:408
4432.72701008moderate rain14377451883.1010.000Ravenna2015-07-24 15:39:488
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0024.86611017very heavy rain14353909252.01159.5Ravenna2015-06-27 09:42:058
1125.71731017very heavy rain14353942442.10100.0Ravenna2015-06-27 10:37:248
2226.41611017very heavy rain14353990214.6080.0Ravenna2015-06-27 11:57:018
3328.29541016very heavy rain14354024234.6090.0Ravenna2015-06-27 12:53:438
4428.47541016very heavy rain14354060606.2080.0Ravenna2015-06-27 13:54:208
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0028.34651017Sky is Clear14368631093.120Torino2015-07-14 10:38:29357
1129.25651017Sky is Clear14368666963.180Torino2015-07-14 11:38:16357
2230.40581017Sky is Clear14368703992.6100Torino2015-07-14 12:39:59357
3331.37541017Sky is Clear14368740052.190Torino2015-07-14 13:40:05357
4432.59451016few clouds14368775582.1120Torino2015-07-14 14:39:18357
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0030.29541012Sky is Clear14377307703.170Torino2015-07-24 11:39:30357
1130.68581012Sky is Clear14377344133.170Torino2015-07-24 12:40:13357
2230.96581012few clouds14377379663.670Torino2015-07-24 13:39:26357
3330.91511011few clouds14377414974.110Torino2015-07-24 14:38:17357
4432.20511011few clouds14377451302.6120Torino2015-07-24 15:38:50357
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0023.80641019Sky is Clear14353876511.00Torino2015-06-27 08:47:31357
1124.68601018Sky is Clear14353908042.180Torino2015-06-27 09:40:04357
2225.12601018Sky is Clear14353942062.650Torino2015-06-27 10:36:46357
3325.54601018Sky is Clear14353986812.650Torino2015-06-27 11:51:21357
4427.42571017Sky is Clear14354021122.170Torino2015-06-27 12:48:32357
full_df =None
for csv_file in filter(lambda filename: filename.endswith('.csv'),
                      os.listdir('data3')):
    df= pd.read_csv(f'data3/{csv_file}')
    if full_df is None:
        full_df = df
    else:
        full_df = full_df.append(df)
full_df.head()
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0028.05661014.0Sky is Clear14368631762.5742.501Asti2015-07-14 10:39:36315
1129.51641014.0Sky is Clear14368667591.54263.000Asti2015-07-14 11:39:19315
2230.39581017.0Sky is Clear14368705102.60100.000Asti2015-07-14 12:41:50315
3331.10541017.0Sky is Clear14368740982.1090.000Asti2015-07-14 13:41:38315
4433.23451016.0few clouds14368776452.10120.000Asti2015-07-14 14:40:45315
full_df.shape
(675, 11)
pd.read_csv('data3/asti_150715.csv')
Unnamed: 0temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
0028.05661014Sky is Clear14368631762.5742.501Asti2015-07-14 10:39:36315
1129.51641014Sky is Clear14368667591.54263.000Asti2015-07-14 11:39:19315
2230.39581017Sky is Clear14368705102.60100.000Asti2015-07-14 12:41:50315
3331.10541017Sky is Clear14368740982.1090.000Asti2015-07-14 13:41:38315
4433.23451016few clouds14368776452.10120.000Asti2015-07-14 14:40:45315
5532.95461016few clouds14368813292.10110.000Asti2015-07-14 15:42:09315
6634.31461015few clouds14368849292.10100.000Asti2015-07-14 16:42:09315
7732.47491015few clouds14368885132.10100.000Asti2015-07-14 17:41:53315
8834.28461015few clouds14368921321.50100.000Asti2015-07-14 18:42:12315
9931.41461014Sky is Clear14368957232.10240.000Asti2015-07-14 19:42:03315
101029.72581014Sky is Clear14368993362.1070.000Asti2015-07-14 20:42:16315
111127.68621015Sky is Clear14369029431.5040.000Asti2015-07-14 21:42:23315
121226.44691015Sky is Clear14369065321.5030.000Asti2015-07-14 22:42:12315
131326.17691016Sky is Clear14369101491.50330.000Asti2015-07-14 23:42:29315
141425.21781014Sky is Clear14369137390.510.000Asti2015-07-15 00:42:19315
151524.45731016Sky is Clear14369173112.60350.000Asti2015-07-15 01:41:51315
161624.32731016Sky is Clear14369209312.60340.000Asti2015-07-15 02:42:11315
171724.24731016Sky is Clear14369244262.10360.000Asti2015-07-15 03:40:26315
181824.00731016Sky is Clear14369280781.50310.000Asti2015-07-15 04:41:18315
191923.00831017broken clouds14369317180.500.000Asti2015-07-15 05:41:58315
202023.00781017scattered clouds14369352983.10350.000Asti2015-07-15 06:41:38315
212125.00731017few clouds14369388821.50330.000Asti2015-07-15 07:41:22315
222227.00651017few clouds14369425160.500.000Asti2015-07-15 08:41:56315
232327.00651017Sky is Clear14369459512.1050.000Asti2015-07-15 09:39:11315
pd.read_csv('data3/asti_150715.csv').shape
(24, 11)
  • 查看列数
full_df.columns
Index(['Unnamed: 0', 'temp', 'humidity', 'pressure', 'description', 'dt',
       'wind_speed', 'wind_deg', 'city', 'day', 'dist'],
      dtype='object')
  • 去除没用的列 Unnamed:0
# del all_['Unnamed:0'] 删除一列
# 方法二
full_df.drop(columns='Unnamed: 0',inplace=True)
full_df.columns
Index(['temp', 'humidity', 'pressure', 'description', 'dt', 'wind_speed',
       'wind_deg', 'city', 'day', 'dist'],
      dtype='object')
显示最高温度与力海远近的关系
  • 按city分组,计算temp和dist的最大值
  • 按dist排序,df.sort_values()
full_df.head()
temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
028.05661014.0Sky is Clear14368631762.5742.501Asti2015-07-14 10:39:36315
129.51641014.0Sky is Clear14368667591.54263.000Asti2015-07-14 11:39:19315
230.39581017.0Sky is Clear14368705102.60100.000Asti2015-07-14 12:41:50315
331.10541017.0Sky is Clear14368740982.1090.000Asti2015-07-14 13:41:38315
433.23451016.0few clouds14368776452.10120.000Asti2015-07-14 14:40:45315
full_df.dtypes
temp           float64
humidity         int64
pressure       float64
description     object
dt               int64
wind_speed     float64
wind_deg       float64
city            object
day             object
dist             int64
dtype: object
dist_temp = full_df.groupby('city')['temp','dist'].max().sort_values('dist')
dist_temp
tempdist
city
Ravenna32.798
Cesena32.8114
Faenza32.7437
Ferrara33.4347
Bologna33.8571
Mantova34.18121
Piacenza33.92200
Milano34.81250
Asti34.31315
Torino34.69357
画图显示关系
  • scatter()显示距离与温度的散布图
  • plot()显示距离与温度的线性图
plt.scatter(dist_temp.dist,dist_temp.temp,s=50,c='r')
plt.plot(dist_temp.dist,dist_temp.temp,ls=':',c='b')
plt.show()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-6nraV5HF-1649956930001)(output_20_0.png)]

读取Astil城市某一天的温度变化数据,并画出线型图和散点图,如2015-7-14这一天的天气变化
  • 处理day时间格式
  • 转成年、月、日的date日期
  • 或转成时、分、秒的字符串
  • 将绘制的画布保存成一张png图片
full_df.city.unique()
array(['Asti', 'Bologna', 'Cesena', 'Faenza', 'Ferrara', 'Mantova',
       'Milano', 'Piacenza', 'Ravenna', 'Torino'], dtype=object)
asti = full_df.query('city=="Asti"')
asti.head()
temphumiditypressuredescriptiondtwind_speedwind_degcitydaydist
028.05661014.0Sky is Clear14368631762.5742.501Asti2015-07-14 10:39:36315
129.51641014.0Sky is Clear14368667591.54263.000Asti2015-07-14 11:39:19315
230.39581017.0Sky is Clear14368705102.60100.000Asti2015-07-14 12:41:50315
331.10541017.0Sky is Clear14368740982.1090.000Asti2015-07-14 13:41:38315
433.23451016.0few clouds14368776452.10120.000Asti2015-07-14 14:40:45315
def covert_date_hour(item):
    return Series({'date':item[:10],'hour':item[11:13]})
asti['day'].map(covert_date_hour)
0     date    2015-07-14
hour            10
dtype: o...
1     date    2015-07-14
hour            11
dtype: o...
2     date    2015-07-14
hour            12
dtype: o...
3     date    2015-07-14
hour            13
dtype: o...
4     date    2015-07-14
hour            14
dtype: o...
                            ...                        
15    date    2015-06-28
hour            04
dtype: o...
16    date    2015-06-28
hour            05
dtype: o...
17    date    2015-06-28
hour            06
dtype: o...
18    date    2015-06-28
hour            07
dtype: o...
19    date    2015-06-28
hour            08
dtype: o...
Name: day, Length: 68, dtype: object
def covert_date(item):
    return item[:10]
def covert_hour(item):
    return item[11:13]
asti['day'].map(covert_date)
0     2015-07-14
1     2015-07-14
2     2015-07-14
3     2015-07-14
4     2015-07-14
         ...    
15    2015-06-28
16    2015-06-28
17    2015-06-28
18    2015-06-28
19    2015-06-28
Name: day, Length: 68, dtype: object
# 添加date列,hour列
asti['date'] =  asti['day'].map(covert_date)
asti['hour'] =  asti['day'].map(covert_hour)
asti.head()
D:\yingyong\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
D:\yingyong\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
temphumiditypressuredescriptiondtwind_speedwind_degcitydaydistdatehour
028.05661014.0Sky is Clear14368631762.5742.501Asti2015-07-14 10:39:363152015-07-1410
129.51641014.0Sky is Clear14368667591.54263.000Asti2015-07-14 11:39:193152015-07-1411
230.39581017.0Sky is Clear14368705102.60100.000Asti2015-07-14 12:41:503152015-07-1412
331.10541017.0Sky is Clear14368740982.1090.000Asti2015-07-14 13:41:383152015-07-1413
433.23451016.0few clouds14368776452.10120.000Asti2015-07-14 14:40:453152015-07-1414
asti.query('date == "2015-07-14"')[['hour','temp']]
hourtemp
01028.05
11129.51
21230.39
31331.10
41433.23
51532.95
61634.31
71732.47
81834.28
91931.41
102029.72
112127.68
122226.44
132326.17
asti1 = asti.query('date == "2015-07-14"')[['hour','temp']]
plt.plot(asti1.hour,asti1.temp)
[<matplotlib.lines.Line2D at 0x9541270>]

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-kjPDQ3Q1-1649956930004)(output_30_1.png)]

查看asti的一周的湿度变化的数据,并画出散点与线型图
  • 按天进行分组
  • 将分组后的data设置为列
asti.head()
temphumiditypressuredescriptiondtwind_speedwind_degcitydaydistdatehour
028.05661014.0Sky is Clear14368631762.5742.501Asti2015-07-14 10:39:363152015-07-1410
129.51641014.0Sky is Clear14368667591.54263.000Asti2015-07-14 11:39:193152015-07-1411
230.39581017.0Sky is Clear14368705102.60100.000Asti2015-07-14 12:41:503152015-07-1412
331.10541017.0Sky is Clear14368740982.1090.000Asti2015-07-14 13:41:383152015-07-1413
433.23451016.0few clouds14368776452.10120.000Asti2015-07-14 14:40:453152015-07-1414
asti.date.unique()
array(['2015-07-14', '2015-07-15', '2015-07-24', '2015-07-25',
       '2015-06-27', '2015-06-28'], dtype=object)
asti.columns
Index(['temp', 'humidity', 'pressure', 'description', 'dt', 'wind_speed',
       'wind_deg', 'city', 'day', 'dist', 'date', 'hour'],
      dtype='object')
asti.groupby('date')['humidity'].mean()
date
2015-06-27    54.083333
2015-06-28    75.250000
2015-07-14    55.571429
2015-07-15    73.400000
2015-07-24    55.692308
2015-07-25    73.272727
Name: humidity, dtype: float64
asti.groupby('date')['humidity'].mean().reset_index(drop=False)
datehumidity
02015-06-2754.083333
12015-06-2875.250000
22015-07-1455.571429
32015-07-1573.400000
42015-07-2455.692308
52015-07-2573.272727
asti2 = asti.groupby('date')['humidity'].mean().reset_index(drop=False)
asti2
datehumidity
02015-06-2754.083333
12015-06-2875.250000
22015-07-1455.571429
32015-07-1573.400000
42015-07-2455.692308
52015-07-2573.272727
plt.scatter(asti2.date,asti2.humidity)
<matplotlib.collections.PathCollection at 0xabab650>

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-XZ2Ksdmk-1649956930005)(output_38_1.png)]

plt.plot(asti2.date,asti2.humidity)
[<matplotlib.lines.Line2D at 0xabab730>]

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-eowQyvZw-1649956930008)(output_39_1.png)]

观察发现,离海近的可以形成一条直线,离海远的也能形成一条直线
分别以100公里和50公里为分界点,划分为离海近和离海远的两组数据
  • city_temp[‘dist’] > 50 # 远距离
  • city_temp[‘dist’] <100 # 近距离
dist_temp
tempdist
city
Ravenna32.798
Cesena32.8114
Faenza32.7437
Ferrara33.4347
Bologna33.8571
Mantova34.18121
Piacenza33.92200
Milano34.81250
Asti34.31315
Torino34.69357
dist_temp.query('dist >50')
tempdist
city
Bologna33.8571
Mantova34.18121
Piacenza33.92200
Milano34.81250
Asti34.31315
Torino34.69357
dist_temp.query('dist < 100')
tempdist
city
Ravenna32.798
Cesena32.8114
Faenza32.7437
Ferrara33.4347
Bologna33.8571
dt1 = dist_temp[dist_temp.dist > 50]
dt2 = dist_temp[dist_temp.dist < 100]
display(dt1,dt2)
tempdist
city
Bologna33.8571
Mantova34.18121
Piacenza33.92200
Milano34.81250
Asti34.31315
Torino34.69357
tempdist
city
Ravenna32.798
Cesena32.8114
Faenza32.7437
Ferrara33.4347
Bologna33.8571
  • 画出远、近的距离与温度的散布图(散点图)
plt.scatter(dt1.dist,dt1.temp,c='r',s =50)
plt.scatter(dt2.dist,dt2.temp,c='b',s = 100)
plt.show()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-la8ofRuI-1649956930009)(output_46_0.png)]

使用线性回归预测给定距离的温度
  • 机器学习的线性回归模型LinearRegression
  • scikit-learn 模块中线性模型
  • sklearn.linear_model
  • 创建回归模型对象
  • linear = LinearRegression()
  • 训练模型
  • fit(x,y)
  • x 是实际数据的自变量,必须是二维数组
  • y 实际数据的因变量
  • 预测模型
  • predict(x)根据预测函数,计算x的结果
  • x也是一个二维数组
from sklearn.linear_model import LinearRegression
# 线性回归模型:有标签(目标)的机器学习,(有监督)
linear = LinearRegression()
# reshape(-1,1) 其中的-1表示为任意数量的行,1表示1列
linear.fit(dt1.dist.values.reshape(-1,1),dt1.temp)  # 训练
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
linear.intercept_  # b
33.733483439733746
linear.coef_    # w
array([0.00255639])
  • 线性函数:y = w*x + b

  • 创建线性回归模型对象

训练模型
  • 一个被训练过的模型,就是一个预测函数表达式f(x)=wx+b
预测模型
# 生成预测数据
x1 = np.linspace(50,400,num=50)  # 远距离的预测数据
x2 = np.linspace(0,100,num=50)  # 近距离的预测数据

y1 = linear.predict(x1.reshape(-1,1))

linear = LinearRegression()
linear.fit(dt2.dist.values.reshape(-1,1),dt2.temp)
y2 = linear.predict(x2.reshape(-1,1))
plt.scatter(dt1.dist,dt1.temp,c='r',s =50)
plt.plot(x1,y1,color='r')
plt.scatter(dt2.dist,dt2.temp,c='b',s = 100)
plt.plot(x2,y2,color='b')
plt.show()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-RlZvzJgo-1649956930012)(output_58_0.png)]

风向与风速的关系
  • 查看Asti城市的风向wind_deg与风速wind_speed
  • 按wind_deg风向排序
  • 重新创建索引 reset_index()
asti
temphumiditypressuredescriptiondtwind_speedwind_degcitydaydistdatehour
028.05661014.0Sky is Clear14368631762.5742.501Asti2015-07-14 10:39:363152015-07-1410
129.51641014.0Sky is Clear14368667591.54263.000Asti2015-07-14 11:39:193152015-07-1411
230.39581017.0Sky is Clear14368705102.60100.000Asti2015-07-14 12:41:503152015-07-1412
331.10541017.0Sky is Clear14368740982.1090.000Asti2015-07-14 13:41:383152015-07-1413
433.23451016.0few clouds14368776452.10120.000Asti2015-07-14 14:40:453152015-07-1414
.......................................
1518.44771018.0few clouds14354600392.6030.000Asti2015-06-28 04:53:593152015-06-2804
1618.01771018.0Sky is Clear14354638772.10360.000Asti2015-06-28 05:57:573152015-06-2805
1718.58841016.0Sky is Clear14354671790.88321.501Asti2015-06-28 06:52:593152015-06-2806
1820.08731018.0Sky is Clear14354708491.000.000Asti2015-06-28 07:54:093152015-06-2807
1920.98681018.0Sky is Clear14354744681.000.000Asti2015-06-28 08:54:283152015-06-2808

68 rows × 12 columns

asti_ds = asti[['wind_deg','wind_speed']]
asti_ds.sort_values('wind_deg',inplace=True)
asti_ds
D:\yingyong\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
wind_degwind_speed
190.01.0
190.00.5
50.02.1
70.01.0
80.01.0
.........
15350.02.6
20350.03.1
17360.02.1
14360.01.5
16360.02.1

68 rows × 2 columns

asti_ds.reset_index(inplace=True)
asti_ds
indexwind_degwind_speed
000.01.0
110.00.5
220.02.1
330.01.0
440.01.0
............
6363350.02.6
6464350.03.1
6565360.02.1
6666360.01.5
6767360.02.1

68 rows × 3 columns

asti_ds.drop(columns='index',inplace=True)
asti_ds
wind_degwind_speed
00.01.0
10.00.5
20.02.1
30.01.0
40.01.0
.........
63350.02.6
64350.03.1
65360.02.1
66360.01.5
67360.02.1

68 rows × 2 columns

画出风向与风俗的线性图
  • 按wind_deg分组,并计算wind_speed的平均风速,最后再plot()画出线形图
asti_ds_mean = asti_ds.groupby('wind_deg').mean()
asti_ds_mean
wind_speed
wind_deg
0.0001.018667
10.0003.600000
24.0002.060000
30.0002.050000
40.0001.500000
42.5012.570000
50.0002.075000
70.0002.600000
80.0002.100000
90.0001.800000
100.0002.083333
110.0002.100000
120.0002.350000
140.0001.500000
154.5052.060000
170.0005.700000
180.0001.000000
240.0002.100000
263.0001.540000
270.0002.600000
280.0002.100000
290.0001.500000
300.0003.600000
310.0001.500000
320.0002.600000
321.5010.880000
330.0001.500000
340.0002.600000
350.0002.233333
360.0001.900000
plt.plot(asti_ds_mean)
[<matplotlib.lines.Line2D at 0xc3b2170>]

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-AMsY0fwC-1649956930015)(output_66_1.png)]

在子图中,同时比较风向与湿度和风力的关系
  • 查看humidity列的数据类型
  • 通过pd.to_numeric()将字符类型转成数值型
asti_dsht = asti[['wind_deg','wind_speed','humidity','temp']]
asti_dsht
wind_degwind_speedhumiditytemp
042.5012.576628.05
1263.0001.546429.51
2100.0002.605830.39
390.0002.105431.10
4120.0002.104533.23
...............
1530.0002.607718.44
16360.0002.107718.01
17321.5010.888418.58
180.0001.007320.08
190.0001.006820.98

68 rows × 4 columns

asti_dsht = asti_dsht.sort_values('wind_deg').reset_index(drop=True)
asti_dsht
wind_degwind_speedhumiditytemp
00.01.06820.98
10.00.58323.00
20.02.14829.12
30.01.05429.04
40.01.06527.89
...............
63350.02.67324.45
64350.03.17823.00
65360.02.17324.24
66360.01.57319.81
67360.02.17718.01

68 rows × 4 columns

asti_dsht_mean = asti_dsht.pivot_table(index='wind_deg')
asti_dsht_mean
humiditytempwind_speed
wind_deg
0.00068.26666724.8820001.018667
10.00062.66666726.5000003.600000
24.00049.00000030.7900002.060000
30.00073.00000022.4400002.050000
40.00062.00000027.6800001.500000
42.50166.00000028.0500002.570000
50.00054.50000027.7600002.075000
70.00056.75000028.9325002.600000
80.00060.00000022.6800002.100000
90.00061.50000027.5800001.800000
100.00051.66666731.1383332.083333
110.00046.00000032.9500002.100000
120.00048.00000032.0800002.350000
140.00045.00000031.4400001.500000
154.50529.00000028.5600002.060000
170.00058.00000029.1000005.700000
180.00061.00000026.8700001.000000
240.00046.00000031.4100002.100000
263.00064.00000029.5100001.540000
270.00061.00000026.5300002.600000
280.00064.00000023.1600002.100000
290.00064.00000022.8400001.500000
300.00057.00000026.2900003.600000
310.00073.00000024.0000001.500000
320.00073.00000021.5900002.600000
321.50184.00000018.5800000.880000
330.00073.00000024.2166671.500000
340.00074.33333322.1066672.600000
350.00078.00000023.0366672.233333
360.00074.33333320.6866671.900000
# 通过三个子画布绘制风向和风力、温度、湿度的关系

plt.figure(figsize=(15,6))
for i,column in enumerate(('wind_speed','humidity','temp')):
    plt.subplot(1,3,i+1) # index 从1开始
    plt.plot(asti_dsht_mean[column],c=np.random.random(size=3))
    plt.title('风向  Vs '+ column,size=20)
    
plt.show()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-HooUXJcF-1649956930017)(output_71_0.png)]

角度 转成 弧度
  • 角度/180 = 弧度/pi, 弧度 = 角度 * pi / 180
  • asti_wind[(asti_wind[‘wind_deg’] >= deg) & (asti_wind[‘wind_deg’] < 45+deg)][‘wind_deg’]
由于风向是360度,我们可以考虑使用玫瑰图(极坐标条形图)
首先自定义一个画图函数
用numpy创建一个直方图,将360度划分为8个面元,将数据分类到这8个面元中
x = np.arange(360,step=45)
x
array([  0,  45,  90, 135, 180, 225, 270, 315])
asti_ds_mean
wind_speed
wind_deg
0.0001.018667
10.0003.600000
24.0002.060000
30.0002.050000
40.0001.500000
42.5012.570000
50.0002.075000
70.0002.600000
80.0002.100000
90.0001.800000
100.0002.083333
110.0002.100000
120.0002.350000
140.0001.500000
154.5052.060000
170.0005.700000
180.0001.000000
240.0002.100000
263.0001.540000
270.0002.600000
280.0002.100000
290.0001.500000
300.0003.600000
310.0001.500000
320.0002.600000
321.5010.880000
330.0001.500000
340.0002.600000
350.0002.233333
360.0001.900000
ds = asti_ds_mean.reset_index()
ds
wind_degwind_speed
00.0001.018667
110.0003.600000
224.0002.060000
330.0002.050000
440.0001.500000
542.5012.570000
650.0002.075000
770.0002.600000
880.0002.100000
990.0001.800000
10100.0002.083333
11110.0002.100000
12120.0002.350000
13140.0001.500000
14154.5052.060000
15170.0005.700000
16180.0001.000000
17240.0002.100000
18263.0001.540000
19270.0002.600000
20280.0002.100000
21290.0001.500000
22300.0003.600000
23310.0001.500000
24320.0002.600000
25321.5010.880000
26330.0001.500000
27340.0002.600000
28350.0002.233333
29360.0001.900000
x = np.arange(360,step=45)
ds[(ds.wind_deg >=0) & (ds.wind_deg < 45)]
wind_degwind_speed
00.0001.018667
110.0003.600000
224.0002.060000
330.0002.050000
440.0001.500000
542.5012.570000
x = np.arange(360,step=45)
speeds = [ds[(ds.wind_deg >=deg) & (ds.wind_deg < 45+deg)]['wind_speed'].mean()
           for deg in x
         ]
speeds
[2.133111111111111,
 2.2583333333333333,
 2.0833333333333335,
 3.0866666666666664,
 1.0,
 1.82,
 2.2600000000000002,
 1.9626666666666666]
x1 = x*np.pi /180
x1
array([0.        , 0.78539816, 1.57079633, 2.35619449, 3.14159265,
       3.92699082, 4.71238898, 5.49778714])
# 画极坐标轴
plt.subplot(1,1,1,polar=True)
plt.bar(x1,speeds,
        color=np.random.random(size=(8,3)),
        align='edge')
<BarContainer object of 8 artists>

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-1ClgJAlf-1649956930019)(output_82_1.png)]

# 设置宽度
plt.subplot(1,1,1,polar=True)
plt.bar(x1,speeds,
        color=np.random.random(size=(8,3)),
        width=0.7,
        align='edge')
<BarContainer object of 8 artists>

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-0da82Ymf-1649956930020)(output_83_1.png)]

plt.subplot(1,1,1,polar=True)
plt.bar(x1,speeds,
        color=np.random.random(size=(8,3)),
        width=0.7,
        align='edge')
# 设置刻度
plt.xticks(x1,['东','东北','北','西北','西','西南','南','东南'])
([<matplotlib.projections.polar.ThetaTick at 0xaf8e930>,
  <matplotlib.projections.polar.ThetaTick at 0xaedf590>,
  <matplotlib.projections.polar.ThetaTick at 0xae54650>,
  <matplotlib.projections.polar.ThetaTick at 0xae544b0>,
  <matplotlib.projections.polar.ThetaTick at 0xade56d0>,
  <matplotlib.projections.polar.ThetaTick at 0xade5af0>,
  <matplotlib.projections.polar.ThetaTick at 0xade52f0>,
  <matplotlib.projections.polar.ThetaTick at 0xade5030>],
 <a list of 8 Text xticklabel objects>)

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-drpikEag-1649956930021)(output_84_1.png)]

练习:计算米兰各个方向上的风速
full_df.city.unique()
array(['Asti', 'Bologna', 'Cesena', 'Faenza', 'Ferrara', 'Mantova',
       'Milano', 'Piacenza', 'Ravenna', 'Torino'], dtype=object)
def draw_polar(full_df,city,compare_colname,title=None):
    # 获取指定城市的 风向与某一列比较的信息
    all_ = full_df.copy()
    city_wind = all_[all_['city'] == city][['wind_deg',compare_colname]]
    city_wind.sort_values('wind_deg',inplace=True) # 按方向排序
    city_wind.reset_index(drop=True,inplace=True)
    
    # 8个方向角度
    degs = np.arange(0,360,step=45)
    
    # 获取每个方向上的数据的平均值
    heights  = [city_wind[compare_colname][(city_wind['wind_deg'] >= deg) &
                                          (city_wind['wind_deg'] < 45+deg)].mean() for deg in degs]
    
    # 将角度转为弧度:角度/360 =弧度/2*pi
    x = degs*np.pi/180
    
    plt.figure(figsize=(6,6))
    plt.axes(polar = True, frameon = True)
    
    plt.bar(x,height=heights,
            align='edge',
            bottom=0.1,
            width=0.75,
            color=np.random.random(size=(8,3)))
    
    plt.xticks(x,('东','东北','北','西北','西','西南','南','东南'),
               fontsize=14)
    
    if title:
        plt.title(city+'城市的'+ title,fontsize=20,color='r',
                 loc='center',position=(0.5,1.1))
        
    plt.show()
draw_polar(full_df,'Milano','wind_speed','风向与风速的关系')

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-vIASitam-1649956930023)(output_88_0.png)]

draw_polar(full_df,'Milano','temp','风向与温度的关系')

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-I6m0Cbz4-1649956930025)(output_89_0.png)]

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

今晚务必早点睡

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值