pandas数据分析

十套练习使用pandas数据分析.6

import pandas as pd
import numpy as np
data = pd.read_table("D:/东华研/数据分析/pandas_exercise/exercise_data/wind.data",sep='\s+',parse_dates=[[0,1,2]])
data.head()
Yr_Mo_DyRPTVALROSKILSHABIRDUBCLAMULCLOBELMAL
02061-01-0115.0414.9613.179.29NaN9.8713.6710.2510.8312.5818.5015.04
12061-01-0214.71NaN10.836.5012.627.6711.5010.049.799.6717.5413.83
22061-01-0318.5016.8812.3310.1311.176.1711.25NaN8.507.6712.7512.71
32061-01-0410.586.6311.754.584.542.888.631.795.835.885.4610.88
42061-01-0513.3313.2511.426.1710.718.2111.926.5410.9210.3412.9211.83
import datetime
def fix_century(x):
    year = x.year - 100 if x.year > 1989 else x.year
    return datetime.date(year, x.month, x.day)

# apply the function fix_century on the column and replace the values to the right ones
data['Yr_Mo_Dy'] = data['Yr_Mo_Dy'].apply(fix_century)

# data.info()
data.head()
Yr_Mo_DyRPTVALROSKILSHABIRDUBCLAMULCLOBELMAL
01961-01-0115.0414.9613.179.29NaN9.8713.6710.2510.8312.5818.5015.04
11961-01-0214.71NaN10.836.5012.627.6711.5010.049.799.6717.5413.83
21961-01-0318.5016.8812.3310.1311.176.1711.25NaN8.507.6712.7512.71
31961-01-0410.586.6311.754.584.542.888.631.795.835.885.4610.88
41961-01-0513.3313.2511.426.1710.718.2111.926.5410.9210.3412.9211.83
data["Yr_Mo_Dy"] = pd.to_datetime(data["Yr_Mo_Dy"])
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6574 entries, 0 to 6573
Data columns (total 13 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Yr_Mo_Dy  6574 non-null   datetime64[ns]
 1   RPT       6568 non-null   float64       
 2   VAL       6571 non-null   float64       
 3   ROS       6572 non-null   float64       
 4   KIL       6569 non-null   float64       
 5   SHA       6572 non-null   float64       
 6   BIR       6574 non-null   float64       
 7   DUB       6571 non-null   float64       
 8   CLA       6572 non-null   float64       
 9   MUL       6571 non-null   float64       
 10  CLO       6573 non-null   float64       
 11  BEL       6574 non-null   float64       
 12  MAL       6570 non-null   float64       
dtypes: datetime64[ns](1), float64(12)
memory usage: 667.8 KB
data=data.set_index('Yr_Mo_Dy')
data.head()
RPTVALROSKILSHABIRDUBCLAMULCLOBELMAL
Yr_Mo_Dy
1961-01-0115.0414.9613.179.29NaN9.8713.6710.2510.8312.5818.5015.04
1961-01-0214.71NaN10.836.5012.627.6711.5010.049.799.6717.5413.83
1961-01-0318.5016.8812.3310.1311.176.1711.25NaN8.507.6712.7512.71
1961-01-0410.586.6311.754.584.542.888.631.795.835.885.4610.88
1961-01-0513.3313.2511.426.1710.718.2111.926.5410.9210.3412.9211.83
data.isnull().sum()
RPT    6
VAL    3
ROS    2
KIL    5
SHA    2
BIR    0
DUB    3
CLA    2
MUL    3
CLO    1
BEL    0
MAL    4
dtype: int64
data.shape[0]-data.isnull().sum()
RPT    6568
VAL    6571
ROS    6572
KIL    6569
SHA    6572
BIR    6574
DUB    6571
CLA    6572
MUL    6571
CLO    6573
BEL    6574
MAL    6570
dtype: int64
data.mean().mean()
10.227982360836924
#列求最小值
lco_stars=pd.DataFrame()
lco_stars['min']=data.min()
lco_stars['max']=data.max()
lco_stars['mean']=data.mean()
lco_stars['var']=data.var()
lco_stars
minmaxmeanvar
RPT0.6735.8012.36298731.566564
VAL0.2133.3710.64431427.745044
ROS1.5033.8411.66052625.084571
KIL0.0028.466.30646813.001874
SHA0.1337.5410.45583424.365332
BIR0.0026.167.09225415.750446
DUB0.0030.379.79734324.776050
CLA0.0031.088.49505320.245042
MUL0.0025.888.49359017.362826
CLO0.0428.218.70733220.285606
BEL0.1342.3813.12100734.047657
MAL0.6742.5415.59907944.887233
#行求最小值
lco_stars_1=pd.DataFrame()
lco_stars_1['min']=data.min(axis=1)
lco_stars_1['max']=data.max(axis=1)
lco_stars_1['mean']=data.mean(axis=1)
lco_stars_1['var']=data.var(axis=1)
lco_stars_1.head()
minmaxmeanvar
Yr_Mo_Dy
1961-01-019.2918.5013.0181827.889776
1961-01-026.5017.5411.33636410.169685
1961-01-036.1718.5011.64181813.556476
1961-01-041.7911.756.61916710.228008
1961-01-056.1713.3310.6300005.979764
data['date'] = data.index
data.head()
RPTVALROSKILSHABIRDUBCLAMULCLOBELMALdate
Yr_Mo_Dy
1961-01-0115.0414.9613.179.29NaN9.8713.6710.2510.8312.5818.5015.041961-01-01
1961-01-0214.71NaN10.836.5012.627.6711.5010.049.799.6717.5413.831961-01-02
1961-01-0318.5016.8812.3310.1311.176.1711.25NaN8.507.6712.7512.711961-01-03
1961-01-0410.586.6311.754.584.542.888.631.795.835.885.4610.881961-01-04
1961-01-0513.3313.2511.426.1710.718.2111.926.5410.9210.3412.9211.831961-01-05
data[data.index.month==1].mean()
C:\WINDOWS\TEMP/ipykernel_5984/279758014.py:1: FutureWarning: DataFrame.mean and DataFrame.median with numeric_only=None will include datetime64 and datetime64tz columns in a future version.
  data[data.index.month==1].mean()





RPT    14.847325
VAL    12.914560
ROS    13.299624
KIL     7.199498
SHA    11.667734
BIR     8.054839
DUB    11.819355
CLA     9.512047
MUL     9.543208
CLO    10.053566
BEL    14.550520
MAL    18.028763
dtype: float64
data['month'] = data['date'].apply(lambda x: x.month)
data['year'] = data['date'].apply(lambda date: date.year)
data['day'] = data['date'].apply(lambda date: date.day)
january_winds = data.query('month == 1')
january_winds.loc[:,'RPT':'MAL'].mean()
RPT    14.847325
VAL    12.914560
ROS    13.299624
KIL     7.199498
SHA    11.667734
BIR     8.054839
DUB    11.819355
CLA     9.512047
MUL     9.543208
CLO    10.053566
BEL    14.550520
MAL    18.028763
dtype: float64
data
RPTVALROSKILSHABIRDUBCLAMULCLOBELMALdatemonthyearday
Yr_Mo_Dy
1961-01-0115.0414.9613.179.29NaN9.8713.6710.2510.8312.5818.5015.041961-01-01119611
1961-01-0214.71NaN10.836.5012.627.6711.5010.049.799.6717.5413.831961-01-02119612
1961-01-0318.5016.8812.3310.1311.176.1711.25NaN8.507.6712.7512.711961-01-03119613
1961-01-0410.586.6311.754.584.542.888.631.795.835.885.4610.881961-01-04119614
1961-01-0513.3313.2511.426.1710.718.2111.926.5410.9210.3412.9211.831961-01-05119615
...................................................
1978-12-2717.5816.9617.628.0813.2111.6714.4615.5914.0414.0017.2140.081978-12-2712197827
1978-12-2813.215.4613.465.008.129.4214.3316.2515.2518.0521.7941.461978-12-2812197828
1978-12-2914.0010.2914.428.719.7110.5419.1712.4614.5016.4218.8829.581978-12-2912197829
1978-12-3018.5014.0421.299.1312.759.7118.0812.8712.4612.1214.6728.791978-12-3012197830
1978-12-3120.3317.4127.299.5912.0810.1319.2511.6311.5811.3812.0822.081978-12-3112197831

6574 rows × 16 columns

data.query('month==1 and day==1')
RPTVALROSKILSHABIRDUBCLAMULCLOBELMALdatemonthyearday
Yr_Mo_Dy
1961-01-0115.0414.9613.179.29NaN9.8713.6710.2510.8312.5818.5015.041961-01-01119611
1962-01-019.293.4211.543.502.211.9610.412.793.545.174.387.921962-01-01119621
1963-01-0115.5913.6219.798.3812.2510.0023.4515.7113.5914.3717.5834.131963-01-01119631
1964-01-0125.8022.1318.2113.2521.2914.7914.1219.5813.2516.7528.9621.001964-01-01119641
1965-01-019.5411.929.004.386.085.2110.256.085.718.6312.0417.411965-01-01119651
1966-01-0122.0421.5017.0812.7522.1715.5921.7918.1216.6617.8328.3323.791966-01-01119661
1967-01-016.464.466.503.216.673.7911.383.837.719.0810.6720.911967-01-01119671
1968-01-0130.0417.8816.2516.2521.7912.5418.1616.6218.7517.6222.2527.291968-01-01119681
1969-01-016.131.635.411.082.541.008.502.424.586.349.1716.711969-01-01119691
1970-01-019.592.9611.793.426.134.089.004.467.293.507.3313.001970-01-01119701
1971-01-013.710.794.710.171.421.044.630.751.541.084.219.541971-01-01119711
1972-01-019.293.6314.544.256.754.4213.005.3310.048.548.7119.171972-01-01119721
1973-01-0116.5015.9214.627.418.2911.2113.547.7910.4610.7913.379.711973-01-01119731
1974-01-0123.2116.5416.089.7515.8311.469.5413.5413.8316.6617.2125.291974-01-01119741
1975-01-0114.0413.5411.295.4612.585.588.128.969.295.177.7111.631975-01-01119751
1976-01-0118.3417.6714.838.0016.6210.1313.179.0413.135.7511.3814.961976-01-01119761
1977-01-0120.0411.9220.259.139.298.0410.755.889.009.0014.8825.701977-01-01119771
1978-01-018.337.127.713.548.507.5014.7110.0011.8310.0015.0920.461978-01-01119781
data.query('day==1')
RPTVALROSKILSHABIRDUBCLAMULCLOBELMALdatemonthyearday
Yr_Mo_Dy
1961-01-0115.0414.9613.179.29NaN9.8713.6710.2510.8312.5818.5015.041961-01-01119611
1961-02-0114.2515.129.045.8812.087.1710.173.636.505.509.178.001961-02-01219611
1961-03-0112.6713.1311.796.429.798.5410.2513.29NaN12.2120.62NaN1961-03-01319611
1961-04-018.386.348.336.759.339.5411.678.2111.216.4611.967.171961-04-01419611
1961-05-0115.8713.8815.379.7913.4610.179.9614.049.759.9218.6311.121961-05-01519611
...................................................
1978-08-0119.3315.0920.178.8312.6210.419.3312.339.509.9215.7518.001978-08-01819781
1978-09-018.426.139.875.253.215.717.253.507.336.507.6215.961978-09-01919781
1978-10-019.506.8310.503.886.134.584.216.506.386.5410.6314.091978-10-011019781
1978-11-0113.5916.7511.257.0811.048.338.1711.2910.7511.2523.1325.001978-11-011119781
1978-12-0121.2916.2924.0412.7918.2119.2921.5417.2116.7117.8317.7525.701978-12-011219781

216 rows × 16 columns


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值