03.pandas

03.pandas

笔记

import pandas as pd
import numpy as np
#定义series
s1 = pd.Series([1,2,2,2,5,4,5,5,5,5,6,8])
print(s1)
s1 = pd.Series([1,2,2,2,5,4,5,5,5,5,6,8],index=list("abcdefghijkl"))
print(s1)

#通过字典定义series
temp_dict = {"name":"xxx","age":25,"tel":123456789}
s2 = pd.Series(temp_dict)
print(s2)

#dataframe定义     index-0    columns-1
s3 = pd.DataFrame(np.arange(12).reshape((3,4)))
print(s3)
#字典
d1 = {"name":["xiaoming","xiaohong"],"sge":[20,30],"tel":[12345215,1544646]}
s4 = pd.DataFrame(d1)
print(s4)

df = pd.read_csv("../BJXS1_df_test_with_needsnow.csv")
print(df.head(10))
print(df.describe())
print(df.info())
# 降序排列,按照AAGA_max列的降序排列
df = df.sort_values(by="AAGA_max",ascending=False)
print(df.head(10))
#取前20行
print(df[:20])
#取前20行的"AAGA_max"
print(df[:20]["AAGA_max"])

#loc[]
s5 = pd.DataFrame(np.arange(12).reshape(3,4),index=list("abc"),columns=list("wxyz"))
print(s5)
#取一个
print(s5.loc["a","z"])
#取一行
print(s5.loc["a",:])
#取一列
print(s5.loc[:,"w"])
#取多行
print(s5.loc[["a","c"],["w","z"]])

#iloc[]
print(s5.iloc[:,[2,1]])
print(s5.iloc[[0,2],[2,1]])
print(s5.iloc[1:,:2])

print(df[(4<df["AAFB_mean"])&(df["AAFB_mean"]<6)])

s6 = pd.DataFrame(np.arange(12).reshape(3,4),index=list("abc"),columns=list("wxyz"))
s6.iloc[[1],[1,2]]=None
print(s6)
#找到为空的
print(pd.isnull(s6))
#找到不为空的
print(pd.notnull(s6))
#删掉x这一列中为nan的行
print(s6[pd.notnull(s6["x"])])
#删除任何带有空的行
print(s6.dropna(axis=0,how="any"))
#全部为空值的行才删除
print(s6.dropna(axis=0,how="all"))
# #inplace=True原地修改???
# print(s6.dropna(axis=0,how="any",inplace=True))
#均值填充
a = s6.fillna(s6.mean())
print(a)
#对某一列进行填充
s6["x"] = s6["x"].fillna(s6["x"].mean())
print(s6["x"])


输出

0     1
1     2
2     2
3     2
4     5
5     4
6     5
7     5
8     5
9     5
10    6
11    8
dtype: int64
a    1
b    2
c    2
d    2
e    5
f    4
g    5
h    5
i    5
j    5
k    6
l    8
dtype: int64
name          xxx
age            25
tel     123456789
dtype: object
   0  1   2   3
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11
       name  sge       tel
0  xiaoming   20  12345215
1  xiaohong   30   1544646
     areaDate  AAAA_max  AAGA_max  ...  AAFB_mean  AAHA1D_sum  needsnow
0  2019-01-03       -11       793  ...          5          66         0
1  2019-01-04       -17       793  ...          5           0         0
2  2019-01-05        -7       793  ...          1           0         0
3  2019-01-06       -10       792  ...          3           0         0
4  2019-01-07       -16       792  ...          4           0         0
5  2019-01-08       -13       795  ...          3           0         0
6  2019-01-09        -8       795  ...          2           0         0
7  2019-01-10        -8       790  ...          4          66         0
8  2019-01-11        -6       790  ...          1          66         0
9  2019-01-12        -8       791  ...          2           0         0

[10 rows x 18 columns]
         AAAA_max    AAGA_max    AADA_max  ...   AAFB_mean  AAHA1D_sum    needsnow
count  140.000000  140.000000  140.000000  ...  140.000000  140.000000  140.000000
mean     1.757143  790.928571   62.535714  ...    2.685714    9.564286   -0.421429
std     10.529077    3.170554   19.070264  ...    1.330796   23.625133    0.495561
min    -20.000000  782.000000   24.000000  ...    1.000000    0.000000   -1.000000
25%     -7.000000  789.000000   48.000000  ...    2.000000    0.000000   -1.000000
50%      1.000000  791.000000   63.000000  ...    2.000000    0.000000    0.000000
75%     11.000000  792.250000   78.000000  ...    4.000000    0.000000    0.000000
max     23.000000  799.000000   99.000000  ...    6.000000   81.000000    0.000000

[8 rows x 17 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 140 entries, 0 to 139
Data columns (total 18 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   areaDate    140 non-null    object
 1   AAAA_max    140 non-null    int64 
 2   AAGA_max    140 non-null    int64 
 3   AADA_max    140 non-null    int64 
 4   AAEB_max    140 non-null    int64 
 5   AAFB_max    140 non-null    int64 
 6   AAAA_min    140 non-null    int64 
 7   AAGA_min    140 non-null    int64 
 8   AADA_min    140 non-null    int64 
 9   AAEB_min    140 non-null    int64 
 10  AAFB_min    140 non-null    int64 
 11  AAAA_mean   140 non-null    int64 
 12  AAGA_mean   140 non-null    int64 
 13  AADA_mean   140 non-null    int64 
 14  AAEB_mean   140 non-null    int64 
 15  AAFB_mean   140 non-null    int64 
 16  AAHA1D_sum  140 non-null    int64 
 17  needsnow    140 non-null    int64 
dtypes: int64(17), object(1)
memory usage: 19.8+ KB
None
       areaDate  AAAA_max  AAGA_max  ...  AAFB_mean  AAHA1D_sum  needsnow
107  2019-05-05         4       799  ...          5           0        -1
108  2019-05-06         8       799  ...          4           0        -1
23   2019-01-26        -5       799  ...          2           0         0
22   2019-01-25       -11       799  ...          5           0         0
83   2019-04-03         6       797  ...          2           0        -1
82   2019-04-02         2       797  ...          2          77        -1
125  2019-05-27        10       796  ...          6           0        -1
42   2019-02-20        -5       796  ...          3           0         0
43   2019-02-21         2       796  ...          1           0         0
126  2019-05-28        17       796  ...          3           0        -1

[10 rows x 18 columns]
       areaDate  AAAA_max  AAGA_max  ...  AAFB_mean  AAHA1D_sum  needsnow
107  2019-05-05         4       799  ...          5           0        -1
108  2019-05-06         8       799  ...          4           0        -1
23   2019-01-26        -5       799  ...          2           0         0
22   2019-01-25       -11       799  ...          5           0         0
83   2019-04-03         6       797  ...          2           0        -1
82   2019-04-02         2       797  ...          2          77        -1
125  2019-05-27        10       796  ...          6           0        -1
42   2019-02-20        -5       796  ...          3           0         0
43   2019-02-21         2       796  ...          1           0         0
126  2019-05-28        17       796  ...          3           0        -1
44   2019-02-22         2       795  ...          1           0         0
45   2019-02-23         1       795  ...          1           0         0
5    2019-01-08       -13       795  ...          3           0         0
6    2019-01-09        -8       795  ...          2           0         0
104  2019-05-01        11       795  ...          2           0        -1
109  2019-05-07        13       795  ...          2           0        -1
56   2019-03-07         4       795  ...          2           0         0
55   2019-03-06        -8       795  ...          4           0         0
72   2019-03-23        -4       794  ...          3           0         0
21   2019-01-24        -4       794  ...          3           0         0

[20 rows x 18 columns]
107    799
108    799
23     799
22     799
83     797
82     797
125    796
42     796
43     796
126    796
44     795
45     795
5      795
6      795
104    795
109    795
56     795
55     795
72     794
21     794
Name: AAGA_max, dtype: int64
   w  x   y   z
a  0  1   2   3
b  4  5   6   7
c  8  9  10  11
3
w    0
x    1
y    2
z    3
Name: a, dtype: int32
a    0
b    4
c    8
Name: w, dtype: int32
   w   z
a  0   3
c  8  11
    y  x
a   2  1
b   6  5
c  10  9
    y  x
a   2  1
c  10  9
   w  x
b  4  5
c  8  9
       areaDate  AAAA_max  AAGA_max  ...  AAFB_mean  AAHA1D_sum  needsnow
107  2019-05-05         4       799  ...          5           0        -1
22   2019-01-25       -11       799  ...          5           0         0
1    2019-01-04       -17       793  ...          5           0         0
0    2019-01-03       -11       793  ...          5          66         0
12   2019-01-15       -19       793  ...          5           0         0
135  2019-06-09        15       791  ...          5           0        -1
24   2019-01-27        -8       790  ...          5           0         0
13   2019-01-16       -12       790  ...          5           0         0
19   2019-01-22        -7       789  ...          5           0         0
31   2019-02-03       -13       786  ...          5           0         0
61   2019-03-12        -9       786  ...          5           0         0
60   2019-03-11        -4       785  ...          5           0         0

[12 rows x 18 columns]
   w    x     y   z
a  0  1.0   2.0   3
b  4  NaN   NaN   7
c  8  9.0  10.0  11
       w      x      y      z
a  False  False  False  False
b  False   True   True  False
c  False  False  False  False
      w      x      y     z
a  True   True   True  True
b  True  False  False  True
c  True   True   True  True
   w    x     y   z
a  0  1.0   2.0   3
c  8  9.0  10.0  11
   w    x     y   z
a  0  1.0   2.0   3
c  8  9.0  10.0  11
   w    x     y   z
a  0  1.0   2.0   3
b  4  NaN   NaN   7
c  8  9.0  10.0  11
   w    x     y   z
a  0  1.0   2.0   3
b  4  5.0   6.0   7
c  8  9.0  10.0  11
a    1.0
b    5.0
c    9.0
Name: x, dtype: float64

Process finished with exit code 0

PPT
在这里插入图片描述
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值