数据处理Pandas学习笔记(一)

import pandas as pd

pandas值series创建

t = pd.Series([1, 2, 31, 12, 3, 4])
t
0     1
1     2
2    31
3    12
4     3
5     4
dtype: int64
type(t)
pandas.core.series.Series

series指定索引

t2 = pd.Series([1,23,3,2,3],index=list('abcde'))
t2
a     1
b    23
c     3
d     2
e     3
dtype: int64
import numpy as np
import string
t2 = pd.Series(np.arange(10),index=list(string.ascii_uppercase[:10]))
t2
A    0
B    1
C    2
D    3
E    4
F    5
G    6
H    7
I    8
J    9
dtype: int32

通过字典创建一个series

temp_dict = {'name':'xiaohong','age':18,'tel':10086}
temp_dict
{'name': 'xiaohong', 'age': 18, 'tel': 10086}
t3 = pd.Series(temp_dict)
t3
name    xiaohong
age           18
tel        10086
dtype: object
t3.dtype
dtype('O')

Pandas切片

t3['age']
18
t3[0]
'xiaohong'
t3[[1,2]]  #取出第二、三行
age       18
tel    10086
dtype: object
t3[:3]  # 取出前三行
name    xiaohong
age           18
tel        10086
dtype: object
t3[['age','tel']]
age       18
tel    10086
dtype: object
t
0     1
1     2
2    31
3    12
4     3
5     4
dtype: int64
t[t>4]  # 把大于四的取出来
2    31
3    12
dtype: int64

pandas取出索引

t3.index
Index(['name', 'age', 'tel'], dtype='object')
for i in t3.index:
    print(i)
name
age
tel
type(t3.index)
pandas.core.indexes.base.Index
list(t3.index)[:2]
['name', 'age']
t3.values
array(['xiaohong', 18, 10086], dtype=object)
type(t3.values)
numpy.ndarray

读取文件

df = pd.read_csv('./can.csv')
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
1201.0040.090-0.125
01201.004-0.043-0.125
11200.9690.090-0.121
21200.973-0.012-0.137
31201.000-0.016-0.121
41200.9610.082-0.121
..................
15299431001.0510.090-0.262
15299531000.9180.039-0.129
15299631001.156-0.094-0.227
15299731000.9340.203-0.172
15299831001.199-0.1760.109

152999 rows × 5 columns

df.head(10)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
1201.0040.090-0.125
01201.004-0.043-0.125
11200.9690.090-0.121
21200.973-0.012-0.137
31201.000-0.016-0.121
41200.9610.082-0.121
51200.973-0.055-0.109
61201.0000.012-0.133
71200.969-0.102-0.141
81200.973-0.059-0.125
91201.0120.043-0.133
import pandas as pd 
import numpy as np
pd.DataFrame(np.arange(12).reshape(3,4))
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
0123
00123
14567
2891011
pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'), columns=list('WXYZ'))
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WXYZ
a0123
b4567
c891011
d1 = {'name':['xiaoming','xiaogang'],'age':[12,20]}
d1
{'name': ['xiaoming', 'xiaogang'], 'age': [12, 20]}
t1 = pd.DataFrame(d1)
t1
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
nameage
0xiaoming12
1xiaogang20
d2 = [{'name':'xioahong','age':20,'tel':10020},{'name':'xioaming','tel':123231},{'name':'xiaowang','age':18}]
d2
[{'name': 'xioahong', 'age': 20, 'tel': 10020},
 {'name': 'xioaming', 'tel': 123231},
 {'name': 'xiaowang', 'age': 18}]
t2 = pd.DataFrame(d2)
t2
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
nameagetel
0xioahong20.010020.0
1xioamingNaN123231.0
2xiaowang18.0NaN
df = pd.read_csv('./jd.csv')
print(df.head())  # 默认五行数据
  乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物     乐高京东自营旗舰店  \
0                              林家铺子水果罐头 什锦罐头 200g*2罐             林家铺子官方旗舰店   
1  羽生结弦:王者之路( 超人气花样滑冰冠军羽生结弦全新传记,全面展示羽生10年成长经历和心路历程!)                 中信出版社   
2            【话费慢充】全国电信话费充值手机特惠慢充话费200元 72小时内到账 200元          易士捷通讯充值拼购专营店   
3  豪皇 潮汕牛肉丸500g*2包 火锅食材牛丸 烧烤丸串生鲜潮汕年夜饭火锅丸子 汕头手打牛肉丸 年货             邻家小厨生鲜专营店   
4  伊利奶粉【全新升级】 金领冠系列 幼儿配方奶粉 3段1200克特惠三联装(1-3岁幼儿适用)...           伊利母婴京东自营旗舰店   

       764  1099.00    https://item.jd.com/100017067554.html  
0     8274     6.90  https://item.jd.com/10029836000540.html  
1    42023    49.00        https://item.jd.com/13598042.html  
2      664   191.99    https://item.jd.com/200151598576.html  
3       32    39.00     https://item.jd.com/65414277974.html  
4  1480578   146.00         https://item.jd.com/1100526.html  
print(df.info())  # 默认拿出列索引数据
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6054 entries, 0 to 6053
Data columns (total 5 columns):
 #   Column                                                      Non-Null Count  Dtype  
---  ------                                                      --------------  -----  
 0   乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物  6054 non-null   object 
 1   乐高京东自营旗舰店                                                   5894 non-null   object 
 2   764                                                         6054 non-null   int64  
 3   1099.00                                                     6054 non-null   float64
 4   https://item.jd.com/100017067554.html                       6054 non-null   object 
dtypes: float64(1), int64(1), object(3)
memory usage: 236.6+ KB
None
# df = df.sort_values(by='Count_AnimalName',ascending=False)  # 打印出现次数最多的
# df
print(df[:20])  # 取前二十行数据

   乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物  \
0                               林家铺子水果罐头 什锦罐头 200g*2罐           
1   羽生结弦:王者之路( 超人气花样滑冰冠军羽生结弦全新传记,全面展示羽生10年成长经历和心路历程!)           
2             【话费慢充】全国电信话费充值手机特惠慢充话费200元 72小时内到账 200元           
3   豪皇 潮汕牛肉丸500g*2包 火锅食材牛丸 烧烤丸串生鲜潮汕年夜饭火锅丸子 汕头手打牛肉丸 年货           
4   伊利奶粉【全新升级】 金领冠系列 幼儿配方奶粉 3段1200克特惠三联装(1-3岁幼儿适用)...           
5      【欧洲进口】法国原瓶进口 Roux家族黑舰经典混酿干红葡萄酒红酒送礼佳品750ml*6瓶整箱           
6             良品铺子 香酥脆灰枣 酥脆小枣即食无核脆枣红枣干蜜饯果干休闲零食量贩装400g           
7      小黄鸭(B.Duck)小学生书包男童女童一三年级男孩儿童减负护脊双肩包 sbd80008黄色           
8   稳健医用外科口罩一次性医用口罩成人儿童可选 稳健口罩 三层防护 透气薄款防细菌口罩医用 1盒...           
9                                 嗨吃家 酸辣粉清真宽粉112g*12袋           
10  日本进口 黛珂Cosme Decorte牛油果乳液150ml 补水保湿 软化肤质 改善粗糙 ...           
11                               嗨吃家正宗铁棍山药粉皮200g*5袋速食           
12                     小鹿蓝蓝_酸奶溶豆 宝宝零食益生菌享6个月食谱 4口味各1盒           
13                                      嗨吃家热干面176g*6袋           
14       【药房直售】康速达 痔立克痔疮膏冷敷凝胶内外混合痔疮肉球肛门瘙痒男女 (周期型)实发两盒           
15                             蒙牛  酸酸乳 原味250ml×24 礼盒装           
16  蒂佳婷Dr.Jart+ 绿丸面膜贴片 舒缓镇静 补水保湿 水动力舒缓补水绿丸面膜25g*5片...           
17                法国原瓶进口  杰朗克西里尔 赤霞珠 干红 葡萄酒 750ml 双支装           
18  土土优选丹麦风味曲奇饼干 皇冠品质早餐网红休闲办公室零食年货72g/盒 十盒*(丹麦风味曲奇...           
19            善存维生素C咀嚼片香橙口味补充维C120片 1盒 1盒*(15+15+90)片           

               乐高京东自营旗舰店      764  1099.00  \
0              林家铺子官方旗舰店     8274     6.90   
1                  中信出版社    42023    49.00   
2           易士捷通讯充值拼购专营店      664   191.99   
3              邻家小厨生鲜专营店       32    39.00   
4            伊利母婴京东自营旗舰店  1480578   146.00   
5                玫嘉官方旗舰店      540   298.00   
6            良品铺子京东自营旗舰店     5747    17.90   
7               尚喜屋母婴旗舰店        9    88.00   
8                稳健官方旗舰店    53390    16.90   
9                 燕之北旗舰店      107    39.90   
10         京东国际美妆自营跨境免税店    89640   289.00   
11                燕之北旗舰店       10    39.90   
12               小鹿蓝蓝旗舰店     1275    54.00   
13                燕之北旗舰店       32    26.90   
14             颐鹤堂大药房旗舰店     4553    69.00   
15             蒙牛京东自营旗舰店   799422    44.90   
16  蒂佳婷(Dr.Jart)海外京东自营专区   747604    98.00   
17             禧家拾粮酒类旗舰店       27    39.90   
18             土土优选官方旗舰店    14849    19.90   
19                益尔益旗舰店     2040    49.00   

      https://item.jd.com/100017067554.html  
0   https://item.jd.com/10029836000540.html  
1         https://item.jd.com/13598042.html  
2     https://item.jd.com/200151598576.html  
3      https://item.jd.com/65414277974.html  
4          https://item.jd.com/1100526.html  
5      https://item.jd.com/22453030555.html  
6     https://item.jd.com/100027854140.html  
7   https://item.jd.com/10030112475646.html  
8   https://item.jd.com/10021189665333.html  
9   https://item.jd.com/10035393346580.html  
10         https://item.jd.com/4972612.html  
11  https://item.jd.com/10035809618060.html  
12  https://item.jd.com/10038718351041.html  
13  https://item.jd.com/10035527980479.html  
14  https://item.jd.com/10033781790177.html  
15         https://item.jd.com/1411416.html  
16         https://item.jd.com/4858894.html  
17  https://item.jd.com/10028867267738.html  
18  https://item.jd.com/10026591565614.html  
19     https://item.jd.com/47384323647.html  

pandas爬取注意点

方括号写数组,表示取行,对行进行操作

取列表示取列索引,对列进行操作

print(df['764'])  # 具体取某一列的值
0          8274
1         42023
2           664
3            32
4       1480578
         ...   
6049    1952366
6050        769
6051        137
6052      21686
6053        276
Name: 764, Length: 6054, dtype: int64
print(type(df['764']))
<class 'pandas.core.series.Series'>
t3 = pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('WXYZ'))
t3
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WXYZ
a0123
b4567
c891011
t3.loc['a','Z']  # 指定取第几行第几列的数据
3
type(t3.loc['a','Z'])
numpy.int32
t3.loc['a']
W    0
X    1
Y    2
Z    3
Name: a, dtype: int32
t3.loc[:,'Y']
a     2
b     6
c    10
Name: Y, dtype: int32
t3.loc[['a','c'],:]
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WXYZ
a0123
c891011
t3.loc[['a','c'],['W','Z']]

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WZ
a03
c811
t3
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WXYZ
a0123
b4567
c891011
t3.iloc[1] # 拿到第二行数据
W    4
X    5
Y    6
Z    7
Name: b, dtype: int32
t3.iloc[:,2]  # 取第三列
a     2
b     6
c    10
Name: Y, dtype: int32

t3.iloc[:,[2,1
          ]]  # 取不连续的两列
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
YX
a21
b65
c109
t3.iloc[1:,:2] = np.nan
t3
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WXYZ
a0.01.023
bNaNNaN67
cNaNNaN1011
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物乐高京东自营旗舰店7641099.00https://item.jd.com/100017067554.html
0林家铺子水果罐头 什锦罐头 200g*2罐林家铺子官方旗舰店82746.90https://item.jd.com/10029836000540.html
1羽生结弦:王者之路( 超人气花样滑冰冠军羽生结弦全新传记,全面展示羽生10年成长经历和心路历程!)中信出版社4202349.00https://item.jd.com/13598042.html
2【话费慢充】全国电信话费充值手机特惠慢充话费200元 72小时内到账 200元易士捷通讯充值拼购专营店664191.99https://item.jd.com/200151598576.html
3豪皇 潮汕牛肉丸500g*2包 火锅食材牛丸 烧烤丸串生鲜潮汕年夜饭火锅丸子 汕头手打牛肉丸 年货邻家小厨生鲜专营店3239.00https://item.jd.com/65414277974.html
4伊利奶粉【全新升级】 金领冠系列 幼儿配方奶粉 3段1200克特惠三联装(1-3岁幼儿适用)...伊利母婴京东自营旗舰店1480578146.00https://item.jd.com/1100526.html
..................
6049贝亲(Pigeon)宽口径玻璃奶瓶奶嘴套装 婴儿奶瓶240ml+自然实感婴儿奶嘴(L码+LL...贝亲(Pigeon)京东自营旗舰店1952366172.00https://item.jd.com/7639987.html
6050尤果(YOUGUO)衣架子带晾衣夹子折叠晾衣架晒袜子架内衣架神器32夹子 可折叠加厚【1个3...尤果生活日用拼购旗舰店76915.90https://item.jd.com/10031481561764.html
6051匹克态极闪现3代篮球鞋男2022春季新款耐磨缓震篮球运动鞋男鞋 大白-气泡配色 42匹克官方旗舰店137669.00https://item.jd.com/10039423932347.html
6052超能 洗衣凝珠 洗衣凝珠 100颗 防串色 浓缩 酵素 香水味 花香型 洗衣球 洗衣珠超能京东自营官方旗舰店21686119.00https://item.jd.com/100011740813.html
60538册专注力训练书找不同迷宫书3-6岁儿童注意力观察记忆力智力开发全脑开发思维训练书籍凤凰新华书店旗舰店27615.80https://item.jd.com/71219454726.html

6054 rows × 5 columns

df.index  # 获取行索引数据
RangeIndex(start=0, stop=6054, step=1)
df.columns  # 获取列索引数据
Index(['乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物',
       '乐高京东自营旗舰店', '764', '1099.00', 'https://item.jd.com/100017067554.html'],
      dtype='object')
df.dtypes  # 获取每一列的数据类型
乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物     object
乐高京东自营旗舰店                                                      object
764                                                             int64
1099.00                                                       float64
https://item.jd.com/100017067554.html                          object
dtype: object

df.values  # 获取值
array([['林家铺子水果罐头 什锦罐头 200g*2罐', '林家铺子官方旗舰店', 8274, 6.9,
        'https://item.jd.com/10029836000540.html'],
       ['羽生结弦:王者之路( 超人气花样滑冰冠军羽生结弦全新传记,全面展示羽生10年成长经历和心路历程!)', '中信出版社',
        42023, 49.0, 'https://item.jd.com/13598042.html'],
       ['【话费慢充】全国电信话费充值手机特惠慢充话费200元 72小时内到账 200元', '易士捷通讯充值拼购专营店', 664,
        191.99, 'https://item.jd.com/200151598576.html'],
       ...,
       ['匹克态极闪现3代篮球鞋男2022春季新款耐磨缓震篮球运动鞋男鞋 大白-气泡配色 42', '匹克官方旗舰店', 137,
        669.0, 'https://item.jd.com/10039423932347.html'],
       ['超能 洗衣凝珠 洗衣凝珠 100颗 防串色 浓缩 酵素 香水味 花香型 洗衣球 洗衣珠', '超能京东自营官方旗舰店',
        21686, 119.0, 'https://item.jd.com/100011740813.html'],
       ['8册专注力训练书找不同迷宫书3-6岁儿童注意力观察记忆力智力开发全脑开发思维训练书籍', '凤凰新华书店旗舰店', 276,
        15.8, 'https://item.jd.com/71219454726.html']], dtype=object)
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物乐高京东自营旗舰店7641099.00https://item.jd.com/100017067554.html
0林家铺子水果罐头 什锦罐头 200g*2罐林家铺子官方旗舰店82746.90https://item.jd.com/10029836000540.html
1羽生结弦:王者之路( 超人气花样滑冰冠军羽生结弦全新传记,全面展示羽生10年成长经历和心路历程!)中信出版社4202349.00https://item.jd.com/13598042.html
2【话费慢充】全国电信话费充值手机特惠慢充话费200元 72小时内到账 200元易士捷通讯充值拼购专营店664191.99https://item.jd.com/200151598576.html
3豪皇 潮汕牛肉丸500g*2包 火锅食材牛丸 烧烤丸串生鲜潮汕年夜饭火锅丸子 汕头手打牛肉丸 年货邻家小厨生鲜专营店3239.00https://item.jd.com/65414277974.html
4伊利奶粉【全新升级】 金领冠系列 幼儿配方奶粉 3段1200克特惠三联装(1-3岁幼儿适用)...伊利母婴京东自营旗舰店1480578146.00https://item.jd.com/1100526.html
..................
6049贝亲(Pigeon)宽口径玻璃奶瓶奶嘴套装 婴儿奶瓶240ml+自然实感婴儿奶嘴(L码+LL...贝亲(Pigeon)京东自营旗舰店1952366172.00https://item.jd.com/7639987.html
6050尤果(YOUGUO)衣架子带晾衣夹子折叠晾衣架晒袜子架内衣架神器32夹子 可折叠加厚【1个3...尤果生活日用拼购旗舰店76915.90https://item.jd.com/10031481561764.html
6051匹克态极闪现3代篮球鞋男2022春季新款耐磨缓震篮球运动鞋男鞋 大白-气泡配色 42匹克官方旗舰店137669.00https://item.jd.com/10039423932347.html
6052超能 洗衣凝珠 洗衣凝珠 100颗 防串色 浓缩 酵素 香水味 花香型 洗衣球 洗衣珠超能京东自营官方旗舰店21686119.00https://item.jd.com/100011740813.html
60538册专注力训练书找不同迷宫书3-6岁儿童注意力观察记忆力智力开发全脑开发思维训练书籍凤凰新华书店旗舰店27615.80https://item.jd.com/71219454726.html

6054 rows × 5 columns

mean_data = df['1099.00']
mean_data
0         6.90
1        49.00
2       191.99
3        39.00
4       146.00
         ...  
6049    172.00
6050     15.90
6051    669.00
6052    119.00
6053     15.80
Name: 1099.00, Length: 6054, dtype: float64
print('商品均价',mean_data.mean())
商品均价 332.4171737693964
df[mean_data==mean_data.min()]  #取出最便宜的商品
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物乐高京东自营旗舰店7641099.00https://item.jd.com/100017067554.html
286补运费专拍链接熊出没官方旗舰店01.0https://item.jd.com/10042346578090.html
1906【京选99新】苹果iPhone 12 ProMax 256GB 石墨色5G全网通 S12勇科手机21.0https://item.jd.com/10040790836846.html
2047Yottoy 瑜伽入门学习教程yottoy京东自营旗舰店111.0https://item.jd.com/100018075841.html
2791运费补运费专用链接(请勿单独拍) 补运费专用链接荷尔健康大药房旗舰店71.0https://item.jd.com/10023059152178.html
3854【准新机】【在保280天以上】iPhone13ProMax 5G全网通256G远峰蓝S18勇科手机11.0https://item.jd.com/10041957238447.html
4398定金 别克昂科拉 试驾享原厂精美试驾礼 【新车汽车买车SUV】 具体车型请与线下经销商协定上汽通用别克官方旗舰店01.0https://item.jd.com/68629491955.html
4491贵州茅台镇酱香型白酒整箱53度粮食窖藏老酒年货送礼酒水饮品江左盟大曲酱香酒 单瓶装遵巡酒类专营店24271.0https://item.jd.com/10028009269896.html
5888【准新机】【在保280天以上】iPhone13ProMax 5G全网通256G金色 S11勇科手机01.0https://item.jd.com/10041957046439.html
print('总共有'+str(df['764'].count())+'个商品')
总共有6054个商品
t3
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WXYZ
a0.01.023
bNaNNaN67
cNaNNaN1011
t3[pd.notnull(t3['W'])]  # 删除W这一列有nan的行数据
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WXYZ
a0.01.023
t3.dropna(axis=0)  # 删除所有含有nan的数值 不适用
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WXYZ
a0.01.023
t3.dropna(axis=0,how='any',inplace=True)
t3
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WXYZ
a0.01.023
t2
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
nameagetel
0xioahong20.010020.0
1xioamingNaN123231.0
2xiaowang18.0NaN
t2.fillna(0)  # 将nan替换为0或其他数值
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
nameagetel
0xioahong20.010020.0
1xioaming0.0123231.0
2xiaowang18.00.0
t2.fillna(t2.mean())  # 将nan替换为均值
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
nameagetel
0xioahong20.010020.0
1xioaming19.0123231.0
2xiaowang18.066625.5
t2['age'].fillna(t2['age'].mean(0))  # 替换age这一列
0    20.0
1    19.0
2    18.0
Name: age, dtype: float64
t3 = pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('WXYZ'))
t3
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WXYZ
a0123
b4567
c891011
t3[t3==0] = np.nan  # 0会参与计算,nan不会
t3
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
WXYZ
aNaN123
b4.0567
c8.091011
from matplotlib import pyplot as plt
import pandas as pd
file_path = './can.csv'
df = pd.read_csv(file_path)
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
1201.0040.090-0.125
01201.004-0.043-0.125
11200.9690.090-0.121
21200.973-0.012-0.137
31201.000-0.016-0.121
41200.9610.082-0.121
..................
15299431001.0510.090-0.262
15299531000.9180.039-0.129
15299631001.156-0.094-0.227
15299731000.9340.203-0.172
15299831001.199-0.1760.109

152999 rows × 5 columns

print(df.info)
# rating,runtime分布情况
# 选择图形,直方图
# 准备数据
<bound method DataFrame.info of         1   20  1.004  0.090  -0.125
0       1   20  1.004 -0.043  -0.125
1       1   20  0.969  0.090  -0.121
2       1   20  0.973 -0.012  -0.137
3       1   20  1.000 -0.016  -0.121
4       1   20  0.961  0.082  -0.121
...    ..  ...    ...    ...     ...
152994  3  100  1.051  0.090  -0.262
152995  3  100  0.918  0.039  -0.129
152996  3  100  1.156 -0.094  -0.227
152997  3  100  0.934  0.203  -0.172
152998  3  100  1.199 -0.176   0.109

[152999 rows x 5 columns]>
# 查看数据聚集在哪块
runtime_data = df["20"].values
print(runtime_data)
max_runtime = runtime_data.max()
min_runtime = runtime_data.min()
# 计算数组
num_bin = (max_runtime-min_runtime)//5
print(num_bin)
# 设置图形大小
plt.figure(figsize=(20,8),dpi=80)
plt.hist(runtime_data, num_bin)
plt.show()
[ 20  20  20 ... 100 100 100]
16

在这里插入图片描述

runtime_data = np.array([8.1,7.0,7.3,7.2,6.2,6.1,8.3,6.4,7.1,7.5,8.4,9.9,7.5,7.9,9.8,6.5,7.8,8.9,6.8,7.8,9.8,7.8,6.7,8.9,7.8,7.8,9.7,6.5,6.7,6.4,6.8,9.8,8.1,7.0,7.3,7.2,6.2,6.1,8.3,6.4,7.1,7.5,8.4,9.9,7.5,7.9,9.8,6.5,7.8,8.9,6.8,7.8,9.8,7.8,6.7,8.9,7.8,7.8,9.7,6.5,6.7,6.4,6.8,9.8,8.1,7.0,7.3,7.2,6.2,6.1,8.3,6.4,7.1,7.5,8.4,9.9,7.5,7.9,9.8,6.5,7.8,8.9,6.8,7.8,9.8,7.8,6.7,8.9,7.8,7.8,9.7,6.5,6.7,6.4,6.8,9.8])
print(runtime_data)
max_runtime = runtime_data.max()
min_runtime = runtime_data.min()
# 计算数组
# num_bin = (max_runtime-min_runtime)//5
# print(num_bin)
num_bin_list = [1.9,3.5]
i = 3.5
while i <= max_runtime:
    i += 0.5
    num_bin_list.append(i)
print(num_bin_list)
# 设置图形大小
plt.figure(figsize=(20,8),dpi=80)
plt.hist(runtime_data, num_bin_list)

plt.xticks(num_bin_list)
plt.show()
[8.1 7.  7.3 7.2 6.2 6.1 8.3 6.4 7.1 7.5 8.4 9.9 7.5 7.9 9.8 6.5 7.8 8.9
 6.8 7.8 9.8 7.8 6.7 8.9 7.8 7.8 9.7 6.5 6.7 6.4 6.8 9.8 8.1 7.  7.3 7.2
 6.2 6.1 8.3 6.4 7.1 7.5 8.4 9.9 7.5 7.9 9.8 6.5 7.8 8.9 6.8 7.8 9.8 7.8
 6.7 8.9 7.8 7.8 9.7 6.5 6.7 6.4 6.8 9.8 8.1 7.  7.3 7.2 6.2 6.1 8.3 6.4
 7.1 7.5 8.4 9.9 7.5 7.9 9.8 6.5 7.8 8.9 6.8 7.8 9.8 7.8 6.7 8.9 7.8 7.8
 9.7 6.5 6.7 6.4 6.8 9.8]
[1.9, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0]

在这里插入图片描述

import pandas as pd

df = pd.DataFrame({'key':['A','B','C','A','B','C','A','B','C'],
                  'data':[0,5,10,5,10,15,10,15,20]})
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
keydata
0A0
1B5
2C10
3A5
4B10
5C15
6A10
7B15
8C20
for key in ['A','B','C']:
    print(key,df[df['key'] == key].sum())
A key     AAA
data     15
dtype: object
B key     BBB
data     30
dtype: object
C key     CCC
data     45
dtype: object
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
keydata
0A0
1B5
2C10
3A5
4B10
5C15
6A10
7B15
8C20

groupby方法

df.groupby('key').sum()  # 同类取和
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
data
key
A15
B30
C45
import numpy as np
df.groupby('key').aggregate(np.mean)  #映射  得到每一类的平均值
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
data
key
A5
B10
C15
df = pd.read_csv('./can.csv')
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
1201.0040.090-0.125
01201.004-0.043-0.125
11200.9690.090-0.121
21200.973-0.012-0.137
31201.000-0.016-0.121
41200.9610.082-0.121
..................
15299431001.0510.090-0.262
15299531000.9180.039-0.129
15299631001.156-0.094-0.227
15299731000.9340.203-0.172
15299831001.199-0.1760.109

152999 rows × 5 columns

df.groupby('1')['20'].mean()
1
1    60.000784
2    60.000000
3    60.000000
Name: 20, dtype: float64
df.groupby(by = '20').groups  # 对20这一列分组统计
{20: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], 25: [2999, 3000, 3001, 3002, 3003, 3004, 3005, 3006, 3007, 3008, 3009, 3010, 3011, 3012, 3013, 3014, 3015, 3016, 3017, 3018, 3019, 3020, 3021, 3022, 3023, 3024, 3025, 3026, 3027, 3028, 3029, 3030, 3031, 3032, 3033, 3034, 3035, 3036, 3037, 3038, 3039, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 3052, 3053, 3054, 3055, 3056, 3057, 3058, 3059, 3060, 3061, 3062, 3063, 3064, 3065, 3066, 3067, 3068, 3069, 3070, 3071, 3072, 3073, 3074, 3075, 3076, 3077, 3078, 3079, 3080, 3081, 3082, 3083, 3084, 3085, 3086, 3087, 3088, 3089, 3090, 3091, 3092, 3093, 3094, 3095, 3096, 3097, 3098, ...], 30: [5999, 6000, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, 6011, 6012, 6013, 6014, 6015, 6016, 6017, 6018, 6019, 6020, 6021, 6022, 6023, 6024, 6025, 6026, 6027, 6028, 6029, 6030, 6031, 6032, 6033, 6034, 6035, 6036, 6037, 6038, 6039, 6040, 6041, 6042, 6043, 6044, 6045, 6046, 6047, 6048, 6049, 6050, 6051, 6052, 6053, 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, 6062, 6063, 6064, 6065, 6066, 6067, 6068, 6069, 6070, 6071, 6072, 6073, 6074, 6075, 6076, 6077, 6078, 6079, 6080, 6081, 6082, 6083, 6084, 6085, 6086, 6087, 6088, 6089, 6090, 6091, 6092, 6093, 6094, 6095, 6096, 6097, 6098, ...], 35: [8999, 9000, 9001, 9002, 9003, 9004, 9005, 9006, 9007, 9008, 9009, 9010, 9011, 9012, 9013, 9014, 9015, 9016, 9017, 9018, 9019, 9020, 9021, 9022, 9023, 9024, 9025, 9026, 9027, 9028, 9029, 9030, 9031, 9032, 9033, 9034, 9035, 9036, 9037, 9038, 9039, 9040, 9041, 9042, 9043, 9044, 9045, 9046, 9047, 9048, 9049, 9050, 9051, 9052, 9053, 9054, 9055, 9056, 9057, 9058, 9059, 9060, 9061, 9062, 9063, 9064, 9065, 9066, 9067, 9068, 9069, 9070, 9071, 9072, 9073, 9074, 9075, 9076, 9077, 9078, 9079, 9080, 9081, 9082, 9083, 9084, 9085, 9086, 9087, 9088, 9089, 9090, 9091, 9092, 9093, 9094, 9095, 9096, 9097, 9098, ...], 40: [11999, 12000, 12001, 12002, 12003, 12004, 12005, 12006, 12007, 12008, 12009, 12010, 12011, 12012, 12013, 12014, 12015, 12016, 12017, 12018, 12019, 12020, 12021, 12022, 12023, 12024, 12025, 12026, 12027, 12028, 12029, 12030, 12031, 12032, 12033, 12034, 12035, 12036, 12037, 12038, 12039, 12040, 12041, 12042, 12043, 12044, 12045, 12046, 12047, 12048, 12049, 12050, 12051, 12052, 12053, 12054, 12055, 12056, 12057, 12058, 12059, 12060, 12061, 12062, 12063, 12064, 12065, 12066, 12067, 12068, 12069, 12070, 12071, 12072, 12073, 12074, 12075, 12076, 12077, 12078, 12079, 12080, 12081, 12082, 12083, 12084, 12085, 12086, 12087, 12088, 12089, 12090, 12091, 12092, 12093, 12094, 12095, 12096, 12097, 12098, ...], 45: [14999, 15000, 15001, 15002, 15003, 15004, 15005, 15006, 15007, 15008, 15009, 15010, 15011, 15012, 15013, 15014, 15015, 15016, 15017, 15018, 15019, 15020, 15021, 15022, 15023, 15024, 15025, 15026, 15027, 15028, 15029, 15030, 15031, 15032, 15033, 15034, 15035, 15036, 15037, 15038, 15039, 15040, 15041, 15042, 15043, 15044, 15045, 15046, 15047, 15048, 15049, 15050, 15051, 15052, 15053, 15054, 15055, 15056, 15057, 15058, 15059, 15060, 15061, 15062, 15063, 15064, 15065, 15066, 15067, 15068, 15069, 15070, 15071, 15072, 15073, 15074, 15075, 15076, 15077, 15078, 15079, 15080, 15081, 15082, 15083, 15084, 15085, 15086, 15087, 15088, 15089, 15090, 15091, 15092, 15093, 15094, 15095, 15096, 15097, 15098, ...], 50: [17999, 18000, 18001, 18002, 18003, 18004, 18005, 18006, 18007, 18008, 18009, 18010, 18011, 18012, 18013, 18014, 18015, 18016, 18017, 18018, 18019, 18020, 18021, 18022, 18023, 18024, 18025, 18026, 18027, 18028, 18029, 18030, 18031, 18032, 18033, 18034, 18035, 18036, 18037, 18038, 18039, 18040, 18041, 18042, 18043, 18044, 18045, 18046, 18047, 18048, 18049, 18050, 18051, 18052, 18053, 18054, 18055, 18056, 18057, 18058, 18059, 18060, 18061, 18062, 18063, 18064, 18065, 18066, 18067, 18068, 18069, 18070, 18071, 18072, 18073, 18074, 18075, 18076, 18077, 18078, 18079, 18080, 18081, 18082, 18083, 18084, 18085, 18086, 18087, 18088, 18089, 18090, 18091, 18092, 18093, 18094, 18095, 18096, 18097, 18098, ...], 55: [20999, 21000, 21001, 21002, 21003, 21004, 21005, 21006, 21007, 21008, 21009, 21010, 21011, 21012, 21013, 21014, 21015, 21016, 21017, 21018, 21019, 21020, 21021, 21022, 21023, 21024, 21025, 21026, 21027, 21028, 21029, 21030, 21031, 21032, 21033, 21034, 21035, 21036, 21037, 21038, 21039, 21040, 21041, 21042, 21043, 21044, 21045, 21046, 21047, 21048, 21049, 21050, 21051, 21052, 21053, 21054, 21055, 21056, 21057, 21058, 21059, 21060, 21061, 21062, 21063, 21064, 21065, 21066, 21067, 21068, 21069, 21070, 21071, 21072, 21073, 21074, 21075, 21076, 21077, 21078, 21079, 21080, 21081, 21082, 21083, 21084, 21085, 21086, 21087, 21088, 21089, 21090, 21091, 21092, 21093, 21094, 21095, 21096, 21097, 21098, ...], 60: [23999, 24000, 24001, 24002, 24003, 24004, 24005, 24006, 24007, 24008, 24009, 24010, 24011, 24012, 24013, 24014, 24015, 24016, 24017, 24018, 24019, 24020, 24021, 24022, 24023, 24024, 24025, 24026, 24027, 24028, 24029, 24030, 24031, 24032, 24033, 24034, 24035, 24036, 24037, 24038, 24039, 24040, 24041, 24042, 24043, 24044, 24045, 24046, 24047, 24048, 24049, 24050, 24051, 24052, 24053, 24054, 24055, 24056, 24057, 24058, 24059, 24060, 24061, 24062, 24063, 24064, 24065, 24066, 24067, 24068, 24069, 24070, 24071, 24072, 24073, 24074, 24075, 24076, 24077, 24078, 24079, 24080, 24081, 24082, 24083, 24084, 24085, 24086, 24087, 24088, 24089, 24090, 24091, 24092, 24093, 24094, 24095, 24096, 24097, 24098, ...], 65: [26999, 27000, 27001, 27002, 27003, 27004, 27005, 27006, 27007, 27008, 27009, 27010, 27011, 27012, 27013, 27014, 27015, 27016, 27017, 27018, 27019, 27020, 27021, 27022, 27023, 27024, 27025, 27026, 27027, 27028, 27029, 27030, 27031, 27032, 27033, 27034, 27035, 27036, 27037, 27038, 27039, 27040, 27041, 27042, 27043, 27044, 27045, 27046, 27047, 27048, 27049, 27050, 27051, 27052, 27053, 27054, 27055, 27056, 27057, 27058, 27059, 27060, 27061, 27062, 27063, 27064, 27065, 27066, 27067, 27068, 27069, 27070, 27071, 27072, 27073, 27074, 27075, 27076, 27077, 27078, 27079, 27080, 27081, 27082, 27083, 27084, 27085, 27086, 27087, 27088, 27089, 27090, 27091, 27092, 27093, 27094, 27095, 27096, 27097, 27098, ...], 70: [29999, 30000, 30001, 30002, 30003, 30004, 30005, 30006, 30007, 30008, 30009, 30010, 30011, 30012, 30013, 30014, 30015, 30016, 30017, 30018, 30019, 30020, 30021, 30022, 30023, 30024, 30025, 30026, 30027, 30028, 30029, 30030, 30031, 30032, 30033, 30034, 30035, 30036, 30037, 30038, 30039, 30040, 30041, 30042, 30043, 30044, 30045, 30046, 30047, 30048, 30049, 30050, 30051, 30052, 30053, 30054, 30055, 30056, 30057, 30058, 30059, 30060, 30061, 30062, 30063, 30064, 30065, 30066, 30067, 30068, 30069, 30070, 30071, 30072, 30073, 30074, 30075, 30076, 30077, 30078, 30079, 30080, 30081, 30082, 30083, 30084, 30085, 30086, 30087, 30088, 30089, 30090, 30091, 30092, 30093, 30094, 30095, 30096, 30097, 30098, ...], 75: [32999, 33000, 33001, 33002, 33003, 33004, 33005, 33006, 33007, 33008, 33009, 33010, 33011, 33012, 33013, 33014, 33015, 33016, 33017, 33018, 33019, 33020, 33021, 33022, 33023, 33024, 33025, 33026, 33027, 33028, 33029, 33030, 33031, 33032, 33033, 33034, 33035, 33036, 33037, 33038, 33039, 33040, 33041, 33042, 33043, 33044, 33045, 33046, 33047, 33048, 33049, 33050, 33051, 33052, 33053, 33054, 33055, 33056, 33057, 33058, 33059, 33060, 33061, 33062, 33063, 33064, 33065, 33066, 33067, 33068, 33069, 33070, 33071, 33072, 33073, 33074, 33075, 33076, 33077, 33078, 33079, 33080, 33081, 33082, 33083, 33084, 33085, 33086, 33087, 33088, 33089, 33090, 33091, 33092, 33093, 33094, 33095, 33096, 33097, 33098, ...], 80: [35999, 36000, 36001, 36002, 36003, 36004, 36005, 36006, 36007, 36008, 36009, 36010, 36011, 36012, 36013, 36014, 36015, 36016, 36017, 36018, 36019, 36020, 36021, 36022, 36023, 36024, 36025, 36026, 36027, 36028, 36029, 36030, 36031, 36032, 36033, 36034, 36035, 36036, 36037, 36038, 36039, 36040, 36041, 36042, 36043, 36044, 36045, 36046, 36047, 36048, 36049, 36050, 36051, 36052, 36053, 36054, 36055, 36056, 36057, 36058, 36059, 36060, 36061, 36062, 36063, 36064, 36065, 36066, 36067, 36068, 36069, 36070, 36071, 36072, 36073, 36074, 36075, 36076, 36077, 36078, 36079, 36080, 36081, 36082, 36083, 36084, 36085, 36086, 36087, 36088, 36089, 36090, 36091, 36092, 36093, 36094, 36095, 36096, 36097, 36098, ...], 85: [38999, 39000, 39001, 39002, 39003, 39004, 39005, 39006, 39007, 39008, 39009, 39010, 39011, 39012, 39013, 39014, 39015, 39016, 39017, 39018, 39019, 39020, 39021, 39022, 39023, 39024, 39025, 39026, 39027, 39028, 39029, 39030, 39031, 39032, 39033, 39034, 39035, 39036, 39037, 39038, 39039, 39040, 39041, 39042, 39043, 39044, 39045, 39046, 39047, 39048, 39049, 39050, 39051, 39052, 39053, 39054, 39055, 39056, 39057, 39058, 39059, 39060, 39061, 39062, 39063, 39064, 39065, 39066, 39067, 39068, 39069, 39070, 39071, 39072, 39073, 39074, 39075, 39076, 39077, 39078, 39079, 39080, 39081, 39082, 39083, 39084, 39085, 39086, 39087, 39088, 39089, 39090, 39091, 39092, 39093, 39094, 39095, 39096, 39097, 39098, ...], 90: [41999, 42000, 42001, 42002, 42003, 42004, 42005, 42006, 42007, 42008, 42009, 42010, 42011, 42012, 42013, 42014, 42015, 42016, 42017, 42018, 42019, 42020, 42021, 42022, 42023, 42024, 42025, 42026, 42027, 42028, 42029, 42030, 42031, 42032, 42033, 42034, 42035, 42036, 42037, 42038, 42039, 42040, 42041, 42042, 42043, 42044, 42045, 42046, 42047, 42048, 42049, 42050, 42051, 42052, 42053, 42054, 42055, 42056, 42057, 42058, 42059, 42060, 42061, 42062, 42063, 42064, 42065, 42066, 42067, 42068, 42069, 42070, 42071, 42072, 42073, 42074, 42075, 42076, 42077, 42078, 42079, 42080, 42081, 42082, 42083, 42084, 42085, 42086, 42087, 42088, 42089, 42090, 42091, 42092, 42093, 42094, 42095, 42096, 42097, 42098, ...], 95: [44999, 45000, 45001, 45002, 45003, 45004, 45005, 45006, 45007, 45008, 45009, 45010, 45011, 45012, 45013, 45014, 45015, 45016, 45017, 45018, 45019, 45020, 45021, 45022, 45023, 45024, 45025, 45026, 45027, 45028, 45029, 45030, 45031, 45032, 45033, 45034, 45035, 45036, 45037, 45038, 45039, 45040, 45041, 45042, 45043, 45044, 45045, 45046, 45047, 45048, 45049, 45050, 45051, 45052, 45053, 45054, 45055, 45056, 45057, 45058, 45059, 45060, 45061, 45062, 45063, 45064, 45065, 45066, 45067, 45068, 45069, 45070, 45071, 45072, 45073, 45074, 45075, 45076, 45077, 45078, 45079, 45080, 45081, 45082, 45083, 45084, 45085, 45086, 45087, 45088, 45089, 45090, 45091, 45092, 45093, 45094, 45095, 45096, 45097, 45098, ...], 100: [47999, 48000, 48001, 48002, 48003, 48004, 48005, 48006, 48007, 48008, 48009, 48010, 48011, 48012, 48013, 48014, 48015, 48016, 48017, 48018, 48019, 48020, 48021, 48022, 48023, 48024, 48025, 48026, 48027, 48028, 48029, 48030, 48031, 48032, 48033, 48034, 48035, 48036, 48037, 48038, 48039, 48040, 48041, 48042, 48043, 48044, 48045, 48046, 48047, 48048, 48049, 48050, 48051, 48052, 48053, 48054, 48055, 48056, 48057, 48058, 48059, 48060, 48061, 48062, 48063, 48064, 48065, 48066, 48067, 48068, 48069, 48070, 48071, 48072, 48073, 48074, 48075, 48076, 48077, 48078, 48079, 48080, 48081, 48082, 48083, 48084, 48085, 48086, 48087, 48088, 48089, 48090, 48091, 48092, 48093, 48094, 48095, 48096, 48097, 48098, ...]}
df = pd.DataFrame([[1,2,3],[4,5,6]],index=['a','b'],columns=['A','B','C'])
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
ABC
a123
b456
df.sum(axis=0)
A    5
B    7
C    9
dtype: int64

df.sum(axis=1)
a     6
b    15
dtype: int64
df.sum(axis='columns')
a     6
b    15
dtype: int64
df.max(axis=0
      )
A    4
B    5
C    6
dtype: int64
df.median(axis=0)  # 中位数
A    2.5
B    3.5
C    4.5
dtype: float64

二元统计

df = pd.read_csv('./can.csv')
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
1201.0040.090-0.125
01201.004-0.043-0.125
11200.9690.090-0.121
21200.973-0.012-0.137
31201.000-0.016-0.121
41200.9610.082-0.121
..................
15299431001.0510.090-0.262
15299531000.9180.039-0.129
15299631001.156-0.094-0.227
15299731000.9340.203-0.172
15299831001.199-0.1760.109

152999 rows × 5 columns

df.head()
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
1201.0040.090-0.125
01201.004-0.043-0.125
11200.9690.090-0.121
21200.973-0.012-0.137
31201.000-0.016-0.121
41200.9610.082-0.121
df.cov()  # 计算协方差
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
1201.0040.090-0.125
10.666669-0.000261-0.0038330.0032570.000941
20-0.000261599.9973860.0403540.0524410.113526
1.004-0.0038330.0403540.5990150.012148-0.036479
0.0900.0032570.0524410.0121480.551461-0.010641
-0.1250.0009410.113526-0.036479-0.0106410.267299
df.corr()  # 计算相关系数 取值范围【-1,1】
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
1201.0040.090-0.125
11.000000-0.000013-0.0060650.0053720.002228
20-0.0000131.0000000.0021290.0028830.008964
1.004-0.0060650.0021291.0000000.021137-0.091164
0.0900.0053720.0028830.0211371.000000-0.027716
-0.1250.0022280.008964-0.091164-0.0277161.000000
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
1201.0040.090-0.125
01201.004-0.043-0.125
11200.9690.090-0.121
21200.973-0.012-0.137
31201.000-0.016-0.121
41200.9610.082-0.121
..................
15299431001.0510.090-0.262
15299531000.9180.039-0.129
15299631001.156-0.094-0.227
15299731000.9340.203-0.172
15299831001.199-0.1760.109

152999 rows × 5 columns

df['1.004'].value_counts()  # 统计相同数的数量
 0.980    4452
 0.977    4358
 0.996    4232
 1.000    4194
 0.984    4166
          ... 
-3.547       1
 5.844       1
 4.988       1
 6.816       1
-3.668       1
Name: 1.004, Length: 2733, dtype: int64
df['1.004'].value_counts(ascending=True)  # 指定排序方法
-3.668       1
 6.816       1
 4.988       1
 5.844       1
-3.547       1
          ... 
 0.984    4166
 1.000    4194
 0.996    4232
 0.977    4358
 0.980    4452
Name: 1.004, Length: 2733, dtype: int64
df['1.004'].value_counts(ascending=True, bins=5)  # 指定划分区间,进行统计
(-8.017, -4.801]       118
(4.797, 7.996]         635
(-4.801, -1.602]      1613
(1.598, 4.797]       10207
(-1.602, 1.598]     140426
Name: 1.004, dtype: int64
df['1'].value_counts(ascending=True)  # 统计1,2,3的各自数量
1    50999
2    51000
3    51000
Name: 1, dtype: int64

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小刘私坊

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值