Pandas快速总结

最新推荐文章于 2022-11-22 21:54:54 发布

xingxiliang

最新推荐文章于 2022-11-22 21:54:54 发布

阅读量324

点赞数

分类专栏：数据分析

本文链接：https://blog.csdn.net/s_xing/article/details/106948658

版权

数据分析专栏收录该内容

2 篇文章 0 订阅

订阅专栏

Series数据结构

# 带标签的一列
import pandas as pd;

a = pd.Series( [1,2,3,4,5]);
a

0    1
1    2
2    3
3    4
4    5
dtype: int64

# 传入index
a = pd.Series([1,2,3,4,5], index=['a', 'b', 'c', 'd', 'e'], dtype=float);
a

a    1.0
b    2.0
c    3.0
d    4.0
e    5.0
dtype: float64

# 从numpy    ndarray导入 
import numpy as np;

a = np.arange(5);
b = pd.Series(a);
print(b)
print(type(a))

0    0
1    1
2    2
3    3
4    4
dtype: int32
<class 'numpy.ndarray'>

# 从dict产生
dic = {'name':'Lee', 'sex':'man', 'age':18}
a = pd.Series(dic)
print(a)

age      18
name    Lee
sex     man
dtype: object

# 手动传入一个索引的时候 以索引为准, 从字典中查找，找不到就直接NaN
my_dict = {'name':'xing', 'sex':'man', 'age':18};
a = pd.Series(my_dict, index = ['name', 'color'])
a

name     xing
color     NaN
dtype: object

# 索引的数量多于 数据内容的时候会自动填充
a = pd.Series(5, [0, 1, 2])
a

0    5
1    5
2    5
dtype: int64

a = pd.Series([3], [0, 1, 2])
a

0    3
1    3
2    3
dtype: int64

DataFrame数据结构

# 从numpy 导入
a = np.random.randint(0, 10, (2,3))
df = pd.DataFrame(a, index=['a', 'b'], columns = ['x', 'y', 'z']);
df

	x	y	z
a	4	3	2
b	5	6	9

# 把Series变为DF
# 一维的字典不能直接转
population = {'beijing':3434, 'shanghai':2343, 'guangzhou':11232};
s = pd.Series(population);
df = pd.DataFrame(s);
df

	0
beijing	3434
guangzhou	11232
shanghai	2343

type(df)

pandas.core.frame.DataFrame

# 还是从series导入，但是加上列名
df = pd.DataFrame(s, columns=['pop_num'])
df

	pop_num
beijing	3434
guangzhou	11232
shanghai	2343

# 字典的字典就可以构建df了
popu = {'bj':9898, 'sh':89887, 'gz':11232}
df = pd.DataFrame({'gdp':popu})  # 字典的字典
df

	gdp
bj	9898
gz	11232
sh	89887

gdp = {'bj':0.998, 'sh':0.889, 'gz':1.232}
df = pd.DataFrame({'gdp':gdp, 'popu':popu})
df

	gdp	popu
bj	0.998	9898
gz	1.232	11232
sh	0.889	89887

# 他会自动扩充
# 单一数值会被自动扩充
df = pd.DataFrame({ 'gdp': gdp, 'popu':popu, 'country':'China'})
df

	country	gdp	popu
bj	China	0.998	9898
gz	China	1.232	11232
sh	China	0.889	89887

pandas里面数据的属性

# values属性转为numpy的array数据
df = pd.DataFrame({'gdp':gdp, 'popu':popu});
df

	gdp	popu
bj	0.998	9898
gz	1.232	11232
sh	0.889	89887

df.values

array([[  9.98000000e-01,   9.89800000e+03],
       [  1.23200000e+00,   1.12320000e+04],
       [  8.89000000e-01,   8.98870000e+04]])

# values属性转为numpy的array数据
df = pd.DataFrame({'gdp':gdp, 'popu':popu, 'country':"China"});
df

	country	gdp	popu
bj	China	0.998	9898
gz	China	1.232	11232
sh	China	0.889	89887

df.values #计算速度更快

array([['China', 0.998, 9898],
       ['China', 1.232, 11232],
       ['China', 0.889, 89887]], dtype=object)

df.index

Index(['bj', 'gz', 'sh'], dtype='object')

df.columns

Index(['country', 'gdp', 'popu'], dtype='object')

df.shape

(3, 3)

df.dtypes

country     object
gdp        float64
popu         int64
dtype: object

df.size

索引查找数据

df = pd.DataFrame({'gdp':gdp, 'popu':popu});
df

	gdp	popu
bj	0.998	9898
gz	1.232	11232
sh	0.889	89887

# 取一列
df['gdp']

bj    0.998
gz    1.232
sh    0.889
Name: gdp, dtype: float64

df.gdp # 对于上面方法的简写

bj    0.998
gz    1.232
sh    0.889
Name: gdp, dtype: float64

# 取一行
df.loc['sh']

gdp         0.889
popu    89887.000
Name: sh, dtype: float64

df.loc[ ['sh', 'bj']] #取多行需要传入列表

	gdp	popu
sh	0.889	89887
bj	0.998	9898

df.loc[ 'bj':'gz'] # 切片可以取到左闭右闭的索引的 一个表格

	gdp	popu
bj	0.998	9898
gz	1.232	11232

# 用位置拿去
df.iloc[ 0]

gdp        0.998
popu    9898.000
Name: bj, dtype: float64

df.iloc[ [0, 2]]

	gdp	popu
bj	0.998	9898
sh	0.889	89887

df.loc['sh', 'gdp'] #精确到一个cell

0.88900000000000001

# iloc 取一个cell
df.iloc[ 0, 1]

# 专为 ndarray之后再取数
df.values[0][1]

9898.0

# 1: 1到最后  ：表示所有的列
df.iloc[ 1:, :]

	gdp	popu
gz	1.232	11232
sh	0.889	89887

df.gdp > 0

bj    True
gz    True
sh    True
Name: gdp, dtype: bool

df.gdp > 0.9

bj     True
gz     True
sh    False
Name: gdp, dtype: bool

df.loc[ df.gdp>0.9] # 用bool变量来筛选

	gdp	popu
bj	0.998	9898
gz	1.232	11232

df[ df.gdp>0.9]

	gdp	popu
bj	0.998	9898
gz	1.232	11232

DF里面的赋值

df.iloc[ 0, 1] = 0 #修改cell
df

	gdp	popu
bj	0.998	0
gz	1.232	11232
sh	0.889	89887

new_column = pd.Series(['010','020','0755'], index=['bj', 'sh','gz']);
new_column

bj     010
sh     020
gz    0755
dtype: object

df['tel'] = new_column #增加一列
df

	gdp	popu	tel
bj	0.998	0	010
gz	1.232	11232	0755
sh	0.889	89887	020

查看数据的基本特征

dates = pd.date_range('2020-1-1', periods=6)
dates

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06'],
              dtype='datetime64[ns]', freq='D')

df = pd.DataFrame(np.random.randint(0, 10, (6,5)), index = dates, columns=list('ABCDE'))
df

	A	B	C	D	E
2020-01-01	4	9	8	8	8
2020-01-02	4	5	5	8	7
2020-01-03	1	0	3	2	0
2020-01-04	9	5	9	6	0
2020-01-05	7	8	3	8	0
2020-01-06	4	3	1	0	8

df.describe() # 对于每一列的描述，看整体结构

	A	B	C	D	E
count	6.000000	6.000000	6.000000	6.000000	6.000000
mean	4.833333	5.000000	4.833333	5.333333	3.833333
std	2.786874	3.286335	3.125167	3.502380	4.215052
min	1.000000	0.000000	1.000000	0.000000	0.000000
25%	4.000000	3.500000	3.000000	3.000000	0.000000
50%	4.000000	5.000000	4.000000	7.000000	3.500000
75%	6.250000	7.250000	7.250000	8.000000	7.750000
max	9.000000	9.000000	9.000000	8.000000	8.000000

df.info() # 描述信息

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6 entries, 2020-01-01 to 2020-01-06
Freq: D
Data columns (total 5 columns):
A    6 non-null int32
B    6 non-null int32
C    6 non-null int32
D    6 non-null int32
E    6 non-null int32
dtypes: int32(5)
memory usage: 168.0 bytes

df.head(1) # 前几行

	A	B	C	D	E
2020-01-01	4	9	8	8	8

df.tail(2)

	A	B	C	D	E
2020-01-05	7	8	3	8	0
2020-01-06	4	3	1	0	8

df.T # 转置

	2020-01-01 00:00:00	2020-01-02 00:00:00	2020-01-03 00:00:00	2020-01-04 00:00:00	2020-01-05 00:00:00	2020-01-06 00:00:00
A	4	4	1	9	7	4
B	9	5	0	5	8	3
C	8	5	3	9	3	1
D	8	8	2	6	8	0
E	8	7	0	0	0	8

df.sort_index() #默认 按照行索引升序

	A	B	C	D	E
2020-01-01	4	9	8	8	8
2020-01-02	4	5	5	8	7
2020-01-03	1	0	3	2	0
2020-01-04	9	5	9	6	0
2020-01-05	7	8	3	8	0
2020-01-06	4	3	1	0	8

df.sort_index(ascending=False) #默认 按照行索引排序

	A	B	C	D	E
2020-01-06	4	3	1	0	8
2020-01-05	7	8	3	8	0
2020-01-04	9	5	9	6	0
2020-01-03	1	0	3	2	0
2020-01-02	4	5	5	8	7
2020-01-01	4	9	8	8	8

df.sort_index(axis=1, ascending=False) # 按照列索引 降序  按照索引排序

	E	D	C	B	A
2020-01-01	8	8	8	9	4
2020-01-02	7	8	5	5	4
2020-01-03	0	2	3	0	1
2020-01-04	0	6	9	5	9
2020-01-05	0	8	3	8	7
2020-01-06	8	0	1	3	4

# 按照值排序
df.sort_values('B') #默认是按照某一列的值 对各个行排序

	A	B	C	D	E
2020-01-03	1	0	3	2	0
2020-01-06	4	3	1	0	8
2020-01-02	4	5	5	8	7
2020-01-04	9	5	9	6	0
2020-01-05	7	8	3	8	0
2020-01-01	4	9	8	8	8

df.sort_values(dates[0], axis=1)

	A	C	D	E	B
2020-01-01	4	8	8	8	9
2020-01-02	4	5	8	7	5
2020-01-03	1	3	2	0	0
2020-01-04	9	9	6	0	5
2020-01-05	7	3	8	0	8
2020-01-06	4	1	0	8	3

数据计算

a = pd.DataFrame([1, 2, 3])
a

	0
0	1
1	2
2	3

a-2

	0
0	-1
1	0
2	1

b = pd.DataFrame([1,3,4])
a+b

	0
0	2
1	5
2	7

a*b

	0
0	1
1	6
2	12

b.T

	0	1	2
0	1	3	4

a.dot(b.T) #矩阵乘法

	0	1	2
0	1	3	4
1	2	6	8
2	3	9	12

a = pd.DataFrame(np.random.randint(0, 20, (2,2)), columns=['A', 'B'])
a

	A	B
0	17	1
1	4	11

b = pd.DataFrame(np.random.randint(0, 20, (3,3)), columns = ['A', 'B', 'C'])
b

	A	B	C
0	9	5	17
1	9	12	16
2	0	13	4

a+b # 有点地方就想加，没有的地方就为NaN

	A	B	C
0	26.0	6.0	NaN
1	13.0	23.0	NaN
2	NaN	NaN	NaN

a.add(b, fill_value=11111111) # 先填充到 shape相同再计算

	A	B	C
0	26.0	6.0	11111128.0
1	13.0	23.0	11111127.0
2	11111111.0	11111124.0	11111115.0

缺失值的处理

a = pd.DataFrame(np.arange(9).reshape(3,3))
a

	0	1	2
0	0	1	2
1	3	4	5
2	6	7	8

a.iloc[ :2, 2] = np.NaN
a

	0	1	2
0	0	1	NaN
1	3	4	NaN
2	6	7	8.0

# 丢掉 缺失值
a.dropna()
a

	0	1	2
0	0	1	NaN
1	3	4	NaN
2	6	7	8.0

a.dropna() #按照行丢弃

	0	1	2
2	6	7	8.0

a.dropna(axis=1) # 按照列丢弃

	0	1
0	0	1
1	3	4
2	6	7

a.dropna(axis=1, how='all') # 全部缺失才丢弃

	0	1	2
0	0	1	NaN
1	3	4	NaN
2	6	7	8.0

a.fillna(999) #帮我们修改

	0	1	2
0	0	1	999.0
1	3	4	999.0
2	6	7	8.0

合并和对齐

a = pd.DataFrame(np.zeros((3,4)), columns=['a', 'b', 'c', 'd'])
a

	a	b	c	d
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0

b = pd.DataFrame(np.zeros( (3,4)), columns=list('abcd'))
b

	a	b	c	d
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0

# 合并，拼接
pd.concat([a, b]) # 需要传递一个列表进去

	a	b	c	d
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0

# 让pd帮我们重新索引
pd.concat( [a, b], ignore_index=True)

	a	b	c	d
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0
3	0.0	0.0	0.0	0.0
4	0.0	0.0	0.0	0.0
5	0.0	0.0	0.0	0.0

# 水平合并
pd.concat( [a, b], axis=1)

	a	b	c	d	a	b	c	d
0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

pd.concat( [a, b], axis=1, ignore_index=True) # ignoreindex 会帮助我们废弃原来的不好用的列名

	0	1	2	3	4	5	6	7
0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

# shape不同的时候怎么办
a = pd.DataFrame(np.ones( (3,3)), index=[0, 1, 2], columns=list('abc'))
b = pd.DataFrame(np.ones( (3,3)), index=[2, 3, 4], columns = list('cde'))
pd.concat([a, b])

	a	b	c	d	e
0	1.0	1.0	1.0	NaN	NaN
1	1.0	1.0	1.0	NaN	NaN
2	1.0	1.0	1.0	NaN	NaN
2	NaN	NaN	1.0	1.0	1.0
3	NaN	NaN	1.0	1.0	1.0
4	NaN	NaN	1.0	1.0	1.0

pd.concat( [a, b], axis=1)

	a	b	c	c	d	e
0	1.0	1.0	1.0	NaN	NaN	NaN
1	1.0	1.0	1.0	NaN	NaN	NaN
2	1.0	1.0	1.0	1.0	1.0	1.0
3	NaN	NaN	NaN	1.0	1.0	1.0
4	NaN	NaN	NaN	1.0	1.0	1.0

# 增加一行的办法
a = pd.DataFrame( np.ones( (3,4)), index=[0, 1, 2], columns=['a', 'b','c','d'])
a

	a	b	c	d
0	1.0	1.0	1.0	1.0
1	1.0	1.0	1.0	1.0
2	1.0	1.0	1.0	1.0

b = pd.Series([100, 100, 100, 100], index=list('abcd'))
b

a    100
b    100
c    100
d    100
dtype: int64

a.append(b, ignore_index=True) # 添加一行

	a	b	c	d
0	1.0	1.0	1.0	1.0
1	1.0	1.0	1.0	1.0
2	1.0	1.0	1.0	1.0
3	100.0	100.0	100.0	100.0

# 对齐 用merge方法,,会通过调整行的上下， 根据相同的‘列’保证其数值不变，
a = pd.DataFrame([[-1, 1], 
                 [-2,  0]], index=[1, 2], columns=["A", "B"]);
b = pd.DataFrame([[1, 11], 
                 [0,   10]], index=[1, 2], columns= ['B', 'C']);
print(a)
print(b)

pd.merge( a,b)

	A	B	C
0	-1	1	11
1	-2	0	10

b = pd.DataFrame([[0, 20], 
                  [1, 21]], index=[1,2], columns = ['B', 'C']);
b

	B	C
1	0	20
2	1	21

	A	B
1	-1	1
2	-2	0

pd.merge(a, b)

	A	B	C
0	-1	1	21
1	-2	0	20

分组

df = pd.DataFrame({
    'key':list('ABCCBA'),
    'data1':range(6), # range是python自带的 np.arange 是numpy里面的
    'data2':range(20,26)
})
df

	data1	data2	key
0	0	20	A
1	1	21	B
2	2	22	C
3	3	23	C
4	4	24	B
5	5	25	A

groups = df.groupby('key')
groups

<pandas.core.groupby.DataFrameGroupBy object at 0x000002A97C0EACC0>

groups.sum() # 每一组的sum

	data1	data2
key
A	5	45
B	5	45
C	5	45

groups.data1.sum()  # 针对其中一列 sum

key
A    5
B    5
C    5
Name: data1, dtype: int32

groups.median()

	data1	data2
key
A	2.5	22.5
B	2.5	22.5
C	2.5	22.5

groups['data1'].mean()#Seies类型的返回值

key
A    2.5
B    2.5
C    2.5
Name: data1, dtype: float64

groups.apply(lambda x:x['data1']/x['data1'].sum())

key   
A    0    0.0
     5    1.0
B    1    0.2
     4    0.8
C    2    0.4
     3    0.6
Name: data1, dtype: float64

def func(x):
    x['data1'] /= x['data1'].sum()
    return x

groups.apply(func)

	data1	data2
0	0.0	20
1	0.2	21
2	0.4	22
3	0.6	23
4	0.8	24
5	1.0	25

def func(x):
    x['data1'] /= x['data1'].sum()
    return x

df.groupby('key').apply(func)  #归一化，，用这个把简单

	data1	data2	key
0	0.0	20	A
1	0.2	21	B
2	0.4	22	C
3	0.6	23	C
4	0.8	24	B
5	1.0	25	A

数据透视表

import seaborn as sns
titanic = sns.load_dataset('titanic')

titanic.head()

	survived	pclass	sex	age	sibsp	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
0	0	3	male	22.0	1	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	1	female	38.0	1	71.2833	C	First	woman	False	C	Cherbourg	yes	False
2	1	3	female	26.0	0	7.9250	S	Third	woman	False	NaN	Southampton	yes	True
3	1	1	female	35.0	1	53.1000	S	First	woman	False	C	Southampton	yes	False
4	0	3	male	35.0	0	8.0500	S	Third	man	True	NaN	Southampton	no	True

titanic.pivot_table('survived', index='sex', columns='class')#透视表

class	First	Second	Third
sex
female	0.968085	0.921053	0.500000
male	0.368852	0.157407	0.135447

xingxiliang

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Pandas快速总结

Series数据结构# 带标签的一列import pandas as pd;a = pd.Series( [1,2,3,4,5]);a0 11 22 33 44 5dtype: int64# 传入indexa = pd.Series([1,2,3,4,5], index=['a', 'b', 'c', 'd', 'e'], dtype=float);aa 1.0b 2.0c 3.0d 4.0e 5.0d
复制链接

扫一扫

专栏目录

	a	b	c	d
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0

	a	b	c	d
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0
3	0.0	0.0	0.0	0.0
4	0.0	0.0	0.0	0.0
5	0.0	0.0	0.0	0.0

	a	b	c	d	a	b	c	d
0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

	0	1	2	3	4	5	6	7
0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

	a	b	c	d	e
0	1.0	1.0	1.0	NaN	NaN
1	1.0	1.0	1.0	NaN	NaN
2	1.0	1.0	1.0	NaN	NaN
2	NaN	NaN	1.0	1.0	1.0
3	NaN	NaN	1.0	1.0	1.0
4	NaN	NaN	1.0	1.0	1.0

	a	b	c	c	d	e
0	1.0	1.0	1.0	NaN	NaN	NaN
1	1.0	1.0	1.0	NaN	NaN	NaN
2	1.0	1.0	1.0	1.0	1.0	1.0
3	NaN	NaN	NaN	1.0	1.0	1.0
4	NaN	NaN	NaN	1.0	1.0	1.0

	a	b	c	d
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0

	a	b	c	d
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0
3	0.0	0.0	0.0	0.0
4	0.0	0.0	0.0	0.0
5	0.0	0.0	0.0	0.0

	a	b	c	d	a	b	c	d
0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

	0	1	2	3	4	5	6	7
0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

	a	b	c	d	e
0	1.0	1.0	1.0	NaN	NaN
1	1.0	1.0	1.0	NaN	NaN
2	1.0	1.0	1.0	NaN	NaN
2	NaN	NaN	1.0	1.0	1.0
3	NaN	NaN	1.0	1.0	1.0
4	NaN	NaN	1.0	1.0	1.0

	a	b	c	c	d	e
0	1.0	1.0	1.0	NaN	NaN	NaN
1	1.0	1.0	1.0	NaN	NaN	NaN
2	1.0	1.0	1.0	1.0	1.0	1.0
3	NaN	NaN	NaN	1.0	1.0	1.0
4	NaN	NaN	NaN	1.0	1.0	1.0

	a	b	c	d
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0

	a	b	c	d
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0
3	0.0	0.0	0.0	0.0
4	0.0	0.0	0.0	0.0
5	0.0	0.0	0.0	0.0

	a	b	c	d	a	b	c	d
0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

	0	1	2	3	4	5	6	7
0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

	a	b	c	d	e
0	1.0	1.0	1.0	NaN	NaN
1	1.0	1.0	1.0	NaN	NaN
2	1.0	1.0	1.0	NaN	NaN
2	NaN	NaN	1.0	1.0	1.0
3	NaN	NaN	1.0	1.0	1.0
4	NaN	NaN	1.0	1.0	1.0

	a	b	c	c	d	e
0	1.0	1.0	1.0	NaN	NaN	NaN
1	1.0	1.0	1.0	NaN	NaN	NaN
2	1.0	1.0	1.0	1.0	1.0	1.0
3	NaN	NaN	NaN	1.0	1.0	1.0
4	NaN	NaN	NaN	1.0	1.0	1.0