pandas之DataFrame的创建&基本操作&索引

最新推荐文章于 2024-04-21 10:10:29 发布

simon5169

最新推荐文章于 2024-04-21 10:10:29 发布

阅读量1.3k

点赞数 1

分类专栏： python 文章标签： python 索引大数据 numpy

本文链接：https://blog.csdn.net/blances/article/details/105499678

版权

python 专栏收录该内容

21 篇文章 0 订阅

订阅专栏

import pandas as pd

import numpy as np
print("*"*25+"dataframe创建"+"*"*25)

*************************dataframe创建*************************

pd.DataFrame(np.arange(12).reshape(3,4)) #index 横向索引，0轴，axis=0；columns 纵向索引，1轴，asix=1

	0	1	2	3
0	0	1	2	3
1	4	5	6	7
2	8	9	10	11

pd.DataFrame(np.arange(12).reshape(3,4),index=list("abc"),columns=list("DEFG"))

	D	E	F	G
a	0	1	2	3
b	4	5	6	7
c	8	9	10	11

d1={"name":["n1","n2"],"age":[20,30],"tel":[111,222]}

t1=pd.DataFrame(d1) #字典转换为dataframe
type(t1)

pandas.core.frame.DataFrame

t1

	name	age	tel
0	n1	20	111
1	n2	30	222

d2=[{"name":"n1","age":20,"tel":111},{"name":"n2","tel":222}]

t2=pd.DataFrame(d2) #列表相同转换为DataFrame，缺失的值为NaN
type(t2)

pandas.core.frame.DataFrame

t2

	name	age	tel
0	n1	20.0	111
1	n2	NaN	222

print("*"*25+"dataframe操作"+"*"*25)

*************************dataframe操作*************************

t2.index #t2的行索引

RangeIndex(start=0, stop=2, step=1)

t2.columns #t2的列索引

Index(['name', 'age', 'tel'], dtype='object')

t2.values #t2的值

array([['n1', 20.0, 111],
       ['n2', nan, 222]], dtype=object)

t2.shape #t2的结构

(2, 3)

t2.dtypes #values的数据类型

name     object
age     float64
tel       int64
dtype: object

t2.ndim #t2的维度

t2.head(1) #显示前几行 默认前5行

	name	age	tel
0	n1	20.0	111

t2.tail(1) #显示后几行

	name	age	tel
1	n2	NaN	222

t2.info() #t2相关信息概览

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    2 non-null      object 
 1   age     1 non-null      float64
 2   tel     2 non-null      int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 176.0+ bytes

t2.describe() #快速进行数字相关的统计计算

	age	tel
count	1.0	2.000000
mean	20.0	166.500000
std	NaN	78.488853
min	20.0	111.000000
25%	20.0	138.750000
50%	20.0	166.500000
75%	20.0	194.250000
max	20.0	222.000000

print("*"*25+"dataframe使用"+"*"*25)

*************************dataframe使用*************************

df = pd.read_csv("./dogNames2.csv") #读取数据

df.sort_values(by="Count_AnimalName",ascending=False) #dataframe排序方法,by=默认升序
# print(df.head(5))

	Row_Labels	Count_AnimalName
1156	BELLA	1195
9140	MAX	1153
2660	CHARLIE	856
3251	COCO	852
12368	ROCKY	823
...	...	...
6884	J-LO	1
6888	JOANN	1
6890	JOAO	1
6891	JOAQUIN	1
16219	39743	1

16220 rows × 2 columns

print("*"*25+"dataframe索引"+"*"*25)

*************************dataframe索引*************************

df_sorted=df.sort_values(by="Count_AnimalName",ascending=False)
df_sorted[:5] #取前20行，默认按行操作

	Row_Labels	Count_AnimalName
1156	BELLA	1195
9140	MAX	1153
2660	CHARLIE	856
3251	COCO	852
12368	ROCKY	823

df_sorted[:5]["Row_Labels"] #取前20行单独只取某列

1156       BELLA
9140         MAX
2660     CHARLIE
3251        COCO
12368      ROCKY
Name: Row_Labels, dtype: object

df_sorted[:1]

	Row_Labels	Count_AnimalName
1156	BELLA	1195

t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list("abc"),columns=list("WXYZ"))

t3

	W	X	Y	Z
a	0	1	2	3
b	4	5	6	7
c	8	9	10	11

t3.loc["a","Z"]#loc通过标签索引行数据（范围是闭合的），iloc通过位置获取行数据

type(t3.loc["a","Z"])

numpy.int64

t3.loc["a"] #取a整行

W    0
X    1
Y    2
Z    3
Name: a, dtype: int64

t3.loc[:,"Z"]

a     3
b     7
c    11
Name: Z, dtype: int64

t3.loc[["a","c"]] #取多行

	W	X	Y	Z
a	0	1	2	3
c	8	9	10	11

t3.loc[:,["X","Z"]] #取多列

	X	Z
a	1	3
b	5	7
c	9	11

t3.iloc[1] #iloc按位置获取

W    4
X    5
Y    6
Z    7
Name: b, dtype: int64

t3.iloc[:,2] #iloc获取列

a     2
b     6
c    10
Name: Y, dtype: int64

t3.iloc[[0,2],[2,1]] #获取

	Y	X
a	2	1
c	10	9

t3.iloc[1:,:2] #获取第1行第1列到最后行第2列 （0为第一行）

	W	X
b	4	5
c	8	9

t3.iloc[1:,:2]=11 #赋值
t3

	W	X	Y	Z
a	0	1	2	3
b	11	11	6	7
c	11	11	10	11

字符串方法：
在这里插入图片描述

simon5169

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
pandas之DataFrame的创建&基本操作&索引

import pandas as pdimport numpy as npprint("*"*25+"dataframe创建"+"*"*25)*************************dataframe创建*************************pd.DataFrame(np.arange(12).reshape(3,4)) #index 横向索引，0轴，axis=...
复制链接

扫一扫