Pandas入门系列（三）-- 深入理解Series和DataFrame

最新推荐文章于 2025-08-20 13:10:26 发布

原创最新推荐文章于 2025-08-20 13:10:26 发布 · 6.3k 阅读

18 ·

CC 4.0 BY-SA版权

文章标签：

#Pandas #Series #DataFrame #数据分析 #Python

python数据科学专栏收录该内容

38 篇文章

订阅专栏

本文详细介绍如何使用Python的Pandas库进行数据分析，包括DataFrame和Series的基本操作、构建方式及索引调整等内容。

##数据分析汇总学习 https://blog.csdn.net/weixin_39778570/article/details/81157884

DataFrame和Series

# 导入需要的模块

>>> import pandas as pd
>>> import numpy as np
>>> from pandas import Series, DataFrame

# 首先创建一个字典

>>> data = {'Student':['XiaoMing','XiaoHong','XiaoWang'],'Grade':[100,90,20],'Class':['RG1','RG2','RG3']}

# 创建一个Series对象

>>> s1 = pd.Series(data['Student'])
>>> s1
0    XiaoMing
1    XiaoHong
2    XiaoWang
dtype: object

# 查看一下这个对象的属性

>>> s1.values
array(['XiaoMing', 'XiaoHong', 'XiaoWang'], dtype=object)

# 当没有给索引赋值的时候默认为

>>> s1.index
RangeIndex(start=0, stop=3, step=1)

# 创建并修改默认索引

>>> s1 = pd.Series(data['Student'], index = ['first','second','three'])
>>> s1
first     XiaoMing
second    XiaoHong
three     XiaoWang
dtype: object

# 给它起个列名‘GDUF’

>>> s1 = pd.Series(data['Student'], index = ['first','second','three'], name = 'GDUF')
>>> s1
first     XiaoMing
second    XiaoHong
three     XiaoWang
Name: GDUF, dtype: object
# 注意：索引和列名是可以修改的，方式如下:
>>> s1.name = 'haha'
>>> s1.index = ['first','second','three']

# 使用字典创建DataFrame对象

>>> df1 = pd.DataFrame(data)
>>> df1
    Student  Grade Class
0  XiaoMing    100   RG1
1  XiaoHong     90   RG2
2  XiaoWang     20   RG3

# 查看DataFrame的属性

# 某一列
>>> cou = df1['Student']
>>> cou
0    XiaoMing
1    XiaoHong
2    XiaoWang
Name: Student, dtype: object   # Series是有名字的

# 列的类型为Series
>>> type(cou)
<class 'pandas.core.series.Series'>

# 默认索引
>>> df1.index
RangeIndex(start=0, stop=3, step=1)

# 查看某一列某一行
>>> c1 = df1['Student'][0]
>>> c1
'XiaoMing'
>>> type(c1)
<class 'str'>

# 查看某一行的属性
>>> for row in df1.iterrows():
	print(row), print(type(row)),print(type(row[0])), print(type(row[1]))
	break

(0, Student    XiaoLi
Grade         100
Class         RG1
Name: 0, dtype: object) # 每一行默认的名字为0
<class 'tuple'>       # 每一行为一个tuple对象
<class 'int'>	      # 默认索引为int类型
<class 'pandas.core.series.Series'> 	# 值为Series类型
(None, None, None, None)

# 通过Series构建DataFrame对象

>>> data
{'Student': ['XiaoMing', 'XiaoHong', 'XiaoWang'], 'Grade': [100, 90, 20], 'Class': ['RG1', 'RG2', 'RG3']}
>>> s1 = pd.Series(data['Student'])
>>> s2 = pd.Series(data['Grade'])
>>> s3 = pd.Series(data['Class'])
>>> df_new = pd.DataFrame([s1,s2,s3]) # 当以列表的形式构建的时候会按行来放，有时候可以优先选择按行放 
>>> df_new
          0         1         2
0  XiaoMing  XiaoHong  XiaoWang
1       100        90        20
2       RG1       RG2       RG3
>>> df_new.T			      # 进行行列转置
          0    1    2
0  XiaoMing  100  RG1
1  XiaoHong   90  RG2
2  XiaoWang   20  RG3

# 修改索引（通过字典直接构建，修改索引）

>>> df_new = DataFrame(data)
>>> df_new
    Student  Grade Class
0  XiaoMing    100   RG1
1  XiaoHong     90   RG2
2  XiaoWang     20   RG3
>>> df_new.index = ['first', 'second', 'thrid']
>>> df_new
         Student  Grade Class
first   XiaoMing    100   RG1
second  XiaoHong     90   RG2
thrid   XiaoWang     20   RG3

# 看一下这行代码
# 构建Series对象的时候，同时指定索引，名字

>>> s1 = pd.Series(data['Student'], name = 'Student', index = ['one','two','three'])

# 这样子DataFrame的结构就呼之欲出了，即列（colums），索引（index），值（values）

>>> df2 = pd.DataFrame(s1)
>>> df2
        Student
one    XiaoMing
two    XiaoHong
three  XiaoWang

>>> s1 = pd.Series(data['Student'], name = 'Student', index = ['one','two','three'])
>>> s2 = pd.Series(data['Grade'], name = 'Grade', index = ['one','two','three'])
>>> s3 = pd.Series(data['Class'], name = 'Class', index = ['one','two','three'])
>>> df2 = pd.DataFrame([s1,s2,s3])
>>> df2
>>> df2
              one       two     three
Student  XiaoMing  XiaoHong  XiaoWang
Grade         100        90        20
Class         RG1       RG2       RG3

# 用列表构建是按行放置的，需要装置一下

>>> df2.T
        Student Grade Class
one    XiaoMing   100   RG1
two    XiaoHong    90   RG2
three  XiaoWang    20   RG3

# 下面这种也是一种比较直观的构建方式。但是比较繁琐（字典中的值为Series）

>>> d = {'one': Series([1., 2., 3.], index=['a', 'b', 'c']), 'two': Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
>>> df = DataFrame(d)
>>> df
   one  two
a  1.0  1.0
b  2.0  2.0
c  3.0  3.0
d  NaN  4.0

官网：http://pandas.pydata.org/pandas-docs/version/0.14.1/