pandas 入门之index，代码均在python3.7环境调试过，也有注释，部分有运行结果，比较大的结果集未列出

handsome1234

已于 2022-07-17 09:03:11 修改

阅读量347

点赞数

文章标签： python pandas

于 2022-07-17 09:02:50 首次发布

本文链接：https://blog.csdn.net/handsome1234/article/details/125828208

版权

本文介绍了如何使用Pandas创建Series和DataFrame。通过一维数组、字典和二维数组等方式创建序列和数据框，并展示了如何操作索引和获取数据。同时，涉及了从DataFrame中提取序列的方法。

摘要由CSDN通过智能技术生成

import numpy as np
import pandas as pd
# 传入一个列表
S1 = pd.Series(["a", "b", "c", "d"])
# 指定索引
S2 = pd.Series([1, 2, 3, 4], index=["a", "b", "c", "d"])
# 传入一个字典
S3 = pd.Series({"a": 1, "b": 2, "c": 3, "d": 4})
# 获取Series的索引和值
S1.index  # 索引
S1.values  # 值
print('S1：',S1)
# 0    a
# 1    b
# 2    c
# 3    d
# dtype: object
print('S2',S2)
# a    1
# b    2
# c    3
# d    4
# dtype: int64
print('S3:',S3)
# S3: a    1
# b    2
# c    3
# d    4
# dtype: int64
print('索引：S1.index:',S1.index)  #索引：S1.index: RangeIndex(start=0, stop=4, step=1)
print('值：S1.values',S1.values) #值：S1.values ['a' 'b' 'c' 'd']

# 传入一个列表
df1 = pd.DataFrame(["a", "b", "c", "d"])
# 传入一个嵌套列表
df2 = pd.DataFrame([["a", "A"], ["b", "B"], ["c", "C"], ["d", "D"]])
# 指定行、列索引
df3 = pd.DataFrame([["a", "A"], ["b", "B"], ["c", "C"], ["d", "D"]], columns=[
                   "小写", "大写"], index=["一", "二", "三", "四"])
# 传入一个字典
data = {"小写": ["a", "b", "c", "d"], "大写": ["A", "B", "C", "D"]}
df4 = pd.DataFrame(data)
# 获取DataFrame的行、列索引 df.columns #行索引 df.index #列索引

print('--------------------------------')
# 1）通过一维数组创建序列
print('1）通过一维数组创建序列')
arr1 = np.arange(10)
print(arr1)  # [0 1 2 3 4 5 6 7 8 9]
print('type(arr1)：', type(arr1)) # type(arr1)： <class 'numpy.ndarray'>
s1 = pd.Series(arr1)
print('s1:', s1)  #
# s1: 0    0
# 1    1
# 2    2
# 3    3
# 4    4
# 5    5
# 6    6
# 7    7
# 8    8
# 9    9
# dtype: int32
print('type(s1):', type(s1))  # type(s1): <class 'pandas.core.series.Series'>
print('2）通过字典的方式创建序列')
dic1 = {'a': 10, 'b': 20, 'c': 30, 'd': 40, 'e': 50}
print(dic1)  # {'a': 10, 'b': 20, 'c': 30, 'd': 40, 'e': 50}
print('type(dic1):', type(dic1)) #type(dic1): <class 'dict'>
print('befor:')
type(dic1)   # 没有输出何故？
print('after:')
s2 = pd.Series(dic1)
print(s2)
# a    10
# b    20
# c    30
# d    40
# e    50
# dtype: int64
print('type(s2):', type(s2))  # type(s2): <class 'pandas.core.series.Series'>
type(s2) # 没有输出何故？
print('3）通过DataFrame中的某一行或某一列创建序列')
# 这部分内容我们放在后面讲，因为下面就开始将DataFrame的创建。
# 2、DataFrame的创建
# 数据框的创建主要有三种方式：
print('1）通过二维数组创建数据框')
arr2 = np.array(np.arange(12)).reshape(4, 3)
print(arr2)
# [[ 0  1  2]
#  [ 3  4  5]
#  [ 6  7  8]
#  [ 9 10 11]]
print('type(arr2):',type(arr2)) #type(arr2): <class 'numpy.ndarray'>
df1 = pd.DataFrame(arr2)
print(df1)
#    0   1   2
# 0  0   1   2
# 1  3   4   5
# 2  6   7   8
# 3  9  10  11
print('type(df1):',type(df1))  #type(df1): <class 'pandas.core.frame.DataFrame'>
print('2）通过字典的方式创建数据框:')
# 以下以两种字典来创建数据框，一个是字典列表，一个是嵌套字典。
dic2 = {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8],
        'c': [9, 10, 11, 12], 'd': [13, 14, 15, 16]}
print(dic2) #{'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8], 'c': [9, 10, 11, 12], 'd': [13, 14, 15, 16]}
print('type(dic2):',type(dic2))  #type(dic2): <class 'dict'>
df2 = pd.DataFrame(dic2)
print(df2)
#    a  b   c   d
# 0  1  5   9  13
# 1  2  6  10  14
# 2  3  7  11  15
# 3  4  8  12  16
print('type(df2):',type(df2)) #type(df2): <class 'pandas.core.frame.DataFrame'>
dic3 = {'one': {'a': 1, 'b': 2, 'c': 3, 'd': 4},
        'two': {'a': 5, 'b': 6, 'c': 7, 'd': 8},
        'three': {'a': 9, 'b': 10, 'c': 11, 'd': 12}}
print(dic3) #{'one': {'a': 1, 'b': 2, 'c': 3, 'd': 4}, 'two': {'a': 5, 'b': 6, 'c': 7, 'd': 8}, 'three': {'a': 9, 'b': 10, 'c': 11, 'd': 12}}
print('type(dic3:',type(dic3)) #type(dic3: <class 'dict'>
df3 = pd.DataFrame(dic3)
print(df3)
#    one  two  three
# a    1    5      9
# b    2    6     10
# c    3    7     11
# d    4    8     12
print('type(df3):',type(df3)) # type(df3): <class 'pandas.core.frame.DataFrame'>
print('3）通过数据框的方式创建数据框:')
df4 = df3[['one', 'three']]  # 取得是one ,three 2行！！！
print(df4)
#    one  three
# a    1      9
# b    2     10
# c    3     11
# d    4     12
print('type(df4):',type(df4))  #type(df4): <class 'pandas.core.frame.DataFrame'>
s3 = df3['one']
print(s3)
# a    1
# b    2
# c    3
# d    4
# Name: one, dtype: int64
print('type(s3):',type(s3)) #type(s3): <class 'pandas.core.series.Series'>
print('索引相关：')
s4 = pd.Series(np.array([1,1,2,3,5,8]))
print('s4:',s4)
# s4: 0    1
# 1    1
# 2    2
# 3    3
# 4    5
# 5    8
# dtype: int32
print('s4.index:',s4.index) #s4.index: RangeIndex(start=0, stop=6, step=1)
s4.index = ['a','b','c','d','e','f']
print('s4:',s4)
# s4: a    1
# b    1
# c    2
# d    3
# e    5
# f    8
# dtype: int32
print('s4[3]:',s4[3]) #s4[3]: 3 用数字索引访问数据
print(r"s4['e']:",s4['e']) #s4['e']: 5 用新索引标签访问数据
print(r's4[[1,3,5]]:',s4[[1,3,5]]) #按依次按索引 来取值
# s4[[1,3,5]]: b    1
# d    3
# f    8
# dtype: int32
print("s4[['a','b','d','f']]:",s4[['a','b','d','f']]) #按依次按个性化索引 来取值
# s4[['a','b','d','f']]: a    1
# b    1
# d    3
# f    8
# dtype: int32
print(r's4[:4]:',s4[:4]) #按依次按索引1-4 来取值，从0开始截止到4，但是不包括4
# s4[:4]: a    1
# b    1
# c    2
# d    3
# dtype: int32
print(r"s4['c':]:",s4['c':]) #按依次按索引2到结尾 来取值，从0开始截止到到结尾,这里用的个性化的索引
# s4['c':]: c    2
# d    3
# e    5
# f    8
# dtype: int32
print("s4['b':'e']:",s4['b':'e'])  #按依次按索引2到结尾 来取值，从0开始截止到到结尾,这里用的个性化的索引
# s4['b':'e']: b    1
# c    2
# d    3
# e    5
# dtype: int32