import numpy as np import pandas as pd # 传入一个列表 S1 = pd.Series(["a", "b", "c", "d"]) # 指定索引 S2 = pd.Series([1, 2, 3, 4], index=["a", "b", "c", "d"]) # 传入一个字典 S3 = pd.Series({"a": 1, "b": 2, "c": 3, "d": 4}) # 获取Series的索引和值 S1.index # 索引 S1.values # 值 print('S1:',S1) # 0 a # 1 b # 2 c # 3 d # dtype: object print('S2',S2) # a 1 # b 2 # c 3 # d 4 # dtype: int64 print('S3:',S3) # S3: a 1 # b 2 # c 3 # d 4 # dtype: int64 print('索引:S1.index:',S1.index) #索引:S1.index: RangeIndex(start=0, stop=4, step=1) print('值:S1.values',S1.values) #值:S1.values ['a' 'b' 'c' 'd'] # 传入一个列表 df1 = pd.DataFrame(["a", "b", "c", "d"]) # 传入一个嵌套列表 df2 = pd.DataFrame([["a", "A"], ["b", "B"], ["c", "C"], ["d", "D"]]) # 指定行、列索引 df3 = pd.DataFrame([["a", "A"], ["b", "B"], ["c", "C"], ["d", "D"]], columns=[ "小写", "大写"], index=["一", "二", "三", "四"]) # 传入一个字典 data = {"小写": ["a", "b", "c", "d"], "大写": ["A", "B", "C", "D"]} df4 = pd.DataFrame(data) # 获取DataFrame的行、列索引 df.columns #行索引 df.index #列索引 print('--------------------------------') # 1)通过一维数组创建序列 print('1)通过一维数组创建序列') arr1 = np.arange(10) print(arr1) # [0 1 2 3 4 5 6 7 8 9] print('type(arr1):', type(arr1)) # type(arr1): <class 'numpy.ndarray'> s1 = pd.Series(arr1) print('s1:', s1) # # s1: 0 0 # 1 1 # 2 2 # 3 3 # 4 4 # 5 5 # 6 6 # 7 7 # 8 8 # 9 9 # dtype: int32 print('type(s1):', type(s1)) # type(s1): <class 'pandas.core.series.Series'> print('2)通过字典的方式创建序列') dic1 = {'a': 10, 'b': 20, 'c': 30, 'd': 40, 'e': 50} print(dic1) # {'a': 10, 'b': 20, 'c': 30, 'd': 40, 'e': 50} print('type(dic1):', type(dic1)) #type(dic1): <class 'dict'> print('befor:') type(dic1) # 没有输出何故? print('after:') s2 = pd.Series(dic1) print(s2) # a 10 # b 20 # c 30 # d 40 # e 50 # dtype: int64 print('type(s2):', type(s2)) # type(s2): <class 'pandas.core.series.Series'> type(s2) # 没有输出何故? print('3)通过DataFrame中的某一行或某一列创建序列') # 这部分内容我们放在后面讲,因为下面就开始将DataFrame的创建。 # 2、DataFrame的创建 # 数据框的创建主要有三种方式: print('1)通过二维数组创建数据框') arr2 = np.array(np.arange(12)).reshape(4, 3) print(arr2) # [[ 0 1 2] # [ 3 4 5] # [ 6 7 8] # [ 9 10 11]] print('type(arr2):',type(arr2)) #type(arr2): <class 'numpy.ndarray'> df1 = pd.DataFrame(arr2) print(df1) # 0 1 2 # 0 0 1 2 # 1 3 4 5 # 2 6 7 8 # 3 9 10 11 print('type(df1):',type(df1)) #type(df1): <class 'pandas.core.frame.DataFrame'> print('2)通过字典的方式创建数据框:') # 以下以两种字典来创建数据框,一个是字典列表,一个是嵌套字典。 dic2 = {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8], 'c': [9, 10, 11, 12], 'd': [13, 14, 15, 16]} print(dic2) #{'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8], 'c': [9, 10, 11, 12], 'd': [13, 14, 15, 16]} print('type(dic2):',type(dic2)) #type(dic2): <class 'dict'> df2 = pd.DataFrame(dic2) print(df2) # a b c d # 0 1 5 9 13 # 1 2 6 10 14 # 2 3 7 11 15 # 3 4 8 12 16 print('type(df2):',type(df2)) #type(df2): <class 'pandas.core.frame.DataFrame'> dic3 = {'one': {'a': 1, 'b': 2, 'c': 3, 'd': 4}, 'two': {'a': 5, 'b': 6, 'c': 7, 'd': 8}, 'three': {'a': 9, 'b': 10, 'c': 11, 'd': 12}} print(dic3) #{'one': {'a': 1, 'b': 2, 'c': 3, 'd': 4}, 'two': {'a': 5, 'b': 6, 'c': 7, 'd': 8}, 'three': {'a': 9, 'b': 10, 'c': 11, 'd': 12}} print('type(dic3:',type(dic3)) #type(dic3: <class 'dict'> df3 = pd.DataFrame(dic3) print(df3) # one two three # a 1 5 9 # b 2 6 10 # c 3 7 11 # d 4 8 12 print('type(df3):',type(df3)) # type(df3): <class 'pandas.core.frame.DataFrame'> print('3)通过数据框的方式创建数据框:') df4 = df3[['one', 'three']] # 取得是one ,three 2行!!! print(df4) # one three # a 1 9 # b 2 10 # c 3 11 # d 4 12 print('type(df4):',type(df4)) #type(df4): <class 'pandas.core.frame.DataFrame'> s3 = df3['one'] print(s3) # a 1 # b 2 # c 3 # d 4 # Name: one, dtype: int64 print('type(s3):',type(s3)) #type(s3): <class 'pandas.core.series.Series'> print('索引相关:') s4 = pd.Series(np.array([1,1,2,3,5,8])) print('s4:',s4) # s4: 0 1 # 1 1 # 2 2 # 3 3 # 4 5 # 5 8 # dtype: int32 print('s4.index:',s4.index) #s4.index: RangeIndex(start=0, stop=6, step=1) s4.index = ['a','b','c','d','e','f'] print('s4:',s4) # s4: a 1 # b 1 # c 2 # d 3 # e 5 # f 8 # dtype: int32 print('s4[3]:',s4[3]) #s4[3]: 3 用数字索引访问数据 print(r"s4['e']:",s4['e']) #s4['e']: 5 用新索引标签访问数据 print(r's4[[1,3,5]]:',s4[[1,3,5]]) #按依次按索引 来取值 # s4[[1,3,5]]: b 1 # d 3 # f 8 # dtype: int32 print("s4[['a','b','d','f']]:",s4[['a','b','d','f']]) #按依次按个性化索引 来取值 # s4[['a','b','d','f']]: a 1 # b 1 # d 3 # f 8 # dtype: int32 print(r's4[:4]:',s4[:4]) #按依次按索引1-4 来取值,从0开始截止到4,但是不包括4 # s4[:4]: a 1 # b 1 # c 2 # d 3 # dtype: int32 print(r"s4['c':]:",s4['c':]) #按依次按索引2到结尾 来取值,从0开始截止到到结尾,这里用的个性化的索引 # s4['c':]: c 2 # d 3 # e 5 # f 8 # dtype: int32 print("s4['b':'e']:",s4['b':'e']) #按依次按索引2到结尾 来取值,从0开始截止到到结尾,这里用的个性化的索引 # s4['b':'e']: b 1 # c 2 # d 3 # e 5 # dtype: int32
pandas 入门之index,代码均在python3.7环境调试过,也有注释,部分有运行结果,比较大的结果集未列出
于 2022-07-17 09:02:50 首次发布
本文介绍了如何使用Pandas创建Series和DataFrame。通过一维数组、字典和二维数组等方式创建序列和数据框,并展示了如何操作索引和获取数据。同时,涉及了从DataFrame中提取序列的方法。
摘要由CSDN通过智能技术生成