# Dataframe 数据结构
# Dataframe是一个表格型的数据结构,“带有标签的二维数组”。
# Dataframe带有index(行标签)和columns(列标签)
df = pd.DataFrame(np.random.rand(12).reshape(3,4)*100,
index = ['one','two','three'],
columns = ['a','b','c','d'])
print(df)
print(type(df))
print('-----')
# 查看数据,数据类型为dataframe
print(df.index,'\n该数据类型为:',type(df.index))
print(df.columns,'\n该数据类型为:',type(df.columns))
print(df.values,'\n该数据类型为:',type(df.values))
# .index查看行标签
# .columns查看列标签
# .values查看值,数据类型为ndarray
a b c d
one 13.173838 78.250185 77.561848 57.013228
two 24.177624 81.762284 99.055599 8.224470
three 74.498542 27.668536 12.516576 18.877024
-----
Index(['one', 'two', 'three'], dtype='object')
该数据类型为:
Index(['a', 'b', 'c', 'd'], dtype='object')
该数据类型为:
[[ 13.17383762 78.25018505 77.56184759 57.01322792]
[ 24.17762414 81.76228389 99.05559888 8.22447019]
[ 74.49854153 27.66853562 12.51657559 18.87702373]]
该数据类型为:
# 列索引:df[]
print(df['a']) # 单列索引,结果为Series
print(df[['a']]) # 单列索引,结果为Dataframe
print(df[['a','c']]) # 多列索引,结果为Dataframe
one 13.173838
two 24.177624
three 74.498542
Name: a, dtype: float64
a
one 13.173838
two 24.177624
three 74.498542
a c
one 13.173838 77.561848
two 24.177624 99.055599
three 74.498542 12.516576
# 行索引:df.loc[] - 按index选择行
df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = ['one','two','three','four'],
columns = ['a','b','c','d'])
print(df1)
print('-----')
data1 = df1.loc['one']
print(data1)
print('单标签索引\n-----')
# 单个标签索引,返回Series
data2 = df1.loc[['two','three','five']]
print(data2)
print('多标签索引\n-----')
# 多个标签索引,如果标签不存在,则返回NaN
# 顺序可变
data3 = df1.loc['one':'three']
print(data3)
print('切片索引')
# 可以做切片对象
# 末端包含
# 核心笔记:df.loc[label]主要针对index选择行,同时支持指定index,及默认数字index
a b c d
one 55.377068 43.065698 39.271316 49.574745
two 68.080127 64.019842 25.429227 26.203999
three 34.199065 44.013401 24.290113 96.597853
four 8.543333 76.680568 42.562423 71.356456
-----
a 55.377068
b 43.065698
c 39.271316
d 49.574745
Name: one, dtype: float64
单标签索引
-----
a b c d
two 68.080127 64.019842 25.429227 26.203999
three 34.199065 44.013401 24.290113 96.597853
five NaN NaN NaN NaN
多标签索引
-----
a b c d
one 55.377068 43.065698 39.271316 49.574745
two 68.080127 64.019842 25.429227 26.203999
three 34.199065 44.013401 24.290113 96.597853
切片索引
# 行索引:# df.iloc[] - 按照整数位置(从轴的0到length-1)选择行
# 类似list的索引,其顺序就是dataframe的整数位置,从0开始计
df = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = ['one','two','three','four'],
columns = ['a','b','c','d'])
print(df)
print('------')
print(df.iloc[0])
print(df.iloc[-1])
#print(df.iloc[4])
print('单位置索引\n-----')
# 单位置索引
# 和loc索引不同,不能索引超出数据行数的整数位置
print(df.iloc[[0,2]])
print(df.iloc[[3,2,1]])
print('多位置索引\n-----')
# 多位置索引
# 顺序可变
print(df.iloc[1:3])
print(df.iloc[::2])
print('切片索引')
# 切片索引
# 末端不包含
a b c d
one 50.438114 97.135347 9.243709 36.805417
two 83.481855 25.575370 86.127597 16.692816
three 90.065811 34.565951 24.451558 22.946117
four 8.319051 88.986334 9.407353 5.776574
------
a 50.438114
b 97.135347
c 9.243709
d 36.805417
Name: one, dtype: float64
a 8.319051
b 88.986334
c 9.407353
d 5.776574
Name: four, dtype: float64
单位置索引
-----
a b c d
one 50.438114 97.135347 9.243709 36.805417
three 90.065811 34.565951 24.451558 22.946117
a b c d
four 8.319051 88.986334 9.407353 5.776574
three 90.065811 34.565951 24.451558 22.946117
two 83.481855 25.575370 86.127597 16.692816
多位置索引
-----
a b c d
two 83.481855 25.575370 86.127597 16.692816
three 90.065811 34.565951 24.451558 22.946117
a b c d
one 50.438114 97.135347 9.243709 36.805417
three 90.065811 34.565951 24.451558 22.946117
切片索引