索引:
import numpy as np
import pandas as pd
#选择行与列
df = pd.DataFrame(np.random.rand(12).reshape(3,4)*100,
index=['one','two','three'],
columns=['a','b','c','d'])
df1 = pd.DataFrame(np.random.rand(12).reshape(3,4)*100,
columns=['a','b','c','d'])
print(df)
print("---------------------------")
data1 = df['a'] #选择列,用列来输出
data2 = df[['b','c']]
print(data1)
print(data2)
print("***************************")
#按照列名只选择一列输出Series,选择多列输出DataFrame
data3 = df.loc['one'] #如果要按照行来输出,就要用loc,不能用df['one'],否则报错
data4 = df.loc[['three','two']]
print(data3)
print(data4)
data3 = df[:2] #切片索引的是行,所以也可以用切片来输出行
print(data3)
print("---------------------------")
data5 = df.loc['one':'three'] #df.loc也可以用来切片行,末端包含
data6 = df1.loc[1:3]
print(data5)
print(data6)
print("***************************")
#df.iloc[] - 按照整数位置,(从轴的0到length-1)选择行
#类似list的索引,其顺序就是DataFrame的整数位置,从0开始计
df = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index=['one','two','three','four'],
columns=['a','b','c','d'])
print(df)
print("------")
print(df.iloc[0])
print(df.iloc[-1])
print(df.iloc[[0,2]]) #多位置索引 顺序可变
print(df.iloc[[3,2,1]])
print(df.iloc[1:3]) #切片索引,末端不包含
print(df.iloc[::2])
print("---------------------------")
多重索引:
import numpy as np
import pandas as pd
#多重索引:比如同时索引行和列
#先选择列再选择行 -- 相当于对于一个数据,先筛选字段,再选择数据量
df = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index=['one','two','three','four'],
columns=['a','b','c','d'])
print(df)
print("---")
print(df['a'].loc[['three','four']])
print("---")
print(df[['b','c','d']].iloc[::2]) #先筛选出bcd,再切片选出步长为2的索引
print("***************************")
del df['a']
print(df)
print(df.drop(['d'],axis=1)) #drop删除列,需要加上axis=1,inplace=False->删除后生成新的数据,不改变原数据
排序:
import numpy as np
import pandas as pd
#排序1 - 按值排序.sort_values
#同样适用于Series
df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
columns=['a','b','c','d'])
print(df1)
print(df1.sort_values(['a'],ascending=True)) #升序
print(df1.sort_values(['a'],ascending=False)) #降序
#ascending参数:设置升序降序,默认升序
#单列排序
print('-'*50)
df2 = pd.DataFrame({'a':[1,1,1,1,2,2,2,2],
'b':list(range(8)),
'c':list(range(8,0,-1))})
print(df2)
print('-'*50)
print(df2.sort_values(['a','c']))
#多列排序,按列顺序排序
print("******************************************************************")
#排序2 - 索引排序.sort_index
df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index=[5,4,3,2],
columns=['a','b','c','d'])
df2 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index=['h','s','x','g'],
columns=['a','b','c','d'])
print(df1)
print(df1.sort_index())
print(df2)
print(df2.sort_index())
#按照index排序
#默认ascending=True,inplace=False