索引
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.rand(12).reshape(3,4)*100,
index = ['one','two','three'],
columns = ['a','b','c','d'])
print(df)
data1 = df['a']
data2 = df[['a','c']]
print('*****'*5)
print(data1,type(data1))
print(data2,type(data2))
print('*****'*5)
data3 = df.loc['one']
data4 = df.loc[['one','two']]
print(data2,type(data3))
print(data3,type(data4))
选择行与列
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.rand(12).reshape(3,4)*100,
index = ['one','two','three'],
columns = ['a','b','c','d'])
print(df)
data1 = df['a']
data2 = df[['a','c']]
print(data1,type(data1))
print(data2,type(data2))
print('*****'*5)
data3 = df.loc['one']
data4 = df.loc[['one','two']]
print(data2,type(data3))
print(data3,type(data4))
df.loc[] - 按index选择行
df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = ['one','two','three','four'],
columns = ['a','b','c','d'])
df2 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
columns = ['a','b','c','d'])
print(df1)
print(df2)
print('*****'*5)
data1 = df1.loc['one']
data2 = df2.loc[1]
print('单标签索引')
print(data1)
print(data2)
print('*****'*5)
data3 = df1.loc[['two','three','five']]
data4 = df2.loc[[3,2,1]]
print('多标签索引')
print(data3)
print(data4)
print('*****'*5)
data5 = df1.loc['one':'three']
data6 = df2.loc[1:3]
print(data5)
print(data6)
print('切片索引')
布尔型索引
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = ['one','two','three','four'],
columns = ['a','b','c','d'])
print(df)
print('*****'*5)
b1 = df < 20
print(b1,type(b1))
print(df[b1])
print('*****'*5)
b2 = df['a'] > 50
print(b2,type(b2))
print(df[b2])
print('*****'*5)
b3 = df[['a','b']] > 50
print(b3,type(b3))
print(df[b3])
print('*****'*5)
b4 = df.loc[['one','three']] < 50
print(b4,type(b4))
print(df[b4])
Dataframe:基本技巧
df = pd.DataFrame(np.random.rand(16).reshape(8,2)*100,
columns = ['a','b'])
print(df.head(2))
print(df.tail())
print('*****'*5)
print(df.T)
print('*****'*5)
df = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
columns = ['a','b','c','d'])
print(df)
print('*****'*5)
df['e'] = 10
df.loc[4] = 20
print(df)
df['e'] = 20
df[['a','c']] = 100
print('*****'*5)
print(df)
df = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
columns = ['a','b','c','d'])
print(df)
print('*****'*5)
del df['a']
print('*****'*5)
print(df)
print('*****'*5)
print(df.drop(0))
print('*****'*5)
print(df.drop([1,2]))
print('*****'*5)
print(df)
print('*****'*5)
print(df.drop(['d'], axis = 1))
print(df)
多重索引:比如同时索引行和列
df = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = ['one','two','three','four'],
columns = ['a','b','c','d'])
print(df)
print('******'*5)
print(df['a'].loc[['one','three']])
print('*****'*5)
print(df[['b','c','d']].iloc[::2])
print('*****'*5)
print(df[df['a'] < 50].iloc[:2])
df1 = pd.DataFrame(np.random.randn(10, 4), columns = ['a','b','c','d'])
df2 = pd.DataFrame(np.random.randn(7, 3), columns = ['a','b','c','d'])
print('*****'*5)
print(df1 + df2)
排序1 - 按值排序 .sort_values
df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
columns = ['a','b','c','d'])
print(df1)
print('*****'*5)
print(df1.sort_values(['a'], ascending = True))
print('*****'*5)
print(df1.sort_values(['a'], ascending = False))
df2 = pd.DataFrame({'a':[2,5,6,9,5,5,8,,8,6,9,8],
'b':list(range(8)),
'c':list(range(8,0,-1))})
print('*****'*5)
print(df2)
print(df2.sort_values(['a','c']))
排序2 - 索引排序 .sort_index
df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = [5,4,3,2],
columns = ['a','b','c','d'])
df2 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = ['f','i','j','k'],
columns = ['a','b','c','d'])
print(df1)
print(df1.sort_index())
print('*****'*5)
print(df2)
print(df2.sort_index())