Series(系列)
认识Series
"""
Series:
1、认识Series(系列)
可以看做是竖起来的Series
2、操作Series
"""
import pandas as pd
s_1 = pd.Series([1,2,3,4,5])
print(s_1)
s_2 = pd.Series([1,2,3,4,5], index = list('ABCDE'))
print(s_2)
'''
A 1
B 2
C 3
D 4
E 5
'''
s_3 = pd.Series(['Lily','Rose','Jack'])
s_1.index
s_2.index
s_3.values
操作Series
"""
Series:
1、认识Series(系列)
可以看做是竖起来的Series
"""
import pandas as pd
s_1 = pd.Series([1,2,3,4,5])
print(s_1)
dic_1 = {'name':'daxian','age':18,'job':'teacher'}
s_4 = pd.Series(dic_1)
s_2 = pd.Series([1,2,3,4,5], index = list('ABCDE'))
print(s_2)
'''
A 1
B 2
C 3
D 4
E 5
'''
s_3 = pd.Series(['Lily','Rose','Jack'])
s_1.index
s_2.index
s_3.values
'''
2、操作Series
'''
'''
2.1通过标签访问
'''
s_2['D']
s_2['B':'D']
s_2[['A','D']]
'''
2.2通过索引访问
'''
s_2[0]
s_2[0:2]
s_2[[0,2]]
'''
2.3增
'''
2.append('2') s_2和‘2’不是一个数据类型,不能添加
s_3 = pd.Series([6],index = ['F'])
s_2 = s_2.append(s_3)
'''
删除
'''
s_2 = s_2.drop(['A'])
print('Jim' != s_3)
'''
改
'''
s_2[0] = 5
s_2[1:] = 4
'''
重置索引
'''
s_2.index = range(0,len(s_2))
DataFrame
认识DataFrame
"""
DataFrame(数据框)
就是excle表(多个Series拼接)
"""
import pandas as pd
df_1 =pd.DataFrame({'name':['tim','rose','jack'],
'age':[1,2,3],
'income':[100,200,300]},
index = ['person01','person02','person03'])
print(df_1)
'''
DataFrame的属性
'''
df_1.index
df_1.columns
df_1.values
操作DataFrame
(一)
"""
DataFrame操作
"""
import pandas as pd
df_1 = pd.DataFrame({'name':['tim','rose','jack'],
'age':[1,2,3],
'income':[100,200,300]},
index = ['person01','person02','person03'])
print(df_1.columns)
df_1.columns = range(0,len(df_1.columns))
print(df_1.columns)
df_1.rename(columns = {'name':'名字','age':'年龄'},inplace = True)
print(df_1.index)
df_1.index = range(0,len(df_1.index))
print(df_1.index)
df_1['pay'] =[10,20,30]
df_1.insert(0,'pay',df_1.pop('pay'))
(二)
'''
增加一行
'''
df_1.loc['person4',['pay',0,1,2]] = [40,'kitty',20,400]
df_1.pay
df_1[['pay',1]]
del df_1[1]
data = df_1.drop('pay',axis = 1, inplace = False )
df_1.drop(1,axis = 0 ,inplace = True)
查询数据框的三种方法
"""
loc()
iloc()
ix()
"""
import numpy as np
import pandas as pd
dates = pd.date_range('2020-01-01',periods = 5)
df = pd.DataFrame(np.arange(30).reshape(5,6),
index = dates,columns = list('ABCDEF'))
'''
loc()方法
df.loc[x,y]
【标签索引】
'''
df.loc['2020-01-02','A']
df.loc[:,'A']
df.loc['2020-01-03':'2020-01-05','A':'C']
df.loc['2020-01-03':,['B','D']]
df.loc['2020-01-01',:]
df.loc['2020-01-01':'2020-01-03',:]
df.loc['2020-01-03':,:]
'''
iloc[]
位置索引
'''
df.iloc[1,2]
df.iloc[:,2]
df.iloc[:,[1,3,5]]
df.iloc[:,2:4]
df.iloc[1,:]
df.iloc[[1,3,4],:]
df.iloc[1:3,:]
'''
ix方法(被弃用)
混合索引
'''
df.ix['2020-01-01':'2020-01-03',[1,3]]
df.ix[:,2]
df.ix[:,[2,4]]
df.ix[:,2:4]
df.ix[1,:]
df.ix[[1,3],:]
DataFrame常见操作
"""
DataFrame常见操作
"""
import pandas as pd
dic = {'name':['kiti','beta','peter','tom'],
'age':[20,18,35,21],
'gender':['f','f','m','m']
}
df =pd.DataFrame(dic)
'''
根据年龄这一脸,进行排序【升序和降序】
'''
df.sort_values(by = ['age'])
df.sort_values(by = ['age'], ascending = False)
'''
值替换
'''
df['gender'] = df['gender'].replace(['m','f'],['male','female'])
'''
重新排列数据中的列
'''
cols = ['gender','name','age']
df = df.ix[:,cols]