pandas入门基础补充
创建表格
import pandas as pd
df = pd.DataFrame('ID':[1,2,3],'Name':['Tim','Victor','Nick'])
df = df.set_index('ID',inplace=True)
df.to_excel('c:/temp.xlsx')
print('Done!')
读取表格
import pandas as pd
pe = pd.read_excel('目录文件')
print(pe.shape)
print(pe.columns)
print(pe.head(3))
print(pe.tail(3))
pe = pd.read_excel('目录文件', header=1)
pe = pd.read_excel('目录文件',index_col='ID')
创建行、列单元格
import pandas as pd
s1 = pd.Series()
s1.index = 0
s1.name = 'ID'
d = {'x':100,'y':200}
s1 = pd.servies(d)
s1 = pd.Series([1,2,3],index=[1,2,3],name='A')
s1 = pd.Series([10,20,30],index=[1,2,3],name='B')
s1 = pd.Series([100,200,300],index=[1,2,3],name='C')
df = pd.DataFrame({s1.name:s1,s2.name:s2,s3.name:s3})
df = pd.DataFrame([s1,s2,s3])
自动填充功能
import pandas as pd
from datatime import date,timedelta
df = pd.read_excel('目录文件',skiprows=3,usecols="C,D,E,F",index_col= None)
df = pd.read_excel('目录文件',skiprows=3,usecols="C:F",index_col= None)
for i in df.index:
df['ID'].at[i]=i+1
df = pd.read_excel('目录文件',skiprows=3,usecols="C,D,E,F",index_col= None,dtype={'ID':str})
for i in df.index:
df['ID'].at[i]=i+1
df['isStudent'].at[i]='Yes'if i % 2 == 0 else 'No'
start = data(2021,7,4)
books['InStore'].at[i]= start + timedelta(days=i)
books['InStore'].at[i]= data(start.year + i,start.month,start.day)
pass
函数填充 计算列
import pandas as pd
books = pd.read_excel('目录文件',index_col='ID')
books['Price'] = books['ListPrice']*books['Discount']
books['Price'] = books['ListPrice']*0.8
for i in books.index:
books['Price'] = books['ListPrice'].at[i]*books['Discount'].at[i]
pass
for i in range(5,10):
books['Price'] = books['ListPrice'].at[i]*books['Discount'].at[i]
pass
books['Price'] = books['ListPrice'] + 2
books['Price'] = books['ListPrice'].apply[lambda x:x + 2]
排序
import pandas as pd
df = pd.read_excel('目录文件',index_col='ID')
df.sort_values(by='Worthy',inplace=True)
df.sort_values(by='Worthy',inplace=True,ascending=False)
df.sort_values(by=['Worthy','Price'],inplace=True)
df.sort_values(by=['Worthy','Price'],inplace=True,asceding=[True,False])
过滤与数据筛选
import pandas as pd
def __age_18_to_22__(a):
return 18<=a<22
def __level_A__(s):
return 80<=s<=100
df = pd.read_excel('目录文件',index_col='ID')
df = df.loc[students['age'].apply(__age_18_to_22__)].loc[students['score'].apply(__level_A__)]
df = df.loc[students.age.apply(__age_18_to_22__)].loc[students.score.apply(__level_A__)]
df = df.loc[students.age.apply(lambda a:18 <=a < 22)]. \
loc[students.score.apply(lambda s:80 <= s <= 100)]