**
pandas玩转Excel
放码过来
**
1 .创建表格
import pandas as pd #导入pandas模块
df = pd.DataFrame({'id':[1,2,34],'name':['G','E','F']}) #加入数据
df.set_index("id") #将“id”设置为索引
df.to_excel("C:/users/86198/表格.xlsx") #保存表格
2 .读取文件
import pandas as pd
people = pd.read_excel("c:/users/86198/表格.xlsx",header = None) # header =None 使column名字为1,2,3
people.columns = ["id"] # 设置column名字
people.set_index("id",inplace=True) #将id设为第一列
print(people.columns)
people.to_excel("c:/users/86198/表格1.xlsx") # 保存
ps
import pandas as pd
people = pd.read_excel("c:/users/86198/表格.xlsx",index_col="id)
#即将第一列设置为id
3.自动填充
import pandas as pd
books = pd.read_excel("c:\\users\\86198\\表格.xlsx",skiprows=3,usecols="C,E,F",dtype=["id":str])
#skiprows跳过前几行不读,usecol读哪几列,dtype即将那列转化为什么类型,这么做为了解决表格中“NaN”为浮点数转化为整数
books["id"].at[0] = 100#即id列0行写为100,或者books.at[0,"id"]
for i in books.index:
books["id"].at[i] = i+1 #自动填充
books["Y/N"].at[i] = "Y"if i % 2=0 else "N"
print(books)
books.to_excel("c:\\users\\86198\\表格.xlsx")
4.函数填充
import pandas as pd
books = pd.read_excel("c:\\users\\86198\\表格.xlsx",index_col="id")
#id 列设为第一列
books["c"]=books['a'] * books['b'] #即计算乘法
books['a']=books['a']+1 #即加法,也可books['a']=books['a'].apply(lambda x:x+1)
- 排序
import pandas as pd
books = pd.read_excel("c:\\users\\86198\\表格.xlsx",index_col='id')
books.sort_values(by=['id','name'],inplace=False,ascending=False)
'''
by='price'即按哪一列排序,by=['id','name']对两项进行排序,与之对应ascending=[True,False]
inplace=True不创建新的对象,直接对原始对象进行修改,即不改变原始序列,ascending=False从小到大排
'''
print(books)
6.筛选数据
import pandas as pd
def age_18_to_30(a):
return 18<=a<30
def level_a(s):
return 80<=s<100
books = pd.read_excel("c:\\users\\86198\\表格.xlsx",index_col='id')
books = books.loc[books.Age.apply(age_18_to_30)].loc[books.name.apply(level_a)]
#分别对“age”,“name”进行筛选
print(books)