read_excel()
df = pd.read_excel("xl/stores.xlsx",
sheet_name="2019", skiprows=1, usecols="B:F")
#usecols读取的列
#sheet_name 读取2019工作表
#skiprows 跳过第一行
df
将Flagship列的数据类型变成bool型
def fix_missing(x):
return False if x in ["", "MISSING"] else x
df = pd.read_excel("xl/stores.xlsx",
sheet_name="2019", skiprows=1, usecols="B:F",
converters={"Flagship": fix_missing})
df
导入所有工作表
sheet_name=None
df1=pd.read_excel("xl/stores.xlsx",sheet_name=None,skiprows=1,,usecols="B:F")
df1
添加列名
header=None, names=[ ]
df = pd.read_excel("xl/stores.xlsx", sheet_name=0,
skiprows=2, skipfooter=3,
usecols="B:C,F", header=None,
names=["Branch", "Employee_Count", "Is_Flagship"])
df
sheet_name接受切片
df1=pd.read_excel("xl/stores.xlsx",sheet_name=0,skiprows=1,usecols="B:F")
df1
df1=pd.read_excel("xl/stores.xlsx",sheet_name=1,skiprows=1,usecols="B:F")
df1
df1=pd.read_excel("xl/stores.xlsx",sheet_name=-1,skiprows=1,usecols="B:F")
df1
处理NaN
df = pd.read_excel("xl/stores.xlsx", sheet_name="2019",
skiprows=1, usecols="B,C,F", skipfooter=2,
na_values="MISSING", keep_default_na=False)
# na_values="MISSING" ,将MISSING 变成 NaN
# keep_default_na=False,其余部分保持不变
df
# 将空白单元格变成NaN
df = pd.read_excel("xl/stores.xlsx", sheet_name="2019",
skiprows=1, usecols="B,C,F", skipfooter=2,
na_values="", keep_default_na=False)
df
ExcelFile
读取文件
with pd.ExcelFile("xl/stores.xls") as f:
df1 = pd.read_excel(f, "2019", skiprows=1, usecols="B:F", nrows=2)
df2 = pd.read_excel(f, "2020", skiprows=1, usecols="B:F", nrows=2)
df1
使用ExcelFile访问所有工作表名
stores = pd.ExcelFile("xl/stores.xlsx")
stores.sheet_names
to_excel()
df.to_excel("written_with_pandas.xlsx", sheet_name="Output",
startrow=1, startcol=1, index=True, header=True,
na_rep="<NA>", inf_rep="<INF>")
#sheet_name 输出工作表名称
#startrow,startcol,跳过第一行,第一列,开始记录数据
#index=True,header=True,索引和列标签保持不变
#na_rep="<NA>", inf_rep="<INF>" na变成<NA>,inf变成 <INF>
将多个DataFrame 写入一个或多个工作表
with pd.ExcelWriter("written_with_pandas2.xlsx") as writer:
df.to_excel(writer, sheet_name="Sheet1", startrow=1, startcol=1)
df.to_excel(writer, sheet_name="Sheet1", startrow=10, startcol=1)
df.to_excel(writer, sheet_name="Sheet2")