目录
1. DataFrame一次增加多列
def add_subtract_series(a, b):
return pd.Series((a + b, a - b))
df[['sum', 'difference']] = df.apply(
lambda row: add_subtract_list(row['a'], row['b']), axis=1)
2. Python实现分层抽样
#分层抽样
gbr = data.groupby("area")
gbr.groups
typicalFracDict = {
1: 0.2,
2: 0.4,
3: 0.6
}
def typicalSampling(group, typicalFracDict):
name = group.name
frac = typicalFracDict[name]
return group.sample(frac=frac)
result = data.groupby(
'area', group_keys=False
).apply(typicalSampling, typicalFracDict)
3. 把几个DataFrame合并成一个DataFrame
https://blog.csdn.net/qq_42707449/article/details/81116656
4. 保存多个DataFrame到一个excel里
from pandas import ExcelWriter
def save_xls(list_dfs, xls_path):
with ExcelWriter(xls_path) as writer:
for n, df in enumerate(list_dfs):
df.to_excel(writer,'sheet%s' % n)
writer.save()
5. Excel的列宽自动调整
# Given a dict of dataframes, for example:
# dfs = {'gadgets': df_gadgets, 'widgets': df_widgets}
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
for sheetname, df in dfs.items(): # loop through `dict` of dataframes
df.to_excel(writer, sheet_name=sheetname) # send df to writer
worksheet = writer.sheets[sheetname] # pull worksheet object
for idx, col in enumerate(df): # loop through all columns
series = df[col]
max_len = max((
series.astype(str).map(len).max(), # len of largest item
len(str(series.name)) # len of column name/header
)) + 1 # adding a little extra space
worksheet.set_column(idx, idx, max_len) # set column width
writer.save()
6. 用Office自动打开excel文件
import subprocess
#Open the workbook in MS Excel
subprocess.Popen([filename], shell=True)
7. 读取excel文件路径中带中文
with open('文件\\ab.csv', encoding = 'utf-8') as f:
file = pd.read_csv(f).fillna('')
if len(file.columns) == 1:
f.seek(0,0) #当csv以\t分隔,文件指针移回文件头
file = pd.read_table(f).fillna()
8. 空DataFrame一行行加数据
for i in range(len(file)):
row = file.irow(i)
result = result.append({'col1':row[0], 'col2':row[1]}, ignore_index = True)
9. 写入Excel单元格中可以多行
def save_xls(list_df, list_sheet_name, xls_path):
with ExcelWriter(xls_path, engine = 'xlsxwriter') as writer:
workbook = writer.book
#设置格式,可以一个单元格内通过 \r\n 换行
book_format = workbook.add_format({'text_wrap':True})
for i in range(len(list_df)):
data = list_df[i]
data.to_excel(writer, list_sheet_name[i], index = False)
worksheet = writer.sheets[list_sheet_name[i]]
for idx, col in enumerate(data):
series = data[col]
max_len = max(series.astype(str).apply(lambda x: x.split('\r\n')).map(len).max() + 15, len(str(series.name))*4 )
worksheet.set_column(idx, idx, max_len, book_format)
writer.save()