01、读取Excel表格
with pd.ExcelFile('333.xlsx') as xls:
df1 = pd.read_excel(xls, 'Sheet1',index_col="1")
df2 = pd.read_excel(xls, 'Sheet2')
data = pd.read_excel(xls, ['Sheet1', 'Sheet2'],
index_col=None, na_values=['NA'])
02、读取txt文本及数据处理
'''
description: 返回文本内容及行数
param {*} filename-文件名
return {*}
'''
def txt_return(filename):
with open(f'{filename}','r',encoding='utf-8') as txt:
content = txt.readlines() #读全部行
txt.close()
lines = np.array(content)#转换成array 类型
num_of_instances = lines.size #整个txt的行数
print(f"文件:{filename}",f"文本总行数:{num_of_instances} ")
data = {
'lines':lines,
'num_of_instances':num_of_instances
}
return data
def data_df(filename):
data = txt_return(filename)
lines = data['lines']
num_of_instances = data['num_of_instances']
list = []
for i in range(0,num_of_instances):
name=lines[i].split()#split()默认用空格分割并会自动去除分割后的空串
list.append(name) #898604B8172220058006
excel_df = pd.DataFrame(list)
print(excel_df)
excel_df.to_excel('Xinna.xlsx',sheet_name='2023-Xia')
return list
03、写入txt文本
with open('Xinnanew.txt','a+') as f:
f.write(str(list))
04、格式
SN = "%05d" % i
txt_data = f'品牌商C|SSXX|Xina2023-{SN}|{data[0]}|{data[1]}\n'
05、pandas
# df.shape #返回df的行数和列数
# df.shape[0] #返回df的行数
# df.shape[1] #返回df的列数
# df.info()
# df.iloc[cont] 读取一行
# df = pd.read_excel(xls,usecols=[0,6]) 第0、6列
# usecols=[0,1,2] 显示哪些列
# nrows=1,共显示多少行
# index_col=0 第1列作为索引
# header=0,第N行作为索引
@use_time
def pandas_excel():
with pd.ExcelFile('4.xlsx') as xls:
df = pd.read_excel(xls,header=0,index_col=None, nrows=5)
# print(df)
# print(df.iloc[1])
for cont in range(0, df.shape[0]-1, 1):
ICCID = df.iloc[cont]['ICCID']
YD = df.iloc[cont]['运营商']
print(f'第{cont}行|ICCID-{ICCID}|运营商-{YD}')
pd.DataFrame()
# 将每一行写入另一个文件:
# with open('output.md', 'w') as f:
# for index, row in df.iterrows():
# print(f'---{df.iloc[index]}')
# # print(f'index:{index}--row:{row[0]}')
# f.write(row.to_markdown())
06、pandas查找某一列字符串大于多少
data = read_excel(f1)
df = data['df']
df_row = data['df_row']
df_column = data['df_column']
print(f'文件:{f1}\n共{df_row}行|{df_column}列\n{df}')
new_data = df[df['绑定码号'].str.len()>43] #-->查找
print(f'{new_data}')
new_data.to_excel('已下载码号.xlsx')
07、写入excel
##################################写入Excel文件################################################
def write_excel(filename,df_data,sheet_name):
print(f'==>filename:{filename}-df_data:{df_data}')
if not os.path.exists(filename):
df = pd.DataFrame()
df.to_excel(filename,sheet_name=sheet_name)
else:
print(f'文件:{filename}已存在')
df_old = pd.DataFrame(pd.read_excel(filename, sheet_name=sheet_name)) # 读取原数据文件和表
row_old = df_old.shape[0] # 获取原数据的行数
df_data = pd.DataFrame(df_data)
book = load_workbook(filename)
with pd.ExcelWriter(f'{filename}',mode='a',engine="openpyxl") as xls:
xls.book = book
xls.sheets = dict((ws.title, ws) for ws in book.worksheets)
df_data.to_excel(xls,sheet_name=sheet_name,startrow=row_old + 1, index=False, header=False)
xls.save()
xls.close()
08、读取指定行列
##################################读取Excel文件指定行################################################
def read_excel_row(filename,row):
with pd.ExcelFile(f'{filename}') as xls:
df = pd.read_excel(xls)
excel_info = df.iloc[row],#行
return excel_info
##################################读取全部Excel文件################################################
def read_excel_column(filename,column):
with pd.ExcelFile(f'{filename}') as xls:
df = pd.read_excel(xls,usecols=column)
excel_info ={
'df':df,#表格数据
'df_row':df.shape[0],#行
'df_column':df.shape[1],#列
}
return excel_info
##################################读取全部Excel文件################################################
def read_excel(filename):
with pd.ExcelFile(f'{filename}') as xls:
df = pd.read_excel(xls)
excel_info ={
'df':df,#表格数据
'df_row':df.shape[0],#行
'df_column':df.shape[1],#列
}
return excel_info
09、函数用时
# 函数运行时间装饰器
def use_time(func):
def wrapper(*args, **kwargs):
start = time()
result = func(*args, **kwargs)
use = round(time() - start, 3)
print('%s()用时: %s秒' % (func.__name__, use))
return result
return wrapper
10、创建多级目录
def makeDirs(dirpath):
'''
创建目录
支持多级目录,若目录已存在自动忽略
'''
dirpath = dirpath.strip().rstrip(os.path.sep)
if dirpath:
if not os.path.exists(dirpath):
os.makedirs(dirpath)