-
python实现excel数据清洗
- 导入需要的库
import pandas as pd
import numpy as np
import xlsxwriter as xw
import matplotlib.pyplot as plt
- 读取excel文件中的数据
excel = pd.read_excel("C:/Users/ypzhao/Desktop/SOC/SOC数据.xlsx")
data = pd.DataFrame(excel)
arr = np.array(data)
data = pd.DataFrame(arr) # 重新来回转变一次,将行名变成正常的序列
data = pd.DataFrame(excel)
data_l = data.values.tolist() # 转为列表
- 截取数据,数据密度较大,绘制时点太多,用python截取其中的数据进行画图
result = []
x = []
y = []
z = []
j=2
while j < len(data_l):
if j == 2:
acc1 = data_l[0][0]
acc_1 = data_l[0][1]
acc_1_1 = data_l[0][2]
x.append(acc1)
y.append(acc_1)
z.append(acc_1_1)
if j < len(data_l):
acc1 = data_l[j][0]
acc_1 = data_l[j][1]
acc_1_1 = data_l[j][2]
x.append(acc1)
y.append(acc_1)
z.append(acc_1_1)
if j > len(data_l):
acc1 = data_l[len(data_l)][0]
acc_1 = data_l[len(data_l)][1]
acc_1_1 = data_l[len(data_l)][2]
x.append(acc1)
y.append(acc_1)
z.append(acc_1_1)
j += 300
- 将不同列表合并为一个列表(多个列表合并为一个DataFrame),为的是最后写出数据
result = pd.DataFrame({'x_list':x,'y_list':y,'z_list':z})
# result = pd.DataFrame(result)
data = result.values.tolist() # 转为列表
- 将截取后的数据写到excel
# 将分段信息写入excel
print("开始将数据写入Excel文件...")
workbook = xw.Workbook("C:/Users/ypzhao/Desktop/SOC/截断后数据.xlsx") # 创建工作簿
worksheet1 = workbook.add_worksheet("sheet1") # 创建子表
worksheet1.activate() # 激活表
title = ["时间",'含控制策略SOC', '无控制策略SOC'] # 设置表头
worksheet1.write_row('A1', title) # 从A1单元格开始写入表头
for i in range(0,len(data)):# 从第二行开始写入数据
insertData = []
for j in range(3):
insertData.append(data[i][j])
row = 'A' + str(i+2)
worksheet1.write_row(row, insertData)
workbook.close() # 关闭表
print("文件写入完毕~~~~")
- 完整代码(含图片细节设置)
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 11 10:36:39 2023
@author: ypzhao
"""
import pandas as pd
import numpy as np
import xlsxwriter as xw
import matplotlib.pyplot as plt
excel = pd.read_excel("C:/Users/ypzhao/Desktop/SOC/SOC数据.xlsx")
data = pd.DataFrame(excel)
arr = np.array(data)
data = pd.DataFrame(arr) # 重新来回转变一次,将行名变成正常的序列
data = pd.DataFrame(excel)
data_l = data.values.tolist() # 转为列表
result = []
x = []
y = []
z = []
j=2
while j < len(data_l):
if j == 2:
acc1 = data_l[0][0]
acc_1 = data_l[0][1]
acc_1_1 = data_l[0][2]
x.append(acc1)
y.append(acc_1)
z.append(acc_1_1)
if j < len(data_l):
acc1 = data_l[j][0]
acc_1 = data_l[j][1]
acc_1_1 = data_l[j][2]
x.append(acc1)
y.append(acc_1)
z.append(acc_1_1)
if j > len(data_l):
acc1 = data_l[len(data_l)][0]
acc_1 = data_l[len(data_l)][1]
acc_1_1 = data_l[len(data_l)][2]
x.append(acc1)
y.append(acc_1)
z.append(acc_1_1)
j += 300
result = pd.DataFrame({'x_list':x,'y_list':y,'z_list':z})
# result = pd.DataFrame(result)
data = result.values.tolist() # 转为列表
# # result.to_csv('test.csv',index = None,encoding = 'utf8')
# x = pd.DataFrame(data)
# result.to_excel('data.xls',index=True)
#dpi为像素大小y
fig, ax = plt.subplots(dpi=3000)
plt.tick_params(labelsize=12)
labels = ax.get_xticklabels() + ax.get_yticklabels()
[label.set_fontname('Times New Roman') for label in labels]
font = {'family': 'Times New Roman','size': 10}
font_1 = {'family': 'Times New Roman','size': 14}
plt.rcParams["font.family"] = "Times New Roman" #全图字号新罗马字体
# 设置画布大小
# plt.figure(figsize=(13, 9))
# 设置字体,SimSun为宋体,14为字号
# font = {'family':'Times New Roman','weight': 'normal','size': 18}
# font1 = {'family':'SimSun','weight': 'normal','size': 18}#宋体
# # # 绘制折线图
# plt.plot(x,y,marker = "*",linewidth=1,color='#00FF00')
# plt.plot(x,z,marker = "",linewidth=1.5,color='#FF0001',)
'''
marker='o', # 像scatter一样设置散点样式
markersize=5, # 设置散点大小
markerfacecolor='blue', # 设置散点填充色
markeredgecolor='yellow', # 设置散点轮廓色
markeredgewidth=0.5 # 设置散点轮廓宽度
'''
plt.plot(x,y,marker = "",markersize=5,alpha=.8,
linewidth=1.5,label='Control',markeredgecolor='green',)
plt.plot(x,z,marker = "",markersize=5,alpha=.8,
linewidth=1.5,label='Unconrol')
# 设置x,y轴之间的范围
ax.axis([-0.1, 2.5, 0, 10]) #X、Y轴区间
'''设置边框不可见
* 左侧、右侧、上侧无框线
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
* Y轴的范围在-10到350
ax.set_ylim(-10, 350)
ax.set_xlim(0,2000)
'''
'''轴边框设置'''
# 设置右和上边框是否可见
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
# 设置轴偏移,'axes'表示便宜比例
ax.spines['top'].set_position(('data',500))
plt.xlabel('Times/s',font_1)
plt.ylabel('SOC/%',font_1)
# plt.grid(True) ##增加网格格点
ax.tick_params(axis='x', rotation=45);
# 绘折线图
# 图像保存路径
ax.axis([0, 2100, 78, 89]) #X、Y轴区间
# plt.legend(bbox_to_anchor=(0.1,0.98), loc=2, borderaxespad=0,prop=font,ncol=2,frameon=False)
# 设置图例显示行数
plt.legend(ncol=2,frameon=False,prop=font)
# 设置网格x则表示只定义x轴,both表示两者定义,which代表类型,both表示两种
# majot表述主要刻度,minor表示次要
# ax.grid(True,axis='x',which='minor',color='red')
# ax.grid(True,axis='both',which='major',color='green')
# 设置网格位于图形底层
# ax.set_axisbelow(True)
# plt.savefig('respiration.pdf')
plt.savefig('SOC.png',dpi=3000) #保存为图片png格式
plt.savefig('SOC.tif',dpi=3000) #保存为图片tif格式
plt.savefig('SOC.jpg',dpi=3000) #保存为图片jpg格式
plt.show()
#展示图片
print("绘图结束、图像输出完毕")
# 将分段信息写入excel
print("开始将数据写入Excel文件...")
workbook = xw.Workbook("C:/Users/ypzhao/Desktop/SOC/截断后数据.xlsx") # 创建工作簿
worksheet1 = workbook.add_worksheet("sheet1") # 创建子表
worksheet1.activate() # 激活表
title = ["时间",'含控制策略SOC', '无控制策略SOC'] # 设置表头
worksheet1.write_row('A1', title) # 从A1单元格开始写入表头
for i in range(0,len(data)):# 从第二行开始写入数据
insertData = []
for j in range(3):
insertData.append(data[i][j])
row = 'A' + str(i+2)
worksheet1.write_row(row, insertData)
workbook.close() # 关闭表
print("文件写入完毕~~~~")
- 运行结果
SOC随时间变化图
数据及代码自取