1. 从excel抓取数据并保存到excel中
import xlrd
import numpy as np
import pandas as pd
# 打开Excel文件
data = xlrd.open_workbook('3.xls')
# 获取所有的sheet名称
sheet_names = data.sheet_names()
print("Sheet Names:", sheet_names)
# 选择第一个sheet
sheet = data.sheet_by_index(0)
# 获取行数和列数
num_rows = sheet.nrows
num_cols = sheet.ncols
print("Number of rows:", num_rows)
print("Number of columns:", num_cols)
# 委托单位 名称 制造厂 出厂编号 上偏差 下偏差 均匀度 波动度
ans =[]
# 获取指定单元格的数据
a1 = sheet.cell_value(2,3)
print("委托单位:", a1)
a2 = sheet.cell_value(3,3)
print("名称:", a2)
a3 = sheet.cell_value(4,3)
print("制造厂:", a3)
a4 = sheet.cell_value(4,23)
print("出厂编号:", a4)
a5 = sheet.cell_value(54,8)
print("上偏差:", a5)
a6 = sheet.cell_value(54,25)
print("下偏差:", a6)
a7 = sheet.cell_value(55,8)
print("均匀度:", a7)
a8 = sheet.cell_value(55,25)
print("波动度:", a8)
print(type(a2))
ans.append(a1)
ans.append(a2)
ans.append(a3)
ans.append(a4)
ans.append(a5)
ans.append(a6)
ans.append(a7)
ans.append(a8)
file_path = '4.xls'
import xlwt
f = xlwt.Workbook()
sheet1 = f.add_sheet(u'sheet1',cell_overwrite_ok=True) #创建sheet
#将数据写入第 i 行,第 j 列
i = 0
for j in range(len(ans)):
sheet1.write(i,j,str(ans[j]))
f.save(file_path) #保存文件
print("保存excel文件成功,处理结束")
结果:
D:\software\anaconda\envs\pytorch37\python.exe C:\Users\Administrator\Desktop\test\1.py
Sheet Names: ['温度记录', '证书正本', '证书副本', '校准证书首页', '校准证书第2页', '校准证书第3页']
Number of rows: 69
Number of columns: 29
委托单位: 病理科
名称: 冰箱
制造厂: 海尔
出厂编号: 3413020
上偏差: +55
下偏差: 110
均匀度: -88
波动度: 89
<class 'str'>
保存excel文件成功,处理结束
2. 合并不同表头的表格
import books as books
import xlrd
import pandas as pd
import numpy as nd
from openpyxl import load_workbook
import numpy as np
import xlrd
from xlrd import xldate_as_tuple
from datetime import datetime
merge_list = []
def excel2matrix(path):
data = xlrd.open_workbook(path)
# 获取所有的sheet名称
sheet_names = data.sheet_names()
print()
# 选择第一个sheet
sheet = data.sheet_by_index(0)
# 获取行数和列数
num_rows = sheet.nrows
num_cols = sheet.ncols
print(path, "Sheet Names:", sheet_names, "rows=", num_rows, "columns=", num_cols)
datamatrix = np.zeros((num_rows, num_cols)) # 生成一个nrows行ncols列,且元素均为0的初始矩阵
datamatrix = datamatrix.astype(np.str)
for x in range(num_cols):
cols = np.matrix(sheet.col_values(x)) # 把list转换为矩阵进行矩阵操作
datamatrix[:, x] = cols # 按列把数据存进矩阵中
for j in range(num_cols):
if '时间' in datamatrix[0, j]:
for i in range(1, num_rows):
# print(datamatrix[i, j], type(datamatrix[i, j])) # 45168.0 <class 'numpy.str_'>
# print(datamatrix[i][j], len(datamatrix[i][j]))
if len(datamatrix[i][j]) < 7:
date = " "
else:
# print(datamatrix[i,j])
tmp = float(datamatrix[i, j])
date = datetime(*xldate_as_tuple(tmp, 0)).strftime('%Y/%m/%d')
datamatrix[i][j] = date
# merge_list += datamatrix[0 ].tolist()
return datamatrix
def crete_matrix(matrix1):
mergematrix1 = np.zeros((matrix1.shape[0], len(merge_list))) # 生成一个nrows行ncols列,且元素均为0的初始矩阵
mergematrix1 = mergematrix1.astype(np.str)
mergematrix1[0, :] = merge_list
for i in range(matrix1.shape[1]):
if matrix1[0,i] in merge_list:
idx = merge_list.index(matrix1[0,i])
mergematrix1[:, idx] = matrix1[:, i]
print("mergematrix2=", mergematrix1.shape, mergematrix1)
return mergematrix1
path1 = '2023年度计划(专项).xls'
matrix1 = excel2matrix(path1)
list1 = matrix1[0].tolist()
path2 = '2023年度计划(医疗).xls'
matrix2 = excel2matrix(path2)
list2 = matrix2[0 ].tolist()
path3 = '2023年度计划(应急).xls'
matrix3 = excel2matrix(path3)
list3 = matrix3[0 ].tolist()
path4 = '攻坚.xls'
matrix4 = excel2matrix(path4)
list4 = matrix4[0 ].tolist()
path5 = '2023年度计划(教学).xls'
matrix5 = excel2matrix(path5)
list5 = matrix5[0 ].tolist()
path6 = '2023年度计划(教管中心).xls'
matrix6 = excel2matrix(path6)
list6 = matrix6[0 ].tolist()
path7 = '2023年度计划(科研).xls'
matrix7 = excel2matrix(path7)
list7 = matrix7[0 ].tolist()
path8 = '2023年度计划(维修).xls'
matrix8 = excel2matrix(path8)
list8 = matrix8[0 ].tolist()
path9 = '2023年度计划(零配件).xls'
matrix9 = excel2matrix(path9)
list9 = matrix9[0 ].tolist()
print(len(list1), len(list2), len(list3), len(list4), len(list5), len(list6),
len(list7), len(list8), len(list9) )
# 47 47 43 29 47 49 47 32 48
merge_list = list1 + list2 + list3 + list4 + list4 + list5 + list6 + list7 + list8 + list9
merge_set = set(merge_list)
merge_list = sorted(merge_set, key=merge_list.index)
print(len(merge_list), merge_list)
ans1 = crete_matrix(matrix1)
ans2 = crete_matrix(matrix2)
ans3 = crete_matrix(matrix3)
ans4 = crete_matrix(matrix4)
ans5 = crete_matrix(matrix5)
ans6 = crete_matrix(matrix6)
ans7 = crete_matrix(matrix7)
ans8 = crete_matrix(matrix8)
ans9 = crete_matrix(matrix9)
ans = np.concatenate((ans1, ans2, ans3, ans4, ans5, ans6, ans7, ans8, ans9 ), axis=0)
print(ans.shape)
ans = np.unique(ans, axis=0)
print(ans.shape)
data_df = pd.DataFrame(ans) # 关键1,将ndarray格式转换为DataFrame
# 将文件写入excel表格中
writer = pd.ExcelWriter('final.xlsx') # 关键2,创建名称为hhh的excel表格
data_df.to_excel(writer, 'page_1', float_format='%.5f') # 关键3,float_format 控制精度,将data_df写到hhh表格的第一页中。若多个文件,可以在page_2中写入
writer._save() # 关键4"""