1from openpyxl import Workbook 2wb = Workbook() 3 4# grab the active worksheet 5ws = wb.active 6 7# Data can be assigned directly to cells 8ws['A1'] = 42 9 10# Rows can also be appended 11ws.append([1, 2, 3]) 12 13# Python types will automatically be converted 14import datetime 15ws['A2'] = datetime.datetime.now() 16 17# Save the file 18wb.save("sample.xlsx")
2. Python 操作 Excel 表格
读取表格
1import os 2import openpyxl 3from collections import OrderedDict 4 5 6def read_excel(fn, sn=None): 7 """ 8 windows 无法识别中文文件名,第一行是表头标题 9 最终格式是返回一个列表,元素是字典,key:表头 value:值10 :param fn: 文件名11 :param sn: excel 表名12 :return:13 """ 14 if not os.path.isfile(fn): 15 print("Excel File Not Found:{}".format(fn)) 16 return [] 17 wb = openpyxl.load_workbook(fn) 18 print("Excel Sheets List: {}".format(wb.sheetnames)) 19 if not sn: 20 sheet = wb.active 21 else: 22 if sn not in wb.sheetnames: 23 print("Error Sheet Name:{}".format(sn)) 24 return [] 25 sheet = wb[sn] 26 print("Sheet Name is 【{}】, Max Column Num: {}".format(sheet.title, sheet.max_column)) 27 # 删掉无用的空白列或者空白格,减少内存遍历(Ctrl + shift + →【箭头】 删掉) 28 print("Max Row Num: {}".format(sheet.max_row)) 29 col_num = 0 30 first = True 31 cols_val = OrderedDict() 32 all_row = [] # 返回所有的行 33 for row in sheet.iter_rows(): 34 i = 0 35 if first: # first row Title 36 for cell in row: 37 col_num += 1 38 if not cell.value: 39 print("Excel Table Title Empty(column num is {})".format(col_num)) 40 continue 41 cols_val[col_num] = cell.value.strip() 42 first = False 43 print("Excel Table Title: {}".format(cols_val.values())) 44 continue 45 rows_val = [] 46 for cell in row: 47 i += 1 48 if i not in cols_val: # 没有表头的则过滤掉该列 49 continue 50 rows_val.append(cell.value if cell.value is not None else "") 51 if rows_val: 52 all_row.append(OrderedDict(zip(cols_val.values(), rows_val))) 53 print("All Row Num:{}".format(len(all_row))) 54 return all_row
1def append_excel(fn, titles, rows, sn=None): 2 """ 3 该函数会直接修改 表格内容 4 向已存在的表格里面增加 titles 表头,内容是 rows内容. 按顺序匹配 5 """ 6 if not os.path.isfile(fn): 7 print("Excel File Not Found:{}".format(fn)) 8 return [] 9 if len(titles) <= 0: 10 print("Titles Empty") 11 return 12 wb = openpyxl.load_workbook(fn) 13 print("Excel Sheets List: {}".format(wb.sheetnames)) 14 if not sn: 15 sheet = wb.active 16 else: 17 if sn not in wb.sheetnames: 18 print("Error Sheet Name:{}".format(sn)) 19 return [] 20 sheet = wb[sn] 21 print("Sheet Name is 【{}】, Max Column Num: {}".format(sheet.title, sheet.max_column)) 22 # 删掉无用的空白列或者空白格,减少内存遍历(Ctrl + shift + →【箭头】 删掉) 23 print("Max Row Num: {}".format(sheet.max_row)) 24 col_num = sheet.max_column + 1 25 for i, title in enumerate(titles): 26 cell = sheet.cell(1, col_num + i) # 增加表头 27 cell.value = title 28 for rn, row in enumerate(rows): 29 row_index = rn + 2 30 for cn, val in enumerate(row): 31 cell = sheet.cell(row_index, cn + col_num) 32 cell.value = val 33 wb.save(fn) 34 return
`
3. Excel 操作案例
案例场景
将2个表格的某一列进行匹配,最后将表2的数据加到表1的后面
案例表1
案例表2
匹配后的结果
实现方法
1def match(t1_fn, t2_fn, attr, t2_titles, t1_sn=None, t2_sn=None): 2 """ 3 获取t1 t2的数据并根据 attr 来匹配2个表格该字段相同的数据。 4 最终将 表2的 t2_titles 列表对应的数据添加到 表1 后面 5 :param t1_fn: 6 :param t2_fn: 7 :param attr: 8 :param t2_titles: 表格2 添加到 t1 后面的字段列表(按照顺序) 9 :param t1_sn10 :param t2_sn11 :return:12 """ 13 result = [] 14 t1_data = read_excel(t1_fn, sn=t1_sn) 15 t2_data = read_excel(t2_fn, sn=t2_sn) 16 if len(t1_data) <= 0 or len(t2_data) <= 0: 17 print("Empty Sheet") 18 return result 19 if len(t2_titles) <= 0: 20 print("Please Input Titles with Table-2") 21 return result 22 t1_map = {} 23 for num, each in enumerate(t1_data): 24 if attr not in each: 25 print("Attribute {} not Found in the {} row of Table-1".format(attr, num + 1)) 26 return result 27 val = each[attr] 28 t1_map.setdefault(val, []) 29 t1_map[val].append(num + 1) # 以匹配的属性值为key,行数为 value,可能有多行 30 31 new_rows = {} # 存储t1行号需要增加的值 32 max_row_num = len(t1_data) # t1最大的行号 33 for num, each in enumerate(t2_data): 34 if attr not in each: 35 print("Attribute {} not Found in the {} row of Table-2".format(attr, num + 1)) 36 return result 37 val = each[attr] 38 if val not in t1_map: 39 print("The Match Value {} not Found in Table-1".format(val)) 40 continue 41 new_row_data = [each.get(t, '') for t in t2_titles] # 字段不存在使用空字符串 42 for row_num in t1_map[val]: 43 new_rows[row_num] = new_row_data # 根据行号存储,第1行就是1,第2行就是2 44 for row_num in range(1, max_row_num): 45 if row_num not in new_rows: 46 result.append([""] * len(t2_titles)) # 没匹配到的以空替换 47 else: 48 result.append(new_rows[row_num]) 49 return result 50 51def test_match(): 52 t1_file_path = "D:\\table-1.xlsx" 53 t2_file_path = "D:\\table-2.xlsx" 54 titles = ["净利润", "订单日期"] 55 rows = match(t1_file_path, t2_file_path, u"订单号", titles, "Sheet4", "Sheet") 56 append_excel(t1_file_path, titles, rows, "Sheet4")