利用 Python 读取数据合并对比同时写入Excel生成报告

前言

学习工作中经常需要对比数据,比如上一版本代码和目前代码跑的数据精度或者时间性能对比。如下程序就给出了两种方法,推荐第二种,同时方便生成数据报告,也可以方便利用 Python 将数据直接写进 Excel 中,生成报告。具体代码可以去我的GitHub下载。

1. 数据格式

id name height
0 ubhyms 207
1 HXZXey 176
2 WqSpPM 192
3 MKWzzI 188
4 kGGQXy 182
5 weFUul 156
6 vdORms 174

id name weight
0 CnVBzn 171
1 xGZqvG 121
2 HNYven 222
3 FLCyed 112
4 ObOfpY 148
5 NlvyTc 234
6 OrxPhQ 158
7 ORoavJ 212

2. 程序

1. 生成随机名字

def generate_name(number, max_length_of_name):
  names = []
  ascii_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
  for i in range(number):
    name = ''
    for j in range(max_length_of_name):
      name = name + random.choice(ascii_letters)
    names.append(name)
  return names
  

2. 生成随机整数组


def generate_number(quantity, min_number, max_number):
  datas = []
  for i in range(quantity):
    datas.append(random.randint(min_number, max_number))
  return datas
  

3. 通过字典合并数据

def merge_data_by_map(data1, data2, head1, head2, data1_same_id, data2_same_id):
  datas = []
  dict_map = {}
  for i in range(len(data1)) :
    data = []
    for j in range(len(data1[i])):
      if j != data1_same_id :
        data.append(data1[i][j])
    dict_map[str(data1[i][data1_same_id])] = data

  for i in range(len(data2)):
    data = data2[i].copy()
    key = data[data2_same_id]
    if dict_map.get( key ) is not None:
      val = dict_map[key]
      for j in range(len(val)) :
        data.append(val[j])
      datas.append(data)
  return datas

4. 通过 Pandas 的 merge 合并数据

def merge_data_by_pandas(data1, data2, head1, head2, data1_same_id, data2_same_id):
  df1 = pd.DataFrame(data1, columns=head1)
  df2 = pd.DataFrame(data2, columns=head2)
  #print(df1)
  #print(df2)
  key = head1[data1_same_id]
  df3 = pd.merge(df1, df2, how='inner', on=key)
  return df3
  

5. 将 Pandas 的 DataFrame 数据写入 Excel 生成报告


def write_excel(file_name, datas):
  workbook = xlsxwriter.Workbook(file_name)
  style = workbook.add_format({
      "fg_color": "yellow",  # background cloro
      "bold": 1,  
      "align": "center",  # 
      "valign": "vcenter",  # 
      "font_color": "red"  #
    })
  style_cen = workbook.add_format({
      "align": "center",  # 
      "valign": "vcenter",  # 
  })
  #style_pre = workbook.add_format({'num_format': '0.000%'})
  
  sheetname = "data"
  worksheet = workbook.add_worksheet(sheetname)  # 
  
  row, col = len(datas), datas.shape[1]
      
  head = [column for column in datas]
  worksheet.write_row('A1', head, style)
  worksheet.freeze_panes(1, 1)
  
  for i in range(row):
    for j in range(col):
      worksheet.write(i+1, j, datas.iloc[[i], [j]].values[0][0], style_cen)
    
  worksheet.set_column(0, col, 16)
  workbook.close()
  

注:此处可以参考我之前的一篇博客:
Python利用xlsxwriter读写Excel文件(持续补充)

6. 随机生成数据并测试

def test_of_merge_data():
  Number = 1000
  names = generate_name(Number, 6)
  height = generate_number(Number, 150, 210)
  weight = generate_number(Number, 80, 250)

  Number_of_data1 = 100
  Number_of_data2 = 80
  data1 = []
  for i in range(Number_of_data1):
    id = random.randint(0, Number-1)
    data = [names[id], height[id]]
    data1.append(data)

  data2 = []
  for i in range(Number_of_data2):
    id = random.randint(0, Number-1)
    data = [names[id], weight[id]]
    data2.append(data)

  #print("data1")
  #print(data1)
  #print("data2")
  #print(data2)

  datas = merge_data_by_map(data1, data2, ["name", "height"], ["name", "weight"], 0, 0)
  print("datas")
  print(datas)

  df = merge_data_by_pandas(data1, data2, ["name", "height"], ["name", "weight"], 0, 0)
  print("df")
  print(df)

  write_excel("data.xlsx", df)
  

7. 完整代码

import os
import re
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xlrd
import xlwt
import xlsxwriter

from matplotlib.font_manager import FontProperties


def generate_name(number, max_length_of_name):
  names = []
  ascii_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
  for i in range(number):
    name = ''
    for j in range(max_length_of_name):
      name = name + random.choice(ascii_letters)
    names.append(name)
  return names


def generate_number(quantity, min_number, max_number):
  datas = []
  for i in range(quantity):
    datas.append(random.randint(min_number, max_number))
  return datas


def merge_data_by_map(data1, data2, head1, head2, data1_same_id, data2_same_id):
  datas = []
  dict_map = {}
  for i in range(len(data1)) :
    data = []
    for j in range(len(data1[i])):
      if j != data1_same_id :
        data.append(data1[i][j])
    dict_map[str(data1[i][data1_same_id])] = data

  for i in range(len(data2)):
    data = data2[i].copy()
    key = data[data2_same_id]
    if dict_map.get( key ) is not None:
      val = dict_map[key]
      for j in range(len(val)) :
        data.append(val[j])
      datas.append(data)
  return datas


def merge_data_by_pandas(data1, data2, head1, head2, data1_same_id, data2_same_id):
  df1 = pd.DataFrame(data1, columns=head1)
  df2 = pd.DataFrame(data2, columns=head2)
  #print(df1)
  #print(df2)
  key = head1[data1_same_id]
  df3 = pd.merge(df1, df2, how='inner', on=key)
  return df3


def write_excel(file_name, datas):
  workbook = xlsxwriter.Workbook(file_name)
  style = workbook.add_format({
      "fg_color": "yellow",  # background cloro
      "bold": 1,  
      "align": "center",  # 
      "valign": "vcenter",  # 
      "font_color": "red"  #
    })
  style_cen = workbook.add_format({
      "align": "center",  # 
      "valign": "vcenter",  # 
  })
  #style_pre = workbook.add_format({'num_format': '0.000%'})
  
  sheetname = "data"
  worksheet = workbook.add_worksheet(sheetname)  # 
  
  row, col = len(datas), datas.shape[1]
      
  head = [column for column in datas]
  worksheet.write_row('A1', head, style)
  worksheet.freeze_panes(1, 1)
  
  for i in range(row):
    for j in range(col):
      worksheet.write(i+1, j, datas.iloc[[i], [j]].values[0][0], style_cen)
    
  worksheet.set_column(0, col, 16)
  workbook.close()

def test_of_merge_data():
  Number = 1000
  names = generate_name(Number, 6)
  height = generate_number(Number, 150, 210)
  weight = generate_number(Number, 80, 250)

  Number_of_data1 = 100
  Number_of_data2 = 80
  data1 = []
  for i in range(Number_of_data1):
    id = random.randint(0, Number-1)
    data = [names[id], height[id]]
    data1.append(data)

  data2 = []
  for i in range(Number_of_data2):
    id = random.randint(0, Number-1)
    data = [names[id], weight[id]]
    data2.append(data)

  #print("data1")
  #print(data1)
  #print("data2")
  #print(data2)

  datas = merge_data_by_map(data1, data2, ["name", "height"], ["name", "weight"], 0, 0)
  print("datas")
  print(datas)

  df = merge_data_by_pandas(data1, data2, ["name", "height"], ["name", "weight"], 0, 0)
  print("df")
  print(df)

  write_excel("data.xlsx", df)


if __name__ == "__main__":
  test_of_merge_data()

3. 程序结果

1. Terminal 输出

在这里插入图片描述

2. Excel 内容

在这里插入图片描述

4. 总结

最好是用第二种方法,其 merge 函数有更多选择方式,具体可以参考:
Pandas 官方文档

这样可以直接将数据传递给 write_excel 函数,将 DataFrame 数据直接写进 Excel 生成报告。

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值