Python操作Excel表格(二)

Python操作Excel表格第二弹

本博客在前一篇博客基础上增加了若干函数,如按列寻找值,查重和增强型查重、数据写入等相关代码。实现了类似Sql的查询(注解中的SQL语句仅做参考,并非可执行SQL)由于工作较忙,更新较慢。后续还有更全面的第三弹,先附上代码:

# -*- coding: utf-8 -*-
#!/bin/usr/python3
# author:zhouzhuo
# create time: 2020/09/05
# update: 2020/11/22

import openpyxl
import pandas as pd 
from collections import Counter
 
#指定文件位置和sheet,返回sheet对象
def get_sheet(excel_path, sheet_name):
    work_book = openpyxl.load_workbook(excel_path)
    #work_sheet = work_book.get_sheet_by_name(sheet_name)
    work_sheet = work_book[sheet_name]
    return work_book,work_sheet
 
#获取sheet的行列规模
def get_sheet_scale(work_sheet):
    return work_sheet.max_row,work_sheet.max_column
 
#获取指定sheet页面的指定行列值
def get_cell_value(work_sheet,row,column):
    return work_sheet.cell(row=row,column=column).value

# 获取sheet的表头(列名)
def get_tile(work_sheet):
    return get_row_data(work_sheet,1) 


#获取指定sheet的指定行的数据,支持取值起始位置和跳跃步长取值
def get_row_data(work_sheet,row,begin=1,step=1):
    row_data = []
    max_column = work_sheet.max_column
    for col in range(begin,max_column+1,step):
        row_data.append(work_sheet.cell(row=row,column=col).value)
    return row_data
 
# 获取指定sheet的指定列的数据,支持取值起始位置和跳跃步长取值
# 类似Sql中select col_name from table;
def get_column_data(work_sheet,column,begin=2,step=1):
    column_data = []
    max_row = work_sheet.max_row
    for r in range(begin,max_row+1,step):
        column_data.append(work_sheet.cell(row=r,column=column).value)
    return column_data

# 获取指定sheet中多列数据,以列表内嵌套元组形式返回
# 类似Sql中select col1,col2,... from table;
def get_multi_column_data(work_sheet,columns,begin=2,step=1):
    columns_data = []
    for col in columns:
        column_data = get_column_data(work_sheet,col,begin=2,step=1)
        columns_data.append(column_data)
    return list(zip(*columns_data))


# 统计列数据出现次数,以字典形式返回
# 类似Sql中select count(*) from table group by colname;
def count_column_data(work_sheet,column,begin=2,step=1):
    column_data = get_column_data(work_sheet,column,begin=2,step=1)
    value_count = Counter(column_data)
    return  dict(value_count)

# 不区分大小写的统计列数据出现次数
def ig_group_column_data(work_sheet,column,begin=2,step=1):
    column_data = get_column_data(work_sheet,column,begin=2,step=1)
    for i, element in enumerate(column_data): column_data[i] = element.lower()
    value_count = Counter(column_data)
    return  dict(value_count)

# 搜索某列包含指定关键词的所有行
# 类似Sql中 select * from table where col in (key)
def search_rowsdata_by_column(work_sheet,column,key):
    column_data = get_column_data(work_sheet,column)
    #print(column_data)
    rowsdata = []
    count = column_data.count(key)
    if (count >= 1):
        for i in range(len(column_data)):
            if (column_data[i]==key):
                rowsdata.append(get_row_data(work_sheet, i + 2))
    return rowsdata

 # 通过另一列的关键词搜索本列对应的值
 # 类似Sql中select col1 from table where col2='key'
def search_columninfo_by_othercolumn(work_sheet,column,othercolumn,key):
    othercolumn_data = get_column_data(work_sheet,othercolumn)
    columninfo = []
    count = othercolumn_data.count(key)
    if (count >= 1):
        for i in range(len(othercolumn_data)):
            if (othercolumn_data[i] == key):
                columninfo.append(get_cell_value(work_sheet,i+2,column))
    return columninfo

#通过本列的值搜索另一列对应的值
def search_othercolumninfo_by_column(work_sheet,column,othercolumn,key):
    column_data = get_column_data(work_sheet,column)
    #othercolumn_data = get_column_data(work_sheet,othercolumn)
    columninfo = []
    count = column_data.count(key)
    if (count >= 1):
        for i in range(len(column_data)):
            if (column_data[i] == key):
               columninfo.append(get_cell_value(work_sheet,i+2,othercolumn))
    return columninfo


# 根据某列查询包含指定字符串的行
def find_similar_bycolumn(work_sheet,column,key):
    column_data = get_column_data(work_sheet,column)
    similar_row = []
    for i in range(len(column_data)):
        if(key in column_data[i]):
            similar_row.append(i)
    return similar_row


# 对某列查重,返回重复元素及重复次数
def double_check(work_sheet,column):
    column_data = get_column_data(work_sheet, column)
    unique_data = dict(Counter(column_data))
    double_key_count = {key: value for key, value in unique_data.items() if value > 1}
    return  double_key_count

# 不区分大小写对某列查重
def ig_double_check(work_sheet,column):
    column_data = get_column_data(work_sheet, column)
    for col in column_data :
        print(type(col))
    for i,element in enumerate(column_data): column_data[i] = element.lower()
    unique_data = dict(Counter(column_data))
    double_key_count = {key: value for key, value in unique_data.items() if value > 1}
    return  double_key_count


def formatcell(work_sheet):
   fontObj2 = openpyxl.styles.Font(name=u'Times New Roman', bold=False, italic=False, size=14)
   work_sheet['A1'].font = fontObj2
## ----------------------------- 写操作---------------------------
    
def create_excel(path,sheetname='Sheet1'):
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = sheetname
    wb.save(path)
    
 
# 在指定行插入一行数据,默认插入末尾
def insert_onerow_data(work_sheet,rowdata,index=-1):
    if(index == -1 or index == work_sheet.max_row+1):
        work_sheet.append(rowdata)
    else:
        work_sheet.insert_rows(index)
        for col in range(1,len(rowdata)+1):
            work_sheet.cell(row = index,column=col,value=rowdata[col-1])          
    

def insert_manyrow_data(path,work_book,work_sheet,rowsdata,index=-1):
    for rowdata in rowsdata:
       insert_onerow_data(work_sheet,rowdata,index)   
    formatcell(work_sheet)
    work_book.save(path)

if __name__ == '__main__':     
	pass
  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值