Python Openpyxl excel 筛选出无重复的数据项

从一堆数据中,筛选出共有多少条不重复的数据,并去除非关键的部分(时间戳之类的字符)
在这里插入图片描述

import json
import requests
import openpyxl
import os
import ssl
import sys
import time

import json
import requests
import openpyxl
import os
import ssl
import sys
import time
from openpyxl.styles import Font, colors, Alignment,PatternFill

class ExcelOp(object):
    def __init__(self, file):
        self.file = file
        self.wb = openpyxl.load_workbook(self.file)
        sheets = self.wb.sheetnames
        self.sheet = sheets[0]
        self.ws = self.wb[self.sheet]

    def set_sheets(self,num):
        sheets = self.wb.sheetnames
        if(num < len(sheets)):
            self.sheet = sheets[num]
            self.ws = self.wb[self.sheet]
        else:
            print("the sheet num is out range ,MAX:%d"%(len(sheets)))
    def get_sheet_name(self):
        return self.ws.title
        
    # 获取表格的总行数和总列数
    def get_row_clo_num(self):
        rows = self.ws.max_row
        columns = self.ws.max_column
        return rows, columns

    # 获取某个单元格的值
    def get_cell_value(self, row, column):
        cell_value = self.ws.cell(row=row, column=column).value
        return cell_value

    # 获取某列的所有值
    def get_col_value(self, column):
        rows = self.ws.max_row
        column_data = []
        for i in range(1, rows + 1):
            cell_value = self.ws.cell(row=i, column=column).value
            column_data.append(cell_value)
        return column_data

    # 获取某行所有值
    def get_row_value(self, row):
        columns = self.ws.max_column
        row_data = []
        for i in range(1, columns + 1):
            cell_value = self.ws.cell(row=row, column=i).value
            row_data.append(cell_value)
        return row_data

    # 设置某个单元格的值
    def set_cell_value(self, row, colunm, cellvalue):
        bold_itatic_24_font = Font(color=colors.RED)
        self.ws.cell(row, colunm).font = bold_itatic_24_font
        fill = PatternFill("solid", fgColor=colors.GREEN)
        self.ws.cell(row, colunm).fill = fill
        try:
            self.ws.cell(row=row, column=colunm).value = cellvalue
            self.wb.save(self.file)
        except:
            self.ws.cell(row=row, column=colunm).value = "writefail"
            self.wb.save(self.file)


    '''
    function :cut_str_3parts
    [itput]
    str_ori        : ori string
    sellect_word   : key words
    part_want      : 0 , return befor keywords
                     1 , return keywords
                     2 , return after keywords
    [putput]
    return[0](int) : 0 ,the keywords don't exist in str_ori
                     1 ,the keywords exist in str_ori
    return[1](str) : the  string of part_want ,! if cant find keywords ,will retuen "None"   
    '''
    def cut_str_3parts(self,str_ori,sellect_word,part_want):
        exist_orNot = 0
        str_return = "None"
        if(str_ori.find(sellect_word) > 0):
            exist_orNot = 1
            str_map = str_ori.partition(sellect_word)
            str_return = str_map[part_want]
        return exist_orNot,str_return
            
reason_list_collect = []


flag_exist = 0
if __name__ == "__main__":
    excel_ori= ExcelOp(file="ori_check.xlsx")
    max_row,max_column = excel_ori.get_row_clo_num()

    
    
    for j in range(max_row):
        str_reason = str(excel_ori.get_cell_value( j+1, 1))

        flag_exist,str_temp = excel_ori.cut_str_3parts(str_reason," Th[",0)
        if flag_exist:
            if str_temp not in reason_list_collect:
                reason_list_collect.append(str_temp)  
       
    print("reason_list_collect:",reason_list_collect)

    for i in range(len(reason_list_collect)):
        excel_ori.set_cell_value(max_row+4+i,1,reason_list_collect[i])

‘’’
ws.sheet_properties.tabColor = “1072BA”

根据列的数字返回字母

print(get_column_letter(2)) # B

根据字母返回列的数字

print(column_index_from_string(‘D’)) # 4

from openpyxl.styles import Font, colors, Alignment

bold_itatic_24_font = Font(name=‘等线’, size=24, italic=False, color=colors.RED, bold=True)

sheet[‘A1’].font = bold_itatic_24_font

sheet[‘B1’].alignment = Alignment(horizontal=‘center’, vertical=‘center’)

第2行行高

sheet.row_dimensions[2].height = 40

C列列宽

sheet.column_dimensions[‘C’].width = 30

合并单元格, 往左上角写入数据即可

sheet.merge_cells(‘B1:G1’) # 合并一行中的几个单元格
sheet.merge_cells(‘A1:C3’) # 合并一个矩形区域中的单元格

sheet.unmerge_cells(‘A1:C3’)

TIME = datetime.datetime.now().strftime("%H:%M:%S")
TITLE = str(time())

'''
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值