从一堆数据中,筛选出共有多少条不重复的数据,并去除非关键的部分(时间戳之类的字符)
import json
import requests
import openpyxl
import os
import ssl
import sys
import time
import json
import requests
import openpyxl
import os
import ssl
import sys
import time
from openpyxl.styles import Font, colors, Alignment,PatternFill
class ExcelOp(object):
def __init__(self, file):
self.file = file
self.wb = openpyxl.load_workbook(self.file)
sheets = self.wb.sheetnames
self.sheet = sheets[0]
self.ws = self.wb[self.sheet]
def set_sheets(self,num):
sheets = self.wb.sheetnames
if(num < len(sheets)):
self.sheet = sheets[num]
self.ws = self.wb[self.sheet]
else:
print("the sheet num is out range ,MAX:%d"%(len(sheets)))
def get_sheet_name(self):
return self.ws.title
# 获取表格的总行数和总列数
def get_row_clo_num(self):
rows = self.ws.max_row
columns = self.ws.max_column
return rows, columns
# 获取某个单元格的值
def get_cell_value(self, row, column):
cell_value = self.ws.cell(row=row, column=column).value
return cell_value
# 获取某列的所有值
def get_col_value(self, column):
rows = self.ws.max_row
column_data = []
for i in range(1, rows + 1):
cell_value = self.ws.cell(row=i, column=column).value
column_data.append(cell_value)
return column_data
# 获取某行所有值
def get_row_value(self, row):
columns = self.ws.max_column
row_data = []
for i in range(1, columns + 1):
cell_value = self.ws.cell(row=row, column=i).value
row_data.append(cell_value)
return row_data
# 设置某个单元格的值
def set_cell_value(self, row, colunm, cellvalue):
bold_itatic_24_font = Font(color=colors.RED)
self.ws.cell(row, colunm).font = bold_itatic_24_font
fill = PatternFill("solid", fgColor=colors.GREEN)
self.ws.cell(row, colunm).fill = fill
try:
self.ws.cell(row=row, column=colunm).value = cellvalue
self.wb.save(self.file)
except:
self.ws.cell(row=row, column=colunm).value = "writefail"
self.wb.save(self.file)
'''
function :cut_str_3parts
[itput]
str_ori : ori string
sellect_word : key words
part_want : 0 , return befor keywords
1 , return keywords
2 , return after keywords
[putput]
return[0](int) : 0 ,the keywords don't exist in str_ori
1 ,the keywords exist in str_ori
return[1](str) : the string of part_want ,! if cant find keywords ,will retuen "None"
'''
def cut_str_3parts(self,str_ori,sellect_word,part_want):
exist_orNot = 0
str_return = "None"
if(str_ori.find(sellect_word) > 0):
exist_orNot = 1
str_map = str_ori.partition(sellect_word)
str_return = str_map[part_want]
return exist_orNot,str_return
reason_list_collect = []
flag_exist = 0
if __name__ == "__main__":
excel_ori= ExcelOp(file="ori_check.xlsx")
max_row,max_column = excel_ori.get_row_clo_num()
for j in range(max_row):
str_reason = str(excel_ori.get_cell_value( j+1, 1))
flag_exist,str_temp = excel_ori.cut_str_3parts(str_reason," Th[",0)
if flag_exist:
if str_temp not in reason_list_collect:
reason_list_collect.append(str_temp)
print("reason_list_collect:",reason_list_collect)
for i in range(len(reason_list_collect)):
excel_ori.set_cell_value(max_row+4+i,1,reason_list_collect[i])
‘’’
ws.sheet_properties.tabColor = “1072BA”
根据列的数字返回字母
print(get_column_letter(2)) # B
根据字母返回列的数字
print(column_index_from_string(‘D’)) # 4
from openpyxl.styles import Font, colors, Alignment
bold_itatic_24_font = Font(name=‘等线’, size=24, italic=False, color=colors.RED, bold=True)
sheet[‘A1’].font = bold_itatic_24_font
sheet[‘B1’].alignment = Alignment(horizontal=‘center’, vertical=‘center’)
第2行行高
sheet.row_dimensions[2].height = 40
C列列宽
sheet.column_dimensions[‘C’].width = 30
合并单元格, 往左上角写入数据即可
sheet.merge_cells(‘B1:G1’) # 合并一行中的几个单元格
sheet.merge_cells(‘A1:C3’) # 合并一个矩形区域中的单元格
sheet.unmerge_cells(‘A1:C3’)
TIME = datetime.datetime.now().strftime("%H:%M:%S")
TITLE = str(time())
'''