在使用openpyxl时,感觉到这个包对列的封装很少,使用起来不太方便。为了方便以后使用,我自己封装了一个列的类,来获取列的一引起关键信息。
import openpyxl
from openpyxl.utils import get_column_letter,column_index_from_string
import pinyin
import datetime
import re
def getStrAllAplha(str):
return pinyin.get_initial(str, delimiter="").upper()
def is_Chinese(word):
for ch in word:
if '\u2E80' <= ch <= '\uFE4F':
return True
return False
def get_str_real_len(content):
real_str_len = len(content)
for ch in content:
if '\u2E80' <= ch <= '\uFE4F':
real_str_len += 1
return real_str_len
def get_real_len(content):
if isinstance(content,str):
real_len = get_str_real_len(content)
elif isinstance(content,int):
real_len = get_str_real_len(str(content))
elif isinstance(content,datetime.datetime):
real_len = 25 # 一个完整的日期加时间一般是18,为了适当冗余,我设成25位
else:
real_len = 0
return real_len
class ExcelColumn(object):
"""comment"""
def __init__(self, excel_path,sheet_index=-1,column_index=1):
self.wb = openpyxl.load_workbook(excel_path)
self.column_index = column_index
if sheet_index == -1:
self.ws = self.wb.active
else:
self.ws = self.wb.worksheets[sheet_index]
self.column = self.ws[get_column_letter(column_index)]
self.values= [row.value for row in self.column]
def get_column_name(self):
return self.column[0].value
def get_column_dbname(self):
column_name = self.get_column_name()
if column_name is None:
column_dbname = 'UntitledColumn'+str(self.column_index)
else:
if is_Chinese(column_name):
column_dbname = getStrAllAplha(column_name)
else:
column_dbname = column_name
return column_dbname
def get_column_max_len(self):
'''获取列的最大长度'''
col_data = (get_real_len(row.value) for row in self.column[1:])
return max(col_data)
def get_num_of_not_null_data(self):
col_not_null_data = [row.value for row in self.column[1:] if row.value is not None]
return len(col_not_null_data)
def is_num_col(self):
flag = True
for row in self.column[1:]:
v=row.value
if v is None:
pass
else:
if not re.match(r'^-?\d+\.?\d*e?-?\d*?$', str(v)):
flag = False
else:
pass
return flag
def is_equal_len(self):
col_data = (len(row.value) for row in self.column[1:] if row.value is not None)
count = len(set(col_data))
#print(count)
#print(col_data)
if count>1:
return False
else:
return True
if __name__ == '__main__':
path = r'e:\投标记录.xlsx'
col = ExcelColumn(path,column_index=6)
print(col.get_column_max_len())
print(col.get_column_dbname())
print(col.get_num_of_not_null_data())
print(col.is_num_col())
print(col.values)
print(col.is_equal_len())
修改记录:2021-02-03 ,程序中有一点小错误,修改了下,并且 加上了__name__=='main’的判断 ,以防调用时出错
2021-04-29, 解决了列名为空的情况