import pandas
import xlwt
class People:
def __init__(self , name , job_number):
self.name = name
self.job_number = job_number
def __str__(self):
return self.name + ","+ str(self.job_number)
class Company:
def __init__(self , name , pid):
self.name = name
self.pid = pid
def __lt__(self, other):
return self.name < other.name
def __hash__(self):
return hash(str(self.name)+str(self.pid))
def __eq__(self, other):
return self.name == other.name and self.pid == other.pid
def __str__(self):
return self.name + "," + str(self.pid)
class Laige:
def __init__(self):
pass
def doWork(self, path , out_path):
f = pandas.DataFrame(pandas.read_excel(path, sheetname=0))
KV = {}
companys = []
for i in f.values:
comp = i[0]
nam = i[1]
jobnm = i[2]
pid = i[3]
k = Company(comp, pid)
if KV.get(k) :
v = KV.get(k)
v.append(People(nam , jobnm))
else:
KV[k] = [People(nam , jobnm)]
companys.append(Company(comp , pid))
workbook = xlwt.Workbook()
sheet_laige = workbook.add_sheet('来哥', cell_overwrite_ok=True)
cpl = list(set(companys))
cpl.sort(key=companys.index)
for i in range(len(cpl)):
k = cpl[i]
col = 0
sheet_laige.write(i , col, str(k.name))
col += 1
sheet_laige.write(i , col , str(k.pid))
col += 1
v = KV.get(k)
for m in v :
# sheet_laige.write(i, col, str(m.name))
# col += 1
sheet_laige.write(i, col, str(m.job_number))
col += 1
workbook.save(out_path)
if __name__ == '__main__':
laige = Laige()
laige.doWork("C:/Users/liyang/Desktop/数据筛选.xlsx" , "C:/Users/liyang/Desktop/数据筛选结果.xlsx")
1)可哈希的(hashable)
只有 可哈希的 对象才能作为字典的键,一个可哈希的对象必须满足以下两个条件:
- 该对象在其生命周期内有一个不变的哈希值(需要实现
__hash__()
方法) - 该对象是可比较的(需要实现
__eq__()
或__cmp__()
方法)
Python中可哈希的对象有:
- 数值、字符串,以及只含有数值或字符串的元组
- 用户自定义类的实例(默认是可哈希的,也可以通过实现
__hash__()
和__cmp__()
来修改默认行为)
2)哈希等价键
假设有字典d的两个键:keyA和keyB,我们称keyA和keyB是 哈希等价键(自己杜撰的名词),如果keyA和keyB满足以下两个条件:
- hash(keyA) == hash(keyB)
- cmp(keyA, keyB) == 0
如果keyA和keyB是哈希等价键,那么它们将被视为完全相同的两个键,于是d[keyA]和d[keyB]会指向同一个字典元素。