import collections class headhandler(): def __init__(self,mylist): self.mystorage={} self.mylist = mylist def delempty(self): ''' 去除重复 :return: ''' while "" in self.mylist: self.mylist.remove("") def formatmydata(self,i): try: i=i.replace(":","") except Exception: i=i return i def fillempty(self): ''' 只用于处理表头信息 :return: ''' # 对于不规则列表的处理办法,如果元素的下一个元素仍是字符串类型,或者不存在 # 就插入或者用0填充 self.delempty() for i in self.mylist: myindex = self.mylist.index(i) if myindex == 0 or (myindex % 2 == 0): try: nextelement = self.mylist[myindex + 1] if isinstance(self.mylist[myindex + 1], str): self.mylist.insert(myindex + 1, 0) except IndexError: self.mylist.append(0) self.mylist =list(map(self.formatmydata,self.mylist)) print(self.mylist) def turntodict(self): self.fillempty() for i in self.mylist[::2]: self.mystorage[i] =self.mylist[self.mylist.index(i)+1] return self.mystorage def finalchart(self): self.delempty() self.mylist = list(map(self.formatmydata,self.mylist)) #print(self.mylist) finalchart = self.turntodict() #print(finalchart) return finalchart class rowhandler(headhandler): def __init__(self,mylist): super(rowhandler,self).__init__(mylist) def fillempty(self): self.delempty() staticdict={} for myindex,myelement in enumerate(self.mylist): if myelement in staticdict: staticdict[myelement].append(myindex) else: staticdict[myelement]=[] staticdict[myelement].append(myindex) for i in list(staticdict.keys()): if len(staticdict[i])==1: del staticdict[i] else: self.mylist[staticdict[i][0]] =self.mylist[staticdict[i][0]]+'重量' self.mylist[staticdict[i][1]] = self.mylist[staticdict[i][1]] + '含量' self.mylist[staticdict[i][2]] = self.mylist[staticdict[i][2]] + '价格' return self.mylist def turntodict(self): self.fillempty() for i in self.mylist[::2]: self.mystorage[i] =self.mylist[self.mylist.index(i)+1] return self.mystorage #mylist = ['采购日期:', '', 43495.0, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '索赔金额:', '', '', '', '', '', 0.0, '', ''] mydict= {'a':[1,2],'b':[2,3,4]} for i in list(mydict.keys()): print(mydict[i]) if len(mydict[i])>2: del mydict[i] print(mydict) #print(wenwa.index('每吨人工:'))
输出结果:
[1, 2] [2, 3, 4] {'a': [1, 2]}
from anewclass import * class docgen: def __init__(self,mylist): self.mxrows = mylist[1::] self.columnline = mylist[0] self.addlist=[]#用于承载非规则行信息 self.mxlist = [] def addstring(self): mycounter = dict(collections.Counter(self.columnline)) keypos = [] finalist = [] for i in mycounter.keys(): if mycounter[i] > 1: for myindex, myelements in enumerate(self.columnline): if myelements == i: keypos.append(myindex) if myindex == len(self.columnline) - 1: finalist.append(keypos) keypos = [] for i in finalist: self.columnline[i[0]] = self.columnline[i[0]] + "重量" self.columnline[i[1]] = self.columnline[i[1]] + "含量" self.columnline[i[2]] = self.columnline[i[2]] + "价格" return self.columnline def genmx(self): self.addstring() for i in self.mxrows: if i[0]=="": myhandler = rowhandler(i) self.addlist.append(myhandler.turntodict()) else: myrow = rowhandler(self.columnline) self.columnline = myrow.fillempty() self.mxlist.append(dict(zip(self.columnline,i))) def returnall(self): self.genmx() return {'mx':self.mxlist,'others':self.addlist} wuwa =[ ['品名', '采购价', '每吨成本', '重量', '货品总成本', '铜重量', '铝重量', '片重量', '无限长', '锄头马', '铁重量', '铜含量', '铝含量', '片含量', '无限长',
'锄头马', '铁含量', '铜价格', '铝价格', '片价格', '无限长', '锄头马', '铁价格', '产值', '每吨毛利', '货品赢利'], ['铜芯', 0.72, 11956.0, 19.617, 234540.852, 4.665, 0.068, 4.706, 0.506, 1.386, 1.63, 0.23780394555742468, 0.0034663811999796094,
0.23989396951623593, 0.025793954223377682, 0.07065300504664321, 0.08309119641127592, 39200.0, 7000.0, 5050.0, 4500.0, 2750.0, 1800.0,
11791.65009940358, -164.3499005964204, -3224.051999999979], ['', '', '', '', '', '', '23尖角', 1.157, '35尖角', 1.766, '', '', '23尖角', 0.058979456593770706, '35尖角', 0.09002395881123515, '', '',
'23尖角', 5000.0, '35尖角', 3500.0, '', '', '', ''], ['', '', '', '', '', '', '35平角', 1.073, '', '', '', '', '35平角', 0.05469745628791354, '', '', '', '', '35平角', 3000.0, '', '', '', '',
'', ''] ] saiwa = docgen(wuwa) print("===============mx===================") for i in saiwa.returnall()['mx']: print(i) print("===============others===================") for i in saiwa.returnall()['others']: print(i)
输出结果:
[1, 2] [2, 3, 4] {'a': [1, 2]} ===============mx=================== {'品名': '铜芯', '采购价': 0.72, '每吨成本': 11956.0, '重量': 19.617, '货品总成本': 234540.852, '铜重量': 4.665, '铝重量': 0.068, '片重量': 4.706,
'无限长重量': 0.506, '锄头马重量': 1.386, '铁重量': 1.63, '铜含量': 0.23780394555742468, '铝含量': 0.0034663811999796094, '片含量':
0.23989396951623593, '无限长含量': 0.025793954223377682, '锄头马含量': 0.07065300504664321, '铁含量': 0.08309119641127592, '铜价格': 39200.0,
'铝价格': 7000.0, '片价格': 5050.0, '无限长价格': 4500.0, '锄头马价格': 2750.0, '铁价格': 1800.0, '产值': 11791.65009940358, '每吨毛利':
-164.3499005964204, '货品赢利': -3224.051999999979} ===============others=================== {'23尖角重量': 1.157, '35尖角重量': 1.766, '23尖角含量': 0.058979456593770706, '35尖角含量': 0.09002395881123515, '23尖角价格': 5000.0,
'35尖角价格': 3500.0} {'35平角重量': 1.073, '35平角含量': 0.05469745628791354, '35平角价格': 3000.0}
def readexcel(path): datablock = pd.read_excel(path,sheet_name=0) print(len(datablock)) wenwa = datablock.head(2) print(type(wenwa.index)) print(datablock.index.__dict__) print("columns",datablock.columns[0]) print("columns",datablock.head(2).columns) def loadexcel(path): mysheet = xlrd.open_workbook(path) mybook = mysheet.sheet_by_index(0) #print(mybook.row_values(0)) colnamelist = mybook.row_values(2) row3 = mybook.row_values(3) #print(dict(zip(colnamelist,row3))) allrets = [] for i in range(mybook.nrows): #print(mybook.row_values(i)) allrets.append(mybook.row_values(i)) print(mybook.nrows) for i in allrets: #print(i) pass return allrets def mergerows(mylist): splitline = 0 doc = {} for i in mylist: print(i) k='每吨人工:' if k in i: print('in: ',mylist.index(i)) splitline = mylist.index(i) doc["mx"] = mylist[2:splitline-1] doc["header"] = mylist[splitline:] return doc duwa = loadexcel('火烧片 2. MSCU3272441 铜芯.csv') doc = mergerows(duwa) for i in doc['header']: print(i) print("==================mx=============================") for i in doc['mx']: print(i) def dealmx(mylist): if mylist[0]=='': pass mylist1=['品名', '采购价', '每吨成本', '重量', '货品总成本', '铜重量', '铝重量', '片重量', '无限长', '锄头马', '铁重量', '铜含量', '铝含量', '片含量',
'无限长', '锄头马', '铁含量', '铜价格', '铝价格', '片价格', '无限长', '锄头马', '铁价格', '产值', '每吨毛利', '货品赢利'] mylist2=['铜芯', 0.72, 11956.0, 19.617, 234540.852, 4.665, 0.068, 4.706, 0.506, 1.386, 1.63, 0.23780394555742468, 0.0034663811999796094,
0.23989396951623593, 0.025793954223377682, 0.07065300504664321, 0.08309119641127592, 39200.0, 7000.0, 5050.0, 4500.0, 2750.0, 1800.0,
11791.65009940358, -164.3499005964204, -3224.051999999979] print(dict(zip(mylist1,mylist2))) print(collections.Counter(mylist1)) print(mylist1.index('无限长')) def addstring(mylist): mycounter = collections.Counter(mylist) keypos=[] finalist=[] for i in mycounter.keys(): if mycounter[i]>1: for myindex,myelements in enumerate(mylist): if myelements==i: keypos.append(myindex) if myindex==len(mylist)-1: finalist.append(keypos) keypos = [] for i in finalist: mylist[i[0]]=mylist[i[0]]+"重量" mylist[i[1]]=mylist[i[1]]+"含量" mylist[i[2]] = mylist[i[2]] + "价格" return mylist print(addstring(mylist1)) mycounter = collections.Counter(mylist1) print(dict(mycounter))
输出结果:
12 ['火烧片', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] ['品名', '采购价', '每吨成本', '重量', '货品总成本', '铜重量', '铝重量', '片重量', '无限长', '锄头马', '铁重量', '铜含量', '铝含量', '片含量', '无限长',
'锄头马', '铁含量', '铜价格', '铝价格', '片价格', '无限长', '锄头马', '铁价格', '产值', '每吨毛利', '货品赢利'] ['铜芯', 0.72, 11956.0, 19.617, 234540.852, 4.665, 0.068, 4.706, 0.506, 1.386, 1.63, 0.23780394555742468, 0.0034663811999796094,
0.23989396951623593, 0.025793954223377682, 0.07065300504664321, 0.08309119641127592, 39200.0, 7000.0, 5050.0, 4500.0, 2750.0, 1800.0,
11791.65009940358, -164.3499005964204, -3224.051999999979] ['', '', '', '', '', '', '23尖角', 1.157, '35尖角', 1.766, '', '', '23尖角', 0.058979456593770706, '35尖角', 0.09002395881123515, '', '',
'23尖角', 5000.0, '35尖角', 3500.0, '', '', '', ''] ['', '', '', '', '', '', '35平角', 1.073, '', '', '', '', '35平角', 0.05469745628791354, '', '', '', '', '35平角', 3000.0, '', '', '', '', '',
''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '每吨人工:', '', '', '', '', '总人工', 0.0, '', ''] in: 7 ['采购日期:', '', 43495.0, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '索赔金额:', '', '', '', '', '', 0.0, '', ''] ['计算日期:', '', 43594.0, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '总成本:', '', '', '', '', '', 234540.852, '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '总利润:', '', '', '', '', '', -3224.051999999979, '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '利润百分比:', '', '', '', '', '', -0.013746227885281063, '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '每吨人工:', '', '', '', '', '总人工', 0.0, '', ''] ['采购日期:', '', 43495.0, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '索赔金额:', '', '', '', '', '', 0.0, '', ''] ['计算日期:', '', 43594.0, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '总成本:', '', '', '', '', '', 234540.852, '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '总利润:', '', '', '', '', '', -3224.051999999979, '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '利润百分比:', '', '', '', '', '', -0.013746227885281063, '', ''] ==================mx============================= ['品名', '采购价', '每吨成本', '重量', '货品总成本', '铜重量', '铝重量', '片重量', '无限长', '锄头马', '铁重量', '铜含量', '铝含量', '片含量', '无限长',
'锄头马', '铁含量', '铜价格', '铝价格', '片价格', '无限长', '锄头马', '铁价格', '产值', '每吨毛利', '货品赢利'] ['铜芯', 0.72, 11956.0, 19.617, 234540.852, 4.665, 0.068, 4.706, 0.506, 1.386, 1.63, 0.23780394555742468, 0.0034663811999796094,
0.23989396951623593, 0.025793954223377682, 0.07065300504664321, 0.08309119641127592, 39200.0, 7000.0, 5050.0, 4500.0, 2750.0, 1800.0,
11791.65009940358, -164.3499005964204, -3224.051999999979]
['', '', '', '', '', '', '23尖角', 1.157, '35尖角', 1.766, '', '', '23尖角', 0.058979456593770706, '35尖角', 0.09002395881123515, '', '',
'23尖角', 5000.0, '35尖角', 3500.0, '', '', '', '']
['', '', '', '', '', '', '35平角', 1.073, '', '', '', '', '35平角', 0.05469745628791354, '', '', '', '', '35平角', 3000.0, '', '', '', '', '',
'']
{'品名': '铜芯', '采购价': 0.72, '每吨成本': 11956.0, '重量': 19.617, '货品总成本': 234540.852, '铜重量': 4.665, '铝重量': 0.068, '片重量': 4.706,
'无限长': 4500.0, '锄头马': 2750.0, '铁重量': 1.63, '铜含量': 0.23780394555742468, '铝含量': 0.0034663811999796094, '片含量': 0.23989396951623593,
'铁含量': 0.08309119641127592,
'铜价格': 39200.0, '铝价格': 7000.0, '片价格': 5050.0, '铁价格': 1800.0, '产值': 11791.65009940358, '每吨毛利': -164.3499005964204, '货品赢利':
-3224.051999999979}
Counter({'无限长': 3, '锄头马': 3, '品名': 1, '采购价': 1, '每吨成本': 1, '重量': 1, '货品总成本': 1, '铜重量': 1, '铝重量': 1, '片重量': 1,
'铁重量': 1, '铜含量': 1, '铝含量': 1, '片含量': 1, '铁含量': 1, '铜价格': 1, '铝价格': 1, '片价格': 1, '铁价格': 1, '产值': 1, '每吨毛利': 1,
'货品赢利': 1})
8 ['品名', '采购价', '每吨成本', '重量', '货品总成本', '铜重量', '铝重量', '片重量', '无限长重量', '锄头马重量', '铁重量', '铜含量', '铝含量', '片含量',
'无限长含量', '锄头马含量', '铁含量', '铜价格', '铝价格', '片价格', '无限长价格', '锄头马价格', '铁价格', '产值', '每吨毛利', '货品赢利'] {'品名': 1, '采购价': 1, '每吨成本': 1, '重量': 1, '货品总成本': 1, '铜重量': 1, '铝重量': 1, '片重量': 1, '无限长重量': 1, '锄头马重量': 1,
'铁重量': 1, '铜含量': 1, '铝含量': 1, '片含量': 1, '无限长含量': 1, '锄头马含量': 1, '铁含量': 1, '铜价格': 1, '铝价格': 1, '片价格': 1,
'无限长价格': 1, '锄头马价格': 1, '铁价格': 1, '产值': 1, '每吨毛利': 1, '货品赢利': 1}