Python pandas 按特征对物料循环分类
在这里插入代码片#最终版
import pandas as pd
import numpy as np
#导入原始数据
file_name='/Users/tianqing/Desktop/python/按特征归类.xlsx'
data=pd.read_excel(file_name)
data_name=data['物料'].drop_duplicates().tolist()
class_total={} #建一个大的类别字典
class_1={'name':[],'ch':[]}
data_name[0]
class_1['name'].append(data_name[0])
class_1['ch'].extend(data[data['物料']==data_name[0]]['特征'].tolist())
class_total['class_1']=class_1#现将第0个物料放在类别1
n=1
for i in data_name: ##i是物料,从物料开始循环,判断是否属于同一类
b=data[data['物料']==i]['特征'].tolist()
class_df = pd.DataFrame.from_dict(class_total,orient='index')
for j in class_df.index: ##j是class_1,类别名/dataframe index
a=class_df.loc[j]['ch'] ##a是特征,列表形式
x=[k for k in a if k in b]
if x!=[]:#如果有相同的特征,则加入该字典
s=class_df.loc[j].to_dict()
s['name'].append(i)
s['ch'].extend(data[data['物料']==i]['特征'].tolist())
class_total[j]=s
break
else:
s={'name':[],'ch':[]}
s['name'].append(i)
s['ch'].extend(data[data['物料']==i]['特征'].tolist())
n +=1
class_total['class_'+str(n)]=s
data_name
class_df