import pandas as pd
import csv
import jieba
from collections import Counter, OrderedDict
#数据格式要求:标签列名称为key,文本列标签为title
此处为预设好的标签,根据分类任务自己写标签名
bumen=[‘xxxx处’,‘xx灯’,‘建xx处’,‘绿xxx办’,‘燃xx’,‘城建xxx’,‘公用xxxx’,‘养xxxx’,‘城建xxxx’,‘城xxx’,‘质xxx’,‘人xxx’,‘监xxxx队’,‘村xxx’,‘招xxx’,‘xxxx办’,‘xxxxx室’]
data = pd.read_csv(‘data1.csv’)
fd=open(‘result.txt’,‘a’)
#定义标签
data_key=data[‘label’]
#定义文本内容
data_title=data[‘text’]
#定义变量
fenci=[]
zongcipin=[]
Zongcipin=[]
bumencipin=[]
Bumencipin=[]
fenci.append([‘总词汇’,])
for xx in bumen :
fenci.append([xx,])
bumencihui=[]
for i in range(len(data_key)):
seg_list = jieba.cut(data_title[i], cut_all=False)
for xx in seg_list:
fenci[0].append(xx)
for j in range (len(fenci)):
if fenci[j][0] in data_key[i]:
fenci[j].append(x