python环境:3.6.8
import csv
import jieba
def lst_strip(lst):#去掉多余的空格。如果 a=[' 123','ttt,'vv ','',' '],如何实现a=['123','fuck','cvm'],因为strip()是对字符串操作,现在是一个list
L=[]
for v in lst:
v = str.strip(v)
if v:
L.append(v)
return L
def P_y(y_all_temp,y_jieguoGeshu):
y_long=len(y_all_temp)
for index_jieguo ,y_jieguo in enumerate(y_all_temp):
y_jieguoGeshu[index_jieguo]=y_all_temp.count(y_jieguo)/y_long
return y_jieguoGeshu
def word_num(input_fenci1,i,y_all_num):
for index ,v in enumerate(y_all_num[1]):
y_all_num[4][index] = 0.0000#
y_all_num[3][index] = 0.0000#
y_all_num[5][index] = 0.0000#
x=y_all_num[0][index].split(' ')# x_all
x=lst_strip(x)#去除空格
for x_1 in x:
y_all_num[4][index]=int( y_all_num[4][index])+1.0000 #4是总词 计算总词个数 ;3是个数
if x_1 == input_fenci1:
y_all_num[3][index]=int(y_all_num[3][index])+1.0000#计算个词个数
for ind, xx in enumerate(y_all_num[3]):
yy=y_all_num[4][ind]
y_all_num[i][ind] = round(xx / yy, 2)#精确小数点后两位。
output=y_all_num[1][y_all_num[i].index(max(y_all_num[i]))]#找出最大数的商品
return output
def sort_goods(input_in):
#csv中的reader 读文件
seg = jieba.cut(input_in.strip(), cut_all=False)
fenci = ' '.join(seg)
input_fenci= fenci.split(' ')
file = 'test1.txt'#读取当前目录下的test1.txt
data = []
x_all = []
y_all = []
with open(file,'r') as f:
result = csv.reader(f,delimiter='\t') #说明:delimiter是分隔符,quotechar是引用符,当一段话中出现分隔符的时候,用引用符将这句话括起来,就能排除歧义。
for line in result:#加载到列表
data.append(line)
y_all_num=[
[], # x_all
[],#y_all
[], # 计算PY的数组
[], #['雀巢', '脆脆', '鲨', '威化', '巧克力', '(', '巧克力', '味', '夹心', ')', '20g', '*', '24', '/', '盒']中input的个数 3
[],#['雀巢', '脆脆', '鲨', '威化', '巧克力', '(', '巧克力', '味', '夹心', ')', '20g', '*', '24', '/', '盒']有15个 4
]
for i in data:
y_all_num[0].append(i[1])
x_all.append(i[1])#取出第一列词
for i in data:
y_all_num[1].append(i[3])
y_all.append(i[3])
x_all=lst_strip(x_all)#去掉空格
y_all=lst_strip(y_all)
# print(x_all)
# print(y_all)
input=input_in
y_gailvGeshu= [] #['雀巢', '脆脆', '鲨', '威化', '巧克力', '(', '巧克力', '味', '夹心', ')', '20g', '*', '24', '/', '盒']中input的个数 3
y_gailvZongshu= [] #['雀巢', '脆脆', '鲨', '威化', '巧克力', '(', '巧克力', '味', '夹心', ')', '20g', '*', '24', '/', '盒']有15个 4
y_gailv= []
y_jieguoGeshu=[]#统计结果各种类的个数
output = []
for xxx_temp in input_fenci:
y_all_num.append([])
print(y_all_num)
###################################################################
for y_gailv_ in y_all:#为目标创建列表,统计为输入值情况下,在各个结果中输入值的个数[0, 0, 0, 0, 0, 0, 0]
y_all_num[2].append('')
y_all_num[3].append('')
y_all_num[4].append('')
for xxxx_in in range(0,len(input_fenci)):
y_all_num[xxxx_in+5].append('')
y_gailvGeshu.append('')
y_gailvZongshu.append('')
y_jieguoGeshu.append('')
y_gailv.append(0.000)
# y_long=len(y_all)
y_jieguoGelv=P_y(y_all_num[1],y_jieguoGeshu)
for xxxx_galv in range(0, len(input_fenci)):
# y_all_num[xxxx_galv+5]=word_num(input_fenci[xxxx_galv],xxxx_galv+5,y_all_num)
output1=word_num(input_fenci[xxxx_galv],xxxx_galv+5,y_all_num)
output.append(output1)
set1 = set(output)
output=list(set1)
return output
# while(1):
print('请输入描述:')
input='旺仔牛奶'
print('分类为:',sort_goods(input))