def loadDataSet():
dataset = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'], # 切分的词条
['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
['stop', 'posting', 'stupid', 'worthless', 'garbage'],
['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
classVec = [0, 1, 0, 1, 0, 1] # 类别标签向量,1代表好,0代表不好
return dataset, classVec
def feature_select(list_words):
# 总词频统计
doc_frequency = defaultdict(int)
for word_list in list_words:
print(word_list)
for i in word_list:
print(i)
doc_frequency[i] += 1
print(sum(doc_frequency.values()))
if __name__=='__main__':
data_list,label_list=loadDataSet() #加载数据
features=feature_select(data_list)
输出结果:
['my', 'dog', 'has', 'flea', 'problems', 'help', 'please']
my
1
dog
2
has
3
flea
4
problems
5
help
6
please
7
二维数组的循环输出
最新推荐文章于 2021-10-04 19:59:28 发布