def loadDataSet():
postingList = [['my', 'dog', 'has', 'flea', 'problem', 'help', 'please'],
['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
['stop', 'posting', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
classVec = [0, 1, 0, 1, 0, 1]
return postingList, classVec
def createVocabList(dataSet):
vocabSet = set([])
for document in dataSet:
vocabSet = vocabSet | set(document)
return list(vocabSet)
def setOfWords2Vec(vocabList, inputSet):
returnVec = [0] * len(vocabList)
for word in inputSet:
if word in vocabList:
returnVec[vocabList.index(word)] = 1
else:
print("the word:%s is not in my vocabulary" % word)
listOPosts = loadDataSet()
myVocabList=createVocabList(listOPosts)
print(myVocabList)
报错:unhashable type: ‘list’
http://blog.csdn.net/pyufftj/article/details/72366631
http://blog.csdn.net/you_are_my_dream/article/details/61616646