Sklearn库的学习之TF-IDF算法:
# coding:utf-8
import jieba
import jieba.posseg as pseg
import os
import sys
from sklearn import feature_extraction
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
one = open(r'one.txt',encoding = "utf-8")
onee = list(one)
two = open(r'two.txt',encoding = "utf-8")
twoo = list(two)
three = open(r'three.txt',encoding = "utf-8")
threee = list(three)
four = open(r'four.txt',encoding = "utf-8")
fourr = list(four)
five = open(r'five.txt',encoding = "utf-8")
fivee = list(five)
six = open(r'six.txt',encoding = "utf-8")
sixx = list(six)
one.close()
two.close()
three.close()
if __name__ == "__main__":
corpus= onee + twoo + threee + fourr + fivee