#!/usr/bin/python import sys #split word file1 ='d:/share/articlelib' file = open(file1, 'r', encoding='UTF-8') for line in file: ss = line.strip().split('\t', 1) if len(ss) != 2: continue doc_index = ss[0].strip() doc_context = ss[1].strip() word_list = doc_context.split(' ') word_set = set() for word in word_list: word_set.add( word) for word in word_set: print('\t'.join([word, "1"]))
TF-IDF(1)
最新推荐文章于 2024-06-15 19:03:40 发布