题目
问题
答案
代码
#!/usr/bin/python3
#-*-coding:GBK -*-
from nltk.probability import FreqDist
neg_str = ['just plain boring ', 'entirely predictable and lacks energy ',
'no surprises and very few laughs']
pos_str = ['very powerful ','the most fun film of the summer']
connect_neg_str = ' '.join(neg_str)
connect_pos_str = ' '.join(pos_str)
def count_freq(words):
"[('the', 2), ('very', 1), ...]"
fdist = FreqDist(words.split())
tops = fdist.most_common(50)
return tops
def count_len(words):
"words单词数"
l = words.split()
return len(l)
def get_V_n_P():
V = len(count_freq(connect_neg_str + ' ' + connect_pos_str)) # 当前训练文本的词汇量
print('|V| = %d'%V, end = ", ")
n_neg = count_len(connect_neg_str) # neg_str的单词数(算重复)