import jieba
import numpy as np
import xlrd
import re
import json
# 数据加载
# jieba词库设置
#读取文本,读取其中1个
#统计包含a的单词和不包含a的单词
#统计主题
def chisquare(datasetTrue, datasetFlase):
dicta ={}
dictb ={}
for i in datasetTrue:
for word in i:
if word not in dicta:
dicta[word] = 0
else:
dicta[word] = dicta[word] + 1
for i in datasetFlase:
if word in i:
if word not in dictb:
dictb[word] = 0
else:
dictb[word] = dictb[word] + 1
keyset = set()
for i in dicta:
keyset.add(i)
for i in dictb:
keyset
import numpy as np
import xlrd
import re
import json
# 数据加载
# jieba词库设置
#读取文本,读取其中1个
#统计包含a的单词和不包含a的单词
#统计主题
def chisquare(datasetTrue, datasetFlase):
dicta ={}
dictb ={}
for i in datasetTrue:
for word in i:
if word not in dicta:
dicta[word] = 0
else:
dicta[word] = dicta[word] + 1
for i in datasetFlase:
if word in i:
if word not in dictb:
dictb[word] = 0
else:
dictb[word] = dictb[word] + 1
keyset = set()
for i in dicta:
keyset.add(i)
for i in dictb:
keyset