from __future__ import division
import re
from numpy import ones, array
from numpy.lib.scimath import log
from nltk import *
def loadDataSet():
obj=open("obj_train_data.txt",'r')
sbj=open("sbj_train_data.txt",'r')
lst_all=[]
classVec=[]
for i in range(2000):
classVec.append(i%2)
for i in range(1000):
str0=obj.readline()
str1=sbj.readline()
regEx0=re.compile('\\W*')
regEx1=re.compile('\\W*')
lst_obj=regEx0.split(str0)
lst_sbj=regEx1.split(str1)
each_lst_obj=[]
each_lst_obj.append([tok.lower() for tok in lst_obj if len(tok)>0])
lst_all.append(bigramGenerate(each_lst_obj[0]))
each_lst_sbj=[]
each_lst_sbj.append([tok.lower() for tok in lst_sbj if len(tok)>0])
lst_all.append(bigramGenerate(each_lst_sbj[0]))
return lst_all,cla
Python贝叶斯算法进行文本主客观分析(采用文本双词模型)
最新推荐文章于 2022-09-17 17:39:14 发布