程序清单4-1 词表到向量的转换函数
from numpy import *
def loadDataSet():
postingList=[['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
['stop', 'posting', 'stupid', 'worthless', 'garbage'],
['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
classVec = [0,1,0,1,0,1] #1 代表侮辱性文字, 0 代表正常言论
return postingList,classVec
#创建了一个小数据集,包含六篇文档,每篇文档有各自的分类(此例仅有0和1两类)
def createVocabList(dataSet):
vocabSet = set([]) #创建一个空集
for document in dataSet:
#循环对数据集内的每个文件提取word,set用于去重
#求并集
#print(set(document))
vocabSet = vocabSet | set(document) #创建两个集合的并集
#print(vocabSet)
return list(vocabSet)
#该函数将文档集转换为一个词汇库(vocabulary),里面包含在文档集内的所有word
#贝叶斯的文档分类都是基于词汇库将文档转换成(特征)向量的,值就0和1表示存在或不存在
def setOfWords2Vec(vocabList,inputSet):
returnVec = [0]*len(vocabList)
#创建一个所含元素都是0的向量
for word in inputSet:
if word in vocabList:
returnVec[vocabList.index(word)]=1
else:
print("the word:%s is not in my Vocabluary!" % word)
return returnVec
#该函数首先创建一个与词汇表等长的向量
#输出表示判断文档中的单词在词汇表中是否出现
#从而将文档转换为词向量
listOPosts,listClasses = loadDataSet()
#print(listOPosts)
#print(listClasses)
myVocabList = createVocabList(listOPosts)
print(myVocabList)
print(listOPosts[0])
['park', 'cute', 'so', 'has', 'dog', 'ate', 'please', 'is', 'worthless', 'steak', 'to', 'problems', 'buying', 'stupid', 'food', 'my', 'mr', 'flea', 'I', 'maybe', 'stop', 'take', 'licks', 'posting', 'love', 'how', 'not', 'quit', 'dalmation', 'help', 'garbage', 'him']
['my', 'dog', 'has', 'flea', 'problems', 'help', 'please']
setOfWords2Vec(myVocabList,listOPosts[0])
[0,
0,
0,
1,
1,
0,
1,
0,
0,
0,
0,
1,
0,
0,
0,
1,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0]
朴素贝叶斯分类器训练伪代码
print(listOPosts[3])
setOfWords2Vec(myVocabList,listOPosts[3])
['stop', 'posting', 'stupid', 'worthless', 'garbage']
[0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
1,
0,
0,
1,
0,
0,
0,
0,
0,
0,
1,
0]
计算每个类别中的文档数目
对每篇训练文档:
对每个类别
如果词条出现在文档中–> 增加该词条的计数值
增加所有词条的计数值
对每个类别:
对每个词条:
讲该词条的数目除以总词条数目得到条件概率
返回每个类别的条件概率
朴素贝叶斯分类器训练函数
def trainNB0(trainMatrix,trainCategory):
numTrainDocs = len(trainMatrix)
#获取训练集的文档个数 6
#print(numTrainDocs)
numWords = len(trainMatrix[0]) # #由第一行的个数获得vocabulary的长度,单词个数32
#print(numWords)
#print(sum(trainCategory))
#print(float(numTrainDocs))
pAbusive = sum(trainCategory)/float(numTrainDocs)
#表示类别的概率,此例中仅限类别为0和1的状况
#print(pAbusive)
'''
p0Num = zeros(numWords) #初始化概率
p1Num = zeros(numWords)
#pXNum是一个与Vocabulary等长的向量,用于统计对应word出现的次数
p0Denom = 0.0
p1Denom = 0.0
#pXDenom表示第X类内单词的总数
'''
p0Num = ones(numWords) #初始化概率
p1Num = ones(numWords)
p0Denom = 2
p1Denom = 2
#根据现实情况修改:1.初始化问题
#贝叶斯进行文档分类时,需要多个概率的乘积以获得文档属于某个类别的概率
#即:分别在每个类内对文档内的每个WORD的概率相乘,以获得整个文档对应该类别的概率
#但是如果某个概率值为0,则整个概率值也为0。所以书中将所有单词出现数初始化为1,分母初始化为2
for i in range(numTrainDocs):
#print(trainCategory[i])
if trainCategory[i] == 1:
p1Num += trainMatrix[i] #向量相加
p1Denom += sum(trainMatrix[i])
#print(p1Num)
#print(p1Denom)
else:
p0Num += trainMatrix[i]
p0Denom += sum(trainMatrix[i])
#print(p0Num)
# print(p0Denom)
print(p1Num)
print(p1Denom)
#p1Vect = p1Num/p1Denom
#2.下溢出
#由于有很多个很小的数相乘,容易造成下溢出,最后会四舍五入得0,解决的方法是:对乘积取对数
#ln(a*b)=ln(a)+ln(b)
p1Vect = log(p1Num/p1Denom)
print(p0Num)
print(p0Denom)
#p0Vect = p0Num/p0Denom
#vocabulary中的某个词在某类别里头出现的频率
p0Vect = log(p0Num/p0Denom)
return p0Vect,p1Vect,pAbusive
#首参数的意思
#结合前几个函数:postingList表示文档的集合,每一行表示一篇文档,行数即文档数
#classVec向量内值的个数与文档数相同,表示各文档的分类
#createVocabList函数把这些文档整合起来求得不含重复word的vocabulary
#setOfWords2Vec函数把一篇文档的word对应到vocabulary中,变成一个向量
#trainNB0(trainMatrix,trainCategory):函数的第一个参数表示每篇转化到vocabulary对应的向量,为n*m,n是文档数,m是vocabulary的长度
#trainCategory是一个向量,是每篇文档对应的类别
from numpy import *
listOPosts,listClasses = loadDataSet()
print(listClasses)
myVocabList = createVocabList(listOPosts)
print(myVocabList)
trainMat = []
for postinDoc in listOPosts:
print(postinDoc)
trainMat.append(setOfWords2Vec(myVocabList,postinDoc))
print(trainMat)
#vocabulary里的word在个类别中出现的概率(先验概率)
#每个类别出现的概率(先验概率)
#此例中pAb结果为0.5,表示0和1两类是等概率出现的
[0, 1, 0, 1, 0, 1]
['park', 'cute', 'so', 'has', 'dog', 'ate', 'please', 'is', 'worthless', 'steak', 'to', 'problems', 'buying', 'stupid', 'food', 'my', 'mr', 'flea', 'I', 'maybe', 'stop', 'take', 'licks', 'posting', 'love', 'how', 'not', 'quit', 'dalmation', 'help', 'garbage', 'him']
['my', 'dog', 'has', 'flea', 'problems', 'help', 'please']
['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid']
['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him']
['stop', 'posting', 'stupid', 'worthless', 'garbage']
['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him']
['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']
[[0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1], [0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]]
p0V,p1V,pAb = trainNB0(trainMat,listClasses)
[ 2. 1. 1. 1. 3. 1. 1. 1. 3. 1. 2. 1. 2. 4. 2. 1. 1. 1.
1. 2. 2. 2. 1. 2. 1. 1. 2. 2. 1. 1. 2. 2.]
21
[ 1. 2. 2. 2. 2. 2. 2. 2. 1. 2. 2. 2. 1. 1. 1. 4. 2. 2.
2. 1. 2. 1. 2. 1. 2. 2. 1. 1. 2. 2. 1. 3.]
26
print(pAb)
0.5
print(p0V)
[-3.25809654 -2.56494936 -2.56494936 -2.56494936 -2.56494936 -2.56494936
-2.56494936 -2.56494936 -3.25809654 -2.56494936 -2.56494936 -2.56494936
-3.25809654 -3.25809654 -3.25809654 -1.87180218 -2.56494936 -2.56494936
-2.56494936 -3.25809654 -2.56494936 -3.25809654 -2.56494936 -3.25809654
-2.56494936 -2.56494936 -3.25809654 -3.25809654 -2.56494936 -2.56494936
-3.25809654 -2.15948425]
print(p1V)
[-2.35137526 -3.04452244 -3.04452244 -3.04452244 -1.94591015 -3.04452244
-3.04452244 -3.04452244 -1.94591015 -3.04452244 -2.35137526 -3.04452244
-2.35137526 -1.65822808 -2.35137526 -3.04452244 -3.04452244 -3.04452244
-3.04452244 -2.35137526 -2.35137526 -2.35137526 -3.04452244 -2.35137526
-3.04452244 -3.04452244 -2.35137526 -2.35137526 -3.04452244 -3.04452244
-2.35137526 -2.35137526]
程序清单4-3 朴素贝叶斯分类函数
def classifyNB(vec2Classify,p0Vec,p1Vec,pClass1):
p1 = sum(vec2Classify*p1Vec)+log(pClass1)
print(sum(vec2Classify*p1Vec))
print(log(pClass1))
print(p1)
p0 = sum(vec2Classify*p0Vec)+log(1.0-pClass1)
print(sum(vec2Classify*p0Vec))
print(log(1.0-pClass1))
print(p0)
if p1>p0:
return 1
else:
return 0
def testingNB():
listOPosts,listClasses = loadDataSet()
myVocabList = createVocabList(listOPosts)
trainMat = []
for postinDoc in listOPosts:
trainMat.append(setOfWords2Vec(myVocabList,postinDoc))
p0V,p1V,pAb = trainNB0(array(trainMat),array(listClasses))
print(p0V)
print(p1V)
print(pAb)
testEntry = ['love','my','dalmation']
thisDoc =array(setOfWords2Vec(myVocabList,testEntry))
print(thisDoc)
print(testEntry)
print("classified as")
print(classifyNB(thisDoc,p0V,p1V,pAb))
testEntry= ['stupid','garbage']
thisDoc = array(array(setOfWords2Vec(myVocabList,testEntry)))
print(testEntry)
print("classified as")
print(classifyNB(thisDoc,p0V,p1V,pAb))
testingNB()
[ 2. 1. 1. 1. 3. 1. 1. 1. 3. 1. 2. 1. 2. 4. 2. 1. 1. 1.
1. 2. 2. 2. 1. 2. 1. 1. 2. 2. 1. 1. 2. 2.]
21
[ 1. 2. 2. 2. 2. 2. 2. 2. 1. 2. 2. 2. 1. 1. 1. 4. 2. 2.
2. 1. 2. 1. 2. 1. 2. 2. 1. 1. 2. 2. 1. 3.]
26
[-3.25809654 -2.56494936 -2.56494936 -2.56494936 -2.56494936 -2.56494936
-2.56494936 -2.56494936 -3.25809654 -2.56494936 -2.56494936 -2.56494936
-3.25809654 -3.25809654 -3.25809654 -1.87180218 -2.56494936 -2.56494936
-2.56494936 -3.25809654 -2.56494936 -3.25809654 -2.56494936 -3.25809654
-2.56494936 -2.56494936 -3.25809654 -3.25809654 -2.56494936 -2.56494936
-3.25809654 -2.15948425]
[-2.35137526 -3.04452244 -3.04452244 -3.04452244 -1.94591015 -3.04452244
-3.04452244 -3.04452244 -1.94591015 -3.04452244 -2.35137526 -3.04452244
-2.35137526 -1.65822808 -2.35137526 -3.04452244 -3.04452244 -3.04452244
-3.04452244 -2.35137526 -2.35137526 -2.35137526 -3.04452244 -2.35137526
-3.04452244 -3.04452244 -2.35137526 -2.35137526 -3.04452244 -3.04452244
-2.35137526 -2.35137526]
0.5
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0]
['love', 'my', 'dalmation']
classified as
-9.13356731317
-0.69314718056
-9.82671449373
-7.00170089182
-0.69314718056
-7.69484807238
0
['stupid', 'garbage']
classified as
-4.00960333377
-0.69314718056
-4.70275051433
-6.51619307604
-0.69314718056
-7.2093402566
1
import numpy as np
a =np.array([-3.25809654,-2.56494936,-2.56494936,-2.56494936,-2.56494936,-2.56494936,
-2.56494936 ,-2.56494936 ,-3.25809654, -2.56494936, -2.56494936 ,-2.56494936,
-3.25809654 ,-3.25809654 ,-3.25809654 ,-1.87180218 ,-2.56494936 ,-2.56494936,
-2.56494936 ,-3.25809654 ,-2.56494936, -3.25809654, -2.56494936, -3.25809654,
-2.56494936, -2.56494936, -3.25809654, -3.25809654 ,-2.56494936 ,-2.56494936,
-3.25809654 ,-2.15948425])
b =np.array([0, 0 ,0 ,0 ,0, 0 ,0 ,0 ,0 ,0 ,0, 0, 0 ,0 ,0, 1, 0, 0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0, 0 ,0 ,1, 0 ,0 ,0])
print(sum(a*b))
-7.0017009
程序清单4-4 朴素贝叶斯词袋模型
def bagOfWords2Vec(vocabList,inputSet):
returnVec = [0]*len(vocabList)
for word in inputSet:
if word in vocabList:
returnVec[vocabList.index(word)]+=1
return returnVec
使用朴素贝叶斯对电子邮件进行分类
(1)收集数据:提供文本文件
(2)准备数据:将文本文件解析成词条向量
(3)分析数据:检查词条确保解析的正确性
(4)训练算法:使用我们之前建立的trainNB0()函数
(5)测试算法:构建使用classifyNB(),并且构建一个新的测试函数来计算文档集的错误率
(6)使用算法:构建一个完整的程序对一组文档进行分类,将错误的文档输出到屏幕上
mySent='this book is the best book on Python or M.L. I have ever laid eyes upon'
mySent.split()
['this',
'book',
'is',
'the',
'best',
'book',
'on',
'Python',
'or',
'M.L.',
'I',
'have',
'ever',
'laid',
'eyes',
'upon']
import re
regEx = re.compile('\\W*') #分隔符是除单词、数字外的任意字符串
listOfTokens = regEx.split(mySent)
listOfTokens
E:\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: FutureWarning: split() requires a non-empty pattern match.
This is separate from the ipykernel package so we can avoid doing imports until
['this',
'book',
'is',
'the',
'best',
'book',
'on',
'Python',
'or',
'M',
'L',
'I',
'have',
'ever',
'laid',
'eyes',
'upon']
[tok for tok in listOfTokens if len(tok)>0]
['this',
'book',
'is',
'the',
'best',
'book',
'on',
'Python',
'or',
'M',
'L',
'I',
'have',
'ever',
'laid',
'eyes',
'upon']
[tok.lower() for tok in listOfTokens if len(tok)>0]
['this',
'book',
'is',
'the',
'best',
'book',
'on',
'python',
'or',
'm',
'l',
'i',
'have',
'ever',
'laid',
'eyes',
'upon']
emailText = open('email/ham/6.txt').read()
listOfTokens = regEx.split(emailText)
E:\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: FutureWarning: split() requires a non-empty pattern match.
listOfTokens
['Hello',
'Since',
'you',
'are',
'an',
'owner',
'of',
'at',
'least',
'one',
'Google',
'Groups',
'group',
'that',
'uses',
'the',
'customized',
'welcome',
'message',
'pages',
'or',
'files',
'we',
'are',
'writing',
'to',
'inform',
'you',
'that',
'we',
'will',
'no',
'longer',
'be',
'supporting',
'these',
'features',
'starting',
'February',
'2011',
'We',
'made',
'this',
'decision',
'so',
'that',
'we',
'can',
'focus',
'on',
'improving',
'the',
'core',
'functionalities',
'of',
'Google',
'Groups',
'mailing',
'lists',
'and',
'forum',
'discussions',
'Instead',
'of',
'these',
'features',
'we',
'encourage',
'you',
'to',
'use',
'products',
'that',
'are',
'designed',
'specifically',
'for',
'file',
'storage',
'and',
'page',
'creation',
'such',
'as',
'Google',
'Docs',
'and',
'Google',
'Sites',
'For',
'example',
'you',
'can',
'easily',
'create',
'your',
'pages',
'on',
'Google',
'Sites',
'and',
'share',
'the',
'site',
'http',
'www',
'google',
'com',
'support',
'sites',
'bin',
'answer',
'py',
'hl',
'en',
'answer',
'174623',
'with',
'the',
'members',
'of',
'your',
'group',
'You',
'can',
'also',
'store',
'your',
'files',
'on',
'the',
'site',
'by',
'attaching',
'files',
'to',
'pages',
'http',
'www',
'google',
'com',
'support',
'sites',
'bin',
'answer',
'py',
'hl',
'en',
'answer',
'90563',
'on',
'the',
'site',
'If',
'you抮e',
'just',
'looking',
'for',
'a',
'place',
'to',
'upload',
'your',
'files',
'so',
'that',
'your',
'group',
'members',
'can',
'download',
'them',
'we',
'suggest',
'you',
'try',
'Google',
'Docs',
'You',
'can',
'upload',
'files',
'http',
'docs',
'google',
'com',
'support',
'bin',
'answer',
'py',
'hl',
'en',
'answer',
'50092',
'and',
'share',
'access',
'with',
'either',
'a',
'group',
'http',
'docs',
'google',
'com',
'support',
'bin',
'answer',
'py',
'hl',
'en',
'answer',
'66343',
'or',
'an',
'individual',
'http',
'docs',
'google',
'com',
'support',
'bin',
'answer',
'py',
'hl',
'en',
'answer',
'86152',
'assigning',
'either',
'edit',
'or',
'download',
'only',
'access',
'to',
'the',
'files',
'you',
'have',
'received',
'this',
'mandatory',
'email',
'service',
'announcement',
'to',
'update',
'you',
'about',
'important',
'changes',
'to',
'Google',
'Groups',
'']
'''
def textParse(bigString):
import re
listOfTokens = re.split(r'\W*',bigString)
return [tok.lower() for tok in listOfTokens if len(tok)>2]
'''
def textParse(bigString):
import re
listOfTokens=re.split(r'\W*',bigString)
#使用中正则表达式提取
return [token.lower() for token in listOfTokens if len(token) >2]
def spamTest():
docList = []
classList = []
fullText = []
#导入并解析文本文件
for i in range(1,26): # 1-25 文件名
wordList = textParse(open('email\spam\%d.txt' % i).read())
#wordList = textParse(open('email/spam/%d.txt' % i,'r',encoding= 'utf-8').read())
docList.append(wordList)
fullText.extend(wordList)
classList.append(1)
wordList = textParse(open('email\ham\%d.txt' % i).read())
#wordList = textParse(open('email/ham/%d.txt' % i,'r',encoding= 'utf-8').read())
docList.append(wordList)
fullText.extend(wordList)
classList.append(0)
vocabList = createVocabList(docList)
#python3.x , 出现错误 'range' object doesn't support item deletion
#原因:python3.x range返回的是range对象,不返回数组对象
#解决方法:
#把 trainingSet = range(50) 改为 trainingSet = list(range(50))
trainingSet = list(range(50))
testSet = []
#随机构建训练集
for i in range(10):
randIndex = int(random.uniform(0,len(trainingSet)))
#random模块用于生成随机数
#random.uniform(a,b)用于生成制定范围内的随机浮点数
testSet.append(trainingSet[randIndex])
del(trainingSet[randIndex])
#随机选择10个文档作为测试集,其余作为训练集
#这种随机选择数据的一部分作为训练集,而剩余部分作为测试集的过程称为留存交叉验证(hold-out cross validation)
trainMat = []; trainClasses = []
for docIndex in trainingSet:
trainMat.append(setOfWords2Vec(vocabList,docList[docIndex]))
trainClasses.append(classList[docIndex])
#将选中的训练集逐个整合在一起
p0V,p1V,pSpam = trainNB0(array(trainMat),array(trainClasses))
print(p0V)
print(p1V)
print(pSpam)
errorCount = 0
for docIndex in testSet:
wordVector = setOfWords2Vec(vocabList,docList[docIndex])
if classifyNB(array(wordVector),p0V,p1V,pSpam) != classList[docIndex]:
errorCount += 1
#如果分类结果与原类别不一致,错误数加1
print('the error rate is',float(errorCount)/len(testSet))
spamTest()
[ 9. 1. 2. 1. 3. 2. 2. 1. 1. 1. 1. 1. 1. 1. 1.
3. 1. 3. 1. 1. 1. 3. 1. 1. 1. 1. 1. 1. 2. 1.
1. 1. 1. 2. 1. 1. 1. 1. 1. 1. 1. 1. 2. 1. 2.
1. 1. 1. 1. 1. 1. 1. 1. 1. 2. 2. 2. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 3. 1.
1. 2. 1. 5. 1. 1. 2. 1. 1. 7. 4. 1. 7. 2. 5.
2. 2. 2. 1. 1. 1. 1. 2. 1. 1. 1. 1. 1. 1. 2.
5. 2. 1. 3. 1. 7. 2. 1. 1. 1. 1. 1. 1. 1. 1.
7. 2. 2. 1. 1. 1. 1. 2. 1. 1. 1. 1. 2. 3. 1.
1. 1. 5. 5. 1. 1. 1. 1. 1. 1. 1. 1. 7. 3. 3.
1. 1. 3. 1. 1. 2. 1. 3. 1. 1. 1. 2. 1. 1. 1.
1. 1. 1. 3. 1. 1. 1. 7. 1. 1. 1. 1. 1. 1. 1.
2. 1. 7. 1. 1. 1. 1. 3. 1. 1. 2. 4. 1. 2. 2.
3. 1. 1. 2. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
4. 1. 2. 1. 1. 2. 1. 1. 1. 1. 7. 1. 2. 1. 3.
1. 2. 1. 1. 2. 1. 1. 1. 8. 1. 1. 1. 1. 1. 1.
2. 1. 1. 1. 1. 1. 2. 1. 1. 1. 1. 1. 1. 1. 10.
1. 1. 1. 8. 1. 1. 1. 1. 1. 3. 1. 2. 5. 1. 1.
1. 1. 1. 1. 3. 1. 1. 1. 7. 2. 1. 1. 7. 1. 1.
1. 1. 1. 1. 1. 1. 3. 2. 1. 1. 2. 1. 1. 1. 3.
2. 1. 1. 7. 2. 3. 1. 1. 2. 1. 1. 1. 1. 1. 2.
3. 1. 1. 3. 1. 3. 1. 7. 1. 1. 3. 1. 3. 2. 2.
1. 1. 1. 1. 5. 2. 13. 1. 2. 2. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 2. 1. 1. 1. 1. 3. 1. 1. 1.
1. 2. 1. 1. 4. 1. 1. 1. 1. 3. 5. 1. 1. 2. 2.
3. 1. 1. 7. 1. 3. 1. 7. 2. 1. 1. 1. 1. 1. 1.
3. 1. 1. 2. 1. 1. 3. 2. 1. 1. 1. 1. 1. 1. 2.
1. 1. 1. 7. 1. 1. 3. 1. 1. 1. 1. 1. 3. 2. 1.
1. 2. 1. 1. 2. 1. 1. 2. 1. 1. 1. 1. 1. 2. 1.
2. 2. 1. 7. 1. 1. 3. 2. 2. 2. 1. 1. 1. 2. 1.
1. 1. 1. 1. 2. 2. 1. 2. 1. 1. 1. 5. 1. 3. 2.
10. 3. 2. 1. 3. 1. 5. 1. 1. 1. 1. 7. 1. 2. 1.
1. 3. 2. 1. 1. 1. 3. 3. 1. 3. 1. 1. 1. 1. 1.
1. 2. 1. 1. 1. 1. 1. 1. 1. 1. 1. 2. 1. 1. 2.
1. 1. 1. 1. 3. 1. 1. 1. 1. 3. 1. 1. 1. 3. 1.
3. 1. 2. 1. 1. 1. 2. 1. 1. 1. 3. 3. 1. 1. 1.
1. 1. 1. 2. 3. 3. 1. 1. 3. 1. 1. 2. 7. 1. 1.
1. 1. 1. 1. 1. 1. 4. 2. 1. 1. 1. 1. 1. 1. 1.
4. 1. 1. 1. 1. 1. 3. 3. 1. 1. 7. 1. 1. 4. 1.
3. 1. 3. 2. 2. 3. 1. 1. 1. 2. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 6. 1. 2. 1. 1.
1. 7. 1. 1. 3. 3. 1. 2. 1. 1. 1. 7. 1. 1. 1.
1. 1. 2. 2. 1. 1. 1. 3. 4. 1. 3. 1. 1. 1. 2.
1. 8. 3. 1. 3. 1. 1. 2. 1. 7. 7. 1. 3. 1. 1.
1. 1. 4. 1. 4. 1. 8. 3. 1. 1. 1. 1. 1. 1. 3.
1. 1. 1. 1. 2. 1. 1. 2. 1. 3. 1. 2. 1. 1. 2.
2. 1. 1. 1.]
510
[ 6. 2. 1. 2. 1. 1. 1. 2. 2. 5. 2. 2. 2. 2. 2.
1. 2. 1. 2. 4. 1. 1. 1. 2. 2. 2. 1. 3. 1. 2.
2. 2. 2. 1. 2. 2. 2. 2. 2. 2. 4. 1. 1. 2. 1.
2. 2. 1. 4. 2. 2. 2. 2. 2. 2. 1. 1. 2. 2. 2.
2. 1. 2. 2. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
2. 1. 3. 14. 1. 1. 1. 3. 1. 1. 1. 2. 1. 1. 5.
1. 1. 1. 2. 2. 2. 2. 1. 2. 3. 2. 2. 3. 2. 7.
4. 1. 2. 1. 2. 1. 1. 2. 2. 3. 2. 2. 2. 2. 2.
1. 1. 1. 2. 3. 2. 2. 1. 2. 2. 2. 2. 1. 2. 2.
2. 2. 6. 3. 1. 2. 1. 2. 2. 3. 2. 3. 1. 1. 1.
2. 3. 2. 2. 2. 1. 2. 1. 2. 3. 4. 1. 2. 2. 1.
2. 2. 2. 1. 2. 1. 2. 1. 2. 1. 1. 2. 2. 2. 2.
1. 2. 1. 1. 4. 2. 2. 1. 1. 4. 1. 1. 2. 1. 1.
1. 2. 1. 1. 2. 2. 2. 2. 2. 2. 2. 1. 2. 2. 3.
2. 2. 1. 2. 2. 1. 1. 2. 2. 1. 1. 5. 1. 2. 1.
3. 1. 2. 2. 1. 2. 2. 1. 1. 2. 2. 2. 3. 3. 1.
1. 2. 2. 2. 2. 7. 1. 3. 2. 1. 2. 1. 2. 2. 1.
2. 2. 2. 1. 2. 2. 2. 2. 2. 1. 2. 1. 1. 2. 1.
1. 1. 1. 3. 1. 3. 2. 2. 1. 1. 4. 2. 1. 1. 2.
2. 2. 2. 2. 2. 3. 1. 1. 2. 2. 1. 1. 3. 2. 1.
1. 2. 2. 1. 1. 5. 2. 2. 3. 3. 7. 2. 2. 2. 1.
1. 2. 2. 1. 2. 1. 2. 1. 2. 3. 1. 2. 2. 1. 1.
2. 2. 2. 2. 8. 1. 14. 2. 1. 1. 1. 2. 2. 5. 1.
2. 2. 2. 1. 2. 2. 1. 2. 2. 2. 1. 1. 4. 2. 2.
2. 1. 1. 2. 1. 2. 2. 1. 1. 1. 1. 2. 4. 1. 1.
1. 2. 2. 1. 3. 1. 1. 1. 1. 2. 3. 2. 2. 2. 4.
1. 1. 2. 1. 5. 4. 1. 1. 2. 2. 2. 2. 2. 3. 1.
2. 1. 2. 1. 2. 2. 1. 2. 2. 2. 2. 2. 7. 1. 1.
2. 1. 2. 2. 1. 2. 2. 1. 2. 2. 2. 2. 2. 2. 1.
1. 1. 2. 1. 1. 2. 1. 2. 1. 8. 5. 1. 2. 1. 1.
2. 2. 2. 2. 1. 1. 2. 1. 2. 4. 2. 1. 1. 2. 1.
14. 1. 1. 2. 1. 3. 1. 2. 2. 2. 2. 2. 2. 1. 1.
3. 1. 1. 2. 2. 2. 1. 1. 3. 1. 1. 5. 1. 2. 2.
2. 2. 2. 2. 2. 2. 1. 1. 2. 2. 2. 1. 1. 3. 1.
2. 2. 3. 2. 5. 2. 2. 2. 2. 1. 2. 2. 2. 1. 3.
1. 1. 3. 2. 2. 6. 1. 2. 2. 2. 1. 1. 2. 2. 2.
1. 2. 2. 1. 1. 1. 2. 1. 1. 2. 2. 1. 1. 2. 2.
4. 3. 2. 2. 2. 2. 1. 1. 2. 2. 3. 2. 1. 1. 2.
8. 2. 2. 1. 2. 1. 1. 1. 2. 2. 1. 2. 2. 1. 5.
1. 2. 1. 1. 6. 1. 2. 2. 2. 1. 3. 2. 4. 2. 1.
2. 4. 2. 2. 1. 3. 2. 2. 1. 2. 2. 2. 1. 2. 4.
1. 1. 2. 2. 1. 1. 2. 1. 2. 2. 2. 1. 2. 2. 2.
2. 1. 1. 1. 2. 2. 2. 1. 1. 2. 1. 2. 1. 2. 1.
2. 1. 1. 2. 1. 2. 2. 2. 2. 1. 1. 2. 1. 2. 1.
2. 2. 3. 1. 1. 2. 1. 1. 1. 3. 2. 2. 2. 1. 1.
2. 2. 2. 2. 1. 2. 1. 1. 2. 1. 2. 1. 2. 2. 1.
1. 2. 2. 2.]
634
[-4.66028949 -5.75890177 -6.45204895 -5.75890177 -6.45204895 -6.45204895
-6.45204895 -5.75890177 -5.75890177 -4.84261104 -5.75890177 -5.75890177
-5.75890177 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -6.45204895
-5.75890177 -5.06575459 -6.45204895 -6.45204895 -6.45204895 -5.75890177
-5.75890177 -5.75890177 -6.45204895 -5.35343667 -6.45204895 -5.75890177
-5.75890177 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177
-5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.06575459 -6.45204895
-6.45204895 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895
-5.06575459 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177
-5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -5.75890177
-5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-5.75890177 -5.75890177 -5.75890177 -5.75890177 -6.45204895 -5.35343667
-3.81299162 -6.45204895 -6.45204895 -6.45204895 -5.35343667 -6.45204895
-6.45204895 -6.45204895 -5.75890177 -6.45204895 -6.45204895 -4.84261104
-6.45204895 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177
-5.75890177 -6.45204895 -5.75890177 -5.35343667 -5.75890177 -5.75890177
-5.35343667 -5.75890177 -4.50613881 -5.06575459 -6.45204895 -5.75890177
-6.45204895 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177
-5.35343667 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -6.45204895 -6.45204895 -5.75890177 -5.35343667 -5.75890177
-5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -4.66028949
-5.35343667 -6.45204895 -5.75890177 -6.45204895 -5.75890177 -5.75890177
-5.35343667 -5.75890177 -5.35343667 -6.45204895 -6.45204895 -6.45204895
-5.75890177 -5.35343667 -5.75890177 -5.75890177 -5.75890177 -6.45204895
-5.75890177 -6.45204895 -5.75890177 -5.35343667 -5.06575459 -6.45204895
-5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -5.75890177 -6.45204895 -5.75890177 -6.45204895 -5.75890177
-6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -5.75890177 -6.45204895 -6.45204895 -5.06575459 -5.75890177
-5.75890177 -6.45204895 -6.45204895 -5.06575459 -6.45204895 -6.45204895
-5.75890177 -6.45204895 -6.45204895 -6.45204895 -5.75890177 -6.45204895
-6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.35343667
-5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895
-6.45204895 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -4.84261104
-6.45204895 -5.75890177 -6.45204895 -5.35343667 -6.45204895 -5.75890177
-5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -6.45204895
-5.75890177 -5.75890177 -5.75890177 -5.35343667 -5.35343667 -6.45204895
-6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -4.50613881
-6.45204895 -5.35343667 -5.75890177 -6.45204895 -5.75890177 -6.45204895
-5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -6.45204895
-6.45204895 -6.45204895 -6.45204895 -5.35343667 -6.45204895 -5.35343667
-5.75890177 -5.75890177 -6.45204895 -6.45204895 -5.06575459 -5.75890177
-6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-5.75890177 -5.75890177 -5.35343667 -6.45204895 -6.45204895 -5.75890177
-5.75890177 -6.45204895 -6.45204895 -5.35343667 -5.75890177 -6.45204895
-6.45204895 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -4.84261104
-5.75890177 -5.75890177 -5.35343667 -5.35343667 -4.50613881 -5.75890177
-5.75890177 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177
-6.45204895 -5.75890177 -6.45204895 -5.75890177 -6.45204895 -5.75890177
-5.35343667 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -6.45204895
-5.75890177 -5.75890177 -5.75890177 -5.75890177 -4.37260741 -6.45204895
-3.81299162 -5.75890177 -6.45204895 -6.45204895 -6.45204895 -5.75890177
-5.75890177 -4.84261104 -6.45204895 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177
-5.75890177 -6.45204895 -6.45204895 -5.06575459 -5.75890177 -5.75890177
-5.75890177 -6.45204895 -6.45204895 -5.75890177 -6.45204895 -5.75890177
-5.75890177 -6.45204895 -6.45204895 -6.45204895 -6.45204895 -5.75890177
-5.06575459 -6.45204895 -6.45204895 -6.45204895 -5.75890177 -5.75890177
-6.45204895 -5.35343667 -6.45204895 -6.45204895 -6.45204895 -6.45204895
-5.75890177 -5.35343667 -5.75890177 -5.75890177 -5.75890177 -5.06575459
-6.45204895 -6.45204895 -5.75890177 -6.45204895 -4.84261104 -5.06575459
-6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-5.75890177 -5.35343667 -6.45204895 -5.75890177 -6.45204895 -5.75890177
-6.45204895 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177
-5.75890177 -5.75890177 -5.75890177 -4.50613881 -6.45204895 -6.45204895
-5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -5.75890177
-5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-5.75890177 -5.75890177 -6.45204895 -6.45204895 -6.45204895 -5.75890177
-6.45204895 -6.45204895 -5.75890177 -6.45204895 -5.75890177 -6.45204895
-4.37260741 -4.84261104 -6.45204895 -5.75890177 -6.45204895 -6.45204895
-5.75890177 -5.75890177 -5.75890177 -5.75890177 -6.45204895 -6.45204895
-5.75890177 -6.45204895 -5.75890177 -5.06575459 -5.75890177 -6.45204895
-6.45204895 -5.75890177 -6.45204895 -3.81299162 -6.45204895 -6.45204895
-5.75890177 -6.45204895 -5.35343667 -6.45204895 -5.75890177 -5.75890177
-5.75890177 -5.75890177 -5.75890177 -5.75890177 -6.45204895 -6.45204895
-5.35343667 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -6.45204895 -5.35343667 -6.45204895 -6.45204895 -4.84261104
-6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-5.75890177 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -5.75890177
-5.75890177 -5.75890177 -6.45204895 -6.45204895 -5.35343667 -6.45204895
-5.75890177 -5.75890177 -5.35343667 -5.75890177 -4.84261104 -5.75890177
-5.75890177 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177
-5.75890177 -6.45204895 -5.35343667 -6.45204895 -6.45204895 -5.35343667
-5.75890177 -5.75890177 -4.66028949 -6.45204895 -5.75890177 -5.75890177
-5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -6.45204895
-5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -6.45204895
-6.45204895 -5.75890177 -5.75890177 -5.06575459 -5.35343667 -5.75890177
-5.75890177 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -5.75890177
-5.75890177 -5.35343667 -5.75890177 -6.45204895 -6.45204895 -5.75890177
-4.37260741 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -6.45204895
-6.45204895 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -5.75890177
-5.75890177 -6.45204895 -4.84261104 -6.45204895 -5.75890177 -6.45204895
-6.45204895 -4.66028949 -6.45204895 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -5.35343667 -5.75890177 -5.06575459 -5.75890177 -6.45204895
-5.75890177 -5.06575459 -5.75890177 -5.75890177 -6.45204895 -5.35343667
-5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -5.75890177 -5.06575459 -6.45204895 -6.45204895 -5.75890177
-5.75890177 -6.45204895 -6.45204895 -5.75890177 -6.45204895 -5.75890177
-5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177
-5.75890177 -6.45204895 -6.45204895 -6.45204895 -5.75890177 -5.75890177
-5.75890177 -6.45204895 -6.45204895 -5.75890177 -6.45204895 -5.75890177
-6.45204895 -5.75890177 -6.45204895 -5.75890177 -6.45204895 -6.45204895
-5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
-6.45204895 -6.45204895 -5.75890177 -6.45204895 -5.75890177 -6.45204895
-5.75890177 -5.75890177 -5.35343667 -6.45204895 -6.45204895 -5.75890177
-6.45204895 -6.45204895 -6.45204895 -5.35343667 -5.75890177 -5.75890177
-5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177
-5.75890177 -6.45204895 -5.75890177 -6.45204895 -6.45204895 -5.75890177
-6.45204895 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895
-6.45204895 -5.75890177 -5.75890177 -5.75890177]
[-4.03718615 -6.23441073 -5.54126355 -6.23441073 -5.13579844 -5.54126355
-5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -5.13579844
-6.23441073 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-5.54126355 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-5.54126355 -5.54126355 -5.54126355 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -5.13579844 -6.23441073 -6.23441073 -5.54126355 -6.23441073
-4.62497281 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073
-4.28850058 -4.84811636 -6.23441073 -4.28850058 -5.54126355 -4.62497281
-5.54126355 -5.54126355 -5.54126355 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -5.54126355 -4.62497281 -5.54126355 -6.23441073
-5.13579844 -6.23441073 -4.28850058 -5.54126355 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-4.28850058 -5.54126355 -5.54126355 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-5.54126355 -5.13579844 -6.23441073 -6.23441073 -6.23441073 -4.62497281
-4.62497281 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -4.28850058 -5.13579844 -5.13579844
-6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -5.54126355
-6.23441073 -5.13579844 -6.23441073 -6.23441073 -6.23441073 -5.54126355
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-5.13579844 -6.23441073 -6.23441073 -6.23441073 -4.28850058 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-5.54126355 -6.23441073 -4.28850058 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -5.13579844 -6.23441073 -6.23441073 -5.54126355 -4.84811636
-6.23441073 -5.54126355 -5.54126355 -5.13579844 -6.23441073 -6.23441073
-5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-4.84811636 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -5.54126355
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -4.28850058 -6.23441073
-5.54126355 -6.23441073 -5.13579844 -6.23441073 -5.54126355 -6.23441073
-6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -4.15496918
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -3.93182563 -6.23441073 -6.23441073 -6.23441073
-4.15496918 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-5.13579844 -6.23441073 -5.54126355 -4.62497281 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -5.13579844 -6.23441073
-6.23441073 -6.23441073 -4.28850058 -5.54126355 -6.23441073 -6.23441073
-4.28850058 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -5.13579844 -5.54126355 -6.23441073
-6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -5.13579844
-5.54126355 -6.23441073 -6.23441073 -4.28850058 -5.54126355 -5.13579844
-6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -5.54126355 -5.13579844 -6.23441073 -6.23441073
-5.13579844 -6.23441073 -5.13579844 -6.23441073 -4.28850058 -6.23441073
-6.23441073 -5.13579844 -6.23441073 -5.13579844 -5.54126355 -5.54126355
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -4.62497281 -5.54126355
-3.66946137 -6.23441073 -5.54126355 -5.54126355 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -5.54126355 -6.23441073 -6.23441073 -4.84811636 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -5.13579844 -4.62497281 -6.23441073
-6.23441073 -5.54126355 -5.54126355 -5.13579844 -6.23441073 -6.23441073
-4.28850058 -6.23441073 -5.13579844 -6.23441073 -4.28850058 -5.54126355
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-5.13579844 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073
-5.13579844 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073
-4.28850058 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -5.13579844 -5.54126355 -6.23441073
-6.23441073 -5.54126355 -6.23441073 -6.23441073 -5.54126355 -6.23441073
-6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -5.54126355 -6.23441073 -5.54126355 -5.54126355 -6.23441073
-4.28850058 -6.23441073 -6.23441073 -5.13579844 -5.54126355 -5.54126355
-5.54126355 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -5.54126355
-6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -4.62497281
-6.23441073 -5.13579844 -5.54126355 -3.93182563 -5.13579844 -5.54126355
-6.23441073 -5.13579844 -6.23441073 -4.62497281 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -4.28850058 -6.23441073 -5.54126355 -6.23441073
-6.23441073 -5.13579844 -5.54126355 -6.23441073 -6.23441073 -6.23441073
-5.13579844 -5.13579844 -6.23441073 -5.13579844 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -5.54126355
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -5.13579844 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073
-6.23441073 -5.13579844 -6.23441073 -5.13579844 -6.23441073 -5.54126355
-6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073
-6.23441073 -5.13579844 -5.13579844 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -5.54126355 -5.13579844 -5.13579844
-6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -5.54126355
-4.28850058 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -4.84811636 -5.54126355 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-4.84811636 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-5.13579844 -5.13579844 -6.23441073 -6.23441073 -4.28850058 -6.23441073
-6.23441073 -4.84811636 -6.23441073 -5.13579844 -6.23441073 -5.13579844
-5.54126355 -5.54126355 -5.13579844 -6.23441073 -6.23441073 -6.23441073
-5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -6.23441073 -6.23441073 -4.44265126 -6.23441073
-5.54126355 -6.23441073 -6.23441073 -6.23441073 -4.28850058 -6.23441073
-6.23441073 -5.13579844 -5.13579844 -6.23441073 -5.54126355 -6.23441073
-6.23441073 -6.23441073 -4.28850058 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -5.54126355 -5.54126355 -6.23441073 -6.23441073
-6.23441073 -5.13579844 -4.84811636 -6.23441073 -5.13579844 -6.23441073
-6.23441073 -6.23441073 -5.54126355 -6.23441073 -4.15496918 -5.13579844
-6.23441073 -5.13579844 -6.23441073 -6.23441073 -5.54126355 -6.23441073
-4.28850058 -4.28850058 -6.23441073 -5.13579844 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -4.84811636 -6.23441073 -4.84811636 -6.23441073
-4.15496918 -5.13579844 -6.23441073 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -6.23441073
-6.23441073 -5.54126355 -6.23441073 -6.23441073 -5.54126355 -6.23441073
-5.13579844 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -5.54126355
-5.54126355 -6.23441073 -6.23441073 -6.23441073]
0.475
the error rate is 0.1
E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
return _compile(pattern, flags).split(string, maxsplit)
spamTest()
the error rate is 0.0
E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
return _compile(pattern, flags).split(string, maxsplit)
spamTest()
the error rate is 0.1
E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
return _compile(pattern, flags).split(string, maxsplit)
spamTest()
the error rate is 0.1
E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
return _compile(pattern, flags).split(string, maxsplit)
spamTest()
the error rate is 0.0
E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
return _compile(pattern, flags).split(string, maxsplit)
使用朴素贝叶斯来 发现地域相关的用词
(1)收集数据:从RSS源收集内容,需要对RRS源构建一个接口
(2)准备数据:将文本文件解析成词条向量
(3)分析数据:检查词条确保解析的正确性
(4)训练算法:使用我们之前建立的trainNB0()函数
(5)测试算法:观察错误率,确保分类器可用。可以修改切分程序,以降低错误率,提高分类效果
(6)使用算法:构建一个完整的程序,封装所有内容。给定两个RSS源,该程序会显示最常用的公共词
import feedparser
#ny = feedparser.parse('http://newyork.craigslist.org/stp/index.rss')
ny = feedparser.parse('http://www.nasa.gov/rss/dyn/image_of_the_day.rss') #换了两个RSS源网址
sf=feedparser.parse('http://www.douban.com/feed/review/book')
ny
len(ny['entries'])
60
len(sf['entries'])
20
ny['entries']
ny['entries'][0]['summary']
'In July 1964, the first Saturn V S-IVB, or third stage test hardware, was delivered to NASA’s Marshall Space Flight Center'
ny['entries'][1]['summary']
#1、generator:这个是生成这个订阅源的工具,也就是博客园自己提供的生成器
#2、entry:每篇博文的块,所有的关于你某篇博客的内容都在这(有若干个块,为了不太长我只保留了一个)多个entry就是entries,所以我通过
#3、content:这个里面貌似是可以加html的吗的,rss阅读器展示的就是这的内容
'Leafy greens are growing in space!'
程序清单4-6 RSS源分类器及高频词去除函数
# 计算出现频率
def calcMostFreq(vocabulary,fulltext):
import operator
freqDict={}
for token in vocabulary:
freqDict[token]=fulltext.count(token)
sortedFreq=sorted(freqDict.items(),key=operator.itemgetter(1),reverse=True)
return sortedFreq[:30]
#出现频率前30的词
#在对文本进行解析的时候,我们分析每个词出现的次数,但是有些词出现的很多,但是却没有实际的意思,
#反而影响权重,比如我们中文中的,的、得等词,英文中的一些简单的代词,谓语动词等等,因此处理的时候要去掉这些高频词汇。
#这个跟spamTest()基本上一样,不同在于这边访问的是RSS源,最后返回词汇表,以及不同分类每个词出现的概率
def localWords(feed1,feed0):#使用两个RSS源作为参数
import feedparser
docList=[];classList=[];fullText=[]
minlen=min(len(feed1['entries']),len(feed0['entries']))
print(minlen)
for i in range(minlen): # 每次访问一条RSS源
wordList=textParse(feed1['entries'][i]['summary'])
docList.append(wordList)
fullText.extend(wordList)
classList.append(1)
wordList=textParse(feed0['entries'][i]['summary'])
docList.append(wordList)
fullText.extend(wordList)
classList.append(0)
#两个RSS源作为正反例
vocabulary=createVocabList(docList)#创建词汇表
#创建词汇库
top30Words=calcMostFreq(vocabulary,fullText)
print(top30Words)
#获得出现频率最高的30个
for pairW in top30Words:
if pairW[0] in vocabulary:vocabulary.remove(pairW[0])
#去除前30的单词
trainingSet=list(range(2*minlen));testSet=[] #创建测试集
for i in range(20):
randIndex=int(random.uniform(0,len(trainingSet)))
testSet.append(trainingSet[randIndex])
del(trainingSet[randIndex])
#随机选择训练和测试集;测试集为20个
trainMat=[];trainClass=[]
for docIndex in trainingSet:
trainMat.append(bagOfWords2Vec(vocabulary,docList[docIndex]))
trainClass.append(classList[docIndex])
#将训练集内的文档转换成频数特征
p0V,p1V,pSpam=trainNB0(array(trainMat),array(trainClass))
errorCount=0
for docIndex in testSet:
wordVector=bagOfWords2Vec(vocabulary,docList[docIndex])
if classifyNB(array(wordVector),p0V,p1V,pSpam)!=classList[docIndex]:
errorCount+=1
print('the error rate is: ',float(errorCount)/len(testSet))
return vocabulary,p0V,p1V
ny=feedparser.parse('http://www.nasa.gov/rss/dyn/image_of_the_day.rss')
sf=feedparser.parse('http://www.douban.com/feed/review/book')
vocabList,pSF,pNY = localWords(ny,sf)
the error rate is: 0.45
E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
return _compile(pattern, flags).split(string, maxsplit)
#另一个常用的方法不仅是移除高频词,我们可以通过整理的停用词表,就是用于句子结构的辅助词表,这样最后的错误率会有一定的改观。
# 从某个预定词表中移除结构上的辅助词,该词表称为停用词表(stop word list)
def getTopWords(ny,sf):
import operator
vocabList,p0V,p1V=localWords(ny,sf)
topNY=[]; topSF=[]
for i in range(len(p0V)):
if p0V[i] > -6.0 : topSF.append((vocabList[i],p0V[i]))
if p1V[i] > -6.0 : topNY.append((vocabList[i],p1V[i]))
sortedSF = sorted(topSF, key=lambda pair: pair[1], reverse=True)
print(sortedSF) #是字典
#假如a是一个由元组构成的列表,这时候就麻烦了,我们需要用到参数key,也就是关键词,
#看下面这句命令,lambda是一个隐函数,是固定写法,不要写成别的单词;pair表示列表中的一个元素,在这里,表示一个元组,
#pair只是临时起的一个名字,你可以使用任意的名字;pair[0]表示元组里的第一个元素,当然第二个元素就是pair[1];
#所以这句命令的意思就是按照列表中第二个元素排序,我们还可以使用reverse参数实现倒序排列
print("SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**")
for item in sortedSF:
print(item[0]) #字典输出item[0] 单词
sortedNY = sorted(topNY, key=lambda pair: pair[1], reverse=True)
print("NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**")
for item in sortedNY:
print(item[0])
testTopWords()
20
[('https', 32), ('com', 29), ('the', 24), ('book', 21), ('subject', 21), ('douban', 21), ('entitymap', 20), ('type', 18), ('image', 16), ('data', 16), ('mutability', 13), ('nasa', 13), ('immutable', 12), ('img3', 9), ('view', 8), ('space', 8), ('doubanio', 8), ('test', 7), ('thumb', 6), ('for', 6), ('and', 6), ('blocks', 5), ('this', 5), ('was', 5), ('text', 5), ('src', 5), ('with', 4), ('launch', 4), ('key', 4), ('2019', 4)]
[ 1. 1. 1. 1. 1. 2. 1. 2. 2. 2. 1. 1. 2. 1. 1. 2. 3. 2.
1. 2. 2. 2. 1. 2. 1. 1. 1. 2. 1. 1. 2. 2. 2. 2. 1. 1.
2. 1. 1. 2. 2. 1. 2. 1. 1. 2. 2. 1. 1. 1. 1. 1. 2. 2.
3. 1. 3. 2. 2. 1. 1. 1. 2. 2. 2. 2. 1. 1. 2. 1. 1. 1.
4. 2. 2. 1. 1. 2. 2. 2. 2. 1. 2. 1. 1. 2. 2. 1. 2. 1.
5.
jupyter版本
https://github.com/liuxf570/ML/blob/master/CH04_Bayes.ipynb
jupyter代码下载
(https://github.com/liuxf570/ML/blob/master/CH04_Bayes.ipynb)