机器学习实战学习笔记2-朴素贝叶斯

程序清单4-1 词表到向量的转换函数

from numpy import *

def loadDataSet():
    postingList=[['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
                 ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
                 ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
                 ['stop', 'posting', 'stupid', 'worthless', 'garbage'],
                 ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
                 ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
    classVec = [0,1,0,1,0,1]    #1 代表侮辱性文字, 0 代表正常言论
    return postingList,classVec
#创建了一个小数据集,包含六篇文档,每篇文档有各自的分类(此例仅有0和1两类)
def createVocabList(dataSet):
    vocabSet = set([])    #创建一个空集
    for document in dataSet: 
    #循环对数据集内的每个文件提取word,set用于去重
    #求并集
        #print(set(document))
        vocabSet = vocabSet | set(document)  #创建两个集合的并集
       #print(vocabSet)       
    return list(vocabSet)

#该函数将文档集转换为一个词汇库(vocabulary),里面包含在文档集内的所有word

#贝叶斯的文档分类都是基于词汇库将文档转换成(特征)向量的,值就0和1表示存在或不存在
def setOfWords2Vec(vocabList,inputSet):
    returnVec = [0]*len(vocabList)
    
    #创建一个所含元素都是0的向量

    for word in inputSet:
        if word in vocabList:
            returnVec[vocabList.index(word)]=1
        else: 
            print("the word:%s is not in my Vocabluary!" % word)
    return returnVec
#该函数首先创建一个与词汇表等长的向量
#输出表示判断文档中的单词在词汇表中是否出现
#从而将文档转换为词向量
listOPosts,listClasses = loadDataSet()
#print(listOPosts)
#print(listClasses)
myVocabList = createVocabList(listOPosts)
print(myVocabList)
print(listOPosts[0])
['park', 'cute', 'so', 'has', 'dog', 'ate', 'please', 'is', 'worthless', 'steak', 'to', 'problems', 'buying', 'stupid', 'food', 'my', 'mr', 'flea', 'I', 'maybe', 'stop', 'take', 'licks', 'posting', 'love', 'how', 'not', 'quit', 'dalmation', 'help', 'garbage', 'him']
['my', 'dog', 'has', 'flea', 'problems', 'help', 'please']
setOfWords2Vec(myVocabList,listOPosts[0])
[0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0]

朴素贝叶斯分类器训练伪代码

print(listOPosts[3])
setOfWords2Vec(myVocabList,listOPosts[3])
['stop', 'posting', 'stupid', 'worthless', 'garbage']





[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0]
计算每个类别中的文档数目
对每篇训练文档:
  对每个类别
      如果词条出现在文档中–> 增加该词条的计数值
     增加所有词条的计数值
对每个类别:
  对每个词条:
      讲该词条的数目除以总词条数目得到条件概率
返回每个类别的条件概率

朴素贝叶斯分类器训练函数

def trainNB0(trainMatrix,trainCategory):
    numTrainDocs = len(trainMatrix)   
    #获取训练集的文档个数 6
    #print(numTrainDocs)
    numWords = len(trainMatrix[0])   #  #由第一行的个数获得vocabulary的长度,单词个数32 
    #print(numWords)
    #print(sum(trainCategory))
    #print(float(numTrainDocs))
    pAbusive = sum(trainCategory)/float(numTrainDocs)
    #表示类别的概率,此例中仅限类别为0和1的状况
    #print(pAbusive)
    '''
    p0Num = zeros(numWords)  #初始化概率
    p1Num = zeros(numWords)
    #pXNum是一个与Vocabulary等长的向量,用于统计对应word出现的次数

    p0Denom = 0.0
    p1Denom = 0.0
    #pXDenom表示第X类内单词的总数
    '''
    p0Num = ones(numWords)  #初始化概率
    p1Num = ones(numWords)
    p0Denom = 2
    p1Denom = 2
    #根据现实情况修改:1.初始化问题
    #贝叶斯进行文档分类时,需要多个概率的乘积以获得文档属于某个类别的概率
    #即:分别在每个类内对文档内的每个WORD的概率相乘,以获得整个文档对应该类别的概率
    #但是如果某个概率值为0,则整个概率值也为0。所以书中将所有单词出现数初始化为1,分母初始化为2

    for i in range(numTrainDocs):
        #print(trainCategory[i])
        if trainCategory[i] == 1:
            p1Num += trainMatrix[i]            #向量相加
            p1Denom += sum(trainMatrix[i])
            #print(p1Num)
            #print(p1Denom)
        else:
            p0Num += trainMatrix[i]
            p0Denom += sum(trainMatrix[i])
            #print(p0Num)
           # print(p0Denom)
    print(p1Num)
    print(p1Denom)        
    #p1Vect = p1Num/p1Denom
    #2.下溢出
    #由于有很多个很小的数相乘,容易造成下溢出,最后会四舍五入得0,解决的方法是:对乘积取对数
    #ln(a*b)=ln(a)+ln(b)
    p1Vect = log(p1Num/p1Denom)
    print(p0Num)
    print(p0Denom)
    #p0Vect = p0Num/p0Denom

    #vocabulary中的某个词在某类别里头出现的频率

    p0Vect = log(p0Num/p0Denom)
    return p0Vect,p1Vect,pAbusive

#首参数的意思
#结合前几个函数:postingList表示文档的集合,每一行表示一篇文档,行数即文档数
#classVec向量内值的个数与文档数相同,表示各文档的分类
#createVocabList函数把这些文档整合起来求得不含重复word的vocabulary
#setOfWords2Vec函数把一篇文档的word对应到vocabulary中,变成一个向量
#trainNB0(trainMatrix,trainCategory):函数的第一个参数表示每篇转化到vocabulary对应的向量,为n*m,n是文档数,m是vocabulary的长度
#trainCategory是一个向量,是每篇文档对应的类别

from numpy import *
listOPosts,listClasses = loadDataSet()
print(listClasses)
myVocabList = createVocabList(listOPosts)
print(myVocabList)
trainMat = []
for postinDoc in listOPosts:
    print(postinDoc)
    trainMat.append(setOfWords2Vec(myVocabList,postinDoc))
print(trainMat)
#vocabulary里的word在个类别中出现的概率(先验概率)

#每个类别出现的概率(先验概率)

#此例中pAb结果为0.5,表示0和1两类是等概率出现的
[0, 1, 0, 1, 0, 1]
['park', 'cute', 'so', 'has', 'dog', 'ate', 'please', 'is', 'worthless', 'steak', 'to', 'problems', 'buying', 'stupid', 'food', 'my', 'mr', 'flea', 'I', 'maybe', 'stop', 'take', 'licks', 'posting', 'love', 'how', 'not', 'quit', 'dalmation', 'help', 'garbage', 'him']
['my', 'dog', 'has', 'flea', 'problems', 'help', 'please']
['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid']
['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him']
['stop', 'posting', 'stupid', 'worthless', 'garbage']
['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him']
['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']
[[0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1], [0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]]
p0V,p1V,pAb = trainNB0(trainMat,listClasses)
[ 2.  1.  1.  1.  3.  1.  1.  1.  3.  1.  2.  1.  2.  4.  2.  1.  1.  1.
  1.  2.  2.  2.  1.  2.  1.  1.  2.  2.  1.  1.  2.  2.]
21
[ 1.  2.  2.  2.  2.  2.  2.  2.  1.  2.  2.  2.  1.  1.  1.  4.  2.  2.
  2.  1.  2.  1.  2.  1.  2.  2.  1.  1.  2.  2.  1.  3.]
26
print(pAb)
0.5
print(p0V)
[-3.25809654 -2.56494936 -2.56494936 -2.56494936 -2.56494936 -2.56494936
 -2.56494936 -2.56494936 -3.25809654 -2.56494936 -2.56494936 -2.56494936
 -3.25809654 -3.25809654 -3.25809654 -1.87180218 -2.56494936 -2.56494936
 -2.56494936 -3.25809654 -2.56494936 -3.25809654 -2.56494936 -3.25809654
 -2.56494936 -2.56494936 -3.25809654 -3.25809654 -2.56494936 -2.56494936
 -3.25809654 -2.15948425]
print(p1V)
[-2.35137526 -3.04452244 -3.04452244 -3.04452244 -1.94591015 -3.04452244
 -3.04452244 -3.04452244 -1.94591015 -3.04452244 -2.35137526 -3.04452244
 -2.35137526 -1.65822808 -2.35137526 -3.04452244 -3.04452244 -3.04452244
 -3.04452244 -2.35137526 -2.35137526 -2.35137526 -3.04452244 -2.35137526
 -3.04452244 -3.04452244 -2.35137526 -2.35137526 -3.04452244 -3.04452244
 -2.35137526 -2.35137526]

程序清单4-3 朴素贝叶斯分类函数

def classifyNB(vec2Classify,p0Vec,p1Vec,pClass1):
    p1 = sum(vec2Classify*p1Vec)+log(pClass1)
    print(sum(vec2Classify*p1Vec))
    print(log(pClass1))
    print(p1)
    p0 = sum(vec2Classify*p0Vec)+log(1.0-pClass1)
    print(sum(vec2Classify*p0Vec))
    print(log(1.0-pClass1))
    print(p0)
    if p1>p0:
        return 1
    else:
        return 0

def testingNB():
    listOPosts,listClasses = loadDataSet()
    myVocabList = createVocabList(listOPosts)
    trainMat = []
    for postinDoc in listOPosts:
        trainMat.append(setOfWords2Vec(myVocabList,postinDoc))
    p0V,p1V,pAb = trainNB0(array(trainMat),array(listClasses))
    print(p0V)
    print(p1V)
    print(pAb)
    testEntry = ['love','my','dalmation']
    thisDoc =array(setOfWords2Vec(myVocabList,testEntry))
    print(thisDoc)
    print(testEntry)
    print("classified as")
    print(classifyNB(thisDoc,p0V,p1V,pAb))
    testEntry= ['stupid','garbage']
    thisDoc = array(array(setOfWords2Vec(myVocabList,testEntry)))
    print(testEntry)
    print("classified as")
    print(classifyNB(thisDoc,p0V,p1V,pAb))
testingNB()
[ 2.  1.  1.  1.  3.  1.  1.  1.  3.  1.  2.  1.  2.  4.  2.  1.  1.  1.
  1.  2.  2.  2.  1.  2.  1.  1.  2.  2.  1.  1.  2.  2.]
21
[ 1.  2.  2.  2.  2.  2.  2.  2.  1.  2.  2.  2.  1.  1.  1.  4.  2.  2.
  2.  1.  2.  1.  2.  1.  2.  2.  1.  1.  2.  2.  1.  3.]
26
[-3.25809654 -2.56494936 -2.56494936 -2.56494936 -2.56494936 -2.56494936
 -2.56494936 -2.56494936 -3.25809654 -2.56494936 -2.56494936 -2.56494936
 -3.25809654 -3.25809654 -3.25809654 -1.87180218 -2.56494936 -2.56494936
 -2.56494936 -3.25809654 -2.56494936 -3.25809654 -2.56494936 -3.25809654
 -2.56494936 -2.56494936 -3.25809654 -3.25809654 -2.56494936 -2.56494936
 -3.25809654 -2.15948425]
[-2.35137526 -3.04452244 -3.04452244 -3.04452244 -1.94591015 -3.04452244
 -3.04452244 -3.04452244 -1.94591015 -3.04452244 -2.35137526 -3.04452244
 -2.35137526 -1.65822808 -2.35137526 -3.04452244 -3.04452244 -3.04452244
 -3.04452244 -2.35137526 -2.35137526 -2.35137526 -3.04452244 -2.35137526
 -3.04452244 -3.04452244 -2.35137526 -2.35137526 -3.04452244 -3.04452244
 -2.35137526 -2.35137526]
0.5
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0]
['love', 'my', 'dalmation']
classified as
-9.13356731317
-0.69314718056
-9.82671449373
-7.00170089182
-0.69314718056
-7.69484807238
0
['stupid', 'garbage']
classified as
-4.00960333377
-0.69314718056
-4.70275051433
-6.51619307604
-0.69314718056
-7.2093402566
1
import numpy as np
a =np.array([-3.25809654,-2.56494936,-2.56494936,-2.56494936,-2.56494936,-2.56494936,
 -2.56494936 ,-2.56494936 ,-3.25809654, -2.56494936, -2.56494936 ,-2.56494936,
 -3.25809654 ,-3.25809654 ,-3.25809654 ,-1.87180218 ,-2.56494936 ,-2.56494936,
 -2.56494936 ,-3.25809654 ,-2.56494936, -3.25809654, -2.56494936, -3.25809654,
 -2.56494936, -2.56494936, -3.25809654, -3.25809654 ,-2.56494936 ,-2.56494936,
 -3.25809654 ,-2.15948425])
b =np.array([0, 0 ,0 ,0 ,0, 0 ,0 ,0 ,0 ,0 ,0, 0, 0 ,0 ,0, 1, 0, 0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0, 0 ,0 ,1, 0 ,0 ,0])
print(sum(a*b))
-7.0017009

程序清单4-4 朴素贝叶斯词袋模型

def bagOfWords2Vec(vocabList,inputSet):
    returnVec = [0]*len(vocabList)
    for word in inputSet:
        if word in vocabList:
            returnVec[vocabList.index(word)]+=1      
    return returnVec

使用朴素贝叶斯对电子邮件进行分类

(1)收集数据:提供文本文件

(2)准备数据:将文本文件解析成词条向量

(3)分析数据:检查词条确保解析的正确性

(4)训练算法:使用我们之前建立的trainNB0()函数

(5)测试算法:构建使用classifyNB(),并且构建一个新的测试函数来计算文档集的错误率

(6)使用算法:构建一个完整的程序对一组文档进行分类,将错误的文档输出到屏幕上

mySent='this book is the best book on Python or M.L. I have ever laid eyes upon'
mySent.split()
['this',
 'book',
 'is',
 'the',
 'best',
 'book',
 'on',
 'Python',
 'or',
 'M.L.',
 'I',
 'have',
 'ever',
 'laid',
 'eyes',
 'upon']
import re
regEx = re.compile('\\W*')  #分隔符是除单词、数字外的任意字符串
listOfTokens = regEx.split(mySent)
listOfTokens
E:\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: FutureWarning: split() requires a non-empty pattern match.
  This is separate from the ipykernel package so we can avoid doing imports until





['this',
 'book',
 'is',
 'the',
 'best',
 'book',
 'on',
 'Python',
 'or',
 'M',
 'L',
 'I',
 'have',
 'ever',
 'laid',
 'eyes',
 'upon']
[tok for tok in listOfTokens if len(tok)>0]
['this',
 'book',
 'is',
 'the',
 'best',
 'book',
 'on',
 'Python',
 'or',
 'M',
 'L',
 'I',
 'have',
 'ever',
 'laid',
 'eyes',
 'upon']
[tok.lower() for tok in listOfTokens if len(tok)>0]
['this',
 'book',
 'is',
 'the',
 'best',
 'book',
 'on',
 'python',
 'or',
 'm',
 'l',
 'i',
 'have',
 'ever',
 'laid',
 'eyes',
 'upon']
emailText = open('email/ham/6.txt').read()
listOfTokens = regEx.split(emailText) 
E:\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: FutureWarning: split() requires a non-empty pattern match.
listOfTokens
['Hello',
 'Since',
 'you',
 'are',
 'an',
 'owner',
 'of',
 'at',
 'least',
 'one',
 'Google',
 'Groups',
 'group',
 'that',
 'uses',
 'the',
 'customized',
 'welcome',
 'message',
 'pages',
 'or',
 'files',
 'we',
 'are',
 'writing',
 'to',
 'inform',
 'you',
 'that',
 'we',
 'will',
 'no',
 'longer',
 'be',
 'supporting',
 'these',
 'features',
 'starting',
 'February',
 '2011',
 'We',
 'made',
 'this',
 'decision',
 'so',
 'that',
 'we',
 'can',
 'focus',
 'on',
 'improving',
 'the',
 'core',
 'functionalities',
 'of',
 'Google',
 'Groups',
 'mailing',
 'lists',
 'and',
 'forum',
 'discussions',
 'Instead',
 'of',
 'these',
 'features',
 'we',
 'encourage',
 'you',
 'to',
 'use',
 'products',
 'that',
 'are',
 'designed',
 'specifically',
 'for',
 'file',
 'storage',
 'and',
 'page',
 'creation',
 'such',
 'as',
 'Google',
 'Docs',
 'and',
 'Google',
 'Sites',
 'For',
 'example',
 'you',
 'can',
 'easily',
 'create',
 'your',
 'pages',
 'on',
 'Google',
 'Sites',
 'and',
 'share',
 'the',
 'site',
 'http',
 'www',
 'google',
 'com',
 'support',
 'sites',
 'bin',
 'answer',
 'py',
 'hl',
 'en',
 'answer',
 '174623',
 'with',
 'the',
 'members',
 'of',
 'your',
 'group',
 'You',
 'can',
 'also',
 'store',
 'your',
 'files',
 'on',
 'the',
 'site',
 'by',
 'attaching',
 'files',
 'to',
 'pages',
 'http',
 'www',
 'google',
 'com',
 'support',
 'sites',
 'bin',
 'answer',
 'py',
 'hl',
 'en',
 'answer',
 '90563',
 'on',
 'the',
 'site',
 'If',
 'you抮e',
 'just',
 'looking',
 'for',
 'a',
 'place',
 'to',
 'upload',
 'your',
 'files',
 'so',
 'that',
 'your',
 'group',
 'members',
 'can',
 'download',
 'them',
 'we',
 'suggest',
 'you',
 'try',
 'Google',
 'Docs',
 'You',
 'can',
 'upload',
 'files',
 'http',
 'docs',
 'google',
 'com',
 'support',
 'bin',
 'answer',
 'py',
 'hl',
 'en',
 'answer',
 '50092',
 'and',
 'share',
 'access',
 'with',
 'either',
 'a',
 'group',
 'http',
 'docs',
 'google',
 'com',
 'support',
 'bin',
 'answer',
 'py',
 'hl',
 'en',
 'answer',
 '66343',
 'or',
 'an',
 'individual',
 'http',
 'docs',
 'google',
 'com',
 'support',
 'bin',
 'answer',
 'py',
 'hl',
 'en',
 'answer',
 '86152',
 'assigning',
 'either',
 'edit',
 'or',
 'download',
 'only',
 'access',
 'to',
 'the',
 'files',
 'you',
 'have',
 'received',
 'this',
 'mandatory',
 'email',
 'service',
 'announcement',
 'to',
 'update',
 'you',
 'about',
 'important',
 'changes',
 'to',
 'Google',
 'Groups',
 '']
'''
def textParse(bigString):
    import re
    listOfTokens = re.split(r'\W*',bigString)
    return [tok.lower() for tok in listOfTokens if len(tok)>2]
'''
def textParse(bigString):
    import re
    listOfTokens=re.split(r'\W*',bigString)
    #使用中正则表达式提取
    return [token.lower() for token in listOfTokens if len(token) >2]

def spamTest():
    docList = []
    classList = []
    fullText = []
    #导入并解析文本文件
    for i in range(1,26):  # 1-25 文件名
        wordList = textParse(open('email\spam\%d.txt' % i).read())
        #wordList = textParse(open('email/spam/%d.txt' % i,'r',encoding= 'utf-8').read())
        docList.append(wordList)
        fullText.extend(wordList)
        classList.append(1)
        wordList = textParse(open('email\ham\%d.txt' % i).read())
        #wordList = textParse(open('email/ham/%d.txt' % i,'r',encoding= 'utf-8').read())
        docList.append(wordList)
        fullText.extend(wordList)
        classList.append(0)
    vocabList = createVocabList(docList)
    #python3.x , 出现错误 'range' object doesn't support item deletion
    #原因:python3.x   range返回的是range对象,不返回数组对象
    #解决方法:
    #把 trainingSet = range(50) 改为 trainingSet = list(range(50))

    trainingSet =  list(range(50))
    testSet = []
    #随机构建训练集
    for i in range(10):
        randIndex = int(random.uniform(0,len(trainingSet)))  
        #random模块用于生成随机数
        #random.uniform(a,b)用于生成制定范围内的随机浮点数
        testSet.append(trainingSet[randIndex])
        del(trainingSet[randIndex])
        #随机选择10个文档作为测试集,其余作为训练集
        #这种随机选择数据的一部分作为训练集,而剩余部分作为测试集的过程称为留存交叉验证(hold-out cross validation)
    trainMat = []; trainClasses = []
    for docIndex in trainingSet:
        trainMat.append(setOfWords2Vec(vocabList,docList[docIndex]))
        trainClasses.append(classList[docIndex])
    #将选中的训练集逐个整合在一起
   
    p0V,p1V,pSpam = trainNB0(array(trainMat),array(trainClasses))
    print(p0V)
    print(p1V)
    print(pSpam)
    errorCount = 0
    for docIndex in testSet:
        wordVector = setOfWords2Vec(vocabList,docList[docIndex])
        if classifyNB(array(wordVector),p0V,p1V,pSpam) !=  classList[docIndex]:
            errorCount += 1
    #如果分类结果与原类别不一致,错误数加1
    print('the error rate is',float(errorCount)/len(testSet))

spamTest()
[  9.   1.   2.   1.   3.   2.   2.   1.   1.   1.   1.   1.   1.   1.   1.
   3.   1.   3.   1.   1.   1.   3.   1.   1.   1.   1.   1.   1.   2.   1.
   1.   1.   1.   2.   1.   1.   1.   1.   1.   1.   1.   1.   2.   1.   2.
   1.   1.   1.   1.   1.   1.   1.   1.   1.   2.   2.   2.   1.   1.   1.
   1.   1.   1.   1.   1.   1.   1.   1.   1.   1.   1.   1.   1.   3.   1.
   1.   2.   1.   5.   1.   1.   2.   1.   1.   7.   4.   1.   7.   2.   5.
   2.   2.   2.   1.   1.   1.   1.   2.   1.   1.   1.   1.   1.   1.   2.
   5.   2.   1.   3.   1.   7.   2.   1.   1.   1.   1.   1.   1.   1.   1.
   7.   2.   2.   1.   1.   1.   1.   2.   1.   1.   1.   1.   2.   3.   1.
   1.   1.   5.   5.   1.   1.   1.   1.   1.   1.   1.   1.   7.   3.   3.
   1.   1.   3.   1.   1.   2.   1.   3.   1.   1.   1.   2.   1.   1.   1.
   1.   1.   1.   3.   1.   1.   1.   7.   1.   1.   1.   1.   1.   1.   1.
   2.   1.   7.   1.   1.   1.   1.   3.   1.   1.   2.   4.   1.   2.   2.
   3.   1.   1.   2.   1.   1.   1.   1.   1.   1.   1.   1.   1.   1.   1.
   4.   1.   2.   1.   1.   2.   1.   1.   1.   1.   7.   1.   2.   1.   3.
   1.   2.   1.   1.   2.   1.   1.   1.   8.   1.   1.   1.   1.   1.   1.
   2.   1.   1.   1.   1.   1.   2.   1.   1.   1.   1.   1.   1.   1.  10.
   1.   1.   1.   8.   1.   1.   1.   1.   1.   3.   1.   2.   5.   1.   1.
   1.   1.   1.   1.   3.   1.   1.   1.   7.   2.   1.   1.   7.   1.   1.
   1.   1.   1.   1.   1.   1.   3.   2.   1.   1.   2.   1.   1.   1.   3.
   2.   1.   1.   7.   2.   3.   1.   1.   2.   1.   1.   1.   1.   1.   2.
   3.   1.   1.   3.   1.   3.   1.   7.   1.   1.   3.   1.   3.   2.   2.
   1.   1.   1.   1.   5.   2.  13.   1.   2.   2.   1.   1.   1.   1.   1.
   1.   1.   1.   1.   1.   1.   2.   1.   1.   1.   1.   3.   1.   1.   1.
   1.   2.   1.   1.   4.   1.   1.   1.   1.   3.   5.   1.   1.   2.   2.
   3.   1.   1.   7.   1.   3.   1.   7.   2.   1.   1.   1.   1.   1.   1.
   3.   1.   1.   2.   1.   1.   3.   2.   1.   1.   1.   1.   1.   1.   2.
   1.   1.   1.   7.   1.   1.   3.   1.   1.   1.   1.   1.   3.   2.   1.
   1.   2.   1.   1.   2.   1.   1.   2.   1.   1.   1.   1.   1.   2.   1.
   2.   2.   1.   7.   1.   1.   3.   2.   2.   2.   1.   1.   1.   2.   1.
   1.   1.   1.   1.   2.   2.   1.   2.   1.   1.   1.   5.   1.   3.   2.
  10.   3.   2.   1.   3.   1.   5.   1.   1.   1.   1.   7.   1.   2.   1.
   1.   3.   2.   1.   1.   1.   3.   3.   1.   3.   1.   1.   1.   1.   1.
   1.   2.   1.   1.   1.   1.   1.   1.   1.   1.   1.   2.   1.   1.   2.
   1.   1.   1.   1.   3.   1.   1.   1.   1.   3.   1.   1.   1.   3.   1.
   3.   1.   2.   1.   1.   1.   2.   1.   1.   1.   3.   3.   1.   1.   1.
   1.   1.   1.   2.   3.   3.   1.   1.   3.   1.   1.   2.   7.   1.   1.
   1.   1.   1.   1.   1.   1.   4.   2.   1.   1.   1.   1.   1.   1.   1.
   4.   1.   1.   1.   1.   1.   3.   3.   1.   1.   7.   1.   1.   4.   1.
   3.   1.   3.   2.   2.   3.   1.   1.   1.   2.   1.   1.   1.   1.   1.
   1.   1.   1.   1.   1.   1.   1.   1.   1.   1.   6.   1.   2.   1.   1.
   1.   7.   1.   1.   3.   3.   1.   2.   1.   1.   1.   7.   1.   1.   1.
   1.   1.   2.   2.   1.   1.   1.   3.   4.   1.   3.   1.   1.   1.   2.
   1.   8.   3.   1.   3.   1.   1.   2.   1.   7.   7.   1.   3.   1.   1.
   1.   1.   4.   1.   4.   1.   8.   3.   1.   1.   1.   1.   1.   1.   3.
   1.   1.   1.   1.   2.   1.   1.   2.   1.   3.   1.   2.   1.   1.   2.
   2.   1.   1.   1.]
510
[  6.   2.   1.   2.   1.   1.   1.   2.   2.   5.   2.   2.   2.   2.   2.
   1.   2.   1.   2.   4.   1.   1.   1.   2.   2.   2.   1.   3.   1.   2.
   2.   2.   2.   1.   2.   2.   2.   2.   2.   2.   4.   1.   1.   2.   1.
   2.   2.   1.   4.   2.   2.   2.   2.   2.   2.   1.   1.   2.   2.   2.
   2.   1.   2.   2.   1.   2.   2.   2.   2.   2.   2.   2.   2.   2.   2.
   2.   1.   3.  14.   1.   1.   1.   3.   1.   1.   1.   2.   1.   1.   5.
   1.   1.   1.   2.   2.   2.   2.   1.   2.   3.   2.   2.   3.   2.   7.
   4.   1.   2.   1.   2.   1.   1.   2.   2.   3.   2.   2.   2.   2.   2.
   1.   1.   1.   2.   3.   2.   2.   1.   2.   2.   2.   2.   1.   2.   2.
   2.   2.   6.   3.   1.   2.   1.   2.   2.   3.   2.   3.   1.   1.   1.
   2.   3.   2.   2.   2.   1.   2.   1.   2.   3.   4.   1.   2.   2.   1.
   2.   2.   2.   1.   2.   1.   2.   1.   2.   1.   1.   2.   2.   2.   2.
   1.   2.   1.   1.   4.   2.   2.   1.   1.   4.   1.   1.   2.   1.   1.
   1.   2.   1.   1.   2.   2.   2.   2.   2.   2.   2.   1.   2.   2.   3.
   2.   2.   1.   2.   2.   1.   1.   2.   2.   1.   1.   5.   1.   2.   1.
   3.   1.   2.   2.   1.   2.   2.   1.   1.   2.   2.   2.   3.   3.   1.
   1.   2.   2.   2.   2.   7.   1.   3.   2.   1.   2.   1.   2.   2.   1.
   2.   2.   2.   1.   2.   2.   2.   2.   2.   1.   2.   1.   1.   2.   1.
   1.   1.   1.   3.   1.   3.   2.   2.   1.   1.   4.   2.   1.   1.   2.
   2.   2.   2.   2.   2.   3.   1.   1.   2.   2.   1.   1.   3.   2.   1.
   1.   2.   2.   1.   1.   5.   2.   2.   3.   3.   7.   2.   2.   2.   1.
   1.   2.   2.   1.   2.   1.   2.   1.   2.   3.   1.   2.   2.   1.   1.
   2.   2.   2.   2.   8.   1.  14.   2.   1.   1.   1.   2.   2.   5.   1.
   2.   2.   2.   1.   2.   2.   1.   2.   2.   2.   1.   1.   4.   2.   2.
   2.   1.   1.   2.   1.   2.   2.   1.   1.   1.   1.   2.   4.   1.   1.
   1.   2.   2.   1.   3.   1.   1.   1.   1.   2.   3.   2.   2.   2.   4.
   1.   1.   2.   1.   5.   4.   1.   1.   2.   2.   2.   2.   2.   3.   1.
   2.   1.   2.   1.   2.   2.   1.   2.   2.   2.   2.   2.   7.   1.   1.
   2.   1.   2.   2.   1.   2.   2.   1.   2.   2.   2.   2.   2.   2.   1.
   1.   1.   2.   1.   1.   2.   1.   2.   1.   8.   5.   1.   2.   1.   1.
   2.   2.   2.   2.   1.   1.   2.   1.   2.   4.   2.   1.   1.   2.   1.
  14.   1.   1.   2.   1.   3.   1.   2.   2.   2.   2.   2.   2.   1.   1.
   3.   1.   1.   2.   2.   2.   1.   1.   3.   1.   1.   5.   1.   2.   2.
   2.   2.   2.   2.   2.   2.   1.   1.   2.   2.   2.   1.   1.   3.   1.
   2.   2.   3.   2.   5.   2.   2.   2.   2.   1.   2.   2.   2.   1.   3.
   1.   1.   3.   2.   2.   6.   1.   2.   2.   2.   1.   1.   2.   2.   2.
   1.   2.   2.   1.   1.   1.   2.   1.   1.   2.   2.   1.   1.   2.   2.
   4.   3.   2.   2.   2.   2.   1.   1.   2.   2.   3.   2.   1.   1.   2.
   8.   2.   2.   1.   2.   1.   1.   1.   2.   2.   1.   2.   2.   1.   5.
   1.   2.   1.   1.   6.   1.   2.   2.   2.   1.   3.   2.   4.   2.   1.
   2.   4.   2.   2.   1.   3.   2.   2.   1.   2.   2.   2.   1.   2.   4.
   1.   1.   2.   2.   1.   1.   2.   1.   2.   2.   2.   1.   2.   2.   2.
   2.   1.   1.   1.   2.   2.   2.   1.   1.   2.   1.   2.   1.   2.   1.
   2.   1.   1.   2.   1.   2.   2.   2.   2.   1.   1.   2.   1.   2.   1.
   2.   2.   3.   1.   1.   2.   1.   1.   1.   3.   2.   2.   2.   1.   1.
   2.   2.   2.   2.   1.   2.   1.   1.   2.   1.   2.   1.   2.   2.   1.
   1.   2.   2.   2.]
634
[-4.66028949 -5.75890177 -6.45204895 -5.75890177 -6.45204895 -6.45204895
 -6.45204895 -5.75890177 -5.75890177 -4.84261104 -5.75890177 -5.75890177
 -5.75890177 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -6.45204895
 -5.75890177 -5.06575459 -6.45204895 -6.45204895 -6.45204895 -5.75890177
 -5.75890177 -5.75890177 -6.45204895 -5.35343667 -6.45204895 -5.75890177
 -5.75890177 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177
 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.06575459 -6.45204895
 -6.45204895 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895
 -5.06575459 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177
 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -5.75890177
 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -6.45204895 -5.35343667
 -3.81299162 -6.45204895 -6.45204895 -6.45204895 -5.35343667 -6.45204895
 -6.45204895 -6.45204895 -5.75890177 -6.45204895 -6.45204895 -4.84261104
 -6.45204895 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177
 -5.75890177 -6.45204895 -5.75890177 -5.35343667 -5.75890177 -5.75890177
 -5.35343667 -5.75890177 -4.50613881 -5.06575459 -6.45204895 -5.75890177
 -6.45204895 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177
 -5.35343667 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -6.45204895 -6.45204895 -5.75890177 -5.35343667 -5.75890177
 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -4.66028949
 -5.35343667 -6.45204895 -5.75890177 -6.45204895 -5.75890177 -5.75890177
 -5.35343667 -5.75890177 -5.35343667 -6.45204895 -6.45204895 -6.45204895
 -5.75890177 -5.35343667 -5.75890177 -5.75890177 -5.75890177 -6.45204895
 -5.75890177 -6.45204895 -5.75890177 -5.35343667 -5.06575459 -6.45204895
 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -5.75890177 -6.45204895 -5.75890177 -6.45204895 -5.75890177
 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -5.75890177 -6.45204895 -6.45204895 -5.06575459 -5.75890177
 -5.75890177 -6.45204895 -6.45204895 -5.06575459 -6.45204895 -6.45204895
 -5.75890177 -6.45204895 -6.45204895 -6.45204895 -5.75890177 -6.45204895
 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.35343667
 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895
 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -4.84261104
 -6.45204895 -5.75890177 -6.45204895 -5.35343667 -6.45204895 -5.75890177
 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -6.45204895
 -5.75890177 -5.75890177 -5.75890177 -5.35343667 -5.35343667 -6.45204895
 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -4.50613881
 -6.45204895 -5.35343667 -5.75890177 -6.45204895 -5.75890177 -6.45204895
 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -6.45204895
 -6.45204895 -6.45204895 -6.45204895 -5.35343667 -6.45204895 -5.35343667
 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -5.06575459 -5.75890177
 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -5.75890177 -5.75890177 -5.35343667 -6.45204895 -6.45204895 -5.75890177
 -5.75890177 -6.45204895 -6.45204895 -5.35343667 -5.75890177 -6.45204895
 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -4.84261104
 -5.75890177 -5.75890177 -5.35343667 -5.35343667 -4.50613881 -5.75890177
 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177
 -6.45204895 -5.75890177 -6.45204895 -5.75890177 -6.45204895 -5.75890177
 -5.35343667 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -6.45204895
 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -4.37260741 -6.45204895
 -3.81299162 -5.75890177 -6.45204895 -6.45204895 -6.45204895 -5.75890177
 -5.75890177 -4.84261104 -6.45204895 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177
 -5.75890177 -6.45204895 -6.45204895 -5.06575459 -5.75890177 -5.75890177
 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -6.45204895 -5.75890177
 -5.75890177 -6.45204895 -6.45204895 -6.45204895 -6.45204895 -5.75890177
 -5.06575459 -6.45204895 -6.45204895 -6.45204895 -5.75890177 -5.75890177
 -6.45204895 -5.35343667 -6.45204895 -6.45204895 -6.45204895 -6.45204895
 -5.75890177 -5.35343667 -5.75890177 -5.75890177 -5.75890177 -5.06575459
 -6.45204895 -6.45204895 -5.75890177 -6.45204895 -4.84261104 -5.06575459
 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -5.75890177 -5.35343667 -6.45204895 -5.75890177 -6.45204895 -5.75890177
 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177
 -5.75890177 -5.75890177 -5.75890177 -4.50613881 -6.45204895 -6.45204895
 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -5.75890177
 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -6.45204895 -5.75890177
 -6.45204895 -6.45204895 -5.75890177 -6.45204895 -5.75890177 -6.45204895
 -4.37260741 -4.84261104 -6.45204895 -5.75890177 -6.45204895 -6.45204895
 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -6.45204895 -6.45204895
 -5.75890177 -6.45204895 -5.75890177 -5.06575459 -5.75890177 -6.45204895
 -6.45204895 -5.75890177 -6.45204895 -3.81299162 -6.45204895 -6.45204895
 -5.75890177 -6.45204895 -5.35343667 -6.45204895 -5.75890177 -5.75890177
 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -6.45204895 -6.45204895
 -5.35343667 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -6.45204895 -5.35343667 -6.45204895 -6.45204895 -4.84261104
 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -5.75890177 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -5.75890177
 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -5.35343667 -6.45204895
 -5.75890177 -5.75890177 -5.35343667 -5.75890177 -4.84261104 -5.75890177
 -5.75890177 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177
 -5.75890177 -6.45204895 -5.35343667 -6.45204895 -6.45204895 -5.35343667
 -5.75890177 -5.75890177 -4.66028949 -6.45204895 -5.75890177 -5.75890177
 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -6.45204895
 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -6.45204895
 -6.45204895 -5.75890177 -5.75890177 -5.06575459 -5.35343667 -5.75890177
 -5.75890177 -5.75890177 -5.75890177 -6.45204895 -6.45204895 -5.75890177
 -5.75890177 -5.35343667 -5.75890177 -6.45204895 -6.45204895 -5.75890177
 -4.37260741 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -6.45204895
 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -6.45204895 -5.75890177
 -5.75890177 -6.45204895 -4.84261104 -6.45204895 -5.75890177 -6.45204895
 -6.45204895 -4.66028949 -6.45204895 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -5.35343667 -5.75890177 -5.06575459 -5.75890177 -6.45204895
 -5.75890177 -5.06575459 -5.75890177 -5.75890177 -6.45204895 -5.35343667
 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -5.75890177 -5.06575459 -6.45204895 -6.45204895 -5.75890177
 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -6.45204895 -5.75890177
 -5.75890177 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177
 -5.75890177 -6.45204895 -6.45204895 -6.45204895 -5.75890177 -5.75890177
 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -6.45204895 -5.75890177
 -6.45204895 -5.75890177 -6.45204895 -5.75890177 -6.45204895 -6.45204895
 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -5.75890177 -5.75890177
 -6.45204895 -6.45204895 -5.75890177 -6.45204895 -5.75890177 -6.45204895
 -5.75890177 -5.75890177 -5.35343667 -6.45204895 -6.45204895 -5.75890177
 -6.45204895 -6.45204895 -6.45204895 -5.35343667 -5.75890177 -5.75890177
 -5.75890177 -6.45204895 -6.45204895 -5.75890177 -5.75890177 -5.75890177
 -5.75890177 -6.45204895 -5.75890177 -6.45204895 -6.45204895 -5.75890177
 -6.45204895 -5.75890177 -6.45204895 -5.75890177 -5.75890177 -6.45204895
 -6.45204895 -5.75890177 -5.75890177 -5.75890177]
[-4.03718615 -6.23441073 -5.54126355 -6.23441073 -5.13579844 -5.54126355
 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -5.13579844
 -6.23441073 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -5.54126355 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -5.54126355 -5.54126355 -5.54126355 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -5.54126355 -6.23441073
 -4.62497281 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073
 -4.28850058 -4.84811636 -6.23441073 -4.28850058 -5.54126355 -4.62497281
 -5.54126355 -5.54126355 -5.54126355 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -5.54126355 -4.62497281 -5.54126355 -6.23441073
 -5.13579844 -6.23441073 -4.28850058 -5.54126355 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -4.28850058 -5.54126355 -5.54126355 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -5.54126355 -5.13579844 -6.23441073 -6.23441073 -6.23441073 -4.62497281
 -4.62497281 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -4.28850058 -5.13579844 -5.13579844
 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -5.54126355
 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -6.23441073 -5.54126355
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -5.13579844 -6.23441073 -6.23441073 -6.23441073 -4.28850058 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -5.54126355 -6.23441073 -4.28850058 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -5.54126355 -4.84811636
 -6.23441073 -5.54126355 -5.54126355 -5.13579844 -6.23441073 -6.23441073
 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -4.84811636 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -5.54126355
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -4.28850058 -6.23441073
 -5.54126355 -6.23441073 -5.13579844 -6.23441073 -5.54126355 -6.23441073
 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -4.15496918
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -3.93182563 -6.23441073 -6.23441073 -6.23441073
 -4.15496918 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -5.13579844 -6.23441073 -5.54126355 -4.62497281 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -5.13579844 -6.23441073
 -6.23441073 -6.23441073 -4.28850058 -5.54126355 -6.23441073 -6.23441073
 -4.28850058 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -5.13579844 -5.54126355 -6.23441073
 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -5.13579844
 -5.54126355 -6.23441073 -6.23441073 -4.28850058 -5.54126355 -5.13579844
 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -5.54126355 -5.13579844 -6.23441073 -6.23441073
 -5.13579844 -6.23441073 -5.13579844 -6.23441073 -4.28850058 -6.23441073
 -6.23441073 -5.13579844 -6.23441073 -5.13579844 -5.54126355 -5.54126355
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -4.62497281 -5.54126355
 -3.66946137 -6.23441073 -5.54126355 -5.54126355 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -4.84811636 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -5.13579844 -4.62497281 -6.23441073
 -6.23441073 -5.54126355 -5.54126355 -5.13579844 -6.23441073 -6.23441073
 -4.28850058 -6.23441073 -5.13579844 -6.23441073 -4.28850058 -5.54126355
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -5.13579844 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073
 -5.13579844 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073
 -4.28850058 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -5.13579844 -5.54126355 -6.23441073
 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -5.54126355 -6.23441073
 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -5.54126355 -6.23441073 -5.54126355 -5.54126355 -6.23441073
 -4.28850058 -6.23441073 -6.23441073 -5.13579844 -5.54126355 -5.54126355
 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -5.54126355
 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -4.62497281
 -6.23441073 -5.13579844 -5.54126355 -3.93182563 -5.13579844 -5.54126355
 -6.23441073 -5.13579844 -6.23441073 -4.62497281 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -4.28850058 -6.23441073 -5.54126355 -6.23441073
 -6.23441073 -5.13579844 -5.54126355 -6.23441073 -6.23441073 -6.23441073
 -5.13579844 -5.13579844 -6.23441073 -5.13579844 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -5.54126355
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -5.13579844 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073
 -6.23441073 -5.13579844 -6.23441073 -5.13579844 -6.23441073 -5.54126355
 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -6.23441073
 -6.23441073 -5.13579844 -5.13579844 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -5.54126355 -5.13579844 -5.13579844
 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -5.54126355
 -4.28850058 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -4.84811636 -5.54126355 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -4.84811636 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -5.13579844 -5.13579844 -6.23441073 -6.23441073 -4.28850058 -6.23441073
 -6.23441073 -4.84811636 -6.23441073 -5.13579844 -6.23441073 -5.13579844
 -5.54126355 -5.54126355 -5.13579844 -6.23441073 -6.23441073 -6.23441073
 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -6.23441073 -6.23441073 -4.44265126 -6.23441073
 -5.54126355 -6.23441073 -6.23441073 -6.23441073 -4.28850058 -6.23441073
 -6.23441073 -5.13579844 -5.13579844 -6.23441073 -5.54126355 -6.23441073
 -6.23441073 -6.23441073 -4.28850058 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -5.54126355 -5.54126355 -6.23441073 -6.23441073
 -6.23441073 -5.13579844 -4.84811636 -6.23441073 -5.13579844 -6.23441073
 -6.23441073 -6.23441073 -5.54126355 -6.23441073 -4.15496918 -5.13579844
 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -5.54126355 -6.23441073
 -4.28850058 -4.28850058 -6.23441073 -5.13579844 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -4.84811636 -6.23441073 -4.84811636 -6.23441073
 -4.15496918 -5.13579844 -6.23441073 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -6.23441073 -5.13579844 -6.23441073 -6.23441073 -6.23441073
 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -5.54126355 -6.23441073
 -5.13579844 -6.23441073 -5.54126355 -6.23441073 -6.23441073 -5.54126355
 -5.54126355 -6.23441073 -6.23441073 -6.23441073]
0.475
the error rate is 0.1


E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
  return _compile(pattern, flags).split(string, maxsplit)
spamTest()
the error rate is 0.0


E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
  return _compile(pattern, flags).split(string, maxsplit)
spamTest()
the error rate is 0.1


E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
  return _compile(pattern, flags).split(string, maxsplit)
spamTest()
the error rate is 0.1


E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
  return _compile(pattern, flags).split(string, maxsplit)
spamTest()
the error rate is 0.0


E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
  return _compile(pattern, flags).split(string, maxsplit)

使用朴素贝叶斯来 发现地域相关的用词

(1)收集数据:从RSS源收集内容,需要对RRS源构建一个接口

(2)准备数据:将文本文件解析成词条向量

(3)分析数据:检查词条确保解析的正确性

(4)训练算法:使用我们之前建立的trainNB0()函数

(5)测试算法:观察错误率,确保分类器可用。可以修改切分程序,以降低错误率,提高分类效果

(6)使用算法:构建一个完整的程序,封装所有内容。给定两个RSS源,该程序会显示最常用的公共词

import feedparser
#ny = feedparser.parse('http://newyork.craigslist.org/stp/index.rss')
ny =  feedparser.parse('http://www.nasa.gov/rss/dyn/image_of_the_day.rss')  #换了两个RSS源网址
sf=feedparser.parse('http://www.douban.com/feed/review/book')

ny
len(ny['entries'])  

60

len(sf['entries'])  
20
ny['entries']
ny['entries'][0]['summary']
'In July 1964, the first Saturn V S-IVB, or third stage test hardware, was delivered to NASA’s Marshall Space Flight Center'
ny['entries'][1]['summary']
#1、generator:这个是生成这个订阅源的工具,也就是博客园自己提供的生成器
#2、entry:每篇博文的块,所有的关于你某篇博客的内容都在这(有若干个块,为了不太长我只保留了一个)多个entry就是entries,所以我通过
#3、content:这个里面貌似是可以加html的吗的,rss阅读器展示的就是这的内容
'Leafy greens are growing in space!'

程序清单4-6 RSS源分类器及高频词去除函数

# 计算出现频率
def calcMostFreq(vocabulary,fulltext):
    import operator
    freqDict={}
    for token in vocabulary:
        freqDict[token]=fulltext.count(token)
    sortedFreq=sorted(freqDict.items(),key=operator.itemgetter(1),reverse=True)
    return sortedFreq[:30]
    #出现频率前30的词
#在对文本进行解析的时候,我们分析每个词出现的次数,但是有些词出现的很多,但是却没有实际的意思,

#反而影响权重,比如我们中文中的,的、得等词,英文中的一些简单的代词,谓语动词等等,因此处理的时候要去掉这些高频词汇。


 #这个跟spamTest()基本上一样,不同在于这边访问的是RSS源,最后返回词汇表,以及不同分类每个词出现的概率
def localWords(feed1,feed0):#使用两个RSS源作为参数
    import feedparser
    docList=[];classList=[];fullText=[]
    minlen=min(len(feed1['entries']),len(feed0['entries']))
    print(minlen)
    for i in range(minlen):   # 每次访问一条RSS源
        wordList=textParse(feed1['entries'][i]['summary'])
        docList.append(wordList)
        fullText.extend(wordList)
        classList.append(1)
        wordList=textParse(feed0['entries'][i]['summary'])
        docList.append(wordList)
        fullText.extend(wordList)
        classList.append(0)
    #两个RSS源作为正反例
    vocabulary=createVocabList(docList)#创建词汇表
    #创建词汇库
    top30Words=calcMostFreq(vocabulary,fullText)
    print(top30Words)
    #获得出现频率最高的30个
    for pairW in top30Words:
        if pairW[0] in vocabulary:vocabulary.remove(pairW[0]) 
    #去除前30的单词
 
    trainingSet=list(range(2*minlen));testSet=[]  #创建测试集
    for i in range(20):
        randIndex=int(random.uniform(0,len(trainingSet)))
        testSet.append(trainingSet[randIndex])
        del(trainingSet[randIndex])
    #随机选择训练和测试集;测试集为20个
    trainMat=[];trainClass=[]
    for docIndex in trainingSet:
        trainMat.append(bagOfWords2Vec(vocabulary,docList[docIndex]))
        trainClass.append(classList[docIndex])
    #将训练集内的文档转换成频数特征
    p0V,p1V,pSpam=trainNB0(array(trainMat),array(trainClass))
    errorCount=0
    for docIndex in testSet:
        wordVector=bagOfWords2Vec(vocabulary,docList[docIndex])
        if classifyNB(array(wordVector),p0V,p1V,pSpam)!=classList[docIndex]:
            errorCount+=1
    print('the error rate is: ',float(errorCount)/len(testSet))
    return vocabulary,p0V,p1V

ny=feedparser.parse('http://www.nasa.gov/rss/dyn/image_of_the_day.rss')
sf=feedparser.parse('http://www.douban.com/feed/review/book')
vocabList,pSF,pNY = localWords(ny,sf)
the error rate is:  0.45


E:\Anaconda3\lib\re.py:212: FutureWarning: split() requires a non-empty pattern match.
  return _compile(pattern, flags).split(string, maxsplit)
#另一个常用的方法不仅是移除高频词,我们可以通过整理的停用词表,就是用于句子结构的辅助词表,这样最后的错误率会有一定的改观。
# 从某个预定词表中移除结构上的辅助词,该词表称为停用词表(stop word list)
def getTopWords(ny,sf):
    import operator
    vocabList,p0V,p1V=localWords(ny,sf)
    topNY=[]; topSF=[]
    for i in range(len(p0V)):
        if p0V[i] > -6.0 : topSF.append((vocabList[i],p0V[i]))
        if p1V[i] > -6.0 : topNY.append((vocabList[i],p1V[i]))
    sortedSF = sorted(topSF, key=lambda pair: pair[1], reverse=True)  
    print(sortedSF)  #是字典
    #假如a是一个由元组构成的列表,这时候就麻烦了,我们需要用到参数key,也就是关键词,
    #看下面这句命令,lambda是一个隐函数,是固定写法,不要写成别的单词;pair表示列表中的一个元素,在这里,表示一个元组,
    #pair只是临时起的一个名字,你可以使用任意的名字;pair[0]表示元组里的第一个元素,当然第二个元素就是pair[1];
    #所以这句命令的意思就是按照列表中第二个元素排序,我们还可以使用reverse参数实现倒序排列    
    print("SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**")
    for item in sortedSF:
        print(item[0]) #字典输出item[0] 单词
    sortedNY = sorted(topNY, key=lambda pair: pair[1], reverse=True)
    print("NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**NY**")
    for item in sortedNY:
        print(item[0])


testTopWords()
20
[('https', 32), ('com', 29), ('the', 24), ('book', 21), ('subject', 21), ('douban', 21), ('entitymap', 20), ('type', 18), ('image', 16), ('data', 16), ('mutability', 13), ('nasa', 13), ('immutable', 12), ('img3', 9), ('view', 8), ('space', 8), ('doubanio', 8), ('test', 7), ('thumb', 6), ('for', 6), ('and', 6), ('blocks', 5), ('this', 5), ('was', 5), ('text', 5), ('src', 5), ('with', 4), ('launch', 4), ('key', 4), ('2019', 4)]
[ 1.  1.  1.  1.  1.  2.  1.  2.  2.  2.  1.  1.  2.  1.  1.  2.  3.  2.
  1.  2.  2.  2.  1.  2.  1.  1.  1.  2.  1.  1.  2.  2.  2.  2.  1.  1.
  2.  1.  1.  2.  2.  1.  2.  1.  1.  2.  2.  1.  1.  1.  1.  1.  2.  2.
  3.  1.  3.  2.  2.  1.  1.  1.  2.  2.  2.  2.  1.  1.  2.  1.  1.  1.
  4.  2.  2.  1.  1.  2.  2.  2.  2.  1.  2.  1.  1.  2.  2.  1.  2.  1.
  5. 

jupyter版本
https://github.com/liuxf570/ML/blob/master/CH04_Bayes.ipynb
jupyter代码下载
(https://github.com/liuxf570/ML/blob/master/CH04_Bayes.ipynb)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值