Markov Models

最新推荐文章于 2019-07-09 20:07:33 发布

LIQING LIN

最新推荐文章于 2019-07-09 20:07:33 发布

阅读量1k

点赞数

分类专栏： Web Scraping with Python 文章标签： cp9

Web Scraping with Python 专栏收录该内容

23 篇文章 0 订阅

订阅专栏

#!/usr/bin/python
#encoding:utf-8

"""
@author: LlQ
@contact:LIQINGLIN54951@gmail.com
@file：cp9_p178.py
@time: 5/19/2019 7:00 PM
"""

from urllib.request import urlopen
from random import randint

def wordListSum(wordList):
    sum = 0
    for word, value in wordList.items():
        sum += value
    return sum

def retrieveRandomWord(wordList):#Markov Models
    randIndex = randint(1, wordListSum(wordList))#
    for word, value in wordList.items():
        randIndex -= value

        if randIndex <= 0:
            return word

def buildWordDict(text):
    # Remove newlines and quotes
    text = text.replace('\n', ' ');
    text = text.replace('"', '');

    # Make sure punctuation marks are treated as  their own "words,"
    # so that they will be included in the Markov chain
    punctuaction = [',','.',';',':']
    # putting spaces around the punctuation
    for symbol in punctuaction:
        text = text.replace(symbol, ' {} '.format(symbol))
        # text = text.replace(symbol, " "+symbol+" ");

    words = text.split(' ')
    # Filter our empty words
    words = [word for word in words if word != '']

    wordDict = {}
    #it builds a two-dimensional dictionary—a dictionary of dictionaries
    for i in range(1, len(words)):
        if words[i-1] not in wordDict:
            # Create a new dictionary for this word
            wordDict[ words[i-1] ] = {} #{words[i-1]:{}}

        if words[i] not in wordDict[ words[i-1] ]:
            wordDict[ words[i-1] ][ words[i] ] = 0 #{words[i-1]:{words[i]:0}}
        wordDict[ words[i-1] ][ words[i] ] += 1    #{words[i-1]:{words[i]:1}}
    return wordDict

text = str(urlopen('http://pythonscraping.com/files/inaugurationSpeech.txt').read(), 'utf-8')
#print(text)

wordDict = buildWordDict(text)

#print(wordDict)

# Generate a Markov chain of length 100
length =100
chain = ['Called']
for i in range(0, length):
    newWord = retrieveRandomWord(wordDict[chain[-1]])#Markov Models
    chain.append(newWord)

print( ' '.join(chain))

# for i in range(0, length):
#     chain += currentWord+" "
#     currentWord = retrieveRandomWord(wordDict[currentWord])
# print(chain)

LIQING LIN

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Markov Models

#!/usr/bin/python#encoding:utf-8"""@author: LlQ@contact:LIQINGLIN54951@gmail.com@file：cp9_p178.py@time: 5/19/2019 7:00 PM"""from urllib.request import urlopenfrom random import randint...
复制链接

扫一扫

专栏目录