python生成相似句子_Python 生成句子

from urllib.request import urlopen

from random import randint

def wordListSum(wordList):

sum = 0

for word, value in wordList.items():

sum += value

return sum

def retrieveRandomWord(wordList):

randIndex = randint(1, wordListSum(wordList))

for word, value in wordList.items():

randIndex -= value

if randIndex <= 0:

return word

def buildWordDict(text):

# 剔除换行符和引号

text = text.replace("\n", "")

text = text.replace("\"", "")

# 保证每个标点符号都和前面的单词在一起

# 这样不会被剔除,保留在马尔可夫链中

punctuation = [',', '.', ';', ':']

for symbol in punctuation:

text = text.replace(symbol, " " + symbol + " ")

words = text.split(" ")

# 过滤空单词

words = [word for word in words if word != ""]

wordDict = {}

for i in range(1, len(words)):

if words[i-1] not in wordDict:

# 为单词新建一个词典

wordDict[words[i-1]] = {}

if words[i] not in wordDict[words[i-1]]:

wordDict[words[i-1]][words[i]] = 0

wordDict[words[i-1]][words[i]] = wordDict[words[i-1]][words[i]] + 1

return wordDict

text = str(urlopen("http://pythonscraping.com/files/inaugurationSpeech.txt").read(), 'utf-8')

wordDict = buildWordDict(text)

# 生成链长为100的马尔可夫链

length = 100

chain = ""

currentWord = "I"

for i in range(0, length):

chain += currentWord + " "

currentWord = retrieveRandomWord(wordDict[currentWord])

print(chain)

参考:《Python 网络数据采集》

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值