Python词频分析词云

见结果

《哈姆雷特》词频分析

def getText():
    with open('D:/python/1213/hamlet2.txt',"r") as f:
        txt=f.read()
        txt=txt.lower()#转换大小写
        for ch in ':?,.!"()[]<>@|\'\\':
            txt=txt.replace(ch,' ')
        return txt
hamletText=getText()
#print(hamletText)
words=hamletText.split()
print(words)
counts={}
for word in words:
    counts[word]=counts.get(word,0)+1
print(counts)
excludes={"the","and","to","of","i","a"}
for word in excludes:
    del counts[word]
print("------------------------")
print(counts)
print("------------------------")
items=list(counts.items())
print(items)
items.sort(key=lambda x:x[1],reverse=True)
print("------------------------")
print(items)
for i in range(10):
    print(items[i])
print("------------------------")
for i in range(10):
    word,count=items[i]
    print ("{0:<10} {1:>5}".format(word, count))

《红楼梦》词频分析

import jieba
txtfilepath ='红楼梦.txt'
with open(txtfilepath,encoding='utf-8') as f:
    #读取交本文
    txt = f.read()
words = jieba.cut(txt)
counts ={} 
for word in words:
          if len(word) == 1:
             continue
          else:
              counts[word] =counts.get(word,0)+1
excludes={'什么','一个','我们','那里','如今','你们','说道','老太太','知道',\
         '起来','姑娘','这里','出来','他们','奶奶','自己','一面','太太','只见',\
         '怎么','两个','没有','不是','不知','这个','听见','众人'}
for word in excludes:
    del counts[word]
items = list(counts.items())#将建品节克车了是数进行从大到小排序
items.sort (key=lambda x: x[1], reverse=True)
for i in range(5):
    word, count = items[i]
    print("{0:<8} {1:>8}".format(word,count))
​

8.17

#8.17
from wordcloud import WordCloud
text="dog cat fish bird cat cat dog cat cat dog monkey cat"
wc = WordCloud(background_color = "green")
wc.generate(text)
wc.to_file("wc_animals.png")

8.18

from wordcloud import WordCloud
import numpy as np
from PIL import Image
mask1 =np.array(Image.open("D:/python/1213/作业/heart1.jpg"))
with open('D:/python/1213/作业/PrideAndPrejudice.txt','r',encoding='utf-8') as file:
    text=file.read()
    wc =WordCloud(background_color="white",
                  width=800,
                  height=600,
                  max_words=100,
                  mask=mask1)
    wc.generate(text)
    wc.to_file('傲慢与偏见.png')
​

word1

import wordcloud
c = wordcloud.WordCloud()
c.generate("wordcloud by Python")
c.to_file("pywordcloud.png")

word2

import wordcloud
txt = "life is short, you need python"
w = wordcloud.WordCloud( background_color = "white")
w.generate(txt)
w.to_file("pywcloud.png")

word3

import jieba
import wordcloud
txt = "程序设计语言是计算机能够理解和识别用户操作意图的一种交互体系,它按照特定规则组织计算机指令,使计算机能够自动进行各种运算处理。"
w = wordcloud.WordCloud( width=1000,font_path="simsun.ttc",height=700)
w.generate(" ".join(jieba.lcut(txt)))
w.to_file("pywcloud.png")

word4

import jieba
import wordcloud
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( font_path = "simsun.ttc",
                         width = 1000,
                         height = 700,
                         background_color = "white")
w.generate(txt)
w.to_file("grwordcloud.png")

word5

import jieba
import wordcloud
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( font_path = "msyh.ttc",\
width = 1000, height = 700, background_color = "white", \
max_words = 15)
w.generate(txt)

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值