见结果
《哈姆雷特》词频分析
def getText():
with open('D:/python/1213/hamlet2.txt',"r") as f:
txt=f.read()
txt=txt.lower()#转换大小写
for ch in ':?,.!"()[]<>@|\'\\':
txt=txt.replace(ch,' ')
return txt
hamletText=getText()
#print(hamletText)
words=hamletText.split()
print(words)
counts={}
for word in words:
counts[word]=counts.get(word,0)+1
print(counts)
excludes={"the","and","to","of","i","a"}
for word in excludes:
del counts[word]
print("------------------------")
print(counts)
print("------------------------")
items=list(counts.items())
print(items)
items.sort(key=lambda x:x[1],reverse=True)
print("------------------------")
print(items)
for i in range(10):
print(items[i])
print("------------------------")
for i in range(10):
word,count=items[i]
print ("{0:<10} {1:>5}".format(word, count))
《红楼梦》词频分析
import jieba
txtfilepath ='红楼梦.txt'
with open(txtfilepath,encoding='utf-8') as f:
#读取交本文
txt = f.read()
words = jieba.cut(txt)
counts ={}
for word in words:
if len(word) == 1:
continue
else:
counts[word] =counts.get(word,0)+1
excludes={'什么','一个','我们','那里','如今','你们','说道','老太太','知道',\
'起来','姑娘','这里','出来','他们','奶奶','自己','一面','太太','只见',\
'怎么','两个','没有','不是','不知','这个','听见','众人'}
for word in excludes:
del counts[word]
items = list(counts.items())#将建品节克车了是数进行从大到小排序
items.sort (key=lambda x: x[1], reverse=True)
for i in range(5):
word, count = items[i]
print("{0:<8} {1:>8}".format(word,count))
8.17
#8.17
from wordcloud import WordCloud
text="dog cat fish bird cat cat dog cat cat dog monkey cat"
wc = WordCloud(background_color = "green")
wc.generate(text)
wc.to_file("wc_animals.png")
8.18
from wordcloud import WordCloud
import numpy as np
from PIL import Image
mask1 =np.array(Image.open("D:/python/1213/作业/heart1.jpg"))
with open('D:/python/1213/作业/PrideAndPrejudice.txt','r',encoding='utf-8') as file:
text=file.read()
wc =WordCloud(background_color="white",
width=800,
height=600,
max_words=100,
mask=mask1)
wc.generate(text)
wc.to_file('傲慢与偏见.png')
word1
import wordcloud
c = wordcloud.WordCloud()
c.generate("wordcloud by Python")
c.to_file("pywordcloud.png")
word2
import wordcloud
txt = "life is short, you need python"
w = wordcloud.WordCloud( background_color = "white")
w.generate(txt)
w.to_file("pywcloud.png")
word3
import jieba
import wordcloud
txt = "程序设计语言是计算机能够理解和识别用户操作意图的一种交互体系,它按照特定规则组织计算机指令,使计算机能够自动进行各种运算处理。"
w = wordcloud.WordCloud( width=1000,font_path="simsun.ttc",height=700)
w.generate(" ".join(jieba.lcut(txt)))
w.to_file("pywcloud.png")
word4
import jieba
import wordcloud
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( font_path = "simsun.ttc",
width = 1000,
height = 700,
background_color = "white")
w.generate(txt)
w.to_file("grwordcloud.png")
word5
import jieba
import wordcloud
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( font_path = "msyh.ttc",\
width = 1000, height = 700, background_color = "white", \
max_words = 15)
w.generate(txt)