Python词频分析词云

最新推荐文章于 2024-03-09 22:50:12 发布

超短腿迪迦

最新推荐文章于 2024-03-09 22:50:12 发布

阅读量417

点赞数

分类专栏： Python编程题文章标签： python 开发语言 windows

本文链接：https://blog.csdn.net/m0_65077254/article/details/128318981

版权

Python编程题专栏收录该内容

5 篇文章 0 订阅

订阅专栏

见结果

《哈姆雷特》词频分析

def getText():
    with open('D:/python/1213/hamlet2.txt',"r") as f:
        txt=f.read()
        txt=txt.lower()#转换大小写
        for ch in ':?,.!"()[]<>@|\'\\':
            txt=txt.replace(ch,' ')
        return txt
hamletText=getText()
#print(hamletText)
words=hamletText.split()
print(words)
counts={}
for word in words:
    counts[word]=counts.get(word,0)+1
print(counts)
excludes={"the","and","to","of","i","a"}
for word in excludes:
    del counts[word]
print("------------------------")
print(counts)
print("------------------------")
items=list(counts.items())
print(items)
items.sort(key=lambda x:x[1],reverse=True)
print("------------------------")
print(items)
for i in range(10):
    print(items[i])
print("------------------------")
for i in range(10):
    word,count=items[i]
    print ("{0:<10} {1:>5}".format(word, count))

《红楼梦》词频分析

import jieba
txtfilepath ='红楼梦.txt'
with open(txtfilepath,encoding='utf-8') as f:
    #读取交本文
    txt = f.read()
words = jieba.cut(txt)
counts ={} 
for word in words:
          if len(word) == 1:
             continue
          else:
              counts[word] =counts.get(word,0)+1
excludes={'什么','一个','我们','那里','如今','你们','说道','老太太','知道',\
         '起来','姑娘','这里','出来','他们','奶奶','自己','一面','太太','只见',\
         '怎么','两个','没有','不是','不知','这个','听见','众人'}
for word in excludes:
    del counts[word]
items = list(counts.items())#将建品节克车了是数进行从大到小排序
items.sort (key=lambda x: x[1], reverse=True)
for i in range(5):
    word, count = items[i]
    print("{0:<8} {1:>8}".format(word,count))

8.17

#8.17
from wordcloud import WordCloud
text="dog cat fish bird cat cat dog cat cat dog monkey cat"
wc = WordCloud(background_color = "green")
wc.generate(text)
wc.to_file("wc_animals.png")

8.18

from wordcloud import WordCloud
import numpy as np
from PIL import Image
mask1 =np.array(Image.open("D:/python/1213/作业/heart1.jpg"))
with open('D:/python/1213/作业/PrideAndPrejudice.txt','r',encoding='utf-8') as file:
    text=file.read()
    wc =WordCloud(background_color="white",
                  width=800,
                  height=600,
                  max_words=100,
                  mask=mask1)
    wc.generate(text)
    wc.to_file('傲慢与偏见.png')

word1

import wordcloud
c = wordcloud.WordCloud()
c.generate("wordcloud by Python")
c.to_file("pywordcloud.png")

word2

import wordcloud
txt = "life is short, you need python"
w = wordcloud.WordCloud( background_color = "white")
w.generate(txt)
w.to_file("pywcloud.png")

word3

import jieba
import wordcloud
txt = "程序设计语言是计算机能够理解和识别用户操作意图的一种交互体系，它按照特定规则组织计算机指令，使计算机能够自动进行各种运算处理。"
w = wordcloud.WordCloud( width=1000,font_path="simsun.ttc",height=700)
w.generate(" ".join(jieba.lcut(txt)))
w.to_file("pywcloud.png")

word4

import jieba
import wordcloud
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( font_path = "simsun.ttc",
                         width = 1000,
                         height = 700,
                         background_color = "white")
w.generate(txt)
w.to_file("grwordcloud.png")

word5

import jieba
import wordcloud
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( font_path = "msyh.ttc",\
width = 1000, height = 700, background_color = "white", \
max_words = 15)
w.generate(txt)