python 读取txt中的英文内容 分析词频 可视化显示
调用turtle库 平台:Spyder
import turtle
pi=3.14159
count = 10
data = []
words = []
import random
def read(filename,data,words):
txt1= open(filename,"r")
word_spss=process_read(txt1)
pairs=list(word_spss.items())
items = [[x,y]for (y,x)in pairs]
items.sort()
for i in range(len(items)-1, len(items)-count-1, -1):
print(items[i][1]+"\t"+str(items[i][0]))
data.append(items[i][0])
words.append(items[i][1])
def randomcolor():
colorArr = ['1','2','3','4','5','6','7','8','9','A','B','C','D','E','F']
color = ""
for i in range(6):
color += colorArr[random.randint(0,14)]
return "#"+color
def replaceMark(line):
for ch in line:
if ch in "~@#$%^&*()_-+=<>?/,.:;{}[]|\'\"":
line=line.replace(ch," ")
return line
def process_read(txt1):
word_spss={}
for line in txt1:
line=line.lower()
line=replaceMark(line)
words=line.split()
for word in words:
if word in word_spss:
word_spss[word]+=1
else:
word_spss[word]=1
return word_spss
def DIY_draw(data,words):
turtle.title("词频结果统计图")
turtle.setup(1200, 500, 0, 0)
t=turtle.Turtle()
sum=0
for ii in range(9):
sum+=data[ii]
print(sum)
x0=150
coefficient=2*x0/sum*1.2;
t0=-1
pp=[]
for i in range(9):
pp.append(words[i]+"["+str(data[i])+"]")
print(pp[1])
for i in range(9):
t.color(randomcolor())
t.penup()
t.goto(x0,-data[i]*coefficient)
x0=x0-data[i]*coefficient-data[i+1]*coefficient
t.pendown()
t.begin_fill()
t.circle(data[i]*coefficient)
t.end_fill()
t0=t0*(-1)
x0=150
for i in range(9):
t.color(randomcolor())
t.penup()
t.goto(x0,-data[i]*coefficient-t0*data[i]*coefficient*1.2)
t.write(pp[i], False, align="center", font=("Arial", 18, "normal"))
x0=x0-data[i]*coefficient-data[i+1]*coefficient
t.pendown()
t0=t0*(-1)
t.down()
def main():
read("C:\\Users\\yiqing\\Desktop\\ex1.txt",data,words)
DIY_draw(data,words)
main()
结果
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/9c818a49adb94a3a839a519dc70d9196.png)
txt
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/529baa240988b858abc5e54877e92dcb.png)