从文件中读取一篇英语文章,统计每个单词出现的频率,输出十个最长出现的单词及次数图像。
import turtle
count = 10
xscale = 30
yscale = 6
data = []
words = []
def replacepunctuations(line):
for ch in line:
if ch in "~@#$%^&*()_-+=<>?/,.:;{}[]|\'""":
line = line.replace(ch," ")
return line
def processline(line,wordcounts):
line = replacepunctuations(line)
words = line.split()
for word in words:
if word in wordcounts:
wordcounts[word] += 1
else:
wordcounts[word] = 1
def line(t,x1,y1,x2,y2): #作x,y轴
t.penup()
t.goto(x1,y1)
t.pendown()
t.goto(x2,y2)
def drawtext(t,x,y,text): #写出每个坐标对应的单词和柱体表示的数量
t.penup()
t.goto(x,y)
t.pendown()
t.write(text)
def drawline(t,x1,y1,x2,y2):
t.penup()
t.goto(x1,y1)
t.pendown()
t.goto(x2,y2)
def drawrectangle(t,x,y): #作长方形
x = x*xscale
y = y*yscale
drawline(t,x-5,0,x-5,y)
drawline(t,x-5,y,x+5,y)
drawline(t,x+5,y,x+5,0)
drawline(t,x+5,0,x-5,0)
def drawbar(t): #作单词对应的图形
for i in range(count):
drawrectangle(t,i+1,data[i])
def drawgraph(t): #作图表
drawline(t,0,0,360,0)
drawline(t,0,300,0,0)
for x in range(count):
x += 1
drawtext(t,x*xscale-5,-20,(words[x-1]))
drawtext(t,x*xscale-5,data[x-1]*yscale+10,data[x-1])
drawbar(t)
def main():
filename = input("enter a filename:").strip()
infile = open(filename,"r")
wordcounts = {}
for line in infile:
processline(line.lower(),wordcounts) #lower()把大写字母改成小写
pairs = list(wordcounts.items())
items = [[x,y] for (y,x) in pairs]
items.sort()
for i in range(len(items)-1,len(items)-count-1,-1):
print(items[i][1]+"\t"+str(items[i][0]))
data.append(items[i][0])
words.append(items[i][1])
infile.close()
turtle.title('词频结果柱状图')
turtle.setup(900,750,0,0)
t = turtle.Turtle()
t.width(3)
drawgraph(t)
main()