统计一篇英语文章每个单词出现的频率

从文件中读取一篇英语文章,统计每个单词出现的频率,输出十个最长出现的单词及次数图像。


import turtle

count = 10
xscale = 30
yscale = 6
data = []
words = []

def replacepunctuations(line):
     for ch in line:
          if ch in "~@#$%^&*()_-+=<>?/,.:;{}[]|\'""":
               line = line.replace(ch," ")
     return line
     
def processline(line,wordcounts):
     line = replacepunctuations(line)
     words = line.split()
     for word in words:
          if word in wordcounts:
               wordcounts[word] += 1
          else:
               wordcounts[word] = 1

def line(t,x1,y1,x2,y2): #作x,y轴
     t.penup()
     t.goto(x1,y1)
     t.pendown()
     t.goto(x2,y2)
     
def drawtext(t,x,y,text): #写出每个坐标对应的单词和柱体表示的数量
     t.penup()
     t.goto(x,y)
     t.pendown()
     t.write(text)

def drawline(t,x1,y1,x2,y2):
     t.penup()
     t.goto(x1,y1)
     t.pendown()
     t.goto(x2,y2)

def drawrectangle(t,x,y):     #作长方形
     x = x*xscale
     y = y*yscale
     drawline(t,x-5,0,x-5,y)
     drawline(t,x-5,y,x+5,y)
     drawline(t,x+5,y,x+5,0)
     drawline(t,x+5,0,x-5,0)

def drawbar(t):     #作单词对应的图形
     for i in range(count):
          drawrectangle(t,i+1,data[i])

def drawgraph(t):   #作图表
     drawline(t,0,0,360,0)
     drawline(t,0,300,0,0)

     for x in range(count):
          x += 1
          drawtext(t,x*xscale-5,-20,(words[x-1]))
          drawtext(t,x*xscale-5,data[x-1]*yscale+10,data[x-1])
     drawbar(t)
     
def main():
     filename = input("enter a filename:").strip()
     infile = open(filename,"r")

     wordcounts = {}
     for line in infile:
          processline(line.lower(),wordcounts)  #lower()把大写字母改成小写

     pairs = list(wordcounts.items())
     items = [[x,y] for (y,x) in pairs]
     items.sort()

     for i in range(len(items)-1,len(items)-count-1,-1):
          print(items[i][1]+"\t"+str(items[i][0]))
          data.append(items[i][0])
          words.append(items[i][1])

     infile.close()

     turtle.title('词频结果柱状图')
     turtle.setup(900,750,0,0)
     t = turtle.Turtle()
     t.width(3)
     drawgraph(t)

main()


  • 4
    点赞
  • 14
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值