近来学习Python,Python在科学计算中有着较强的优势。练习文章处理的初级代码,共享出来希望高手指点。
任务目标:统计英文文章中出现频率较高的单词,画出频率图并显示频率较高的单词。
基本要求已完成。应改变显示结果的条件,适应长短相差较大的文章。
import pylab
import numpy
import string
def linetoword(line):
for ch in line:
if ch not in string.lowercase and ch not in string.uppercase and not ch == ' ':
line.replace(ch,' ')
wordlist = line.split(" ")
newlist = []
for word in wordlist:
if len(word)>3:
word = word.lower()
newlist.append(word)
return newlist
def readarticle(title):
file = open(title,"r")
wordlist = []
line = file.readline()
while not line == "":
wordlist.extend(linetoword(line))
line = file.readline()
file.close()
return wordlist
wordlist = readarticle("article.txt