str='''If I should stay I would only be in your way So I'll go But I know I'll think of your every step of the way And I will always love you Will always love you You my darling you Bitter-sweet memories That is all I'm taking with me So goodbye Please don't cry We both know I'm not What you need And I will always love you I will always love you I hope life treats you kind And I hope you have all you dreamed of And I wish you joy and happiness But above all this Ii wish your love And I will always love you I will always love you I will always love you I will always love you I will always love you I will always love you Darling I love you I'll always love you''' #读取文本文件 f = open('英文歌词.txt','r',encoding='utf-8') Go = f.read() f.close() print(Go) #预处理 print(Go.lower()) a = ",.;:'`" for b in a: Go.replace(b,' ') #利用for循环语句把特殊符号替换成空格 print(Go) #分别从空格提取单词 firelist = Go.split() print(firelist) #统计每个单词出现的次数 fireset = set(firelist) #把列表firelist转换成集合,使得单词不会重复出现 #排除语法型词汇,代词、冠词、连词等无语义词 se = {'a','the','and','if','do','of'} fireset =fireset-se firedict = {} for word in fireset: firedict[word] = firelist.count(word) print(len(firedict),firedict) wordlist = list(firedict.items()) #按单词的频数排序 wordlist.sort(key=lambda x:x[1],reverse=True) print(wordlist) #输出TOP(20) for i in range(20): print(wordlist[i])
asd = open('百万英镑.txt', 'r', encoding='utf-8') strasd = asd.read() asd.close() print(strasd) #单词计数 strGoSet = set(strasd) print(len(strGoSet),strGoSet) strDict ={} for word in strGoSet: strDict[word] = strasd.count(word) print(len(strDict),strDict) wcList = list(strDict.items()) wcList.sort() print(strDict.items()) #词频排序 wcList.sort(key=lambda x:x[1],reverse=True) print(wcList) #输出top20 for s in range(20): print(wcList[s])