我找到了这个python代码来对文本文件执行词干分析。在import nltk
import string
from collections import Counter
def get_tokens():
with open('/Users/MYUSERNAME/Desktop/Test_sp500/A_09.txt', 'r') as shakes:
text = shakes.read()
lowers = text.lower()
no_punctuation = lowers.translate(None,string.punctuation)
tokens = nltk.word_tokenize(no_punctuation)
return tokens
tokens = get_tokens()
count = Counter(tokens)
count.most_common(10)
from nltk.corpus import stopwords
tokens = get_tokens()
filtered = [w for w in tokens if not w in stopwords.words('english')]
count = Counter(filtered)
count.most_common(100)
from nltk.stem.porter import *
def stem_tokens(tokens, stemmer):
stemmed = []
for item in tokens:
stemmed.append(stemmer.stem(item))
return stemmed
stemmer = PorterStemmer()
stemmed = stem_tokens(filtered, stemmer)
count = Counter(stemmed)
count.most_common(100)
尝试运行此程序时,出现以下错误:
^{pr2}$
现在我的问题是:我该怎么解决这个问题?在
当这个程序工作时,我如何不仅为一个.txt文件运行这个脚本,而且为某个目录中的所有.txt文件运行这个脚本?在
注意:我通常不需要编程,所以我只知道绝对的Python基础知识。在