import nltk
from nltk.corpus import gutenberg
a = gutenberg.fileids()
print(a)
emma = gutenberg.words("shakespeare-macbeth.txt")
print(emma[1030 :1037])
for fileid in gutenberg.fileids():
num_chars = len(gutenberg.raw(fileid))
num_words = len(gutenberg.words(fileid))
num_sents = len(gutenberg.sents(fileid))
print(num_chars , num_words, num_sents , fileid)
python gutenberg古腾堡语料库
最新推荐文章于 2024-06-14 09:31:39 发布