from collections import Counter
def generate_ngrams(text, n):
# 将文本拆分成单词
words = text.split()
# 创建一个空列表来存储N-gram
ngrams = []
# 用于生成N-gram的循环
for i in range(len(words) - n + 1):
# 将连续的n个单词连接成一个N-gram
ngram = ' '.join(words[i:i + n])
ngrams.append(ngram)
return ngrams
def count_ngrams(ngrams):
# 使用Counter统计N-gram的频率
return Counter(ngrams)
# 示例文本
text = "The quick brown fox jumps over the lazy dog. The quick brown fox is fast."
# 生成和统计二元组(bigram)
n = 2
bigrams = generate_ngrams(text, n)
bigram_freq = count_ngrams(bigrams)
# 输出结果
print("Bigrams and their frequencies:")
for bigram, freq in bigram_freq.items():
print(f"{bigram}: {freq}")
# 生成和统计三元组(trigram)
n = 3
trigrams = generate_ngrams(text, n)
trigram_freq = count_ngrams(trigrams)
# 输出结果
print("\nTrigrams and their frequencies:")
for trigram, freq in trigram_freq.items():
print(f"{trigram}: {freq}")
【无标题】ngram
最新推荐文章于 2024-07-19 15:28:13 发布