Go语言实现朴素贝叶斯算法

亚丁号

于 2024-08-27 07:35:24 发布

阅读量97

点赞数 2

文章标签： golang 算法 c#

本文链接：https://blog.csdn.net/zy0412326/article/details/141580180

版权

朴素贝叶斯算法是一种基于贝叶斯定理的分类方法，假设特征之间是相互独立的。它通常用于文本分类等任务。下面是用Go语言实现朴素贝叶斯算法的一个简单示例。

代码实现

package main

import (
	"fmt"
	"math"
	"strings"
)

// 定义朴素贝叶斯分类器结构体
type NaiveBayesClassifier struct {
	classes      []string           // 类别
	classCount   map[string]int     // 每个类别的样本数
	wordCount    map[string]map[string]int // 每个类别中每个单词的出现次数
	totalWords   map[string]int     // 每个类别的总单词数
	totalSamples int                // 总样本数
}

// 初始化朴素贝叶斯分类器
func NewNaiveBayesClassifier() *NaiveBayesClassifier {
	return &NaiveBayesClassifier{
		classCount:   make(map[string]int),
		wordCount:    make(map[string]map[string]int),
		totalWords:   make(map[string]int),
		totalSamples: 0,
	}
}

// 训练分类器
func (nb *NaiveBayesClassifier) Train(data map[string][]string) {
	for class, texts := range data {
		nb.classes = append(nb.classes, class)
		nb.classCount[class] = len(texts)
		nb.totalSamples += len(texts)

		if nb.wordCount[class] == nil {
			nb.wordCount[class] = make(map[string]int)
		}

		for _, text := range texts {
			words := strings.Fields(text)
			for _, word := range words {
				nb.wordCount[class][word]++
				nb.totalWords[class]++
			}
		}
	}
}

// 计算给定类别的先验概率
func (nb *NaiveBayesClassifier) PriorProbability(class string) float64 {
	return float64(nb.classCount[class]) / float64(nb.totalSamples)
}

// 计算在给定类别下单词的条件概率
func (nb *NaiveBayesClassifier) ConditionalProbability(word, class string) float64 {
	wordFrequency := nb.wordCount[class][word]
	totalWordsInClass := nb.totalWords[class]
	return float64(wordFrequency+1) / float64(totalWordsInClass+len(nb.wordCount[class])) // 使用拉普拉斯平滑
}

// 预测给定文本的类别
func (nb *NaiveBayesClassifier) Predict(text string) string {
	words := strings.Fields(text)
	maxProbability := math.Inf(-1)
	var bestClass string

	for _, class := range nb.classes {
		probability := math.Log(nb.PriorProbability(class))
		for _, word := range words {
			probability += math.Log(nb.ConditionalProbability(word, class))
		}

		if probability > maxProbability {
			maxProbability = probability
			bestClass = class
		}
	}

	return bestClass
}

func main() {
	// 训练数据
	data := map[string][]string{
		"spam": {
			"buy cheap watches",
			"cheap watches available",
			"cheap watches buy",
		},
		"ham": {
			"I love my watch",
			"This watch is great",
			"Beautiful and elegant watch",
		},
	}

	// 初始化并训练分类器
	nb := NewNaiveBayesClassifier()
	nb.Train(data)

	// 进行预测
	testText := "cheap watches"
	prediction := nb.Predict(testText)
	fmt.Printf("Text: \"%s\" is classified as: \"%s\"\n", testText, prediction)
}

代码解释

结构体定义: NaiveBayesClassifier结构体包含了类别、每个类别的样本数、每个类别中单词的出现次数、每个类别的总单词数以及总样本数等信息。
训练过程: Train函数接收训练数据，统计每个类别的样本数、单词出现次数，并将这些信息存储在结构体的字段中。
先验概率计算: PriorProbability函数计算给定类别的先验概率，即该类别的样本数占总样本数的比例。
条件概率计算: ConditionalProbability函数计算在给定类别下某个单词的条件概率。使用拉普拉斯平滑来处理从未见过的单词。
预测: Predict函数接受一段文本，计算该文本在各类别下的概率，并返回概率最大的类别作为预测结果。