github上完整php代码如下:
[],Type::NEGATIVE => []];
private $documents = [Type::POSITIVE => 0,Type::NEGATIVE => 0];
public function guess($statement)
{
$words = $this->getWords($statement); // get the words
$best_likelihood = 0;
$best_type = null;
foreach ($this->types as $type) {
$likelihood = $this->pTotal($type); // calculate P(Type)
foreach ($words as $word) {
$likelihood *= $this->p($word,$type); // calculate P(word,Type)
}
if ($likelihood > $best_likelihood) {
$best_likelihood = $likelihood;
$best_type = $type;
}
}
return $best_type;
}
public function learn($statement,$type)
{
$words = $this->getWords($statement);
foreach ($words as $word) {
if (!isset($this->words[$type][$word])) {
$this->words[$type][$word] = 0;
}
$this->words[$type][$word]++; // increment the word count for the type
}
$this->documents[$type]++; // increment the document count for the type
}
public function p($word,$type)
{
$count = 0;
if (isset($this->words[$type][$word])) {
$count = $this->words[$type][$word];
}
return ($count + 1) / (array_sum($this->words[$type]) + 1);
}
public function pTotal($type)
{
return ($this->documents[$type] + 1) / (array_sum($this->documents) + 1);
}
public function getWords($string)
{
return preg_split('/\s+/',strtolower($string)));
}
}
$classifier = new Classifier();
$classifier->learn('Symfony is the best',Type::NEGATIVE);
var_dump($classifier->guess('Symfony is great')); // string(8) "positive"
var_dump($classifier->guess('I complain a lot')); // string(8) "negative"
结束语
尽管我们只进行了很少的训练,但是算法还是应该能给出相对精确的结果。在真实环境,你可以让机器学习成百上千的记录,这样就可以给出更精准的结果。你可以下载查看这篇文章(英文):。
而且,朴素贝叶斯不仅仅可以运用到文本类的应用。希望通过这篇文章可以拉近你和机器学习的一点点距离。