贝叶斯分类python_简单的贝叶斯分类器的python实现

1 #-*- coding: utf-8 -*-

2 '''

3 >>> c = Classy()4 >>> c.train(['cpu', 'RAM', 'ALU', 'io', 'bridge', 'disk'], 'architecture')5 True6 >>> c.train(['monitor', 'mouse', 'keyboard', 'microphone', 'headphones'], 'input_devices')7 True8 >>> c.train(['desk', 'chair', 'cabinet', 'lamp'], 'office furniture')9 True10 >>> my_office = ['cpu', 'monitor', 'mouse', 'chair']11 >>> c.classify(my_office)12 ('input_devices', -1.0986122886681098)13 ...14 >>> c = Classy()15 >>> c.train(['cpu', 'RAM', 'ALU', 'io', 'bridge', 'disk'], 'architecture')16 True17 >>> c.train(['monitor', 'mouse', 'keyboard', 'microphone', 'headphones'], 'input_devices')18 True19 >>> c.train(['desk', 'chair', 'cabinet', 'lamp'], 'office furniture')20 True21 >>> my_office = ['cpu', 'monitor', 'mouse', 'chair']22 >>> c.classify(my_office)23 ('input_devices', -1.0986122886681098)24 ...25 '''

26

27 from collections importCounter28 importmath29

30 classClassifierNotTrainedException(Exception):31

32 def __str__(self):33 return "Classifier is not trained."

34

35 classClassy(object):36

37 def __init__(self):38 self.term_count_store ={}39 self.data ={40 'class_term_count': {},41 'beta_priors': {},42 'class_doc_count': {},43 }44 self.total_term_count =045 self.total_doc_count =046

47 deftrain(self, document_source, class_id):48

49 '''

50 Trains the classifier.51

52 '''

53 count =Counter(document_source)54 try:55 self.term_count_store[class_id]56 exceptKeyError:57 self.term_count_store[class_id] ={}58 for term incount:59 try:60 self.term_count_store[class_id][term] +=count[term]61 exceptKeyError:62 self.term_count_store[class_id][term] =count[term]63 try:64 self.data['class_term_count'][class_id] += document_source.__len__()65 exceptKeyError:66 self.data['class_term_count'][class_id] = document_source.__len__()67 try:68 self.data['class_doc_count'][class_id] += 1

69 exceptKeyError:70 self.data['class_doc_count'][class_id] = 1

71 self.total_term_count += document_source.__len__()72 self.total_doc_count += 1

73 self.compute_beta_priors()74 returnTrue75

76 defclassify(self, document_input):77 if not self.total_doc_count: raiseClassifierNotTrainedException()78

79 term_freq_matrix =Counter(document_input)80 arg_max_matrix =[]81 for class_id in self.data['class_doc_count']:82 summation =083 for term indocument_input:84 try:85 conditional_probability = (self.term_count_store[class_id][term] + 1)86 conditional_probability = conditional_probability / (self.data['class_term_count'][class_id] +self.total_doc_count)87 summation += term_freq_matrix[term] *math.log(conditional_probability)88 exceptKeyError:89 break

90 arg_max = summation + self.data['beta_priors'][class_id]91 arg_max_matrix.insert(0, (class_id, arg_max))92 arg_max_matrix.sort(key=lambda x:x[1])93 return (arg_max_matrix[-1][0], arg_max_matrix[-1][1])94

95 defcompute_beta_priors(self):96 if not self.total_doc_count: raiseClassifierNotTrainedException()97

98 for class_id in self.data['class_doc_count']:99 tmp = self.data['class_doc_count'][class_id] /self.total_doc_count100 self.data['beta_priors'][class_id] = math.log(tmp)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值