贝叶斯分类:Common Lisp实现

最近利用闲暇时间学习Common Lisp,作为编程实践,我实现了一个简陋的
贝叶斯分类器(bayesian)。这只是一个代码玩具,没有实际的工程价值。文章最后我

贴了这个贝叶斯的代码。


贝叶斯公式如下:


可以这样简单的推导出贝叶斯公式:




代码的流程,首先是统计样本数据;然后根据bayesian定理,创建一个
用于分类的模型;最后在用这个模型去对测试数据进行分类。

宏add-class-data负责统计数据, 她的第0个参数是分类,第1个参数是
在这个分类下的一个数据项,最后一个参数是个hash table,用于记录。
调用一次add-class-data就增加一项数据的统计值。

宏calc-bayesian-model会生成一个贝叶斯模型,并作为她的返回值。
她的参数是记录了多种统计结果的hash table。

函数bayesian-predict对测试数据进行分类。她的第0个参数是一个列表,
存放的是一个待测试的向量。第1个参数是分类模型。

函数mytest用于驱动这个玩具。

我初学Lisp,代码幼稚。随便写写玩具程序,变量胡乱起名。宏里面
的变量名应该用(gensym)生成,但我图方便没这么干。但代码还是可以
正常执行的,分类结果还能勉强接受。我在fedora 17上用sbcl简单的
测试了程序,可以这样执行:
    sbcl --script ./bayesian.lisp


bayesian.lisp:
===========================================
;;; 2013年 03月 10日 星期日 10:16:49 CST
;;; author: 李小丹(Li Shao Dan) 字 殊恒(shuheng)
;;; K.I.S.S
;;; S.P.O.T


(defmacro add-count (k h) `(incf (gethash ,k ,h 0)))

(defmacro add-class-data (c k h)
 `(multiple-value-bind (v p) (gethash ,c ,h)
     (if p (add-count ,k v) (add-count ,k (setf (gethash ,c ,h) (make-hash-table :test #'equal))))))

(defun print-k-v (k v) (format t "~t~t~a: ~a~%" k v))
(defun print-class (k v) (format t "~a:~%" k) (maphash #'print-k-v v))
(defmacro print-all (h) `(progn (maphash #'print-class ,h) (format t "====================~%")))

(defmacro calc-one-sum (h) `(let ((s 0)) (maphash #'(lambda (k v) (setf s (+ s v))) ,h) s))
(defmacro calc-item-sum (h &optional (c nil c-p))
 `(if ,c-p (multiple-value-bind (v p) (gethash ,c ,h) (if p (calc-one-sum v)))
             (let ((s 0)) (maphash #'(lambda (k v) (setf s (+ s (calc-one-sum v)))) ,h) s)))
(defmacro count-class (h) `(hash-table-count ,h))
(defmacro count-item-uniq (h &optional (c nil c-p))
 `(if ,c-p (multiple-value-bind (v p) (gethash ,c ,h) (if p (hash-table-count v)))
     (let ((s 0)) (maphash #'(lambda (k v) (setf s (+ s (hash-table-count v)))) ,h) s)))

(defmacro count-item (h c i)
 `(multiple-value-bind (ch ch-p) (gethash ,c ,h)
     (if ch-p (gethash ,i ch 0))))

(defmacro calc-p-class (h c)
 `(multiple-value-bind (v p) (gethash ,c ,h)
     (if p (let (sc cc) (setf sc (calc-item-sum ,h)) (setf cc (calc-one-sum v))
            (/ cc sc)))))

(defmacro calc-p-item-class (h c i)
 `(let ((sum (calc-item-sum ,h ,c)))
     (if (and sum (not (equal 0 sum))) (/ (count-item ,h ,c ,i) sum))))

(defmacro calc-bayesian-model (h)
 `(let ((items nil) (classes nil) (ret (make-hash-table :test #'equal)))
     (maphash #'(lambda (k v) (pushnew k classes) (maphash #'(lambda (kk vv) (pushnew kk items)) v)) ,h)
     (dolist (i items)
      (let ((sum 0) (temp))
        (multiple-value-bind (value v-p) (gethash i ret)
          (if (not v-p) (setf value (setf (gethash i ret) (make-hash-table :test #'equal))))
          (dolist (c classes)
            (setf temp (* (calc-p-item-class ,h c i) (calc-p-class ,h c)))
            (incf sum temp)
            (setf (gethash c value) temp))
          (maphash #'(lambda (x y) (setf (gethash x value) (/ y sum))) value)
          (maphash #'(lambda (x y) (if (equal 0 y) (setf (gethash x value) 1/100000))) value)))) ret))

(defun bayesian-predict (vec model)
 (let ((coll (make-hash-table :test #'equal)) (temp 0) (mc) (sum 0) (classes nil))
  (maphash #'(lambda (k v) (maphash #'(lambda (kk vv) (pushnew kk classes)) v)) model)
  (dolist (c classes)
   (let ((mul 1))
    (dolist (d vec)
     (multiple-value-bind (val v-p) (gethash d model)
      (if v-p (setf mul (* mul (gethash c val 1/100000))) (setf mul (* mul (/ 1 (length classes)))))))
    (setf (gethash c coll) mul)))
  (maphash #'(lambda (k v) (incf sum v) (when (> v temp) (setf temp v) (setf mc k))) coll)
  (if (>= (/ temp sum) 1/2) mc)))


;; ---------- test ---------------

(defvar *h* (make-hash-table :test #'equal))

(defun mytest()
  (add-class-data 1 "a" *h*)
  (add-class-data 1 "a" *h*)
  (add-class-data 1 "a" *h*)
  (add-class-data 1 "b" *h*)
  (add-class-data 1 "c" *h*)
  (add-class-data 1 "c" *h*)
  (add-class-data 1 "d" *h*)

  (add-class-data 2 "a" *h*)
  (add-class-data 2 "a" *h*)
  (add-class-data 2 "c" *h*)
  (add-class-data 2 "c" *h*)
  (add-class-data 2 "d" *h*)
  (add-class-data 2 "d" *h*)
  (add-class-data 2 "f" *h*)
  (add-class-data 2 "f" *h*)
  (add-class-data 2 "f" *h*)

  (add-class-data 3 "a" *h*)
  (add-class-data 3 "a" *h*)
  (add-class-data 3 "b" *h*)
  (add-class-data 3 "b" *h*)
  (add-class-data 3 "d" *h*)
  (add-class-data 3 "d" *h*)
  (add-class-data 3 "d" *h*)
  (add-class-data 3 "d" *h*)
  (add-class-data 3 "f" *h*)
  (add-class-data 3 "f" *h*)

  (print-all *h*)

;  (format t "sum is ~a.~%" (calc-item-sum *h*))
;  (dotimes (i 5) (format t "class(~a) sum: ~a.~%" i (calc-item-sum *h* i)))
;  (format t "class count ~a.~%" (count-class *h*))
;  (format t "item count: ~a.~%" (count-item-uniq *h*))
;  (dotimes (i 5) (format t "item(~a) type count: ~a.~%" i (count-item-uniq *h* i)))
;  (dotimes (i 5) (format t "class(~a) probability: ~a.~%" i (calc-p-class *h* i)))
;  (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "aa" (count-item *h* i "aa")))
;  (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "a" (count-item *h* i "a")))
;  (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "b" (count-item *h* i "b")))
;  (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "bd" (count-item *h* i "bd")))
;  (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "c" (count-item *h* i "c")))
;  (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "aka" (count-item *h* i "aka")))
;  (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "aa" (calc-p-item-class *h* i "aa")))
;  (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "a" (calc-p-item-class *h* i "a")))
;  (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "b" (calc-p-item-class *h* i "b")))
;  (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "bd" (calc-p-item-class *h* i "bd")))
;  (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "c" (calc-p-item-class *h* i "c")))
;  (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "aka" (calc-p-item-class *h* i "aka")))
  (setf *h* (calc-bayesian-model *h*))
  (print-all *h*)
  (format t "~a~%" (bayesian-predict (list "a" "c") *h*))
  (format t "====================~%")
  (format t "~a~%" (bayesian-predict (list "k" "a" "m" "n" "o") *h*))
  (format t "====================~%")
  (format t "~a~%" (bayesian-predict (list "a" "b" "c" "f") *h*))
  (format t "====================~%")
  (format t "~a~%" (bayesian-predict (list "a" "b" "c" "d" "f") *h*))
  (format t "====================~%")
  (format t "~a~%" (bayesian-predict (list "a" "d" "d" "c" "f") *h*))
  (format t "====================~%")
  (format t "~a~%" (bayesian-predict (list "f" "d" "f" "d" "d" "d" "f") *h*))
  (format t "====================~%")
  (format t "~a~%" (bayesian-predict (list "a" "c" "c" "d" "f") *h*))
  (format t "====================~%"))

(mytest)


====================================================================

代码完

看看,有点意思吧!


  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值