贝叶斯分类：Common Lisp实现

最新推荐文章于 2024-07-14 18:15:43 发布

置顶 leeshuheng

最新推荐文章于 2024-07-14 18:15:43 发布

阅读量2.5k

点赞数 1

分类专栏：数学文章标签：算法编程 Bayesian 贝叶斯 Common lisp

本文链接：https://blog.csdn.net/leeshuheng/article/details/8671349

版权

数学专栏收录该内容

3 篇文章 0 订阅

订阅专栏

最近利用闲暇时间学习Common Lisp，作为编程实践，我实现了一个简陋的
贝叶斯分类器(bayesian)。这只是一个代码玩具，没有实际的工程价值。文章最后我

贴了这个贝叶斯的代码。

贝叶斯公式如下：

可以这样简单的推导出贝叶斯公式:

代码的流程，首先是统计样本数据；然后根据bayesian定理，创建一个
用于分类的模型；最后在用这个模型去对测试数据进行分类。

宏add-class-data负责统计数据, 她的第0个参数是分类，第1个参数是
在这个分类下的一个数据项，最后一个参数是个hash table，用于记录。
调用一次add-class-data就增加一项数据的统计值。

宏calc-bayesian-model会生成一个贝叶斯模型，并作为她的返回值。
她的参数是记录了多种统计结果的hash table。

函数bayesian-predict对测试数据进行分类。她的第0个参数是一个列表，
存放的是一个待测试的向量。第1个参数是分类模型。

函数mytest用于驱动这个玩具。

我初学Lisp，代码幼稚。随便写写玩具程序，变量胡乱起名。宏里面
的变量名应该用(gensym)生成，但我图方便没这么干。但代码还是可以
正常执行的，分类结果还能勉强接受。我在fedora 17上用sbcl简单的
测试了程序，可以这样执行：
sbcl --script ./bayesian.lisp

bayesian.lisp:
===========================================
;;; 2013年 03月 10日星期日 10:16:49 CST
;;; author: 李小丹(Li Shao Dan) 字殊恒(shuheng)
;;; K.I.S.S
;;; S.P.O.T

(defmacro add-count (k h) `(incf (gethash ,k ,h 0)))

(defmacro add-class-data (c k h)
`(multiple-value-bind (v p) (gethash ,c ,h)
   (if p (add-count ,k v) (add-count ,k (setf (gethash ,c ,h) (make-hash-table :test #'equal))))))

(defun print-k-v (k v) (format t "~t~t~a: ~a~%" k v))
(defun print-class (k v) (format t "~a:~%" k) (maphash #'print-k-v v))
(defmacro print-all (h) `(progn (maphash #'print-class ,h) (format t "====================~%")))

(defmacro calc-one-sum (h) `(let ((s 0)) (maphash #'(lambda (k v) (setf s (+ s v))) ,h) s))
(defmacro calc-item-sum (h &optional (c nil c-p))
`(if ,c-p (multiple-value-bind (v p) (gethash ,c ,h) (if p (calc-one-sum v)))
           (let ((s 0)) (maphash #'(lambda (k v) (setf s (+ s (calc-one-sum v)))) ,h) s)))
(defmacro count-class (h) `(hash-table-count ,h))
(defmacro count-item-uniq (h &optional (c nil c-p))
`(if ,c-p (multiple-value-bind (v p) (gethash ,c ,h) (if p (hash-table-count v)))
   (let ((s 0)) (maphash #'(lambda (k v) (setf s (+ s (hash-table-count v)))) ,h) s)))

(defmacro count-item (h c i)
`(multiple-value-bind (ch ch-p) (gethash ,c ,h)
   (if ch-p (gethash ,i ch 0))))

(defmacro calc-p-class (h c)
`(multiple-value-bind (v p) (gethash ,c ,h)
   (if p (let (sc cc) (setf sc (calc-item-sum ,h)) (setf cc (calc-one-sum v))
           (/ cc sc)))))

(defmacro calc-p-item-class (h c i)
`(let ((sum (calc-item-sum ,h ,c)))
   (if (and sum (not (equal 0 sum))) (/ (count-item ,h ,c ,i) sum))))

(defmacro calc-bayesian-model (h)
`(let ((items nil) (classes nil) (ret (make-hash-table :test #'equal)))
   (maphash #'(lambda (k v) (pushnew k classes) (maphash #'(lambda (kk vv) (pushnew kk items)) v)) ,h)
   (dolist (i items)
      (let ((sum 0) (temp))
       (multiple-value-bind (value v-p) (gethash i ret)
          (if (not v-p) (setf value (setf (gethash i ret) (make-hash-table :test #'equal))))
          (dolist (c classes)
           (setf temp (* (calc-p-item-class ,h c i) (calc-p-class ,h c)))
           (incf sum temp)
           (setf (gethash c value) temp))
          (maphash #'(lambda (x y) (setf (gethash x value) (/ y sum))) value)
          (maphash #'(lambda (x y) (if (equal 0 y) (setf (gethash x value) 1/100000))) value)))) ret))

(defun bayesian-predict (vec model)
(let ((coll (make-hash-table :test #'equal)) (temp 0) (mc) (sum 0) (classes nil))
(maphash #'(lambda (k v) (maphash #'(lambda (kk vv) (pushnew kk classes)) v)) model)
(dolist (c classes)
   (let ((mul 1))
   (dolist (d vec)
   (multiple-value-bind (val v-p) (gethash d model)
      (if v-p (setf mul (* mul (gethash c val 1/100000))) (setf mul (* mul (/ 1 (length classes)))))))
   (setf (gethash c coll) mul)))
(maphash #'(lambda (k v) (incf sum v) (when (> v temp) (setf temp v) (setf mc k))) coll)
(if (>= (/ temp sum) 1/2) mc)))

;; ---------- test ---------------

(defvar *h* (make-hash-table :test #'equal))

(defun mytest()
(add-class-data 1 "a" *h*)
(add-class-data 1 "a" *h*)
(add-class-data 1 "a" *h*)
(add-class-data 1 "b" *h*)
(add-class-data 1 "c" *h*)
(add-class-data 1 "c" *h*)
(add-class-data 1 "d" *h*)

(add-class-data 2 "a" *h*)
(add-class-data 2 "a" *h*)
(add-class-data 2 "c" *h*)
(add-class-data 2 "c" *h*)
(add-class-data 2 "d" *h*)
(add-class-data 2 "d" *h*)
(add-class-data 2 "f" *h*)
(add-class-data 2 "f" *h*)
(add-class-data 2 "f" *h*)

(add-class-data 3 "a" *h*)
(add-class-data 3 "a" *h*)
(add-class-data 3 "b" *h*)
(add-class-data 3 "b" *h*)
(add-class-data 3 "d" *h*)
(add-class-data 3 "d" *h*)
(add-class-data 3 "d" *h*)
(add-class-data 3 "d" *h*)
(add-class-data 3 "f" *h*)
(add-class-data 3 "f" *h*)

(print-all *h*)

; (format t "sum is ~a.~%" (calc-item-sum *h*))
; (dotimes (i 5) (format t "class(~a) sum: ~a.~%" i (calc-item-sum *h* i)))
; (format t "class count ~a.~%" (count-class *h*))
; (format t "item count: ~a.~%" (count-item-uniq *h*))
; (dotimes (i 5) (format t "item(~a) type count: ~a.~%" i (count-item-uniq *h* i)))
; (dotimes (i 5) (format t "class(~a) probability: ~a.~%" i (calc-p-class *h* i)))
; (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "aa" (count-item *h* i "aa")))
; (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "a" (count-item *h* i "a")))
; (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "b" (count-item *h* i "b")))
; (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "bd" (count-item *h* i "bd")))
; (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "c" (count-item *h* i "c")))
; (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "aka" (count-item *h* i "aka")))
; (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "aa" (calc-p-item-class *h* i "aa")))
; (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "a" (calc-p-item-class *h* i "a")))
; (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "b" (calc-p-item-class *h* i "b")))
; (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "bd" (calc-p-item-class *h* i "bd")))
; (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "c" (calc-p-item-class *h* i "c")))
; (dotimes (i 5) (format t "class(~a) item(~a) probability: ~a.~%" i "aka" (calc-p-item-class *h* i "aka")))
(setf *h* (calc-bayesian-model *h*))
(print-all *h*)
(format t "~a~%" (bayesian-predict (list "a" "c") *h*))
(format t "====================~%")
(format t "~a~%" (bayesian-predict (list "k" "a" "m" "n" "o") *h*))
(format t "====================~%")
(format t "~a~%" (bayesian-predict (list "a" "b" "c" "f") *h*))
(format t "====================~%")
(format t "~a~%" (bayesian-predict (list "a" "b" "c" "d" "f") *h*))
(format t "====================~%")
(format t "~a~%" (bayesian-predict (list "a" "d" "d" "c" "f") *h*))
(format t "====================~%")
(format t "~a~%" (bayesian-predict (list "f" "d" "f" "d" "d" "d" "f") *h*))
(format t "====================~%")
(format t "~a~%" (bayesian-predict (list "a" "c" "c" "d" "f") *h*))
(format t "====================~%"))

(mytest)

====================================================================

代码完

看看，有点意思吧！

leeshuheng

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
贝叶斯分类：Common Lisp实现

最近利用闲暇时间学习Common Lisp，作为编程实践，我实现了一个简陋的贝叶斯分类器(bayesian)。这只是一个代码玩具，没有实际的工程价值。文章最后我贴了这个贝叶斯的代码。贝叶斯公式如下：可以这样简单的推导出贝叶斯公式:代码的流程，首先是统计样本数据；然后根据bayesian定理，创建一个用于分类的模型；最后在用这个模型去对测试数据
复制链接

扫一扫