本节主要介绍了利用抽象符号来进行Huffman Code
说一下我觉得难的地方,首先,相较于之前,第一次用scheme写个比较完整的功能,可能头绪比较乱,最初的那些construct以及selector的创建是一个难的地方
之后是对于编树将操作一步步分离,又一次见识到了啥叫抽象,像我思考的时候就是一整段功能一起思考,然后一阵头大,具体功能实现不是难点,主要还是
设计整个程序的思路比较难,以下为代码
(define (make-leaf symbol weight)
(list 'leaf symbol weight))
(define (leaf? object)
(eq? (car object) 'leaf))
(define (symbol-leaf object)
(cadr object))
(define (weight-leaf object)
(caddr object))
(define (make-code-tree left right)
(list left right (append (symbols left) (symbols right)) (+ (weight left) (weight right))))
(define (left-tree object)
(car object))
(define (right-tree object)
(cadr object))
(define (symbols tree)
(if (leaf? tree) (list (symbol-leaf tree)) (caddr tree)))
(define (weight tree)
(if (leaf? tree) (weight-leaf tree) (cadddr tree)))
(define (decode bits tree)
(decode-1 bits tree tree))
(define (decode-1 bits tree current-branch)
(if (null? bits) ()
(let ((next-branch
(choose-branch (car bits) current-branch)))
(if (leaf? next-branch)
(cons (symbol-leaf next-branch)
(decode-1 (cdr bits) tree tree))
(decode-1 (cdr bits) tree next-branch)))))
(define (choose-branch bit branch)
(cond
((= bit 0) (left-tree branch))
((= bit 1) (right-tree branch))
(else (error "bad-bit"))))
(define (adjoin-set x set)
(cond ((null? set) (list x))
((< (weight x) (weight (car set))) (cons x set))
(else (cons (car set) (adjoin-set x (cdr set))))))
(define (make-leaf-set pairs)
(if (null? pairs) ()
(let ((pair (car pairs)))
(adjoin-set (make-leaf (car pair) (cadr pair)) (make-leaf-set (cdr pairs))))))
(define sample-tree (make-code-tree (make-leaf 'A 4) (make-code-tree (make-leaf 'B 2) (make-code-tree (make-leaf 'C 1) (make-leaf 'D 1)))))
(define sample-message '(0 1 1 0 0 1 0 1 0 1 1 1 0))
(define (encode message tree)
(if (null? message)
()
(append (encode-symbol (car message) tree) (encode (cdr message) tree))))
(define (element-of-set? element set)
(cond
((null? set) #f)
((eq? element (car set)) #t)
(else (element-of-set? element (cdr set)))))
(define (encode-symbol message tree)
(cond
((element-of-set? message (symbols tree))
(cond
((leaf? tree) 0)
((element-of-set? message (symbols (left-tree tree)))
(if (leaf? (left-tree tree)) (list 0) (cons 0 (encode-symbol message (left-tree tree)))))
((element-of-set? message (symbols (right-tree tree)))
(if (leaf? (right-tree tree)) (list 1) (cons 1 (encode-symbol message (right-tree tree)))))))
(else (error "Not valid value"))))
(define (generate-huffman-tree pairs)
(successive-merge (make-leaf-set pairs)))
(define (successive-merge set)
(cond
((null? set) ())
((null? (cdr set)) set)
((null? (cddr set)) (make-code-tree (car set) (cadr set)))
(else (make-code-tree (car set) (successive-merge (cdr set))))))
(define pairs
(list '(a 2) '(na 16) '(boom 1) '(sha 3) '(get 2) '(yip 10) '(job 2) '(wah 1)))
(define message1 '(get a job))
(define message2 '(sha na na na na na na))
(define message3 '(wah yip yip yip yip yip))
(define message4 '(sha boom))
然后是假设我们的频率为2的幂,这导致了我们取两个最小的相加,不会产生越位现象,也就是除了最后2位,其他都是很难看的一条线下来,
例如我们举个栗子
1 2 4 8 16
我们的编码树为(16 (8 ( 4 (2 1))))
其中括号层数可以看做树的层数,显然由于没有越位,我们的树看起来非常的简单,然后,它的层数为n-1
之后是要求我们评估encode的时间界
为了方便我们把代码从上面单独拎出来
(define (encode message tree)
(if (null? message)
()
(append (encode-symbol (car message) tree) (encode (cdr message) tree))))
(define (element-of-set? element set)
(cond
((null? set) #f)
((eq? element (car set)) #t)
(else (element-of-set? element (cdr set)))))
(define (encode-symbol message tree)
(cond
((element-of-set? message (symbols tree))
(cond
((leaf? tree) 0)
((element-of-set? message (symbols (left-tree tree)))
(if (leaf? (left-tree tree)) (list 0) (cons 0 (encode-symbol message (left-tree tree)))))
((element-of-set? message (symbols (right-tree tree)))
(if (leaf? (right-tree tree)) (list 1) (cons 1 (encode-symbol message (right-tree tree)))))))
(else (error "Not valid value"))))
由于不同的树,不同的编码信息会导致不同结果我们再次只看一下有哪些操作,首先我们对于一个symbol进行了一次element-of-set的判断,需要N 之后又进行了递归的element-of-set的调用约1/2*N^2,最后我们对M个symbol进行编码,最好情况 O(M*N) 最坏情况 O(m*N^2)
由于只是粗粗估计可能会有错误,欢迎指正