GO实现文件压缩算法

8 篇文章 0 订阅

实现原理

   读取文件,统计字符出现次数为权值,构建哈夫曼树,获取每个字符的哈夫曼编码,写入文件。

 

压缩文件头定义

type compressHead struct {
	srclen, dstlen, keymapLen uint32	//源文件字符个数  压缩文件字符个数   哈夫曼编码字符映射个数
	patchBit                  uint8     //压缩后不足8bit补0个数
	keysMap                   map[interface{}]uint32	//字符统计构建哈夫曼树
}

压缩实现过程如下

//按照小端模式写入文件
func getCompressedBytes(pHead *compressHead, data []byte) []byte {
	buf := new(bytes.Buffer)
	if err := binary.Write(buf, binary.LittleEndian, pHead.srclen); err == nil {
		if err = binary.Write(buf, binary.LittleEndian, pHead.dstlen); err != nil {
			fmt.Println(err.Error())
		}

		if err = binary.Write(buf, binary.LittleEndian, pHead.keymapLen); err != nil {
			fmt.Println(err.Error())
		}

		if err = binary.Write(buf, binary.LittleEndian, pHead.patchBit); err != nil {
			fmt.Println(err.Error())
		}

		for key, value := range pHead.keysMap {
			if err = binary.Write(buf, binary.LittleEndian, key); err != nil {
				fmt.Println(err.Error())
			}

			if err = binary.Write(buf, binary.LittleEndian, value); err != nil {
				fmt.Println(err.Error())
			}
		}

		if err = binary.Write(buf, binary.LittleEndian, data); err != nil {
			fmt.Println(err.Error())
		}
	} else {
		fmt.Println(err.Error())
	}
	return buf.Bytes()
}

func Compress(strInFileName, strOutFileName string) bool {
	if data, err := ioutil.ReadFile(strInFileName); err != nil {
		fmt.Println(err.Error())
		return false
	} else {
		keys := make(map[interface{}]uint32)
		for i := 0; i < len(data); i++ {
			if _, ok := keys[data[i]]; ok {
				keys[data[i]]++
			} else {
				keys[data[i]] = 1
			}
		}

		if pTree := CreatHuffman(keys); pTree != nil {
			pHead := &compressHead{srclen: (uint32(len(data))), keysMap: keys}
			compressdata := make([]byte, 0)
			factor := make(map[byte][]byte)
			for i := 0; i < len(data); i++ {
				if value, ok := factor[data[i]]; ok {
					compressdata = append(compressdata, value...)
				} else {
					if code, codeok := GetHuffmanCode(data[i], pTree); codeok {
						factor[data[i]] = code
						compressdata = append(compressdata, code...)
					}
				}
			}

			pHead.keymapLen = (uint32(len(pHead.keysMap)))
			pHead.patchBit = (uint8)(8 - len(compressdata)%8)
			for i := uint8(0); i < pHead.patchBit; i++ {
				compressdata = append(compressdata, 0)
			}

			afterdata := make([]byte, 0)
			for ; len(compressdata) >= 8; compressdata = compressdata[8:] {
				var b byte = 0
				for i := 0; i < 8; i++ {
					b |= compressdata[i] << (7 - i)
				}

				afterdata = append(afterdata, b)
			}

			fmt.Printf("after data:%v\n", afterdata)
			pHead.dstlen = (uint32(len(afterdata)))
			buf := getCompressedBytes(pHead, afterdata)
			ioutil.WriteFile(strOutFileName, buf, 0666)
		}
	}

	return true
}

构造哈夫曼树可以参考哈夫曼树构建

解压缩过程

解压缩读取压缩文件头部信息,构建哈夫曼树,bit遍历每个字符进行解压缩

func getCompressedHead(data []byte) (*compressHead, []byte) {
	pHead := new(compressHead)
	buf := bytes.NewBuffer(data)

	binary.Read(buf, binary.LittleEndian, &pHead.srclen)
	binary.Read(buf, binary.LittleEndian, &pHead.dstlen)
	binary.Read(buf, binary.LittleEndian, &pHead.keymapLen)
	binary.Read(buf, binary.LittleEndian, &pHead.patchBit)
	pHead.keysMap = make(map[interface{}]uint32)
	for i := uint32(0); i < pHead.keymapLen; i++ {
		var key byte
		var value uint32
		binary.Read(buf, binary.LittleEndian, &key)
		binary.Read(buf, binary.LittleEndian, &value)
		pHead.keysMap[key] = value
	}

	dstdata := make([]byte, pHead.dstlen)
	binary.Read(buf, binary.LittleEndian, dstdata[:pHead.dstlen])

	return pHead, dstdata
}

func UnCompress(strInFileName, strOutFileName string) bool {
	if data, err := ioutil.ReadFile(strInFileName); err != nil {
		fmt.Println(err.Error())
		return false
	} else {
		pHead, dst := getCompressedHead(data)
		src := make([]byte, pHead.srclen)
		srcindex := 0
		if pTree := CreatHuffman(pHead.keysMap); pTree != nil {
			pTmpTree := pTree
			bEnd := false
			for len(dst) >= 1 && !bEnd {
				b := dst[0]
				for i := 7; i >= 0; i-- {
					if (b>>i)&1 != 0 {
						pTmpTree = pTmpTree.GetRight()
					} else {
						pTmpTree = pTmpTree.GetLeft()
					}

					if nil != pTmpTree && pTmpTree.Value != nil {
						v, _ := GetHuffmanValue(pTmpTree).(byte)
						src[srcindex] = v
						srcindex++

						if len(dst) == 1 && (uint8(i)) == pHead.patchBit {
							bEnd = true
							break
						}

						pTmpTree = pTree
					}
				}

				dst = dst[1:]
			}
		}

		ioutil.WriteFile(strOutFileName, src, 0666)
	}

	return true
}

 

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值