2020编码大赛(4)LZW压缩算法

LZW的原理:

初始化字典为255个字符,随着压缩的过程,字典一步步扩充,最终把全文本转换成字典的id序列,再用变长编码的方法,把id序列编码成二进制。

编码方法:0-254用8位表示,255到510用9位表示,......

当第一次需要编码9位的时候,插入255表示升位,此后0-510都用9位表示,......

因为代码是用string来操作的,所以额外加了对于/0的编码,保证算法的通用性。

 

压缩率:大概在1024:420左右

代码:

#include "compress.h"
#include <map>

#define BUF_SIZE 1048576
#define ASCII 256
#define ASCIILEN 8
using namespace std;

char g_buff[BUF_SIZE + 5];
char g_curByte;
int g_bufSize;
int g_bits;
int g_byte;
bool g_EOF;

void writeBit(unsigned int data, int length)
{
    if (!length) {
        return;
    }
    if (!g_bits) {
        g_buff[g_byte++] = g_curByte, g_curByte = 0;
    }
    g_bits = (g_bits + 7) % 8, g_curByte |= (((data >> length - 1) & 1) << g_bits);
    writeBit(data, length - 1);
}

void writeInt(unsigned int data, int &length)
{
    if (data >= unsigned((1 << length) - 1)) {
        writeBit((1 << length) - 1, length);
        length++;
    }
    writeBit(data, length);
}

void readBit(unsigned int &val)
{
    if (!g_bits) {
        g_EOF = (g_byte == g_bufSize), g_curByte = g_buff[g_byte++];
    }
    g_bits = (g_bits + 7) % 8, val = val * 2 + ((g_curByte >> g_bits) & 1);
}

unsigned int readInt(int length)
{
    unsigned int val = 0;
    while (length-- && !g_EOF) {
        readBit(val);
    }
    return val;
}

void compress(ifstream &fin, ofstream &fout)
{
    map<string, unsigned int> m;
    for (int i = 0; i < ASCII; i++) {
        m[string("") + char(i)] = i;
    }
    int bitLen = ASCIILEN;
    unsigned int index = ASCII;

    char ch;
    string str;
    while (fin.get(ch)) {
        if (!ch) {
            if (m[str]) {
                writeInt(m[str], bitLen);
                str = "";
            }
            writeInt(0, bitLen);
            continue;
        }
        if (m[str + ch]) {
            str += ch;
        } else {
            writeInt(m[str], bitLen);
            m[str + ch] = index++;
            str = ch;
        }
    }
    if (str != "") {
        writeInt(m[str], bitLen);
    }
}

void decompress(ofstream &fout, ifstream &fin)
{
    map<int, string> m;
    for (int i = 0; i < ASCII; i++) {
        m[i] = string("") + char(i);
    }
    int bitLen = ASCIILEN;
    unsigned int index = ASCII;
    char zero = 0;

    string oldstr, newstr;
    g_curByte = g_buff[0];
    int code = g_curByte;
    if (code == ASCII - 1) { // 以\255开头的文件
        bitLen++;
        code = readInt(bitLen);
    }
    if (code == 0) {
        fout << zero;
    } else {
        oldstr = m[code];
        fout << oldstr;
    }

    while (true) {
        code = readInt(bitLen);
        if (g_EOF) {
            break;
        }
        if (code == (1 << bitLen) - 1) {
            bitLen++;
            code = readInt(bitLen);
        }

        if (code == 0) {
            fout << zero;
            oldstr = "";
            continue;
        }

        if (m[code] != "") {
            newstr = m[code];
        } else {
            newstr = oldstr + oldstr[0];
        }
        if (oldstr != "") {
            m[index++] = oldstr + newstr[0];
        }
        oldstr = newstr;
        fout << oldstr;
    }
}

void compress(const string input, const string output)
{
    ifstream fin(input.c_str(), ios::binary);
    ofstream fout(output.c_str(), ios::binary);
    g_bits = 8;
    g_byte = 0;
    g_curByte = 0;
    compress(fin, fout);
    fin.close();
    g_buff[g_byte++] = g_curByte;
    fout.write(g_buff, sizeof(char) * g_byte);
    fout.close();
}

void decompress(const string input, const string output)
{
    ifstream fin(input.c_str(), ios::binary);
    ofstream fout(output.c_str(), ios::binary);
    g_EOF = false;
    g_bits = 0;
    g_byte = 1;
    fin.read(g_buff, sizeof(char) * BUF_SIZE);
    g_bufSize = int(fin.gcount());
    decompress(fout, fin);
    fout.close();
    fin.close();
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值