原文链接: nodejs 使用 pako 压缩数据
上一篇: win10 沙拉词典 ext-saladict 打包
下一篇: 使用chrome 插件 沙拉查词 在英文pdf 中划词翻译
github
https://github.com/nodeca/pako
应用场景
大对象传输一般使用json, 在worker或local storage中还必须使用字符串, pako可以对字符串或者 Uint8Array 数据进行压缩和解压, 牺牲一部分读写性能获得更大的空间可用性
简单例子
如果指定, to: string, 传入的可以是buffer
const pako = require('pako')
const fs = require('fs')
function test(path) {
let s = fs.readFileSync(path, 'utf8',)
console.log(s.length)
let st = +new Date()
let compressed = pako.deflate(s, {to: 'string'})
let ed = +new Date()
console.log(compressed.length, compressed.length / s.length, ed - st)
st = +new Date()
let raw = pako.inflate(compressed, {to: 'string'})
ed = +new Date()
console.log(raw.length, ed - st)
}
// 3.5M
let big = "D:/data/高考成绩/高考爬虫/json/school.json"
// 2k
let small = "D:/data/高考成绩/高考爬虫/json/province.json"
test(big)
test(small)
使用 buffer 和 Uint8Array
function test(path) {
let s = new Uint8Array(fs.readFileSync(path))
console.log(s.length)
let st = +new Date()
let compressed = pako.deflate(s)
let ed = +new Date()
console.log(compressed.length, compressed.length / s.length, ed - st)
st = +new Date()
let raw = pako.inflate(compressed)
ed = +new Date()
console.log(raw.length, ed - st)
}
测试读写和转换速度
读写和转换还是很快的, 主要还是大文件的压缩耗时
function test(path) {
let st = +new Date()
let buffer = fs.readFileSync(path)
let ed = +new Date()
console.log(ed - st, buffer.length)
st = +new Date()
let ua = new Uint8Array(buffer)
ed = +new Date()
console.log(ed - st, ua.length, buffer.length)
st = +new Date()
let b2 = Buffer.from(ua)
ed = +new Date()
console.log(ed - st, ua.length, buffer.length, b2.length)
}
// 2 3425144
// 2 3425144 3425144
// 1 3425144 3425144 3425144
// 0 1372
// 0 1372 1372
// 0 1372 1372 1372
测试结果
执行结果, 对于大数据的情况下还是比较耗时的, 但是可以看到数据压缩效果很好, 居然可以达到十分之一左右, 即使是小数据也表现很好
3425144
427696 0.12486949453803986 282
3425144 135
1372
341 0.24854227405247814 2
1372 1
其他参数
Additional options, for internal needs:
-
chunkSize
- size of generated data chunks (16K by default) -
raw
(Boolean) - do raw deflate -
gzip
(Boolean) - create gzip wrapper -
to
(String) - if equal to 'string', then result will be "binary string" (each char code [0..255]) -
header
(Object) - custom header for gzip-
text
(Boolean) - true if compressed data believed to be text -
time
(Number) - modification time, unix timestamp -
os
(Number) - operation system code -
extra
(Array) - array of bytes with extra data (max 65536) -
name
(String) - file name (binary string) -
comment
(String) - comment (binary string) -
hcrc
(Boolean) - true if header crc should be added
-