不知小伙伴们在node开发中有没有解压zip的场景。我这边在处理zip乱码时,查阅了许多资料,最后也使用jszip成功解决了此问题,在此把处理方法分享给大家,有这个需求的可以测试下,希望能帮到大家。
import * as JSZip from 'jszip';
import * as iconv from 'iconv-lite';
const detect = require('charset-detector');
export async function getZipEncodingCharsets(data: Buffer | Blob): Promise<any> {
let allBytes: any[] = [];
await JSZip.loadAsync(data, {
decodeFileName(bytes: any) {
allBytes = allBytes.concat(bytes);
return bytes.toString();
},
});
return detect(allBytes);
}
export async function getZipEncodingCharsetNames(data: Buffer | Blob): Promise<string[]> {
let charsets = await getZipEncodingCharsets(data);
if (charsets.length === 0) {
return [];
}
const ignoreCharsetArr: string[] = [
'UTF-32BE',
'UTF-32LE',
];
charsets = charsets.filter((c: any) => !ignoreCharsetArr.includes(c.charsetName));
const zhCharset = charsets.find((c: any) => c.lang === 'zh');
let charsetNames: string[] = [];
if (zhCharset) {
charsetNames.push(zhCharset.charsetName);
}
charsetNames.push('UTF-8');
charsetNames.push('GB18030');
charsetNames = charsetNames.concat(charsets.map((c: any) => c.charsetName));
charsetNames = charsetNames.filter((
item: string,
index: number,
array: string[],
) => array.indexOf(item) === index);
return charsetNames;
}
function checknum(value: string): boolean {
const Regx = /^[A-Za-z0-9]*$/;
return Regx.test(value);
}
function isMessyCode(strName: string) {
const temp = strName.replace(/\p{L}|\p{N}|_|-|\.|\/|\s|\(|\)/g, '');
const ch = temp.split('');
const chLen = ch ? ch.length : 0;
for (let i = 0; i < chLen; i += 1) {
const c = ch[i];
if (!checknum(c)) {
const str = ch[i];
const patt1 = new RegExp(/[\u4e00-\u9fa5]+/);
if (!patt1.test(str)) {
return true;
}
}
}
return false;
}
async function getTheRightZip(data: Buffer | Blob, charsets: string[]): Promise<{ zip: JSZip, charset: string } | void> {
if (!charsets || charsets.length === 0) {
return;
}
const charset = charsets.shift();
if (!charset) {
return;
}
const zip = await JSZip.loadAsync(data, {
decodeFileName(bytes: any) {
try {
return iconv.decode(bytes, charset);
} catch (e) {
console.error(e);
}
},
});
let str = '';
zip.forEach((rp: string) => {
str += rp;
});
const isHasMessyCode = isMessyCode(str);
if (!isHasMessyCode) {
return {
zip,
charset,
};
}
return await getTheRightZip(data, charsets);
}
export async function getZipWithAutoDetectedCharset(data: Buffer | Blob): Promise<{ zip: JSZip, charset: string }> {
const charsets = await getZipEncodingCharsetNames(data);
const result = await getTheRightZip(data, charsets).catch(e => console.error(e));
let zip;
let charset: string = '';
if (result) {
zip = result.zip;
charset = result.charset;
}
if (!zip) {
charset = 'utf-8';
zip = await JSZip.loadAsync(data, {
decodeFileName(bytes: any) {
try {
return Buffer.from(bytes, 'utf-8');
} catch (e) {
console.error(e);
}
},
});
}
return {
zip,
charset,
};
}