手写转换:
function toUint8Arr(str) {
const buffer = [];
for (let i of str) {
const _code = i.charCodeAt(0);
if (_code < 0x80) {
buffer.push(_code);
} else if (_code < 0x800) {
buffer.push(0xc0 + (_code >> 6));
buffer.push(0x80 + (_code & 0x3f));
} else if (_code < 0x10000) {
buffer.push(0xe0 + (_code >> 12));
buffer.push(0x80 + (_code >> 6 & 0x3f));
buffer.push(0x80 + (_code & 0x3f));
}
}
return Uint8Array.from(buffer);
}
Unicode转utf8的规则:
Unicode符号范围 | UTF-8编码方式
(十六进制) | (二进制)
--------------------+---------------------------------------------
0000 0000-0000 007F | 0xxxxxxx
0000 0080-0000 07FF | 110xxxxx 10xxxxxx
0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx