1 function isHex(val) {
2 if (typeof val !== 'string') return;
3 return /^[0-9a-fA-F]+$/.test(val);
4 }
5
6 function isBinary(val) {
7 if (typeof val !== 'string') return;
8 return /^[01]+$/.test(val);
9 }
10
11 function binaryToHex(binary) {
12 if (!isBinary(binary)) return;
13 const bytes = binary.match(/\d+?(?=(?:\d{8})*$)/g);
14 const hex = bytes.map((binary) => (+('0b' + binary)).toString(16).padStart(2, '0'));
15 return hex.join('').replace(/^0+/, '');
16 }
17
18 function hexToBinary(hex) {
19 if (!isHex(hex)) return;
20 const bytes = hex.match(/\w+?(?=(?:\w{2})*$)/g);
21 const binary = bytes.map((hex) => (+('0x' + hex)).toString(2).padStart(8, '0'));
22 return binary.join('').replace(/^0+/, '');
23 }
24
25 function hexCompare(a, b) {
26 if (isBinary(a)) a = binaryToHex(a);
27 if (isBinary(b)) b = binaryToHex(b);
28 if (!isHex(a) || !isHex(b)) return;
29 if (a === b) return 0;
30 const aLength = a.length,
31 bLength = b.length;
32 if (aLength !== bLength) return aLength > bLength ? 1 : -1;
33 for (let n = 0; n < aLength; n++) {
34 /* 字符串比较的是 unicode */
35 if (a[n] > b[n]) return 1;
36 if (b[n] > a[n]) return -1;
37 }
38 }
39
40 function unicodeToUtf8(unicode) {
41 if (isBinary(unicode)) unicode = binaryToHex(unicode);
42 if (!isHex(unicode)) return;
43 unicode = unicode.toLowerCase();
44 const ranges = ['7f', '7ff', 'ffff', '10ffff'];
45 if (hexCompare(unicode, ranges[3]) === 1) return;
46 if ([-1, 0].includes(hexCompare(unicode, ranges[0]))) return unicode;
47 let utf8Bytes = hexToBinary(unicode).match(/\d+?(?=(?:\d{6})*$)/g);
48 let firstByte = '1'.repeat(utf8Bytes.length) + utf8Bytes[0].padStart(8 - utf8Bytes.length, '0');
49
50 let restBytes = utf8Bytes
51 .slice(1)
52 .map((binary) => '10' + binary)
53 .join('');
54
55 return binaryToHex(firstByte + restBytes);
56 }
57
58 function utf8ToUnicode(utf8) {
59 if (isBinary(utf8)) utf8 = binaryToHex(utf8);
60 if (!isHex(utf8)) return;
61 utf8 = utf8.toLowerCase();
62 const ranges = ['7f', 'dfbf', 'efbfbf', 'f7bfbfbf'];
63 if (hexCompare(utf8, ranges[3]) === 1) return;
64 if ([-1, 0].includes(hexCompare(utf8, ranges[0]))) return utf8;
65 const utf8Bytes = hexToBinary(utf8).match(/\d+?(?=(?:\d{8})*$)/g);
66 const firstByte = utf8Bytes[0].slice(utf8Bytes.length);
67
68 const restBytes = utf8Bytes
69 .slice(1)
70 .map((binary) => binary.slice(2))
71 .join('');
72
73 return binaryToHex(firstByte + restBytes);
74 }
75
76 function formatData(text) {
77 const code = text.charCodeAt(0),
78 binary = code.toString(2),
79 hex = code.toString(16),
80 unicode = hex,
81 utf8 = hexCompare(hex, '7f') === 1 ? encodeURI(text).replace(/%/g, '').toLowerCase() : hex;
82
83 return { binary, hex, unicode, utf8 };
84 }
85
86 export { isHex, isBinary, binaryToHex, hexToBinary, hexCompare, unicodeToUtf8, utf8ToUnicode, formatData };
unicode 与 utf8 互转
最新推荐文章于 2024-09-02 17:34:07 发布
本文提供了一系列JavaScript函数,用于在十六进制、二进制、Unicode和UTF8编码之间进行转换,并实现字符串的比较。主要功能包括isHex、isBinary、binaryToHex、hexToBinary、hexCompare、unicodeToUtf8、utf8ToUnicode以及formatData等方法。
摘要由CSDN通过智能技术生成