输出
可按编码顺序输出《GB 18030-2022》中规定的单字节、双字节及四字节(缺省未输出)的中文编码字符集。
一般不输出保留区(ReservedZone)字符和用户自定义区(PrivateUseArea)字符。
Lua实现代码
function IsInDoubleBytesPrivateUseArea(ch1, ch2)
-- ch1 = [0xAA, 0xAF], ch2 = [0xA1, 0xFE]
if (0xAA <= ch1 and ch1 <= 0xAF and 0xA1 <= ch2 and ch2 <= 0xFE) then
return true
end
-- ch1 = [0xF8, 0xFE], ch2 = [0xA1, 0xFE]
if (0xF8 <= ch1 and ch1 <= 0xFE and 0xA1 <= ch2 and ch2 <= 0xFE) then
return true
end
-- ch1 = [0xA1, 0xA7], ch2 = [0x40, 0x7E] or ch2 = [0x80, 0xA0]
if (0xA1 <= ch1 and ch1 <= 0xA7 and
((0x40 <=ch2 and ch2 <= 0x7E) or (0x80 <= ch2 and ch2 <= 0xA0))) then
return true
end
return false
end
function IsDoubleBytesGraphNotation(ch1, ch2)
-- ch1 = [0xA1, 0xA9], ch2 = [0xA1, 0xFE]
if (0xA1 <= ch1 and ch1 <= 0xA9 and 0xA1 <= ch2 and ch2 <= 0xFE) then
return true
end
-- ch1 = [0xA8, 0xA9], ch2 = [0x40, 0x7E] or ch2 = [0x80, 0xA0]
if (0xA8 <= ch1 and ch1 <= 0xA9 and
((0x40 <=ch2 and ch2 <= 0x7E) or (0x80 <= ch2 and ch2 <= 0xA0))) then
return true
end
return false
end
function IsDoubleBytesChineseIdeograph(ch1, ch2)
-- ch1 = [0xB0, 0xF7], ch2 = [0xA1, 0xFE]
if (0xB0 <= ch1 and ch1 <= 0xF7 and 0xA1 <= ch2 and ch2 <= 0xFE) then
return true
end
-- ch1 = [0x81, 0xA0], ch2 = [0x40, 0x7E] or ch2 = [0x80, 0xFE]
if (0x81 <= ch1 and ch1 <= 0xA0 and
((0x40 <=ch2 and ch2 <= 0x7E) or (0x80 <= ch2 and ch2 <= 0xFE))) then
return true
end
-- ch1 = [0xAA, 0xFE], ch2 = [0x40, 0x7E] or ch2 = [0x80, 0xA0]
if (0xAA <= ch1 and ch1 <= 0xFE and
((0x40 <=ch2 and ch2 <= 0x7E) or (0x80 <= ch2 and ch2 <= 0xA0))) then
return true
end
return false
end
function IsInQuadBytesReservedZone(ch1, ch2, ch3, ch4)
if (0x81 > ch1 or ch1 > 0xFE) or
(0x30 > ch2 or ch2 > 0x39) or
(0x81 > ch3 or ch3 > 0xFE) or
(0x30 > ch4 or ch4 > 0x39) then
return false
end
local ch = (ch1 << 24) | (ch2 << 16) | (ch3 << 8) | ch4
local quadBytesRange = {
[ 0x81318132 ] = 0x81319934,
[ 0x8430BA32 ] = 0x8430FE35,
[ 0x84318730 ] = 0x84319530,
[ 0x8132E834 ] = 0x8132FD31,
[ 0x8134D238 ] = 0x8134E337,
[ 0x9034C538 ] = 0x9034C730,
[ 0x8134F434 ] = 0x8134F830,
[ 0x8134F932 ] = 0x81358437,
[ 0x81358B32 ] = 0x81359935,
[ 0x82359833 ] = 0x82369435,
[ 0x82369535 ] = 0x82369A32,
[ 0x81339D36 ] = 0x8133B635,
[ 0x8139A933 ] = 0x8139B734,
[ 0x8237CF35 ] = 0x8336BE36,
[ 0x9232C636 ] = 0x9232D635,
[ 0x81398B32 ] = 0x8139A135,
[ 0x8139EE39 ] = 0x81358738,
[ 0x82358F33 ] = 0x82359636,
[ 0x95328236 ] = 0x9835F336,
[ 0x9835F738 ] = 0x98399E36,
[ 0x98399F38 ] = 0x9839B539,
[ 0x9839B632 ] = 0x9933FE33,
[ 0x99348138 ] = 0x9939F730,
[ 0x81308130 ] = 0x8439FE39,
[ 0x90308130 ] = 0xE339FE39,
[ 0xFD308130 ] = 0xFE39FE39
}
for i,v in pairs(quadBytesRange) do
if (i <= ch and ch <= v) then
-- print(string.format("Hit:[0x%X, 0x%X]\n", i, v))
return false
end
end
-- print(debug.getinfo(1).name, debug.getinfo(1).currentline)
return true
end
function IsInQuadBytesPrivateUseArea(ch1, ch2, ch3, ch4)
-- [0xFD308130, 0xFE39FE39]
return (0xFD <= ch1 and ch1 <= 0xFE) and
(0x30 <= ch2 and ch2 <=0x39) and
(0x81 <= ch3 and ch3 <= 0xFE) and
(0x30 <= ch4 and ch4 <=0x39)
end
function PrintSingleByte(ch)
if ((ch & 0x0f) == 0) then
io.write(string.format("\n %1X", ch >> 4))
end
io.write(string.format(" %c", ch))
end
function PrintDoubleBytes(ch1, ch2, last_ch)
local ch = (ch1 << 8) | ch2
bNewLine = false
if ((last_ch >> 8) ~= ch1) then
io.write(string.format("\n\n%02X 0 1 2 3 4 5 6 7 8 9 A B C D E F", ch1))
bNewLine = true
end
if (bNewLine or ((last_ch & 0xf0) ~= (ch2 & 0xf0))) then
io.write(string.format("\n %1X", ch2 >> 4))
for i = 1, ch2 & 0x0f, 1 do
io.write(" ")
end
else
for i = 1, (ch2 & 0x0f) - (last_ch & 0x0f) - 1, 1 do
io.write(" ")
end
end
io.write(string.format(" %c%c", ch1, ch2))
return ch
end
function PrintQuadBytes(ch1, ch2, ch3, ch4, last_ch)
local Leading2Bytes = (ch1 << 8) | ch2
local bNewLine = false
if ((last_ch >> 16) ~= Leading2Bytes) then
io.write(string.format("\n\n%04X\n 30 31 32 33 34 35 36 37 38 39", Leading2Bytes))
bNewLine = true
end
local ch = (Leading2Bytes << 16) | (ch3 << 8) | ch4
if (bNewLine or (((last_ch >> 8) & 0xff) ~= ch3)) then
io.write(string.format("\n%02X", ch3))
for i = 1, ch4 - 0x30, 1 do
io.write(" ")
end
else
for i = 1, ch4 - (last_ch & 0xff) - 1, 1 do
io.write(" ")
end
end
io.write(string.format(" %c%c%c%c", ch1, ch2, ch3, ch4))
return ch
end
-- main
---[[
io.write(" 0 1 2 3 4 5 6 7 8 9 A B C D E F")
for ch = 0, 0x7f, 1 do
PrintSingleByte(ch)
end
--]]
--[[
ch1 = 0x81
ch2 = 0x31
ch3 = 0x81
ch4 = 0x32
if IsInQuadBytesReservedZone(ch1, ch2, ch3, ch4) then
print("Hit!")
else
print("Missed!")
end
--]]
local last_ch = 0
---[[
for ch1 = 0x81, 0xfe, 1 do
for ch2 = 0x40, 0x7e, 1 do
if (not IsInDoubleBytesPrivateUseArea(ch1, ch2)) then
last_ch = PrintDoubleBytes(ch1, ch2, last_ch)
end
end
for ch2 = 0x80, 0xfe, 1 do
if (not IsInDoubleBytesPrivateUseArea(ch1, ch2)) then
last_ch = PrintDoubleBytes(ch1, ch2, last_ch)
end
end
end
--]]
--[[
for ch1 = 0x81, 0xfe, 1 do
for ch2 = 0x40, 0x7e, 1 do
if (IsDoubleBytesChineseIdeograph(ch1, ch2)) then
last_ch = PrintDoubleBytes(ch1, ch2, last_ch)
end
end
for ch2 = 0x80, 0xfe, 1 do
if (IsDoubleBytesChineseIdeograph(ch1, ch2)) then
last_ch = PrintDoubleBytes(ch1, ch2, last_ch)
end
end
end
--]]
--[[
for ch1 = 0x81, 0xfe, 1 do
for ch2 = 0x30, 0x39, 1 do
for ch3 = 0x81, 0xfe, 1 do
for ch4 = 0x30, 0x39, 1 do
if (not (IsInQuadBytesReservedZone(ch1, ch2, ch3, ch4) or
IsInQuadBytesPrivateUseArea(ch1, ch2, ch3, ch4))) then
last_ch = PrintQuadBytes(ch1, ch2, ch3, ch4, last_ch)
end
end
end
end
end
--]]