输出《GB 18030-2022》编码字符(Lua)

2 篇文章 0 订阅

输出

可按编码顺序输出《GB 18030-2022》中规定的单字节、双字节及四字节(缺省未输出)的中文编码字符集。
一般不输出保留区(ReservedZone)字符和用户自定义区(PrivateUseArea)字符。

Lua实现代码

function IsInDoubleBytesPrivateUseArea(ch1, ch2)
	-- ch1 = [0xAA, 0xAF], ch2 = [0xA1, 0xFE]
	if (0xAA <= ch1 and ch1 <= 0xAF and 0xA1 <= ch2 and ch2 <= 0xFE) then
		return true
	end
	-- ch1 = [0xF8, 0xFE], ch2 = [0xA1, 0xFE]
	if (0xF8 <= ch1 and ch1 <= 0xFE and 0xA1 <= ch2 and ch2 <= 0xFE) then
		return true
	end
	-- ch1 = [0xA1, 0xA7], ch2 = [0x40, 0x7E] or ch2 = [0x80, 0xA0]
	if (0xA1 <= ch1 and ch1 <= 0xA7 and
		((0x40 <=ch2 and ch2 <= 0x7E) or (0x80 <= ch2 and ch2 <= 0xA0))) then
		return true
	end
	return false
end

function IsDoubleBytesGraphNotation(ch1, ch2)
	-- ch1 = [0xA1, 0xA9], ch2 = [0xA1, 0xFE]
	if (0xA1 <= ch1 and ch1 <= 0xA9 and 0xA1 <= ch2 and ch2 <= 0xFE) then
		return true
	end
	-- ch1 = [0xA8, 0xA9], ch2 = [0x40, 0x7E] or ch2 = [0x80, 0xA0]
	if (0xA8 <= ch1 and ch1 <= 0xA9 and
		((0x40 <=ch2 and ch2 <= 0x7E) or (0x80 <= ch2 and ch2 <= 0xA0))) then
		return true
	end
	return false
end

function IsDoubleBytesChineseIdeograph(ch1, ch2)
	-- ch1 = [0xB0, 0xF7], ch2 = [0xA1, 0xFE]
	if (0xB0 <= ch1 and ch1 <= 0xF7 and 0xA1 <= ch2 and ch2 <= 0xFE) then
		return true
	end
	-- ch1 = [0x81, 0xA0], ch2 = [0x40, 0x7E] or ch2 = [0x80, 0xFE]
	if (0x81 <= ch1 and ch1 <= 0xA0 and
		((0x40 <=ch2 and ch2 <= 0x7E) or (0x80 <= ch2 and ch2 <= 0xFE))) then
		return true
	end
	-- ch1 = [0xAA, 0xFE], ch2 = [0x40, 0x7E] or ch2 = [0x80, 0xA0]
	if (0xAA <= ch1 and ch1 <= 0xFE and
		((0x40 <=ch2 and ch2 <= 0x7E) or (0x80 <= ch2 and ch2 <= 0xA0))) then
		return true
	end
	return false
end

function IsInQuadBytesReservedZone(ch1, ch2, ch3, ch4)
	if (0x81 > ch1 or ch1 > 0xFE) or
		(0x30 > ch2 or ch2 > 0x39) or
		(0x81 > ch3 or ch3 > 0xFE) or
		(0x30 > ch4 or ch4 > 0x39) then
		return false
	end
	local ch = (ch1 << 24) | (ch2 << 16) | (ch3 << 8) | ch4
	local quadBytesRange = {
		[ 0x81318132 ] = 0x81319934,
		[ 0x8430BA32 ] = 0x8430FE35,
		[ 0x84318730 ] = 0x84319530,
		[ 0x8132E834 ] = 0x8132FD31,
		[ 0x8134D238 ] = 0x8134E337,
		[ 0x9034C538 ] = 0x9034C730,
		[ 0x8134F434 ] = 0x8134F830,
		[ 0x8134F932 ] = 0x81358437,
		[ 0x81358B32 ] = 0x81359935,
		[ 0x82359833 ] = 0x82369435,
		[ 0x82369535 ] = 0x82369A32,
		[ 0x81339D36 ] = 0x8133B635,
		[ 0x8139A933 ] = 0x8139B734,
		[ 0x8237CF35 ] = 0x8336BE36,
		[ 0x9232C636 ] = 0x9232D635,
		[ 0x81398B32 ] = 0x8139A135,
		[ 0x8139EE39 ] = 0x81358738,
		[ 0x82358F33 ] = 0x82359636,
		[ 0x95328236 ] = 0x9835F336,
		[ 0x9835F738 ] = 0x98399E36,
		[ 0x98399F38 ] = 0x9839B539,
		[ 0x9839B632 ] = 0x9933FE33,
		[ 0x99348138 ] = 0x9939F730,
		[ 0x81308130 ] = 0x8439FE39,
		[ 0x90308130 ] = 0xE339FE39,
		[ 0xFD308130 ] = 0xFE39FE39
	}
	for i,v in pairs(quadBytesRange) do
		if (i <= ch and ch <= v) then
			-- print(string.format("Hit:[0x%X, 0x%X]\n", i, v))
			return false
		end
	end
	-- print(debug.getinfo(1).name, debug.getinfo(1).currentline)
	return true
end

function IsInQuadBytesPrivateUseArea(ch1, ch2, ch3, ch4)
	-- [0xFD308130, 0xFE39FE39]
	return (0xFD <= ch1 and ch1 <= 0xFE) and
		(0x30 <= ch2 and ch2 <=0x39) and
		(0x81 <= ch3 and ch3 <= 0xFE) and
		(0x30 <= ch4 and ch4 <=0x39)
end

function PrintSingleByte(ch)
	if ((ch & 0x0f) == 0) then
		io.write(string.format("\n %1X", ch >> 4))
	end
	io.write(string.format("  %c", ch))
end

function PrintDoubleBytes(ch1, ch2, last_ch)
	local ch = (ch1 << 8) | ch2
	bNewLine = false
	if ((last_ch >> 8) ~= ch1) then
		io.write(string.format("\n\n%02X  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F", ch1))
		bNewLine = true
	end
	if (bNewLine or ((last_ch & 0xf0) ~= (ch2 & 0xf0))) then
		io.write(string.format("\n %1X", ch2 >> 4))
		for i = 1, ch2 & 0x0f, 1 do
			io.write("   ")
		end
	else
		for i = 1, (ch2 & 0x0f) - (last_ch & 0x0f) - 1, 1 do
			io.write("   ")
		end
	end
	io.write(string.format(" %c%c", ch1, ch2))
	return ch
end

function PrintQuadBytes(ch1, ch2, ch3, ch4, last_ch)
	local Leading2Bytes = (ch1 << 8) | ch2
	local bNewLine = false
	if ((last_ch >> 16) ~= Leading2Bytes) then
		io.write(string.format("\n\n%04X\n    30 31 32 33 34 35 36 37 38 39", Leading2Bytes))
		bNewLine = true
	end
	local ch = (Leading2Bytes << 16) | (ch3 << 8) | ch4
	if (bNewLine or (((last_ch >> 8) & 0xff) ~= ch3)) then
		io.write(string.format("\n%02X", ch3))
		for i = 1, ch4 - 0x30, 1 do
			io.write("   ")
		end
	else
		for i = 1, ch4 - (last_ch & 0xff) - 1, 1 do
			io.write("   ")
		end
	end
	io.write(string.format(" %c%c%c%c", ch1, ch2, ch3, ch4))
	return ch
end

-- main
---[[
io.write("    0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F")
for ch = 0, 0x7f, 1 do
	PrintSingleByte(ch)
end
--]]
--[[
ch1 = 0x81
ch2 = 0x31
ch3 = 0x81
ch4 = 0x32
if IsInQuadBytesReservedZone(ch1, ch2, ch3, ch4) then
	print("Hit!")
else
	print("Missed!")
end
--]]
local last_ch = 0
---[[
for ch1 = 0x81, 0xfe, 1 do
	for ch2 = 0x40, 0x7e, 1 do
		if (not IsInDoubleBytesPrivateUseArea(ch1, ch2)) then
			last_ch = PrintDoubleBytes(ch1, ch2, last_ch)
		end
	end
	for ch2 = 0x80, 0xfe, 1 do
		if (not IsInDoubleBytesPrivateUseArea(ch1, ch2)) then
			last_ch = PrintDoubleBytes(ch1, ch2, last_ch)
		end
	end
end
--]]
--[[
for ch1 = 0x81, 0xfe, 1 do
	for ch2 = 0x40, 0x7e, 1 do
		if (IsDoubleBytesChineseIdeograph(ch1, ch2)) then
			last_ch = PrintDoubleBytes(ch1, ch2, last_ch)
		end
	end
	for ch2 = 0x80, 0xfe, 1 do
		if (IsDoubleBytesChineseIdeograph(ch1, ch2)) then
			last_ch = PrintDoubleBytes(ch1, ch2, last_ch)
		end
	end
end
--]]
--[[
for ch1 = 0x81, 0xfe, 1 do
	for ch2 = 0x30, 0x39, 1 do
		for ch3 = 0x81, 0xfe, 1 do
			for ch4 = 0x30, 0x39, 1 do
				if (not (IsInQuadBytesReservedZone(ch1, ch2, ch3, ch4) or
					IsInQuadBytesPrivateUseArea(ch1, ch2, ch3, ch4))) then
					last_ch = PrintQuadBytes(ch1, ch2, ch3, ch4, last_ch)
				end
			end
		end
	end
end
--]]

参考

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值