Weekly Report on Aug. 3rd
by ZhengQiushi
This week I have complete decoding of kanji ,alphanumeric mode and eci mode under linux. And also fixed some bugs in the previous process.
1.Kanji Mode
In this mode , we need to divide 13-bit binary string to two byte .
Here are the steps :
1.Get a 13-bit string.
2.The converting process can be explained in the following equations:
P
=
X
−
C
o
n
s
t
P = X - Const
P=X−Const
X = Shift JIS value, Const =0x8140 or 0xC140
L
=
P
%
1
6
2
H
=
P
/
1
6
2
L = P \% 16^2 \ \ \ \ \ \ \ \ H = P / 16^2
L=P%162 H=P/162
L is the lower byte and the H is the higher byte.
Y
=
H
∗
0
x
C
0
+
L
<
=
>
{
Y
/
0
x
C
0
=
H
+
L
/
0
x
C
0
=
H
+
(
L
>
=
0
)
Y
%
0
x
C
0
=
L
%
0
x
C
0
Y=H*0xC0 + L <=> \begin{cases} Y/0xC0 = H + L/0xC0 = H + (L>=0) \\ Y \%0xC0 = L \%0xC0 \end{cases}
Y=H∗0xC0+L<=>{Y/0xC0=H+L/0xC0=H+(L>=0)Y%0xC0=L%0xC0
Y = 13bits value
{
H
=
Y
/
0
x
C
0
−
(
L
>
=
0
)
L
=
Y
%
0
x
C
0
+
{
0
,
0
x
C
0
}
\begin{cases} H = Y/0xC0 - (L>=0) \\ L = Y \%0xC0 + \{ 0,0xC0 \} \end{cases}
{H=Y/0xC0−(L>=0)L=Y%0xC0+{0,0xC0}
Y
=
H
∗
0
x
C
0
+
L
Y = H*0xC0 + L
Y=H∗0xC0+L
3.Using iconv
to convert coding set from SHIFT_JIS
to UTF-8
decode_error QRDecode::decode_kanji(uint8_t * &ptr){
const int per_char_len = 13;
/*initialize the count indicator*/
int bits = 12;
if(version<10)
bits = 8;
else if(version < 27)
bits = 10;
/*initialize the count length*/
int count = 0;
count = get_bits(bits,ptr);
/*one char = two byte*/
if (payload_len + count * 2 + 1 > MAX_PAYLOAD)
return ERROR_DATA_OVERFLOW;
/*one char = 13 bits*/
if (bits_remaining(ptr) < count * per_char_len)
return ERROR_DATA_UNDERFLOW;
decode_error err = SUCCESS;
/*correction for L_mod_C0*/
const int addition[2] = {0b00000000 , 0b11000000};
for (int i = 0; i < count; i++){
/*Get My bits*/
int Y =get_bits(per_char_len,ptr);
int L_mod_C0 = Y % 0xc0; /*the real L is L_mod_C0 + addition*/
int H_around = Y / 0xc0; /*the real H is H_around + (L>=C0)*/
/*the real L and H */
int L = 0; int H = 0 ;
bool is_err = true ;
/*correction for L_mod */
for(int j = 0 ; j < 2 ; j++){
L = addition[j] + L_mod_C0 ;
H = H_around - (L>=0xc0);
/*check if is equal to the original bits*/
if(Y == H*0xc0+L){
is_err = false;
break;
}
}
if(is_err){
return ERROR_UNKNOWN_DATA_TYPE;
}
/*get the subtract value */
uint16_t subtract = (H<<8) + L ;
uint16_t result = 0;
if (subtract + 0x8140 <= 0x9ffc) {
/* bytes are in the range 0x8140 to 0x9FFC */
result = subtract + 0x8140;
} else {
/* bytes are in the range 0xE040 to 0xEBBF */
result = subtract + 0xc140;
}
if(eci==UTF_8){
/*use iconv_open to convert coding set*/
const char* fromcode = getSrcMode(Shift_JIS);
char src_shift_jis[3]={char(result >> 8),char(result & 0xff)};
convert2utf8(src_shift_jis,fromcode);
}
}
return SUCCESS;
}
2.Alphanumeric Mode
This mode is pretty simple.
We just need to divide the 11-bit binary string into two byte by dividing 45 and modding 45.Then using a fixed character map to decode .
decode_error QRDecode::decode_alpha(uint8_t * &ptr){
/*alpha table*/
static const char *alpha_map =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:";
int count = 0;
/*initialize the count indicator*/
int bits = 13;
if(version<10)
bits = 9;
else if(version < 27)
bits = 11;
/*string length*/
count = get_bits(bits,ptr);
if (payload_len + count + 1 > MAX_PAYLOAD){
return ERROR_DATA_OVERFLOW;
}
/*11bits at a time */
while (count >= 2) {
if(bits_remaining(ptr)<11)
return ERROR_DATA_UNDERFLOW;
int num = get_bits(11,ptr);
/*divided into to parts*/
int H = num/45;
int L = num%45;
payload[payload_len++] = alpha_map[H];
payload[payload_len++] = alpha_map[L] ;
count -= 2;
}
/*remaining 6 bits*/
if (count!=0){
if(bits_remaining(ptr)<6)
return ERROR_DATA_UNDERFLOW;
int num = get_bits(6,ptr);
payload[payload_len++] = alpha_map[num];
}
return SUCCESS;
}
3.ECI Mode
In this mode , there are two main tasks.
1.Get the ECI Assignment Number and the corresponding encoding character set , which was stored in the variable eci
.
decode_error QRDecode::decode_eci(uint8_t * &ptr){
/*ECI Assignment Number is at least 8bits*/
if (bits_remaining(ptr) < 8)
return ERROR_DATA_UNDERFLOW;
/*get ECI Assignment Number*/
eci = (uint32_t)get_bits(8,ptr);
/*check the highest two bits*/
int codeword_value = eci & 0xc0;
if(codeword_value == 0x80){
/*two codeword*/
if (bits_remaining(ptr) < 8)
return ERROR_DATA_UNDERFLOW;
eci = (eci << 8) | get_bits(8,ptr);
}else if(codeword_value == 0xc0){
/*three codeword*/
if (bits_remaining(ptr) < 16)
return ERROR_DATA_UNDERFLOW;
eci = (eci << 16) | get_bits(8,ptr);
}
return SUCCESS;
}
Here are the common used encoding set :
enum ENCODING_SET {
CP437 = 0, // (Cp437 0)
ISO_8859_1, // (ECI codes 1)
CP437_, // (Cp437 2
ISO_8859_1_,// (ECI codes 3)
ISO_8859_2, // (ECI code 4)
ISO_8859_3, // (ECI code 5)
ISO_8859_4, // (ECI code 6)
ISO_8859_5, // (ECI code 7)
ISO_8859_6, // (ECI code 8)
ISO_8859_7, // (ECI code 9)
ISO_8859_8, // (ECI code 10)
ISO_8859_9, // (ECI code 11)
ISO_8859_10, //(ECI code 12)
ISO_8859_11, //(ECI code 13)
ISO_8859_13 =15 , //(ECI code 15)
ISO_8859_14, //(ECI code 16)
ISO_8859_15, //(ECI code 17)
ISO_8859_16, //(ECI code 18)
Shift_JIS =20 , //(ECI code 20)
CP1250, // windows_1250,//(ECI code 21)
CP1251, // windows_1251,//(ECI code 22)
CP1252, // windows_1252,//(ECI code 23)
CP1256, // windows_1256,//(ECI code 24)
UTF_16BE, // UnicodeBig,UnicodeBigUnmarked, (ECI code 25)
UTF_8, //(ECI code 26)
US_ASCII, //(ECI codes 27,170)
Big5, //(ECI code 28)
GBK, //GB18030, GB2312, EUC_CN, (ECI code 29)
EUC_KR //(ECI code 30)
};
2.The ECI Mode
will influence other Mode decoding ,such as Byte Mode. Before writing to the result variable payload
, it needs to do a character set convertion .
decode_error QRDecode::decode_byte(uint8_t * &ptr){
int bits = 8;
int count = 0;
/*check version to update the bit counter*/
if(version>9)
bits=16;
count = get_bits(bits,ptr);
if (payload_len + count + 1 > MAX_PAYLOAD){
return ERROR_DATA_OVERFLOW;
}
if (bits_remaining(ptr) < count * 8){
return ERROR_DATA_UNDERFLOW;
}
const char* fromcode = getSrcMode(eci);
for (int i = 0; i < count; i++){
int tmp =get_bits(8,ptr);
if(!strcmp(fromcode , "UTF−8")){
payload[payload_len++]=tmp;
}
else{
char src_shift_jis[3]={char(tmp)};
convert2utf8(src_shift_jis,fromcode);
}
}
return SUCCESS;
}
The convertion process is implemented by <iconv.h>
.
void QRDecode::convert2utf8(char* src,const char * fromcode ){
/*input characters */
char *inbuf=src;
size_t inlen=strlen(inbuf);
/*output characters */
char dst_utf8 [255]={};
char *outbuf = dst_utf8 ;
size_t outlen=255;
cout<<"before @ inlen : "<<inlen << "inbuf : "<<inbuf<<endl;
/*convert to utf-8 by iconv*/
iconv_t cd=iconv_open("UTF-8",fromcode);
iconv(cd, &inbuf, &inlen, &outbuf, &outlen);
cout<<"after @ outlen : "<<strlen(dst_utf8) <<" outbuf : "<<dst_utf8<<endl;
for(int j = 0; j < (int)strlen(dst_utf8) ; j++){
payload[payload_len++] = dst_utf8[j];
}
iconv_close(cd);
return ;
}
Unfortunately , <iconv.h> is only compatible with linux.
How should I complete the convertion part ?
Can I use other lib or some implemented functions ? If so, I wonder if there are some universal lib for character set convertion under every platform.
Or do I need to establish my convertion table for all ECI standards like what I did in alphanumeric mode ?
Looking forward to hearing from you.
2020.08.03