小项目使用8266接收WebHook,需要提取消息展示到串口屏
一切顺利,显示数量,图标,时间都很好
到了最后一步显示内容时碰壁了,Arduino发送的是UTF-8编码的
串口屏支持 GB2312 好一些,网上查了一番
有人用 char hello[] ={0xC4,0xE3,0xBA,0xC3}
直接定义编码数组的方式,可以展示一些固定的字符串文本
有人将文件编码从UTF-8
改为ANSI
的
但这两种方法我都没有尝试,我们要展示的内容是接口收到的内容,内容不可控.不能改编码也是因为要从接口获取数据,修改了编码,请求数据又会出现问题
解决办法
最后在Github上,看到了一个别人几年前做的8266气象站:esp8266-weather-station
他的代码中用到了UTF-8
转GB2312
,翻看代码后是网上的另一个思路,查表
PROGMEM unsigned short int code_table[][2] =
{
{ 0x4E00, 0xD2BB },
{ 0x4E01, 0xB6A1 },
{ 0x4E03, 0xC6DF },
{ 0x4E07, 0xCDF2 },
{ 0x4E08, 0xD5C9 },
{ 0x4E09, 0xC8FD },
{ 0x4E0A, 0xC9CF },
....
{ 0xE813, 0xD7FD },
{ 0xE814, 0xD7FE }
};
搭配一个转换函数,将UTF-8
的char
数组转成u16
的数组
void Utf8ToGb2312(const char *utf8, int len, u16 *gbArray)
{
int k = 0;
int byteCount = 0;
int i = 0;
int j = 0;
char temp[500];
u16 unicodeKey = 0;
u16 gbKey = 0;
//循环解析
while (i < len)
{ //delay(0);
switch (GetUtf8ByteNumForWord((u8)utf8[i]))
{
case 0:
temp[j] = utf8[i];
gbArray[k++] = gbKey;
byteCount = 1;
break;
case 2:
temp[j] = utf8[i];
temp[j + 1] = utf8[i + 1];
gbKey = ((utf8[i] & 0x80) << 8) + utf8[i + 1];
gbArray[k++] = gbKey;
byteCount = 2;
break;
case 3:
//这里就开始进行UTF8->Unicode
temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F); //高字节
temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F); //低字节
//取得Unicode的值
memcpy(&unicodeKey, (temp + j), 2);
//根据这个值查表取得对应的GB2312的值
gbKey = SearchCodeTable(unicodeKey);
//printf("gbKey=0x%X\n",gbKey);
gbArray[k++] = gbKey;
byteCount = 3;
break;
case 4:
byteCount = 4;
break;
case 5:
byteCount = 5;
break;
case 6:
byteCount = 6;
break;
default:
break;
}
i += byteCount;
if (byteCount == 1)
{
j++;
}
else
{
j += 2;
}
}
gbArray[k] = 0xffff;
//return gbKey;
}
测试运行后我想要的中文的确都可以在串口打印出来了
但是有一个小问题,比如想要发送123
,应该发送0x31 0x32 0x33
,但是因为他转换的是u16
数组,会多出一些字节来
我们进行一下小小的改造,返回数组的真实长度,修改u8
数组
int Utf8ToGb2312(const char *utf8, int len, u8 *gbArray)
{
int k = 0;
int byteCount = 0;
int i = 0;
int j = 0;
char temp[500];
u16 unicodeKey = 0;
u16 gbKey = 0;
// 循环解析
while (i < len)
{
switch (GetUtf8ByteNumForWord((u8)utf8[i]))
{
case 0:
temp[j] = utf8[i];
gbArray[k++] = utf8[i];
byteCount = 1;
break;
case 2:
temp[j] = utf8[i];
temp[j + 1] = utf8[i + 1];
gbKey = ((utf8[i] & 0x80) << 8) + utf8[i + 1];
gbArray[k++] = gbKey >> 8;
gbArray[k++] = gbKey & 0xff;
byteCount = 2;
break;
case 3:
// 这里就开始进行UTF8->Unicode
temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F); // 高字节
temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F); // 低字节
// 取得Unicode的值
memcpy(&unicodeKey, (temp + j), 2);
// 根据这个值查表取得对应的GB2312的值
gbKey = SearchCodeTable(unicodeKey);
gbArray[k++] = gbKey >> 8;
gbArray[k++] = gbKey & 0xff;
byteCount = 3;
break;
case 4:
byteCount = 4;
break;
case 5:
byteCount = 5;
break;
case 6:
byteCount = 6;
break;
default:
break;
}
i += byteCount;
if (byteCount == 1)
{
j++;
}
else
{
j += 2;
}
}
return k;
}
这样就可以实现中英文的混合参数,英文数字占用一个字节,中文占用两个字节
下面附上完整的代码
完整代码
受文章长度限制,代码贴不出来,可参考ESP8266 Arduino 发送 GB2312 编码的中文到串口屏
UTF-8toGB2312.cpp
#include <stdio.h>
#include <string.h>
#include "cp936.h"
#include "UTF-8toGB2312.h"
int GetUtf8ByteNumForWord(u8 firstCh)
{
u8 temp = 0x80;
int num = 0;
while (temp & firstCh)
{
num++;
temp = (temp >> 1);
}
// printf("the num is: %d\n", num);
return num;
}
u16 SearchCodeTable(u16 unicodeKey)
{
int first = 0;
int end = CODE_TABLE_SIZE - 1;
int mid = 0;
while (first <= end)
{
mid = (first + end) / 2;
if (pgm_read_word(&(code_table[mid][0])) == unicodeKey)
{
return pgm_read_word(&(code_table[mid][1]));
}
else if (pgm_read_word(&(code_table[mid][0])) > unicodeKey)
{
end = mid - 1;
}
else
{
first = mid + 1;
}
}
return 0;
}
int Utf8ToGb2312(const char *utf8, int len, u8 *gbArray)
{
int k = 0;
int byteCount = 0;
int i = 0;
int j = 0;
char temp[500];
u16 unicodeKey = 0;
u16 gbKey = 0;
// 循环解析
while (i < len)
{
switch (GetUtf8ByteNumForWord((u8)utf8[i]))
{
case 0:
temp[j] = utf8[i];
gbArray[k++] = utf8[i];
byteCount = 1;
break;
case 2:
temp[j] = utf8[i];
temp[j + 1] = utf8[i + 1];
gbKey = ((utf8[i] & 0x80) << 8) + utf8[i + 1];
gbArray[k++] = gbKey >> 8;
gbArray[k++] = gbKey & 0xff;
byteCount = 2;
break;
case 3:
// 这里就开始进行UTF8->Unicode
temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F); // 高字节
temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F); // 低字节
// 取得Unicode的值
memcpy(&unicodeKey, (temp + j), 2);
// 根据这个值查表取得对应的GB2312的值
gbKey = SearchCodeTable(unicodeKey);
gbArray[k++] = gbKey >> 8;
gbArray[k++] = gbKey & 0xff;
byteCount = 3;
break;
case 4:
byteCount = 4;
break;
case 5:
byteCount = 5;
break;
case 6:
byteCount = 6;
break;
default:
break;
}
i += byteCount;
if (byteCount == 1)
{
j++;
}
else
{
j += 2;
}
}
return k;
}
UTF-8toGB2312.h
#ifndef utf8togb2312
#define utf8togb2312
int Utf8ToGb2312(const char* utf8, int len,unsigned char* gbArray);
#endif
使用的例子
int num = utf8Str.length();
u8 s[num * 2];
int len = Utf8ToGb2312(utf8Str.c_str(), num, s);
Serial.write(s, len);