最近在做AT指令发送短信,转码成了难题,使用libiconv在pc机上ok,在板子上不行。
所以自己写,第一先要看utf8和unicode之间的关系。
UTF-8编码字符理论上可以最多到6个字节长,然而16位BMP(Basic Multilingual Plane
)字符最多
只用到3字节长。下面看一下UTF-8编码表:
unicode utf8
U-00000000 - U-0000007F: 0xxxxxxx
U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
unicode utf8
U-00000000 - U-0000007F: 0xxxxxxx
U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
#ifndef CODECONVERTOR_H
#define CODECONVERTOR_H
#include <QString>
#include <string>
class CodeConvertor
{
public:
static QString tuf8ToUnicode(const QString &in);
static std::string _tuf8ToUnicode(const std::string &in);
static void charTo2Hex(char in,char *out);
private:
CodeConvertor();
};
#endif // CODECONVERTOR_H
#include "codeconvertor.h"
#include <QDebug>
CodeConvertor::CodeConvertor()
{
}
QString CodeConvertor::tuf8ToUnicode(const QString &in)
{
QString outstr;
QByteArray byteArr = in.toLatin1();
for (int i = 0;i < byteArr.length();) {
if ((byteArr[i] & 0x80) == 0x00)
{//0x0xxxxxx 只占一个字节
outstr += "00";
if (!(byteArr[i] & 0xf0)) {
outstr += "0";
}
outstr += QString::number((uint)byteArr[i] & 0x000000ff,16);
++i;
}
else
{
if ((byteArr[i] & 0xe0) == 0xc0)
{//0x110xxxxx 0x10xxxxxx 占两个字节
char t1 = byteArr[i] & 0x1f;//第一个字节的后五位 1
char t2 = byteArr[++i] & 0x3f;//第而个字节的后六位 2
char t3 = t2 | ((t1 & 0x03) << 6);//2
char t4 = (t1 >> 2) & 07;//1
if (!(t4 & 0xf0)) {
outstr += "0";
}
outstr += QString::number((uint)t4 & 0x000000ff,16);
if (!(t3 & 0xf0)) {
outstr += "0";
}
outstr += QString::number((uint)t3 & 0x000000ff,16);
++i;
}
else
{
if ((byteArr[i] & 0xf0) == 0xe0)
{//占3个字节
char t1 = byteArr[i] & 0x0f;//1
char t2 = byteArr[++i] & 0x3f;//2
char t3 = byteArr[++i] & 0x3f;//3
char t4 = t3 | ((t2 & 0x03) << 6);//3
char t5 = ((t2 >> 2) & 0x0f) | ((t1 << 4) & 0xf0) ;//2
if (!(t5 & 0xf0)) {
outstr += "0";
}
outstr += QString::number((uint)t5 & 0x000000ff,16);
if (!(t4 & 0xf0)) {
outstr += "0";
}
outstr += QString::number((uint)t4 & 0x000000ff,16);
++i;
}
}
}
}
return outstr;
}
std::string CodeConvertor::_tuf8ToUnicode(const std::string &in)
{
std::string outstr;
char buf[3];
for (int i = 0;i < in.length();) {
if ((in[i] & 0x80) == 0x00)
{//0x0xxxxxx 只占一个字节
outstr += "00";
CodeConvertor::charTo2Hex(in[i],buf);
outstr += buf;
++i;
}
else
{
if ((in[i] & 0xe0) == 0xc0)
{//0x110xxxxx 0x10xxxxxx 占两个字节
char t1 = in[i] & 0x1f;//第一个字节的后五位 1
char t2 = in[++i] & 0x3f;//第而个字节的后六位 2
char t3 = t2 | ((t1 & 0x03) << 6);//2
char t4 = (t1 >> 2) & 07;//1
CodeConvertor::charTo2Hex(t4,buf);
outstr += buf;
CodeConvertor::charTo2Hex(t3,buf);
outstr += buf;
++i;
}
else
{
if ((in[i] & 0xf0) == 0xe0)
{//占3个字节
char t1 = in[i] & 0x0f;//1
char t2 = in[++i] & 0x3f;//2
char t3 = in[++i] & 0x3f;//3
char t4 = t3 | ((t2 & 0x03) << 6);//3
char t5 = ((t2 >> 2) & 0x0f) | ((t1 << 4) & 0xf0) ;//2
CodeConvertor::charTo2Hex(t5,buf);
outstr += buf;
CodeConvertor::charTo2Hex(t4,buf);
outstr += buf;
++i;
}
}
}
}
return outstr;
}
void CodeConvertor::charTo2Hex(char in, char *out)
{
char l = in & 0x0f;
char h = (in & 0xf0) >> 4;
int hh = h % 16;
if (hh < 10) {
out[0] = '0' + hh;
} else {
out[0] = hh - 10 + 'A';
}
int ll = l % 16;
if (ll < 10) {
out[1] = '0' + ll;
} else {
out[1] = ll - 10 + 'A';
}
out[2] = '\0';
}
#include <QCoreApplication>
#include <QDebug>
#include <iostream>
#include "codeconvertor.h"
int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);
qDebug() << CodeConvertor::_tuf8ToUnicode("工作愉快!").data();
qDebug()<<"5DE54F5C61095FEBFF01";
return a.exec();
}