utf8转unicode

最新推荐文章于 2024-07-18 15:29:38 发布

写了程序换酒钱

最新推荐文章于 2024-07-18 15:29:38 发布

阅读量1k

点赞数

分类专栏： C/C++ qt qt

本文链接：https://blog.csdn.net/zhx6044/article/details/8642423

版权

C/C++ 同时被 3 个专栏收录

85 篇文章 0 订阅

订阅专栏

53 篇文章 2 订阅

订阅专栏

53 篇文章 36 订阅

订阅专栏

最近在做AT指令发送短信，转码成了难题，使用libiconv在pc机上ok，在板子上不行。

所以自己写，第一先要看utf8和unicode之间的关系。

UTF-8编码字符理论上可以最多到6个字节长,然而16位BMP（Basic Multilingual Plane ）字符最多

                只用到3字节长。下面看一下UTF-8编码表：
                 unicode                                 utf8
              U-00000000 - U-0000007F: 0xxxxxxx
                U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
                U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
                U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
                U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
                U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

#ifndef CODECONVERTOR_H
#define CODECONVERTOR_H

#include <QString>
#include <string>

class CodeConvertor
{
public:
    static QString tuf8ToUnicode(const QString &in);
    static std::string _tuf8ToUnicode(const std::string &in);
    static void charTo2Hex(char in,char *out);
private:
    CodeConvertor();

};

#endif // CODECONVERTOR_H
#include "codeconvertor.h"

#include <QDebug>

CodeConvertor::CodeConvertor()
{

}

QString CodeConvertor::tuf8ToUnicode(const QString &in)
{
    QString outstr;
    QByteArray byteArr = in.toLatin1();


    for (int i = 0;i < byteArr.length();) {
        if ((byteArr[i] & 0x80) == 0x00)
        {//0x0xxxxxx 只占一个字节
             outstr += "00";
            if (!(byteArr[i] & 0xf0)) {
                outstr += "0";
            }
            outstr += QString::number((uint)byteArr[i] & 0x000000ff,16);
            ++i;
        }
        else
        {

            if ((byteArr[i] & 0xe0) == 0xc0)
            {//0x110xxxxx 0x10xxxxxx 占两个字节
                char t1 = byteArr[i] & 0x1f;//第一个字节的后五位 1
                char t2 = byteArr[++i] & 0x3f;//第而个字节的后六位 2
                char t3 = t2 | ((t1 & 0x03) << 6);//2
                char t4 = (t1 >> 2) & 07;//1
                if (!(t4 & 0xf0)) {
                    outstr += "0";
                }
                outstr += QString::number((uint)t4 & 0x000000ff,16);
                if (!(t3 & 0xf0)) {
                    outstr += "0";
                }
                outstr += QString::number((uint)t3 & 0x000000ff,16);
                ++i;

            }
            else
            {
                if ((byteArr[i] & 0xf0) == 0xe0)
                {//占3个字节
                    char t1 = byteArr[i] & 0x0f;//1
                    char t2 = byteArr[++i] & 0x3f;//2
                    char t3 = byteArr[++i] & 0x3f;//3
                    char t4 = t3 | ((t2 & 0x03) << 6);//3
                    char t5 = ((t2 >> 2) & 0x0f) | ((t1 << 4) & 0xf0) ;//2
                    if (!(t5 & 0xf0)) {
                        outstr += "0";
                    }
                    outstr += QString::number((uint)t5 & 0x000000ff,16);
                    if (!(t4 & 0xf0)) {
                        outstr += "0";
                    }
                    outstr += QString::number((uint)t4 & 0x000000ff,16);

                    ++i;

                }

            }
        }
    }

    return outstr;

}
std::string CodeConvertor::_tuf8ToUnicode(const std::string &in)
{
    std::string outstr;
    char buf[3];
    for (int i = 0;i < in.length();) {
        if ((in[i] & 0x80) == 0x00)
        {//0x0xxxxxx 只占一个字节
            outstr += "00"; 
            CodeConvertor::charTo2Hex(in[i],buf);
            outstr += buf;
            ++i;
        }
        else
        {
            if ((in[i] & 0xe0) == 0xc0)
            {//0x110xxxxx 0x10xxxxxx 占两个字节
                char t1 = in[i] & 0x1f;//第一个字节的后五位 1
                char t2 = in[++i] & 0x3f;//第而个字节的后六位 2
                char t3 = t2 | ((t1 & 0x03) << 6);//2
                char t4 = (t1 >> 2) & 07;//1
                CodeConvertor::charTo2Hex(t4,buf);
                outstr += buf;
                CodeConvertor::charTo2Hex(t3,buf);
                outstr += buf;
                ++i;

            }
            else
            {
                if ((in[i] & 0xf0) == 0xe0)
                {//占3个字节
                    char t1 = in[i] & 0x0f;//1
                    char t2 = in[++i] & 0x3f;//2
                    char t3 = in[++i] & 0x3f;//3
                    char t4 = t3 | ((t2 & 0x03) << 6);//3
                    char t5 = ((t2 >> 2) & 0x0f) | ((t1 << 4) & 0xf0) ;//2

                    CodeConvertor::charTo2Hex(t5,buf);
                    outstr += buf;

                    CodeConvertor::charTo2Hex(t4,buf);
                    outstr += buf;

                    ++i;

                }

            }
        }
    }
    return outstr;
}
void CodeConvertor::charTo2Hex(char in, char *out)
{
    char l = in & 0x0f;
    char h = (in & 0xf0) >> 4;
    int hh = h % 16;
    if (hh < 10) {
        out[0] = '0' + hh;
    } else {
        out[0] = hh - 10 + 'A';
    }
    int ll = l % 16;
    if (ll < 10) {
        out[1] = '0' + ll;
    } else {
        out[1] = ll - 10 + 'A';
    }

    out[2] = '\0';
}


#include <QCoreApplication>
#include <QDebug>
#include <iostream>
#include "codeconvertor.h"


int main(int argc, char *argv[])
{
    QCoreApplication a(argc, argv);

    qDebug() << CodeConvertor::_tuf8ToUnicode("工作愉快！").data();
    qDebug()<<"5DE54F5C61095FEBFF01";










    
    return a.exec();
}

写了程序换酒钱

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
utf8转unicode

最近在做AT指令发送短信，转码成了难题，使用libiconv在pc机上ok，在板子上不行。所以自己写，第一先要看utf8和unicode之间的关系。 UTF-8编码字符理论上可以最多到6个字节长,然而16位BMP（Basic Multilingual Plane）字符
复制链接

扫一扫

专栏目录