utf8转unicode

53 篇文章 2 订阅
53 篇文章 36 订阅

                     最近在做AT指令发送短信,转码成了难题,使用libiconv在pc机上ok,在板子上不行。

                     所以自己写,第一先要看utf8和unicode之间的关系。

                   

                UTF-8编码字符理论上可以最多到6个字节长,然而16位BMP(Basic Multilingual Plane )字符最多
                只用到3字节长。下面看一下UTF-8编码表:
                 unicode                                 utf8
              U-00000000 - U-0000007F: 0xxxxxxx 
                U-00000080 - U-000007FF: 110xxxxx 10xxxxxx 
                U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx 
                U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 
                U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 
                U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx


                  

#ifndef CODECONVERTOR_H
#define CODECONVERTOR_H

#include <QString>
#include <string>

class CodeConvertor
{
public:
    static QString tuf8ToUnicode(const QString &in);
    static std::string _tuf8ToUnicode(const std::string &in);
    static void charTo2Hex(char in,char *out);
private:
    CodeConvertor();

};

#endif // CODECONVERTOR_H
#include "codeconvertor.h"

#include <QDebug>

CodeConvertor::CodeConvertor()
{

}

QString CodeConvertor::tuf8ToUnicode(const QString &in)
{
    QString outstr;
    QByteArray byteArr = in.toLatin1();


    for (int i = 0;i < byteArr.length();) {
        if ((byteArr[i] & 0x80) == 0x00)
        {//0x0xxxxxx 只占一个字节
             outstr += "00";
            if (!(byteArr[i] & 0xf0)) {
                outstr += "0";
            }
            outstr += QString::number((uint)byteArr[i] & 0x000000ff,16);
            ++i;
        }
        else
        {

            if ((byteArr[i] & 0xe0) == 0xc0)
            {//0x110xxxxx 0x10xxxxxx 占两个字节
                char t1 = byteArr[i] & 0x1f;//第一个字节的后五位 1
                char t2 = byteArr[++i] & 0x3f;//第而个字节的后六位 2
                char t3 = t2 | ((t1 & 0x03) << 6);//2
                char t4 = (t1 >> 2) & 07;//1
                if (!(t4 & 0xf0)) {
                    outstr += "0";
                }
                outstr += QString::number((uint)t4 & 0x000000ff,16);
                if (!(t3 & 0xf0)) {
                    outstr += "0";
                }
                outstr += QString::number((uint)t3 & 0x000000ff,16);
                ++i;

            }
            else
            {
                if ((byteArr[i] & 0xf0) == 0xe0)
                {//占3个字节
                    char t1 = byteArr[i] & 0x0f;//1
                    char t2 = byteArr[++i] & 0x3f;//2
                    char t3 = byteArr[++i] & 0x3f;//3
                    char t4 = t3 | ((t2 & 0x03) << 6);//3
                    char t5 = ((t2 >> 2) & 0x0f) | ((t1 << 4) & 0xf0) ;//2
                    if (!(t5 & 0xf0)) {
                        outstr += "0";
                    }
                    outstr += QString::number((uint)t5 & 0x000000ff,16);
                    if (!(t4 & 0xf0)) {
                        outstr += "0";
                    }
                    outstr += QString::number((uint)t4 & 0x000000ff,16);

                    ++i;

                }

            }
        }
    }

    return outstr;

}
std::string CodeConvertor::_tuf8ToUnicode(const std::string &in)
{
    std::string outstr;
    char buf[3];
    for (int i = 0;i < in.length();) {
        if ((in[i] & 0x80) == 0x00)
        {//0x0xxxxxx 只占一个字节
            outstr += "00"; 
            CodeConvertor::charTo2Hex(in[i],buf);
            outstr += buf;
            ++i;
        }
        else
        {
            if ((in[i] & 0xe0) == 0xc0)
            {//0x110xxxxx 0x10xxxxxx 占两个字节
                char t1 = in[i] & 0x1f;//第一个字节的后五位 1
                char t2 = in[++i] & 0x3f;//第而个字节的后六位 2
                char t3 = t2 | ((t1 & 0x03) << 6);//2
                char t4 = (t1 >> 2) & 07;//1
                CodeConvertor::charTo2Hex(t4,buf);
                outstr += buf;
                CodeConvertor::charTo2Hex(t3,buf);
                outstr += buf;
                ++i;

            }
            else
            {
                if ((in[i] & 0xf0) == 0xe0)
                {//占3个字节
                    char t1 = in[i] & 0x0f;//1
                    char t2 = in[++i] & 0x3f;//2
                    char t3 = in[++i] & 0x3f;//3
                    char t4 = t3 | ((t2 & 0x03) << 6);//3
                    char t5 = ((t2 >> 2) & 0x0f) | ((t1 << 4) & 0xf0) ;//2

                    CodeConvertor::charTo2Hex(t5,buf);
                    outstr += buf;

                    CodeConvertor::charTo2Hex(t4,buf);
                    outstr += buf;

                    ++i;

                }

            }
        }
    }
    return outstr;
}
void CodeConvertor::charTo2Hex(char in, char *out)
{
    char l = in & 0x0f;
    char h = (in & 0xf0) >> 4;
    int hh = h % 16;
    if (hh < 10) {
        out[0] = '0' + hh;
    } else {
        out[0] = hh - 10 + 'A';
    }
    int ll = l % 16;
    if (ll < 10) {
        out[1] = '0' + ll;
    } else {
        out[1] = ll - 10 + 'A';
    }

    out[2] = '\0';
}


#include <QCoreApplication>
#include <QDebug>
#include <iostream>
#include "codeconvertor.h"


int main(int argc, char *argv[])
{
    QCoreApplication a(argc, argv);

    qDebug() << CodeConvertor::_tuf8ToUnicode("工作愉快!").data();
    qDebug()<<"5DE54F5C61095FEBFF01";










    
    return a.exec();
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值