UTF-8、UTF-8-BOM、GB2312文件编码格式转换

在这里插入图片描述

.h

#ifndef MAINWINDOW_H
#define MAINWINDOW_H

#include <QObject>
#include <QMainWindow>
#include <QDialog>
#include <QPushButton>
#include <QString>
#include <QTextEdit>
#include <QComboBox>
#include <QLayout>

enum Encoding{
    GBK=0,
    UTF8,
    UTF8_BOM,
    UTF16_LE_BOM,
    UTF16_BE_BOM,
    UNKNOWN
};

bool ReadFile(QString &content, const QString& filename, const Encoding& encoding);
bool SaveFile(const QString &content, const QString& filename, const Encoding& encoding);

Encoding GetFileEncoding(const QString& filename, Encoding& encoding, QString &encodingname);
bool IsUTF8WithoutBOM(char* _data, int size);


class EncodingDlg: public QDialog
{
public:
    EncodingDlg(QWidget *parent=NULL);
    Encoding GetCurrentEncoding();
private:
    QComboBox* comboBox;
};

class MainWindow: public QMainWindow
{
    Q_OBJECT
public:
    MainWindow(QWidget* parent=NULL);
protected slots:
    void OnTriggered(bool checked);
    void OnTextChanged();
private:
    void ReadFilename(const QString& filename);
    void SaveFilename(const QString& filename);

    QTextEdit* textEdit;
    Encoding current_encoding;
    QString current_encodingname;
    QString current_filename;
    bool change;

};
#endif // MAINWINDOW_H

.cpp

#include "mainwindow.h"

#include <QFileDialog>
#include <QMessageBox>
#include <QFile>
#include <QComboBox>
#include <QTextCodec>
#include <QMenuBar>
#include <QAction>
#include <QStatusBar>
#include <QDebug>

MainWindow::MainWindow(QWidget *parent):QMainWindow(parent)
{
    QAction* act = new QAction(QString::fromLocal8Bit("打开文件"),this->menuBar());
    QAction* act_1 = new QAction(QString::fromLocal8Bit("保存文件"),this->menuBar());
    QAction* act_2 = new QAction(QString::fromLocal8Bit("另存为"),this->menuBar());
    this->menuBar()->addAction(act);
    this->menuBar()->addAction(act_1);
    this->menuBar()->addAction(act_2);
    textEdit = new QTextEdit;
    this->setCentralWidget(textEdit);
    change = false;
    connect(act, SIGNAL(triggered(bool)), this, SLOT(OnTriggered(bool)));
    connect(act_1, SIGNAL(triggered(bool)), this, SLOT(OnTriggered(bool)));
    connect(act_2, SIGNAL(triggered(bool)), this, SLOT(OnTriggered(bool)));
    connect(textEdit, SIGNAL(textChanged()), this, SLOT(OnTextChanged()));
}
void MainWindow::OnTextChanged()
{
    change = true;
    this->statusBar()->showMessage(current_filename + QString::fromLocal8Bit("\t编码格式:")
                                   + current_encodingname + QString::fromLocal8Bit("\t内容已改变!"));
}

void MainWindow::OnTriggered(bool checked)
{
    QAction* act = (QAction*)QObject::sender();
    qDebug()<<"OnTriggered";
    if(QString::fromLocal8Bit("打开文件") == act->text()){
        QString filename = QFileDialog::getOpenFileName(NULL, QString::fromLocal8Bit("打开文件"),"/");
        if(filename.isEmpty())return;
        if(change && !textEdit->toPlainText().isEmpty() &&
                QMessageBox::question(NULL, QString::fromLocal8Bit("询问"),
                                           QString::fromLocal8Bit("文件内容已改变!是否保存?"))){
            SaveFilename(current_filename);
        }
        ReadFilename(filename);
    }else if(QString::fromLocal8Bit("保存文件") == act->text()){
        if(textEdit->toPlainText().isEmpty() || !change)return;
        SaveFilename(current_filename);
    }else if(QString::fromLocal8Bit("另存为") == act->text()){
        EncodingDlg dlg;
        if(dlg.exec()){
            QString filename = QFileDialog::getSaveFileName(NULL,
                                                            QString::fromLocal8Bit("保存文件"),"/");
            if(!filename.isEmpty()){
                current_encoding = dlg.GetCurrentEncoding();
                SaveFilename(filename);
                ReadFilename(filename);
            }
        }
    }
}

void MainWindow::ReadFilename(const QString& filename)
{
    current_encoding = GetFileEncoding(filename, current_encoding, current_encodingname);
    QString content;
    bool re = ReadFile(content, filename, current_encoding);
    if(!re){
        QMessageBox::information(NULL, QString::fromLocal8Bit("提示"),
                                 QString::fromLocal8Bit("文件打开失败!"));
        return;
    }
    textEdit->setText(content);
    change = false;
    current_filename = filename;
    this->statusBar()->showMessage(current_filename + QString::fromLocal8Bit("\t编码格式:") + current_encodingname);
}

void MainWindow::SaveFilename(const QString& filename)
{
    QString content = textEdit->toPlainText();
    bool re = SaveFile(content, filename, current_encoding);
    if(!re){
        QMessageBox::information(NULL, QString::fromLocal8Bit("提示"),
                                 QString::fromLocal8Bit("文件保存失败!"));
        return;
    }
    change = false;
}

EncodingDlg::EncodingDlg(QWidget *parent):
    QDialog(parent)
{
    QHBoxLayout* hbox = new QHBoxLayout;
    comboBox = new QComboBox;
    comboBox->addItem("GBK", GBK);
    comboBox->addItem("UTF-8", UTF8);
    comboBox->addItem("UTF-8-BOM", UTF8_BOM);
    QPushButton* btn = new QPushButton("ok");
    hbox->addWidget(comboBox);
    hbox->addWidget(btn);
    setLayout(hbox);
    connect(btn, SIGNAL(clicked(bool)), this, SLOT(accept()));
}

Encoding EncodingDlg::GetCurrentEncoding()
{
    return (Encoding)comboBox->itemData(comboBox->currentIndex()).toInt();
}



bool ReadFile(QString &content, const QString &filename, const Encoding &encoding)
{
    QFile file(filename);
    if(!file.open(QIODevice::ReadOnly | QIODevice::Text)){
        return false;
    }

    QTextStream in(&file);
    if(encoding == GBK){
        in.setCodec("GBK");
    }else{
        in.setCodec("UTF-8");
    }
    content = in.readAll();

    file.close();
    return true;
}

bool SaveFile(const QString &content, const QString &filename, const Encoding &encoding)
{
    QFile file(filename);
    if(!file.open(QIODevice::WriteOnly | QIODevice::Text)){
        return false;
    }

    QTextStream out(&file);
    if(encoding == GBK){
        out.setCodec("GBK");
        out << content;
        out.flush();
    }else if(encoding == UTF8_BOM){
        out.setCodec("UTF-8");
        out.setGenerateByteOrderMark(true);
        char buffer[3];
        buffer[0]=0xef;
        buffer[1]=0xbb;
        buffer[2]=0xbf;
        file.write(buffer,3);
        out << content;
        out.flush();
    }/*else if(encoding == Encoding::UTF16_LE_BOM){
        out.setCodec("UTF-16");
        out.setGenerateByteOrderMark(true);
        char buffer[2];
        buffer[0]=0xff;
        buffer[1]=0xfe;
        file.write(buffer,2);
        out << content;
        out.flush();
    }else if(encoding == Encoding::UTF16_BE_BOM){
        out.setCodec("UTF-16");
        out.setGenerateByteOrderMark(true);
        char buffer[2];
        buffer[0]=0xfe;
        buffer[1]=0xff;
        file.write(buffer,2);
        out << content;
        out.flush();
    }*/else{
        out.setCodec("UTF-8");
        out.setGenerateByteOrderMark(false);
        out << content;
        out.flush();
    }
    return true;
}

Encoding GetFileEncoding(const QString &filename, Encoding& encoding, QString &encodingname)
{
    QFile file(filename);
    if (!file.open(QIODevice::ReadOnly)) {
        return UNKNOWN;
    }

    std::vector<char> buffer(3); // Read the first 3 bytes
    file.read(buffer.data(), 3);
    file.close();

    if (file.size() < 3) {
        // File is too short to contain a BOM
        return UNKNOWN;
    }

    if (buffer[0] == char(0xEF) && buffer[1] == char(0xBB) && buffer[2] == char(0xBF)) {
        // UTF-8 with BOM
        encodingname = "UTF-8-BOM";
        return UTF8_BOM;
    } else if (buffer[0] == char(0xFF) && buffer[1] == char(0xFE)) {
        // UTF-16 LE with BOM
        encodingname = "UTF16-LE-BOM";
        return UTF16_LE_BOM;
    } else if (buffer[0] == char(0xFE) && buffer[1] == char(0xFF)) {
        // UTF-16 BE with BOM
        encodingname = "UTF16-BE-BOM";
        return UTF16_BE_BOM;
    } else {
        QFile file(filename);
            if(file.open(QIODevice::ReadOnly))
        {
            int size = file.bytesAvailable();
            char* buffer = new char[size];
            file.read(buffer,size);
            if(IsUTF8WithoutBOM(buffer,size))
            {
                encodingname = "UTF-8";
                return UTF8;
            }else{
                encodingname = "GBK";
                return GBK;
            }
        }
    }
    return UNKNOWN;
}

bool IsUTF8WithoutBOM(char *_data, int size)
{
    char *data = new char[size];
    memset(data,0,size);
    memcpy(data,_data,size);

    int encodingBytesCount = 0;
    bool allTextsAreASCIIChars = true;


    for(int i=0;i<size;i++)
    {
        char current = data[i];
        //判断是否都是ASCII字符,当包括中文字符时allTextsAreASCIIChars为false
        if ((current & 0x80) == 0x80)
            allTextsAreASCIIChars = false;

        //判断是否为一个字符的开始字节
        if (encodingBytesCount == 0)
        {
            if ((current & 0x80) == 0)
            {
                // ASCII chars, from 0x00-0x7F
                continue;
            }

            if ((current & 0xC0) == 0xC0)	///大于等于两字节
            {
                encodingBytesCount = 1;
                current <<= 2;

                // More than two bytes used to encoding a unicode char.
                // Calculate the real length.
                while ((current & 0x80) == 0x80)
                {
                    current <<= 1;	//判断下一位是否为1
                    encodingBytesCount++;	//当前字符编码字节数
                }
            }
            else
            {
                // Invalid bits structure for UTF8 encoding rule.
                return false;
            }
        }
        else
        {
            // Following bytes, must start with 10.
            if ((current & 0xC0) == 0x80)	///当前字节是否以10开头
            {
                encodingBytesCount--;
            }
            else
            {
                return false;
            }
        }
    }
    if (encodingBytesCount != 0)
    {
        // Invalid bits structure for UTF8 encoding rule.
        // Wrong following bytes count.
        return false;
    }
    return !allTextsAreASCIIChars;
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值