主要使用QFile、QTextStream及QTextCodec三个类,先进行文件编码的识别(ANSI、UTF-8、UTF-8 BOM、UTF-16 LE、UTF-8 BE),再进行文件编码的转换。
主要用于Qt项目中源代码的批量转换,从ANSI或UTF-8转换为UTF-8 BOM。
#ifndef CODEHELPER_H
#define CODEHELPER_H
#include <QString>
#include <QTextStream>
enum EncodingFormat {
ANSI,
UTF16LE,
UTF16BE,
UTF8,
UTF8BOM
};
class CodeHelper
{
public:
CodeHelper();
//尝试获取当前文件编码
EncodingFormat getFileEncoding(const QString &filename);
QString getFileEncodingStr(const QString &filename);
//转换文件编码为UTF-8 BOM.
bool translateFile2UTF8BOM(const QString &srcFile, const QString &desFile);
private:
void useRightCodec(QTextStream &stream, const EncodingFormat &encodingFormat);
};
#endif // CODEHELPER_H
#include "codehelper.h"
#include <QFile>
#include <QTextCodec>
#include <QMap>
#include <QDebug>
static const QMap<EncodingFormat, QString> code2StringMap = {
{ANSI, "ANSI"},
{UTF16LE, "UTF-16LE"},
{UTF16BE, "UTF-16BE"},
{UTF8, "UTF-8"},
{UTF8BOM, "UTF-8BOM"}
};
CodeHelper::CodeHelper()
{
}
QString CodeHelper::getFileEncodingStr(const QString& filename)
{
EncodingFormat encodingFormat = getFileEncoding(filename);
return code2StringMap.value(encodingFormat);
}
EncodingFormat CodeHelper::getFileEncoding(const QString& filename)
{
EncodingFormat code;
QFile file(filename);
if(file.open(QIODevice::ReadOnly))
{
QByteArray buffer = file.read(3);
quint8 byte1st = buffer.at(0);
quint8 byte2st = buffer.at(1);
quint8 byte3st = buffer.at(2);
if(byte1st == 0xFF && byte2st == 0xFE) {
code = EncodingFormat::UTF16LE;
} else if(byte1st == 0xFE && byte2st == 0xFF) {
code = EncodingFormat::UTF16BE;
} else if(byte1st == 0xEF && byte2st == 0xBB && byte3st == 0xBF) {
code = EncodingFormat::UTF8BOM;
} else {
QTextCodec::ConverterState cs;
QTextCodec* tc = QTextCodec::codecForName("UTF-8");
tc->toUnicode(buffer.constData(), buffer.size(), &cs);
code - (cs.invalidChars > 0) ? EncodingFormat::ANSI : EncodingFormat::UTF8;
}
file.close();
}
return code;
}
void CodeHelper::useRightCodec(QTextStream& stream, const EncodingFormat& encodingFormat)
{
QByteArray codeBa = code2StringMap.value(encodingFormat).toUtf8();
QByteArray codeBaUTF8 = code2StringMap.value(EncodingFormat::UTF8).toUtf8();
switch (encodingFormat) {
case UTF8BOM:
stream.setGenerateByteOrderMark(true);
stream.setCodec(QTextCodec::codecForName(codeBaUTF8));
break;
default:
stream.setCodec(QTextCodec::codecForName(codeBa));
break;
}
}
bool CodeHelper::translateFile2UTF8BOM(const QString &srcFile, const QString &desFile)
{
if(!QFile::exists(srcFile)) {
return false;
}
QFile file_src(srcFile);
if(!file_src.open(QIODevice::ReadOnly)) {
return false;
}
QTextStream stream_src(&file_src);
useRightCodec(stream_src, getFileEncoding(srcFile));
QString info_src = stream_src.readAll();
file_src.close();
QFile file_des(desFile);
if(!file_des.open(QIODevice::WriteOnly)) {
return false;
}
QTextStream stream_des(&file_des);
useRightCodec(stream_des, EncodingFormat::UTF8BOM);
stream_des << info_src;
file_des.close();
return true;
}