C++ res 文件的翻译 ANSI Unicode转换
*.res 文件是标准的C语言和C++程序的资源文件,如果要从源码翻译C++程序就要从res文件入手
遗憾的是早期各种编程语言包括C++,VB对Unicode的支持都不好,当时都是ANSI的应用程序,因此res也是ANSI文件格式,这给翻译带来了巨大的难度,直到后来技术发展,全面支持Unicode才解决这个问题。
本文记录一下早期res文件的处理
这就是一个res文件,看起来里面全都是乱码,因为它是ANSI编码格式,我以ISO-8859-1打开,因此仅能显示英文和欧洲语言
TEXTID "auto"
ENG "auto"
ESP "auto"
FRA "auto"
DEU "Auto"
ITA "auto"
BRA "auto"
SWE "auto"
DAN "auto"
RUS "àâòî"
GRE "áõôüì."
NED "auto"
FIN "auto"
NOR "auto"
CHN "×Ô¶¯"
TEXTID "ElastoIndex"
ENG "ElastoIndex"
ESP "ÍndiceElasto"
FRA "IndexElasto"
DEU "Elasto-Index"
ITA "Ind. Elasto"
BRA "ÍndicElasto"
SWE "ElastoIndex"
DAN "ElastoIndex"
RUS "ÝëàñòîÈíäåêñ"
GRE "ÄåßêôÅëáóô"
NED "ElastoIndex"
FIN "ElastoIndex"
NOR "ElastoIndex"
CHN "ElastoIndex"
同样的文件GB2312打开,中文显示正常,但是其他是乱的
CATEGORY [CfgM_A]
TEXTID "auto"
ENG "auto"
ESP "auto"
FRA "auto"
DEU "Auto"
ITA "auto"
BRA "auto"
SWE "auto"
DAN "auto"
RUS "噔蝾"
GRE "狨酎?"
NED "auto"
FIN "auto"
NOR "auto"
CHN "自动"
TEXTID "ElastoIndex"
ENG "ElastoIndex"
ESP "蚽diceElasto"
FRA "IndexElasto"
DEU "Elasto-Index"
ITA "Ind. Elasto"
BRA "蚽dicElasto"
SWE "ElastoIndex"
DAN "ElastoIndex"
RUS "蓦囫蝾软溴犟"
GRE "腻哧襞脶篝"
NED "ElastoIndex"
FIN "ElastoIndex"
NOR "ElastoIndex"
CHN "ElastoIndex"
再试一个ISO-8859-5 ,只有俄语显示正常了
CATEGORY [CfgM_A]
TEXTID "auto"
ENG "auto"
ESP "auto"
FRA "auto"
DEU "Auto"
ITA "auto"
BRA "auto"
SWE "auto"
DAN "auto"
RUS "авто"
GRE "бхфьм."
NED "auto"
FIN "auto"
NOR "auto"
CHN "ЧФ¶Ї"
TEXTID "ElastoIndex"
ENG "ElastoIndex"
ESP "НndiceElasto"
FRA "IndexElasto"
DEU "Elasto-Index"
ITA "Ind. Elasto"
BRA "НndicElasto"
SWE "ElastoIndex"
DAN "ElastoIndex"
RUS "ЭластоИндекс"
GRE "ДеЯкфЕлбуф"
NED "ElastoIndex"
FIN "ElastoIndex"
NOR "ElastoIndex"
CHN "ElastoIndex"
因此这个文件每一行有一个自己语言的encoding编码格式,因此这个文件每次打开只能编辑一个语言,稍不注意就会全部损毁,而且无法直观对比
因此设计了一个方法将这个文件转换为Unicode编码以便后续翻译操作
using (FileStream fsSource = new FileStream(filename, FileMode.Open, FileAccess.Read))
{
byte[] bytes = new byte[fsSource.Length];
int numBytesToRead = (int)fsSource.Length;
int numBytesRead = 0;
while (numBytesToRead > 0)
{
// Read may return anything from 0 to numBytesToRead.
int n = fsSource.Read(bytes, numBytesRead, numBytesToRead);
// Break when the end of the file is reached.
if (n == 0)
break;
numBytesRead += n;
numBytesToRead -= n;
}
numBytesToRead = bytes.Length;
char[] chs = Encoding.UTF8.GetChars(bytes);
List<int> linebreaks = new List<int>();
linebreaks.Add(0);
for (int i = 0; i < bytes.Length - 1; i++)
{
if (bytes[i] == 13 && bytes[i + 1] == 10)
{
linebreaks.Add(i);
}
}
string newFile = Path.GetDirectoryName(filename) + Path.DirectorySeparatorChar + "Unicode_" + Path.GetFileName(filename);
StreamWriter sw = new StreamWriter(newFile, false, Encoding.Unicode);
sw.AutoFlush = true;
bool isFirstLine = true;
for (int i = 1; i < linebreaks.Count; i++)
{
byte[] lineData = new byte[linebreaks[i] - linebreaks[i-1]];
if (isFirstLine)
{
Array.Copy(bytes, linebreaks[i - 1], lineData, 0, linebreaks[i] - linebreaks[i - 1]);
}
else
{
Array.Copy(bytes, linebreaks[i - 1]+2, lineData, 0, linebreaks[i] - linebreaks[i - 1]);
}
char[] charLine = Encoding.GetEncoding(1252).GetChars(lineData);
string rawStr = new string(charLine);
string rawStrT = rawStr.TrimStart();
if (rawStrT.StartsWith("RUS"))
{
charLine = Encoding.GetEncoding(1251).GetChars(lineData);
}else if (rawStrT.StartsWith("GRE"))
{
charLine = Encoding.GetEncoding(1253).GetChars(lineData);
}
else if (rawStrT.StartsWith("CHN"))
{
charLine = Encoding.GetEncoding(936).GetChars(lineData);
}
else if (rawStrT.StartsWith("JPN"))
{
charLine = Encoding.GetEncoding(932).GetChars(lineData);
}
string lineStr = new string(charLine);
sw.Write(lineStr);
if(isFirstLine == true)
isFirstLine = false;
}
int posLastBreak = linebreaks.Last<int>();
if (posLastBreak+2 < numBytesToRead)
{
byte[] lastData = new byte[numBytesToRead - (posLastBreak + 2)];
Array.Copy(bytes, posLastBreak + 2, lastData, 0, numBytesToRead - (posLastBreak + 2));
#region Select Encoding
char[] charLine = Encoding.GetEncoding(1252).GetChars(lastData);
string rawStr = new string(charLine);
string rawStrT = rawStr.TrimStart();
if (rawStrT.StartsWith("RUS"))
{
charLine = Encoding.GetEncoding(1251).GetChars(lastData);
}
else if (rawStrT.StartsWith("GRE"))
{
charLine = Encoding.GetEncoding(1253).GetChars(lastData);
}
else if (rawStrT.StartsWith("CHN"))
{
charLine = Encoding.GetEncoding(936).GetChars(lastData);
}
else if (rawStrT.StartsWith("JPN"))
{
charLine = Encoding.GetEncoding(932).GetChars(lastData);
}
#endregion
string lineStr = new string(charLine);
sw.Write(lineStr);
}
sw.Close();
反向将Unicode写回这种格式比较容易
StreamReader sr = new StreamReader(filename, Encoding.Unicode, true);
string newFile = Path.GetDirectoryName(filename) + Path.DirectorySeparatorChar + "ANSI_" + Path.GetFileName(filename);
FileStream fs = new FileStream(newFile, FileMode.Create);
byte[] linebreak = new byte[2];
linebreak[0] = 13;
linebreak[1] = 10;
while (sr.Peek()>0)
{
string lineData = sr.ReadLine();
byte[] byteLine = Encoding.GetEncoding(1252).GetBytes(lineData);
string rawStrT = lineData.TrimStart();
if (rawStrT.StartsWith("RUS"))
{
byteLine = Encoding.GetEncoding(1251).GetBytes(lineData);
}
else if (rawStrT.StartsWith("GRE"))
{
byteLine = Encoding.GetEncoding(1253).GetBytes(lineData);
}
else if (rawStrT.StartsWith("CHN"))
{
byteLine = Encoding.GetEncoding(936).GetBytes(lineData);
}
else if (rawStrT.StartsWith("JPN"))
{
byteLine = Encoding.GetEncoding(932).GetBytes(lineData);
}
fs.Write(byteLine, 0, byteLine.Length);
fs.Write(linebreak, 0, linebreak.Length);
}
fs.Flush();
fs.Close();
sr.Close();