X64dbg中文乱码解决
X64dbg可以对64位的软件进行反编译,是针对Olldbg只能调试32位软件的改进,使用也比较方便。但由于该软件前端使用QT开发,对中文的解析经常会出现乱码,不能很好解析出中文(经测试发现对GB2312、GBK的字符解析都没问题),使用Strings和x64dbg_tol两款插件也不能解决问题,为此对x64dbg-2021-1-12版进行了修改完善,增加了解析UTF-8及完善了UTF-16(Unicode)的功能,并对CPU dump界面显示方式和寄存器、堆栈标签显示方式进行了修改,同时增加了自动对进程PEB和线程TLB进行注释的功能。
一、源码下载及编译
1、下载development版源码
$ git clone -b development https://hub.fastgit.org/x64dbg/x64dbg.git
进入x64dbg目录下,将.gitmodules内的github.com替换为hub.fastgit.org
然后在x64dbg目录内执行:
git submodule update --init --recursive
下载子目录文件。(使用hub.fastgit.org代替github.com是为了加快下载速度)
2、安装编译环境
下载安装:
qt-opensource-windows-x86-msvc2013_64-5.6.3.exe
qt-opensource-windows-x86-msvc2013-5.6.3.exe
qt-creator-opensource-windows-x86-4.3.1.exe
vs2013sp5
windows_sdk_8.1
qt-vsaddin-msvc2013-2.3.2.vsix
并完成QT_VS_TOOLS的设置(通用设置,略)。
为了调试方便,QT的pro工程使用QT_VS_TOOLS导入解决方案中,不单独使用qt-creater编译。
导入工程选择x64-release版本编译即可成功。(略)
二、UTF-8解析功能实现
在x64dbg\src\dbg\disasm_helper.cpp文件内修改如下函数:
在isunicodestring内对汉字进行严格过滤
extern "C" __declspec(dllexport) bool isunicodestring(const unsigned char* data, int maxlen)
{
int len = 0;
wchar_t* safebuffer = new wchar_t[maxlen];
if(!safebuffer)
return false;
//修改开始
//严格匹配汉字
if (((unsigned char)data[0] < 0x34) || ((unsigned char)data[0] > 0x80))
{
return false;
}
else if (((unsigned char)data[0] == 0x4D) && ((unsigned char)data[1] > 0xB5))
{
return false;
}//修改结束
for(const wchar_t* p = (const wchar_t*)data; *p; len += sizeof(wchar_t), p++)
{
if(len >= maxlen)
break;
safebuffer[p - (const wchar_t*)data] = *p;
}
if(len < 2 * sizeof(wchar_t))
{
delete[] safebuffer;
return false;
}
safebuffer[len / sizeof(wchar_t) - 1] = 0; // Mark the end of string
String data2;
WString wdata2;
// Convert to and from ANSI
data2 = StringUtils::Utf16ToLocalCp(safebuffer);
delete[] safebuffer;
if(data2.size() < 2)
return false;
wdata2 = StringUtils::LocalCpToUtf16(data2);
if(wdata2.size() < 2)
return false;
// Is the data exactly representable in both ANSI and Unicode?
if(memcmp(wdata2.c_str(), data, wdata2.size() * sizeof(wchar_t)) != 0)
return false;
// Filter out bad chars
if(!isunicodestring(wdata2))
return false;
return true;
}
增加utf-8字符串判断函数(此处只检测第一个字符是否为utf-8):通过对软件数据跟踪发现,x64dbg内同时存在三字节的UTF-8和两字节的UTF-8,而两字节的UTF-8软件处理位Unicode编码(软件命名为UTF-16),而对三字节的UTF-8编码没有进行处理,
如图:0000h地址处的 E590就是"启"的两字节UTF-8编码0x90E5,即通常意义的Unicode码。
0xCCh处的E590AF就是启的三字节UTF-8编码0xE590AF。
extern "C" __declspec(dllexport) bool isutf8string(const unsigned char* data, int maxlen)
{
int len = 0;
char* safebuffer = new char[maxlen];
if (!safebuffer)
return false;
for (const char* p = (const char*)data; *p; len++, p++)
{
if (len >= maxlen)
break;
safebuffer[p - (const char*)data] = *p;
}
if (len < 2)
{
delete[] safebuffer;
return false;
}
safebuffer[len] = 0; // Mark the end of string
bool isutf8 = false;
//只匹配三字节表示的汉字
if (((unsigned char)safebuffer[0] >= 0xE3) && ((unsigned char)safebuffer[0] <= 0xE9))
{
if (((unsigned char)safebuffer[1] >= 0x80) && ((unsigned char)safebuffer[1] <= 0xBF))
{
if (((unsigned char)safebuffer[2] >= 0x80) && ((unsigned char)safebuffer[2] <= 0xBF))
{
isutf8 = true;
}
else
{
isutf8 = false;
}
}
else
{
isutf8 = false;
}
}
else
{
isutf8 = false;
}
return isutf8;
}
disasmispossiblestring内增加对utf8字符串的处理
bool disasmispossiblestring(duint addr, STRING_TYPE* type)
{
unsigned char data[60];
memset(data, 0, sizeof(data));
duint bytesRead = 0;
if(!MemReadUnsafe(addr, data, sizeof(data), &bytesRead) && bytesRead < 2)
return false;
if(isasciistring(data, sizeof(data)))
{
if(type)
*type = str_ascii;
return true;
}
if(isunicodestring(data, sizeof(data) / 2))
{
if(type)
*type = str_unicode;
return true;
}
//修改开始
if (isutf8string(data, sizeof(data) + 1))
{
if (type)
*type = str_utf8;
return true;
}//修改结束
if(type)
*type = str_none;
return false;
}
disasmgetstringat对三字节UTF-8字符串进行编码转换,并返回
bool disasmgetstringat(duint addr, STRING_TYPE* type, char* ascii, char* unicode, int maxlen)
{
if(type)
*type = str_none;
if(!MemIsValidReadPtrUnsafe(addr, true) || !disasmispossiblestring(addr))
return false;
Memory<unsigned char*> data((maxlen + 1) * 2, "disasmgetstringat:data");
MemReadUnsafe(addr, data(), (maxlen + 1) * 2); //TODO: use safe version?
// Save a few pointer casts
auto asciiData = (char*)data();
// First check if this was an ASCII only string
if(isasciistring(data(), maxlen))
{
if(type)
*type = str_ascii;
// Convert ANSI string to UTF-8
std::string asciiData2 = StringUtils::LocalCpToUtf8((const char*)data());
memcpy(asciiData, asciiData2.c_str(), min((size_t(maxlen) + 1) * 2, asciiData2.size() + 1));
// Escape the string
String escaped = StringUtils::Escape(asciiData);
// Copy data back to outgoing parameter
strncpy_s(ascii, min(int(escaped.length()) + 1, maxlen), escaped.c_str(), _TRUNCATE);
return true;
}
if(isunicodestring(data(), maxlen))
{
if(type)
*type = str_unicode;
// Convert UTF-16 string to UTF-8
std::string asciiData2 = StringUtils::Utf16ToUtf8((const wchar_t*)data());
memcpy(asciiData, asciiData2.c_str(), min((size_t(maxlen) + 1) * 2, asciiData2.size() + 1));
// Escape the string
String escaped = StringUtils::Escape(asciiData);
// Copy data back to outgoing parameter
strncpy_s(unicode, min(int(escaped.length()) + 1, maxlen), escaped.c_str(), _TRUNCATE);
return true;
}
//修改开始
if (isutf8string(data(), maxlen))
{
if (type)
*type = str_utf8;
// Convert UTF-16 string to UTF-8
std::string asciiData2 = (const char*)data();
memcpy(asciiData, asciiData2.c_str(), min((size_t(maxlen) + 1) * 2, asciiData2.size() + 1));
// Escape the string
String escaped = StringUtils::Escape(asciiData);
// Copy data back to outgoing parameter
strncpy_s(unicode, min(int(escaped.length()) + 1, maxlen), escaped.c_str(), _TRUNCATE);
return true;
}
//修改结束
return false;
}
disasmgetstringatwrapper处理字符串输出,此处“&L”开头为两字节Unicode(UTF-16)字符,“#F”开头为三字节UTF-8字符
bool disasmgetstringatwrapper(duint addr, char* dest, bool cache)
{
if(!MemIsValidReadPtrUnsafe(addr, cache))
return false;
auto readValidPtr = [cache](duint addr) -> duint
{
duint addrPtr;
if(MemReadUnsafe(addr, &addrPtr, sizeof(addrPtr)) && MemIsValidReadPtrUnsafe(addrPtr, cache))
return addrPtr;
return 0;
};
*dest = '\0';
char string[MAX_STRING_SIZE];
duint addrPtr = readValidPtr(addr);
STRING_TYPE strtype;
auto possibleUnicode = disasmispossiblestring(addr, &strtype) && strtype == str_unicode;
//修改开始-by WangRui 20210107
auto possibleUtf8 = disasmispossiblestring(addr, &strtype) && strtype == str_utf8;
auto possibleAscii = disasmispossiblestring(addr, &strtype) && strtype == str_ascii;
if (possibleAscii) //addrPtr &&possibleAscii
{
if (disasmgetstringat(addr, &strtype, string, string, MAX_STRING_SIZE - 5)) //addrPtr
{
if (strtype == str_ascii)
sprintf_s(dest, MAX_STRING_SIZE, "\"%s\"", string);
else if (strtype == str_unicode)//unicode
sprintf_s(dest, MAX_STRING_SIZE, "L\"%s\"", string);
else if (strtype == str_utf8)
sprintf_s(dest, MAX_STRING_SIZE, "F\"%s\"", string);
return true;
}
}
if (possibleUnicode){
if (disasmgetstringat(addr, &strtype, string, string, MAX_STRING_SIZE - 4))
{
if (strtype == str_ascii)
sprintf_s(dest, MAX_STRING_SIZE, "&\"%s\"", string);
else if (strtype == str_unicode)//unicode
sprintf_s(dest, MAX_STRING_SIZE, "&L\"%s\"", string);
else if (strtype == str_utf8)
sprintf_s(dest, MAX_STRING_SIZE, "&F\"%s\"", string);
return true;
}
}
if (possibleUtf8){
if (disasmgetstringat(addr, &strtype, string, string, MAX_STRING_SIZE - 4))
{
if (strtype == str_ascii)
sprintf_s(dest, MAX_STRING_SIZE, "#\"%s\"", string);
else if (strtype == str_unicode)//unicode
sprintf_s(dest, MAX_STRING_SIZE, "#L\"%s\"", string);
else if (strtype == str_utf8)
sprintf_s(dest, MAX_STRING_SIZE, "#F\"%s\"", string);
return true;
}
}
if (addrPtr && !possibleAscii &&!possibleUnicode &&!possibleUtf8)
{
if (disasmgetstringat(addrPtr, &strtype, string, string, MAX_STRING_SIZE - 5)) //addrPtr
{
if (int(strlen(string)) <= (strtype == str_ascii ? 3 : 2) && readValidPtr(addrPtr)) //addrPtr
return false;
if (strtype == str_ascii)
sprintf_s(dest, MAX_STRING_SIZE, "\"%s\"", string);
else if (strtype == str_unicode)//unicode
sprintf_s(dest, MAX_STRING_SIZE, "L\"%s\"", string);
else if (strtype == str_utf8)
sprintf_s(dest, MAX_STRING_SIZE, "F\"%s\"", string);
return true;
}
}
return false;
}
修改后即可在字符串参考中正确解析处UTF-8字符。
三、界面显示修改
在寄存器及堆栈的标签、备注显示方式中使用“string:module.label”形式,
并增加了PEB和TEB自动注释的功能
及CPU Dump窗口多种编码同时显示功能。
由于只是UI的操作,此处就不贴代码了
附件:
可编译源码 及 x64dbg修改版:
下载地址一:
链接:https://pan.baidu.com/s/1zEy0o_D3HJz1R5kUfKOj-Q
提取码:3jpm
复制这段内容后打开百度网盘手机App,操作更方便哦
下载地址二:
编译后软件:x64dbg_2021_01_12(Modified By 风吹小裤衩(20210329))
软件源码:x64dbg_2021_01_12源码(Modified By 风吹小裤衩(20210329))
不足之处,欢迎报告Bug