X64dbg 2021最新版中文乱码解决

最新推荐文章于 2024-07-27 18:48:41 发布

风吹小裤衩_A

最新推荐文章于 2024-07-27 18:48:41 发布

阅读量10w+

点赞数 5

文章标签：乱码字符串 c++

本文链接：https://blog.csdn.net/qq_43522781/article/details/115311560

版权

本文介绍了如何解决X64dbg在处理中文时出现乱码的问题，通过源码修改增强了对UTF-8的支持。主要修改了disasm_helper.cpp文件，增加了对UTF-8字符串的判断和处理，并调整了界面显示，使寄存器、堆栈标签和PEB、TEB注释能正确显示UTF-8字符。

摘要由CSDN通过智能技术生成

X64dbg中文乱码解决

X64dbg可以对64位的软件进行反编译，是针对Olldbg只能调试32位软件的改进，使用也比较方便。但由于该软件前端使用QT开发，对中文的解析经常会出现乱码，不能很好解析出中文（经测试发现对GB2312、GBK的字符解析都没问题），使用Strings和x64dbg_tol两款插件也不能解决问题，为此对x64dbg-2021-1-12版进行了修改完善，增加了解析UTF-8及完善了UTF-16（Unicode）的功能，并对CPU dump界面显示方式和寄存器、堆栈标签显示方式进行了修改，同时增加了自动对进程PEB和线程TLB进行注释的功能。

一、源码下载及编译

1、下载development版源码

$ git clone -b development https://hub.fastgit.org/x64dbg/x64dbg.git

进入x64dbg目录下，将.gitmodules内的github.com替换为hub.fastgit.org
然后在x64dbg目录内执行：

git submodule update --init --recursive

下载子目录文件。（使用hub.fastgit.org代替github.com是为了加快下载速度）

2、安装编译环境

下载安装：
qt-opensource-windows-x86-msvc2013_64-5.6.3.exe
qt-opensource-windows-x86-msvc2013-5.6.3.exe
qt-creator-opensource-windows-x86-4.3.1.exe
vs2013sp5
windows_sdk_8.1
qt-vsaddin-msvc2013-2.3.2.vsix
并完成QT_VS_TOOLS的设置（通用设置，略）。
为了调试方便，QT的pro工程使用QT_VS_TOOLS导入解决方案中，不单独使用qt-creater编译。
导入工程选择x64-release版本编译即可成功。（略）

二、UTF-8解析功能实现

在x64dbg\src\dbg\disasm_helper.cpp文件内修改如下函数：
在isunicodestring内对汉字进行严格过滤

extern "C" __declspec(dllexport) bool isunicodestring(const unsigned char* data, int maxlen)
{
    int len = 0;
    wchar_t* safebuffer = new wchar_t[maxlen];
    if(!safebuffer)
        return false;
	
	//修改开始
	//严格匹配汉字
	if (((unsigned char)data[0] < 0x34) || ((unsigned char)data[0] > 0x80))
	{
		return false;
	}
	else if (((unsigned char)data[0] == 0x4D) && ((unsigned char)data[1] > 0xB5))
	{
		return false;
	}//修改结束
    for(const wchar_t* p = (const wchar_t*)data; *p; len += sizeof(wchar_t), p++)
    {
        if(len >= maxlen)
            break;
        safebuffer[p - (const wchar_t*)data] = *p;
    }

    if(len < 2 * sizeof(wchar_t))
    {
        delete[] safebuffer;
        return false;
    }
    safebuffer[len / sizeof(wchar_t) - 1] = 0; // Mark the end of string

    String data2;
    WString wdata2;
    // Convert to and from ANSI
    data2 = StringUtils::Utf16ToLocalCp(safebuffer);
    delete[] safebuffer;
    if(data2.size() < 2)
        return false;
    wdata2 = StringUtils::LocalCpToUtf16(data2);
    if(wdata2.size() < 2)
        return false;
    // Is the data exactly representable in both ANSI and Unicode?
    if(memcmp(wdata2.c_str(), data, wdata2.size() * sizeof(wchar_t)) != 0)
        return false;
    // Filter out bad chars
    if(!isunicodestring(wdata2))
        return false;
    return true;
}

增加utf-8字符串判断函数（此处只检测第一个字符是否为utf-8）：通过对软件数据跟踪发现，x64dbg内同时存在三字节的UTF-8和两字节的UTF-8，而两字节的UTF-8软件处理位Unicode编码（软件命名为UTF-16），而对三字节的UTF-8编码没有进行处理，
如图：0000h地址处的 E590就是"启"的两字节UTF-8编码0x90E5，即通常意义的Unicode码。
0xCCh处的E590AF就是启的三字节UTF-8编码0xE590AF。
在这里插入图片描述

extern "C" __declspec(dllexport) bool isutf8string(const unsigned char* data, int maxlen)
{
	int len = 0;
	char* safebuffer = new char[maxlen];
	if (!safebuffer)
		return false;
	for (const char* p = (const char*)data; *p; len++, p++)
	{
		if (len >= maxlen)
			break;
		safebuffer[p - (const char*)data] = *p;
	}

	if (len < 2)
	{
		delete[] safebuffer;
		return false;
	}
	safebuffer[len] = 0; // Mark the end of string
	bool isutf8 = false;

	//只匹配三字节表示的汉字
	if (((unsigned char)safebuffer[0] >= 0xE3) && ((unsigned char)safebuffer[0] <= 0xE9))
	{
		if (((unsigned char)safebuffer[1] >= 0x80) && ((unsigned char)safebuffer[1] <= 0xBF))
		{
			if (((unsigned char)safebuffer[2] >= 0x80) && ((unsigned char)safebuffer[2] <= 0xBF))
			{
				isutf8 = true;
			}
			else
			{
				isutf8 = false;
			}
		}
		else
		{
			isutf8 = false;
		}
	}
	else
	{
		isutf8 = false;
	}
	return isutf8;
}

disasmispossiblestring内增加对utf8字符串的处理

bool disasmispossiblestring(duint addr, STRING_TYPE* type)
{
    unsigned char data[60];
    memset(data, 0, sizeof(data));
    duint bytesRead = 0;
    if(!MemReadUnsafe(addr, data, sizeof(data), &bytesRead) && bytesRead < 2)
        return false;
    if(isasciistring(data, sizeof(data)))
    {
        if(type)
            *type = str_ascii;
        return true;
    }
    if(isunicodestring(data, sizeof(data) / 2))
    {
        if(type)
            *type = str_unicode;
        return true;
    }
	//修改开始
	if (isutf8string(data, sizeof(data) + 1))
	{
		if (type)
			*type = str_utf8;
		return true;
	}//修改结束
    if(type)
        *type = str_none;
    return false;
}

disasmgetstringat对三字节UTF-8字符串进行编码转换，并返回

bool disasmgetstringat(duint addr, STRING_TYPE* type, char* ascii, char* unicode, int maxlen)
{
    if(type)
        *type = str_none;
    if(!MemIsValidReadPtrUnsafe(addr, true) || !disasmispossiblestring(addr))
        return false;
    Memory<unsigned char*> data((maxlen + 1) * 2, "disasmgetstringat:data");
    MemReadUnsafe(addr, data(), (maxlen + 1) * 2); //TODO: use safe version?

    // Save a few pointer casts
    auto asciiData = (char*)data();

    // First check if this was an ASCII only string
    if(isasciistring(data(), maxlen))
    {
        if(type)
            *type = str_ascii;

        // Convert ANSI string to UTF-8
        std::string asciiData2 = StringUtils::LocalCpToUtf8((const char*)data());
        memcpy(asciiData, asciiData2.c_str(), min((size_t(maxlen) + 1) * 2, asciiData2.size() + 1));

        // Escape the string
        String escaped = StringUtils::Escape(asciiData);

        // Copy data back to outgoing parameter
        strncpy_s(ascii, min(int(escaped.length()) + 1, maxlen), escaped.c_str(), _TRUNCATE);
        return true;
    }

    if(isunicodestring(data(), maxlen))
    {
        if(type)
            *type = str_unicode;

        // Convert UTF-16 string to UTF-8
        std::string asciiData2 = StringUtils::Utf16ToUtf8((const wchar_t*)data());
        memcpy(asciiData, asciiData2.c_str(), min((size_t(maxlen) + 1) * 2, asciiData2.size() + 1));

        // Escape the string
        String escaped = StringUtils::Escape(asciiData);

        // Copy data back to outgoing parameter
        strncpy_s(unicode, min(int(escaped.length()) + 1, maxlen), escaped.c_str(), _TRUNCATE);
        return true;
    }
	//修改开始
	if (isutf8string(data(), maxlen))
	{
		if (type)
			*type = str_utf8;

		// Convert UTF-16 string to UTF-8

		std::string asciiData2 = (const char*)data();
		memcpy(asciiData, asciiData2.c_str(), min((size_t(maxlen) + 1) * 2, asciiData2.size() + 1));

		// Escape the string
		String escaped = StringUtils::Escape(asciiData);

		// Copy data back to outgoing parameter
		strncpy_s(unicode, min(int(escaped.length()) + 1, maxlen), escaped.c_str(), _TRUNCATE);


		return true;
	}
	//修改结束

    return false;
}

disasmgetstringatwrapper处理字符串输出，此处“&L”开头为两字节Unicode（UTF-16）字符，“#F”开头为三字节UTF-8字符

bool disasmgetstringatwrapper(duint addr, char* dest, bool cache)
{
    if(!MemIsValidReadPtrUnsafe(addr, cache))
        return false;

    auto readValidPtr = [cache](duint addr) -> duint
    {
        duint addrPtr;
        if(MemReadUnsafe(addr, &addrPtr, sizeof(addrPtr)) && MemIsValidReadPtrUnsafe(addrPtr, cache))
            return addrPtr;
        return 0;
    };

    *dest = '\0';
    char string[MAX_STRING_SIZE];
    duint addrPtr = readValidPtr(addr);
    STRING_TYPE strtype;
    auto possibleUnicode = disasmispossiblestring(addr, &strtype) && strtype == str_unicode;
	//修改开始-by WangRui 20210107
	auto possibleUtf8 = disasmispossiblestring(addr, &strtype) && strtype == str_utf8;
	auto possibleAscii = disasmispossiblestring(addr, &strtype) && strtype == str_ascii;
	if (possibleAscii)  //addrPtr &&possibleAscii
	{
		if (disasmgetstringat(addr, &strtype, string, string, MAX_STRING_SIZE - 5))  //addrPtr
		{
			if (strtype == str_ascii)
				sprintf_s(dest, MAX_STRING_SIZE, "\"%s\"", string);
			else if (strtype == str_unicode)//unicode
				sprintf_s(dest, MAX_STRING_SIZE, "L\"%s\"", string);
			else if (strtype == str_utf8)
				sprintf_s(dest, MAX_STRING_SIZE, "F\"%s\"", string);
			return true;
		}
	}
	if (possibleUnicode){
		if (disasmgetstringat(addr, &strtype, string, string, MAX_STRING_SIZE - 4))
		{
			if (strtype == str_ascii)
				sprintf_s(dest, MAX_STRING_SIZE, "&\"%s\"", string);
			else if (strtype == str_unicode)//unicode
				sprintf_s(dest, MAX_STRING_SIZE, "&L\"%s\"", string);
			else if (strtype == str_utf8)
				sprintf_s(dest, MAX_STRING_SIZE, "&F\"%s\"", string);
			return true;
		}
	}
	if (possibleUtf8){
		if (disasmgetstringat(addr, &strtype, string, string, MAX_STRING_SIZE - 4))
		{
			if (strtype == str_ascii)
				sprintf_s(dest, MAX_STRING_SIZE, "#\"%s\"", string);
			else if (strtype == str_unicode)//unicode
				sprintf_s(dest, MAX_STRING_SIZE, "#L\"%s\"", string);
			else if (strtype == str_utf8)
				sprintf_s(dest, MAX_STRING_SIZE, "#F\"%s\"", string);
			return true;
		}
	}
	if (addrPtr && !possibleAscii &&!possibleUnicode &&!possibleUtf8)
	{
		if (disasmgetstringat(addrPtr, &strtype, string, string, MAX_STRING_SIZE - 5))  //addrPtr
		{
			if (int(strlen(string)) <= (strtype == str_ascii ? 3 : 2) && readValidPtr(addrPtr))  //addrPtr
				return false;
			if (strtype == str_ascii)
				sprintf_s(dest, MAX_STRING_SIZE, "\"%s\"", string);
			else if (strtype == str_unicode)//unicode
				sprintf_s(dest, MAX_STRING_SIZE, "L\"%s\"", string);
			else if (strtype == str_utf8)
				sprintf_s(dest, MAX_STRING_SIZE, "F\"%s\"", string);
			return true;
		}
	}
    return false;
}

修改后即可在字符串参考中正确解析处UTF-8字符。
在这里插入图片描述

三、界面显示修改

在寄存器及堆栈的标签、备注显示方式中使用“string：module.label”形式，
并增加了PEB和TEB自动注释的功能
在这里插入图片描述

及CPU Dump窗口多种编码同时显示功能。

由于只是UI的操作，此处就不贴代码了

附件：

可编译源码及 x64dbg修改版：
下载地址一：
链接：https://pan.baidu.com/s/1zEy0o_D3HJz1R5kUfKOj-Q
提取码：3jpm
复制这段内容后打开百度网盘手机App，操作更方便哦
下载地址二：
编译后软件：x64dbg_2021_01_12(Modified By 风吹小裤衩(20210329))
软件源码：x64dbg_2021_01_12源码(Modified By 风吹小裤衩(20210329))

不足之处，欢迎报告Bug