一、背景
如前面《Android系统下进程模块的遍历》所述,模块遍历是一种很常见的需求,不同的是实现方式。Android的底层是linux操作系统,
可以通过proc虚拟文件系统获取进程的状态等信息;而IOS的底层是Darwin系统,并没有提供proc虚拟文件系统的功能。那么,该怎样获取进程加载模块的信息呢?这里有两种方法。下面分别介绍。
二、进程模块遍历的实现方式
1. 从内存中的dyld模块内容中获取
dyld是系统的动态链接器,当应用启动时,系统在解析完主模块macho的信息后,会首先运行dyld,通过dyld动态加载应用需要的所有模块,并把每个模块的信息保存到自己的macho文件的“_dyld_all_image_infos”的符号数据中。
Macho是IOS系统上的可执行文件和动态库的格式,类似于Windows系统上的PE、Linux系统上的ELF。它的结构如下图:
它的结构分成三部分:Header、Load Command和Data。其中,Header用来描述cpu架构、Command数目等文件信息;Load
Command则描述文件中的段信息;Data包含每个段要引用的数据。Macho的文件信息可以通过MachOView应用来查看,如下图:
使用此种方式实现进程模块遍历算法为:遍历进程的虚拟内存空间,找到macho模块;检查是否是dyld模块,以及包含“_dyld_all_image_infos”符号,如果是,则找到该符号的内容地址;最后,从该符号的内容地址中获取模块信息。此算法涉及到的api有以下几种:
详细代码步骤如下:
a)遍历进程的虚拟内存空间,找到macho模块。
FindRegion方法的代码如下:
for (;;)
{
int print = 0;
int done = 0;
address = prev_address + prev_size;
/* Check to see if address space has wrapped around. */
if (address == 0)
{
print = done = 1;
}
if (!done)
{
// Even on iOS, we use VM_REGION_BASIC_INFO_COUNT_64. This works.
count = VM_REGION_BASIC_INFO_COUNT;
kret = vm_region (mach_task_self(), &address, &size,
VM_REGION_BASIC_INFO,
(vm_region_info_t) &info, &count, &object_name);
if (kret != KERN_SUCCESS)
{
/* iOS 6 workaround - attempt to reget the task port to avoiD */
/* "(ipc/send) invalid destination port" (1000003 or something) */
kret = vm_region (mach_task_self(), &address, &size,
VM_REGION_BASIC_INFO,
(vm_region_info_t) &info, &count, &object_name);
}
if (kret != KERN_SUCCESS)
{
printf("FindRegion: mach_vm_region failed for address %p - Error: %x\n",
(void *)address, (kret));
size = 0;
if (address >= USER_MAX_ADDRESS) return;
print = done = 1;
}
}
// 当前地址不是上一块内存的起始地址+内存长度,表示新的模块
if (address != prev_address + prev_size)
{
print = 1;
}
// 当前内存块的权限和上一个内存块的权限不同
if ((info.protection != prev_info.protection)
|| (info.max_protection != prev_info.max_protection)
|| (info.inheritance != prev_info.inheritance)
|| (info.shared != prev_info.shared)
|| (info.reserved != prev_info.reserved))
{
print = 1;
}
if (print)
{
int print_size = 0;
const char *print_size_unit = NULL;
// 从此内存块中找出模块列表信息
bool bRet = FindListOfImage(mach_task_self(), prev_address, prev_size);
if (bRet == true)
{
done = 1;
}
/* Quick hack to show size of segment, which GDB does not */
print_size = prev_size;
if (print_size > 1024) { print_size /= 1024; print_size_unit = "K";
}
if (print_size > 1024) { print_size /= 1024; print_size_unit = "M";
}
if (print_size > 1024) { print_size /= 1024; print_size_unit = "G";
}
/* End Quick hack */
if (nsubregions > 1)
{
printf("FindRegion: (%d sub-regions)\n", nsubregions);
}
prev_address = address;
prev_size = size;
memcpy (&prev_info, &info, sizeof
(vm_region_basic_info_data_t));
nsubregions = 1;
num_printed++;
}
else
{
prev_size += size;
nsubregions++;
}
if (address >= USER_MAX_ADDRESS)
{
done = 1;
}
if (done)
{
break;
}
}
b)检查是否是dyld模块,以及包含“_dyld_all_image_infos”符号;如果有,则找到该符号的内存地址。
FindListOfImage方法代码如下:
for (int k = 0; k < ((struct mach_header *)pbyMachoHeader)->ncmds;
++k)
{
if ((unsigned char*)pbyLoadCommand >= (unsigned char*)pbyMachoHeader +
size)
{
printf("FindListOfImage: lc out of range\n");
break;
}
printf("FindListOfImage: pbyLoadCommand index:%d, cmd:%d, size:%d\n", k,
pbyLoadCommand->cmd, pbyLoadCommand->cmdsize);
if (pbyLoadCommand->cmd == LC_ID_DYLINKER)
{
struct dylinker_command* pbyDyldCommand = (struct
dylinker_command*)pbyLoadCommand;
char *pszDyldName = (char *)pbyDyldCommand +
pbyDyldCommand->name.offset;
printf("FindListOfImage: %d, pbyDyldCommand name is:%s, name offset:%d,
(pbyDyldCommand:%x, pbyDyldCommand name:%x)\n", k, pszDyldName,
pbyDyldCommand->name.offset, pbyDyldCommand, pszDyldName);
if (pszDyldName != NULL && strstr(pszDyldName, "/lib/dyld") !=
NULL)
{
// 从macho模块中找到"_dyld_all_image_infos"符号的地址
pbyDyldAllImageInfos = GetDyldAllImageInfosAddr((struct mach_header
*)pbyMemHeader, "_dyld_all_image_infos");
}
break;
}
// find next load command address
pbyLoadCommand = (struct load_command *)((char*)pbyLoadCommand +
pbyLoadCommand->cmdsize);
}
vm_deallocate(t, (vm_offset_t)pbyMachoHeader, nMagicSize);
if (pbyDyldAllImageInfos)
{
// 解析符号内容
ParseImageInfos(pbyDyldAllImageInfos);
return true;
}
GetDyldAllImageInfosAddr函数代码如下:
unsigned long ulImageSlide = (pbyTextHdr->vmsize -
pbyTextHdr->filesize) + (pbyDataHdr->vmsize -
pbyDataHdr->filesize);
struct nlist *pbySymtabContent = (struct nlist *)( (unsigned
long)pbyMachoHeader + pbySymtabHdr->symoff + ulImageSlide );
const char *pbyStrtabContent = (const char *)( (unsigned
long)pbyMachoHeader + pbySymtabHdr->stroff + ulImageSlide);
unsigned nSymbolCount = pbySymtabHdr->nsyms;
for(unsigned i = 0; i < nSymbolCount; i++)
{
struct nlist &SymItemRef = pbySymtabContent[i];
if(SymItemRef.n_sect == NO_SECT)
{
continue;
}
char *pbySymItemName = (char *)(pbyStrtabContent +
SymItemRef.n_un.n_strx);
if(strcmp(pszSymbolName, pbySymItemName) == 0)
{
unsigned long ulDyldImageInfosOffset = SymItemRef.n_value -
pbyTextHdr->vmaddr;
pbyDyldAllImageInfos = (void *)( (unsigned long)pbyMachoHeader +
ulDyldImageInfosOffset);
break;
}
}
c)从” _dyld_all_image_infos“符号内容中获取模块信息。
ParseImageInfos方法代码如下:
nDyldInfoCount = sizeof(dyld_all_image_infos);
pbyDyldAllImageInfoContent = ReadProcessMemory(mach_task_self(),
(vm_address_t)pbyDyldAllImageInfo, &nDyldInfoCount);
if (!pbyDyldAllImageInfoContent)
{
printf("ParseImageInfos: read mem failed 1\n");
break;
}
struct dyld_all_image_infos * dyldaii = (struct dyld_all_image_infos
*)pbyDyldAllImageInfoContent;
printf("ParseImageInfos: Version: %d, %d images at offset
%p\n",dyldaii->version, dyldaii->infoArrayCount,
dyldaii->infoArray);
nImageCount = dyldaii->infoArrayCount;
nDyldAllImageInfoArrayCount = nImageCount * sizeof(struct
dyld_image_info);
pbyDyldAllImageInfoArrayContent = ReadProcessMemory(mach_task_self(),
(mach_vm_address_t)dyldaii->infoArray, &nDyldAllImageInfoArrayCount);
if (!pbyDyldAllImageInfoArrayContent)
{
printf("ParseImageInfos: read mem failed 1\n");
break;
}
struct dyld_image_info *dii = (struct dyld_image_info
*)pbyDyldAllImageInfoArrayContent;
unsigned char *pszImageFilePath;
mach_msg_type_number_t nPathNameLen;
for (int i = 0; i < nImageCount; i++)
{
nPathNameLen = 1024;
pszImageFilePath = ReadProcessMemory(mach_task_self(),
(mach_vm_address_t)dii[i].imageFilePath, &nPathNameLen);
ModInfo *pstModInfo = new ModInfo();
if (pszImageFilePath)
{
pstModInfo->m_pszModPath = strdup((char *)pszImageFilePath);
pstModInfo->m_pulModBase = (unsigned long *)dii[i].imageLoadAddress;
m_MapModuleList[pstModInfo->m_pszModPath] = pstModInfo;
vm_deallocate(mach_task_self(), (vm_offset_t)pszImageFilePath, 1024);
}
else
{//解决获取不到主进程文件路径的问题
struct dl_info curr_dl_info = {0};
dladdr(dii[i].imageLoadAddress, &curr_dl_info);
printf("ParseImageInfos: find process mod: %d:%p:%s\n", i,
curr_dl_info.dli_fbase, curr_dl_info.dli_fname);
pstModInfo->m_pszModPath = strdup(curr_dl_info.dli_fname);
pstModInfo->m_pulModBase = (unsigned long *)dii[i].imageLoadAddress;
m_MapModuleList[pstModInfo->m_pszModPath] = pstModInfo;
}
printf("ParseImageInfos: mod[%d] path:%s, mod base:%p\n", i,
pstModInfo->m_pszModPath, pstModInfo->m_pulModBase);
}
2. 利用dyld的API获取进程模块信息
除了第1种方法外,还有一种更简单的实现进程模块遍历的方法,那就是dyld提供的接口。涉及到的API有以下几种:
如上面介绍:
_dlyd_image_count:获取当前已加载的模块数量,但是不是线程安全的。
_dyld_get_image_header:返回当前已加载的某个模块的头部地址。_dyld_get_image_vmaddr_slide:当前已加载的某个模块的虚拟内存偏移值。
_dyld_get_image_name:返回当前已加载的某个模块的名称。_dyld_register_func_for_addr_image:注册添加模块时的回调函数。
_dyld_register_func_for_remove_image:注册移除模块时的回调函数。
使用此方法实现模块遍历的算法为:先用进程中已加载模块的数量,然后逐个获取名称和模块首地址,具体代码如下:
void SearchModuleInfo::EnumDyldModules()
{
m_nModNum = _dyld_image_count();
for (int i = 0; i < m_nModNum; i++)
{
const mach_header* pstMachoHeader = _dyld_get_image_header(i);
const char *pszImageFullPath = _dyld_get_image_name(i);
ModInfo *pstModInfo = new ModInfo();
pstModInfo->m_pszModPath = strdup(pszImageFullPath);
pstModInfo->m_pulModBase = (unsigned long *)pstMachoHeader;
m_MapModuleList[pstModInfo->m_pszModPath] = pstModInfo;
printf("mod[%d]: %s-%p-%p\n", i, pszImageFullPath, pstMachoHeader,
(unsigned long*)_dyld_get_image_vmaddr_slide(i));
}
}
三、IOS系统上进程模块遍历的运用场景
获取自己进程的模块加载列表,如是否已加载目标模块。
获取自己进程中某个特定模块的信息,如代码段、数据段是否被修改
本文内容转载自网络,本着分享与传播的原则,版权归原作者所有,如有侵权请联系我们进行删除!