1、引言
通常情况下,当需要遍历整个磁盘时,我们会选择使用系统提供的接口,或者调用标准库或者boost库中的filesystem模块。
filesystem库的遍历方法较为简单,因此本文仅列举系统遍历接口的使用方法。
2、Windows
结合FindFirstFile/FindNextFile/FindClose函数和广度优先算法遍历目录。
HANDLE hFind;
WIN32_FIND_DATA find_data;
vec_dir.emplace_back(path);
os_string directory;
while (!vec_dir.empty()) {
directory = vec_dir.back() + L"\\*";
vec_dir.pop_back();
hFind = FindFirstFile(directory.c_str(), &find_data);
if (hFind != INVALID_HANDLE_VALUE) {
do {
// 跳过系统目录
if (find_data.cFileName[0] == L'$' ||
wcscmp(find_data.cFileName, L".") == 0 ||
wcscmp(find_data.cFileName, L"..") == 0)
continue;
// 忽略压缩文件
if (find_data.dwFileAttributes & FILE_ATTRIBUTE_COMPRESSED ||
find_data.dwFileAttributes & FILE_ATTRIBUTE_ENCRYPTED)
continue;
os_string filepath =
directory.substr(0, directory.length() - 1) + find_data.cFileName;
if (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
if (find_data.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) {
continue;
} else {
vec_dir.push_back(filepath);
}
} else {
b64 filesize = find_data.nFileSizeLow | (b64)find_data.nFileSizeHigh
<< 32;
// deal with file content
}
} while (FindNextFile(hFind, &find_data));
}
FindClose(hFind);
}
当然,实际上对Windows操作系统的底层调用有所了解的情况下,可以知道上述提到的函数在内核中实际上是通过调用NtQueryDirectoryFile函数实现的。
因此可以选择直接调用NtQueryDirectoryFile函数,提高效率。
scope::ScopedModule ntdll(LoadLibrary(L"ntdll.dll"));
pNtQueryDirectoryFile_ =
(PNTQUERYDIRECTORYFILE)GetProcAddress(ntdll, "NtQueryDirectoryFile");
vector<os_string> vec_dir;
vec_dir.emplace_back(path);
os_string directory;
scope::ScopedPtr<b8> query_buffer;
ulong query_buffer_size;
query_buffer_size =
sizeof(FILE_DIRECTORY_INFORMATION) + MAX_PATH * sizeof(WCHAR);
query_buffer_size *= 16;
query_buffer = new b8[query_buffer_size];
if (!query_buffer) {
return false;
}
while (!vec_dir.empty()) {
directory = vec_dir.back() + L"\\";
vec_dir.pop_back();
scope::ScopedHandle hFind(
CreateFile(directory.c_str(), SYNCHRONIZE | FILE_LIST_DIRECTORY,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL,
OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL));
if (hFind != INVALID_HANDLE_VALUE) {
IO_STATUS_BLOCK IoStatusBlock;
do {
NTSTATUS nt_status = pNtQueryDirectoryFile_(
hFind, NULL, NULL, NULL, &IoStatusBlock, query_buffer,
query_buffer_size, FileDirectoryInformation, FALSE, NULL, FALSE);
if (nt_status == 0) {
PFILE_DIRECTORY_INFORMATION file_info =
(PFILE_DIRECTORY_INFORMATION)query_buffer.get();
for (; (b8*)file_info < query_buffer.get() + query_buffer_size;
file_info = (PFILE_DIRECTORY_INFORMATION)(
((b8*)file_info) + file_info->NextEntryOffset)) {
os_string filepath(path);
b64 file_size = file_info->EndOfFile.QuadPart;
if (file_info->FileName[0] == L'$' ||
(wcsncmp(file_info->FileName, L".", 1) == 0 &&
file_info->FileNameLength == 2) ||
(wcsncmp(file_info->FileName, L"..", 2) == 0 &&
file_info->FileNameLength == 4))
goto NEXT;
if (file_info->FileAttributes & FILE_ATTRIBUTE_COMPRESSED ||
file_info->FileAttributes & FILE_ATTRIBUTE_ENCRYPTED)
goto NEXT;
filepath.append(file_info->FileName,
file_info->FileNameLength / sizeof(WCHAR));
if (file_info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
if (file_info->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) {
goto NEXT;
}
vec_dir.push_back(filepath);
} else {
// deal with file content
}
NEXT:
if (!file_info->NextEntryOffset) {
break;
}
}
} else {
#define STATUS_NO_SUCH_FILE 0xC000000FL
#define STATUS_NO_MORE_FILES 0x80000006L
if (nt_status == STATUS_NO_MORE_FILES ||
nt_status == STATUS_NO_SUCH_FILE) {
break;
}
}
} while (true);
}
}
3、Linux
在linux平台上,说到目录遍历,最先想到的必然就是opendir/readdir/closedir函数。
vector<os_string> vec_dir;
vec_dir.push_back(path);
struct dirent* p_file_info = NULL;
scope::ScopedDir p_dir;
os_string directory;
while (!vec_dir.empty()) {
directory = vec_dir.back();
vec_dir.pop_back();
p_dir = opendir(directory.c_str());
if (p_dir != NULL) {
while (NULL != (p_file_info = readdir(p_dir))) {
if (strcmp(p_file_info->d_name, ".") == 0 ||
strcmp(p_file_info->d_name, "..") == 0)
goto NEXT;
if (p_file_info->d_type == DT_FIFO || p_file_info->d_type == DT_SOCK ||
p_file_info->d_type == DT_LNK || p_file_info->d_type == DT_CHR ||
p_file_info->d_type == DT_BLK)
goto NEXT;
os_string filepath = directory + p_file_info->d_name;
if (p_file_info->d_type == DT_DIR) {
// 检测当前目录是否挂载点
if (p_disk_scanner_->CheckFileSystemMounted(filepath)) {
goto NEXT;
}
vec_dir.push_back(filepath);
} else {
// deal with file content
}
NEXT:
p_disk_scanner_->UpdateScannedSpace(1);
p_disk_scanner_->UpdateScannedProcess(1);
}
}
}