在日常的数据处理中,我们经常会遇到各种各样的缺失值,虽然简单的筛选就可以找出来缺失值,但是如果经常性的要对这些缺失值进行标注和处理,筛选工具就显得不太够用了。在这里我将遇到的关于筛选缺失值的一些案例分享一下,这里主要使用的是VBA。
1.函数准备
首先,先定义一个寻找相应列名称的函数,将数字转换为字母,方便下一步的数据选取
Function get_col(col_num) As Byte()
'输出字母形式的列名称
If col_num <= 26 Then
col_str = Chr(64 + col_num)
Else
b_num = col_num \ 26
e_num = col_num Mod 26
col_str = Chr(64 + b_num) + Chr(64 + e_num)
End If
get_col = col_str
End Function
在调用的话,直接传入相应的数字,例如get_col(26),返回Z ;get_col(27),返回AA
2.基于列的数据空缺值标注
arr1:传入一个数组
check_list传入一个需要判断缺失列的序号
color_num:颜色值
Sub col_remark(arr1, check_list, color_num)
'空缺值位置标注列
Dim i, j, num As Integer
'进行颜色标注
For i = 0 To UBound(check_list)
num = 0
For j = 1 To UBound(arr1)
'每个单元格进行颜色标注
If arr1(j, check_list(i)) = "" Then
num = num + 1
Cells(j, check_list(i)).Interior.Color = color_num
End If
Next j
'相应列进行颜色标注
If num > 0 Then
Cells(1, check_list(i)).Interior.Color = color_num
End If
Next i
End Sub
3.基于行的数据空缺值标注,并生成新的工作表
各参数的用法同2
Sub index_remark(arr1, check_list, color_num)
'空缺值位置标注行
Dim i, j, x, num As Integer
Dim arr_miss()
'进行颜色标注
ReDim arr_miss(1 To UBound(arr1), 1 To UBound(arr1, 2))
k = 0 'arr_miss的行,将首行的标题写入新表
For i = 1 To UBound(arr1)
num = 0 '记录每行缺失的个数
'首行标题写入数组
If k = 0 Then
k = k + 1
For x = 1 To UBound(arr1, 2)
arr_miss(k, x) = arr1(i, x)
Next x
End If
'每个单元格进行颜色标注
For j = 0 To UBound(check_list)
If arr1(i, check_list(j)) = "" Then
num = num + 1
Cells(i, check_list(j)).Interior.Color = color_num
End If
Next j
'相应行进行颜色标注
If num > 0 Then
Cells(i, 1).Interior.Color = color_num
'写入相应的数据再arr_miss
k = k + 1
For x = 1 To UBound(arr1, 2)
arr_miss(k, x) = arr1(i, x)
Next x
End If
Next i
'删除缺失数据工作表
new_name = "缺失数据"
Application.DisplayAlerts = False
For Each sht In Sheets
If sht.name = new_name Then sht.Delete
Next sht
Application.DisplayAlerts = True
'添加新工作表
Sheets.Add After:=Sheets(Sheets.Count)
Sheets(Sheets.Count).name = new_name
'写入数据
Sheets(new_name).Range("a1").Resize(k, UBound(arr_miss, 2)) = arr_miss
Range("1:1").Font.Bold = True
End Sub
4.完整代码及调用
在进行使用时,直接将代码拷贝,并直接运行main_func就可以了
Function get_col(col_num) As Byte()
'输出字母形式的列名称
If col_num <= 26 Then
col_str = Chr(64 + col_num)
Else
b_num = col_num \ 26
e_num = col_num Mod 26
col_str = Chr(64 + b_num) + Chr(64 + e_num)
End If
get_col = col_str
End Function
Sub main_func()
Dim arr_missing(), check()
Dim i As Integer
'输入要处理的工作表名称
sheet_name = "Sheet1"
'输入需要寻找的缺失列序号
check = Array(2, 4)
'输入要标记的底框颜色,可以输入0-16777215,255为红色
mark_color = 25589
'是否要进行全部行处理
check_all = False
'将数据放入数组
Sheets(sheet_name).Select
row_num = [a65536].End(xlUp).Row
col_num = ActiveSheet.UsedRange.Columns.Count
col_str = get_col(col_num)
arr_missing = Range("a1:" & col_str & row_num)
If check_all Then
ReDim check(1 To col_num)
For i = 1 To col_num
check(i) = i
Next i
End If
'删除缺失数据工作表
new_name = "标记数据"
Application.DisplayAlerts = False
For Each sht In Sheets
If sht.name = new_name Then sht.Delete
Next sht
Application.DisplayAlerts = True
'添加新工作表
Sheets.Add After:=Sheets(Sheets.Count)
Sheets(Sheets.Count).name = new_name
'写入数据
Sheets(new_name).Range("a1").Resize(UBound(arr_missing), UBound(arr_missing, 2)) = arr_missing
Range("1:1").Font.Bold = True
'调用数据处理
Call col_remark(arr_missing, check, mark_color)
Call index_remark(arr_missing, check, mark_color)
End Sub
Sub col_remark(arr1, check_list, color_num)
'空缺值位置标注列
Dim i, j, num As Integer
'进行颜色标注
For i = LBound(check_list) To UBound(check_list)
num = 0
For j = 1 To UBound(arr1)
'每个单元格进行颜色标注
If arr1(j, check_list(i)) = "" Then
num = num + 1
Cells(j, check_list(i)).Interior.Color = color_num
End If
Next j
'相应列进行颜色标注
If num > 0 Then
Cells(1, check_list(i)).Interior.Color = color_num
End If
Next i
End Sub
Sub index_remark(arr1, check_list, color_num)
'空缺值位置标注行
Dim i, j, x, num As Integer
Dim arr_miss()
'进行颜色标注
ReDim arr_miss(1 To UBound(arr1), 1 To UBound(arr1, 2))
k = 0 'arr_miss的行,将首行的标题写入新表
For i = 1 To UBound(arr1)
num = 0 '记录每行缺失的个数
'首行标题写入数组
If k = 0 Then
k = k + 1
For x = 1 To UBound(arr1, 2)
arr_miss(k, x) = arr1(i, x)
Next x
End If
'每个单元格进行颜色标注
For j = LBound(check_list) To UBound(check_list)
If arr1(i, check_list(j)) = "" Then
num = num + 1
Cells(i, check_list(j)).Interior.Color = color_num
End If
Next j
'相应行进行颜色标注
If num > 0 Then
Cells(i, 1).Interior.Color = color_num
'写入相应的数据再arr_miss
k = k + 1
For x = 1 To UBound(arr1, 2)
arr_miss(k, x) = arr1(i, x)
Next x
End If
Next i
'删除缺失数据工作表
new_name = "缺失数据"
Application.DisplayAlerts = False
For Each sht In Sheets
If sht.name = new_name Then sht.Delete
Next sht
Application.DisplayAlerts = True
'添加新工作表
Sheets.Add After:=Sheets(Sheets.Count)
Sheets(Sheets.Count).name = new_name
'写入数据
Sheets(new_name).Range("a1").Resize(k, UBound(arr_miss, 2)) = arr_miss
Range("1:1").Font.Bold = True
End Sub
5.使用例子
在这里举一个简单的例子,便于理解,原数据如下:
现在只需要找出姓名和性别缺失就可以了,使用如下:
运行结束,效果如下:
如果需要对全部列进行处理,只用将check_all的值改为True就可以了!
写在最后:数据处理有多种方式,有时候换种方式来处理,也有不同的乐趣!根据数据量和数据形式的展现,使用不同的工具,往往可以达到意想不到的效果!