CorpAct抽样模板

最近有个项目的抽样也是够恶心,原始数据表包含一张全的公司list(无重复公司)。交给Ops Team去根据它抽数。

接着对方返回一个excel 包含母list 和 多个data group sheet,每个data group里面就是抽到的,在指定时间段内的数据。

在被抽到的data group 中,总共抽取30个公司的数据。要求每个data group 都要random到两个记录。剩下的部分随机挑公司。

另外还要在过万的记录里面随机抽样30个公司,是没有任何数据的。去audit missing case。

各data group之间复制粘贴,还要考虑有些公司可能在各个data group都有数据。想来想去,觉得还是来个tool吧。

下图是拿到的raw data list样式


母list一万多条。在家测试共用去了26秒。每个页面

下图那些flag就是被抽中的了

下面上代码。

Option Explicit

Sub Summarize()
   
    Dim WrkSht As Worksheet
    Dim ShtNew As Worksheet
    Dim ShtCom As Worksheet
    Dim Rng As Range
   
    Dim k As Integer
    Dim k1 As Integer
    Dim k2 As Integer
    Dim kk As Integer
    Dim yy As Integer
    Dim Row1 As Integer
    Dim Zebra As Integer
    Dim Flag As Integer
    Flag = 1
 
   
    Dim CID As String
    
    
    
    Dim arrData
    Dim dataCount As Integer
    dataCount = 0
    Dim i As Integer
    
    ReDim arrData(1 To 1)
    
    
    Dim d As Object
    Set d = CreateObject("scripting.dictionary")
   
    
    
    
   Application.ScreenUpdating = False
   
 
       
    Set ShtNew = Worksheets.Add(After:=Worksheets(Worksheets.Count))
    ShtNew.Name = "AccuracyList"
   
   
   
'Accuracy List
     For Each WrkSht In Worksheets
           
            If Not WrkSht Is ShtNew And WrkSht.Name <> "CompanyList" Then
               
                kk = WrkSht.Range("A1").CurrentRegion.Columns.Count
                'MsgBox kk
                yy = WrkSht.Range("A1").CurrentRegion.Rows.Count
                'MsgBox yy
               
                For k = 1 To kk
                   
                    If WrkSht.Cells(1, k).Value = "Company Id" Then
               
                       
                        WrkSht.Range(WrkSht.Cells(2, k), WrkSht.Cells(yy, k)).Copy ShtNew.Range("A" & ShtNew.Range("A56565").End(3).Row + 1)
                        'note = note + yy
                        Exit For
                        
                    End If
                   
                Next k
               
            End If
     Next
    
    ShtNew.Cells(1, 1).Value = "Company Id"
    
    ShtNew.Cells(1, 1).CurrentRegion.RemoveDuplicates Columns:=1, Header:=xlYes
    
    ShtNew.Cells(1, 2).Value = "RAND"
    
    yy = ShtNew.Range("A1").CurrentRegion.Rows.Count
    
    For k = 2 To yy
        
        ShtNew.Cells(k, 2).Value = Rnd
        
    Next k
    
    ShtNew.Range("A:B").Sort Columns(2), xlAscending, Header:=xlYes
      
    
'End of Accuracy List



'Completeness List
    Set ShtCom = Worksheets.Add(After:=Worksheets(Worksheets.Count))
    ShtCom.Name = "CompleteList"
    
    For Each WrkSht In Worksheets
        
        If WrkSht.Name = "CompanyList" Then
                
                WrkSht.Range("A1").CurrentRegion.Copy ShtCom.Range("A1")
                Exit For
        End If
    Next
    
    
    
    kk = ShtCom.Range("A1").CurrentRegion.Columns.Count ' The total number of columns
    
    For k = 1 To kk
            If ShtCom.Cells(1, k).Value = "CompanyId" Then
                k1 = k ' the column named Company Id
                Exit For
            End If
            
    Next k


    
             
    For k = 1 To ShtNew.Range("A1").CurrentRegion.Columns.Count
        
        If ShtNew.Cells(1, k).Value = "Company Id" Then
            k2 = k ' the column named Company Id
            Exit For
        End If
    Next
    
    kk = ShtCom.Range("A1").CurrentRegion.Rows.Count ' The total number of rows
    yy = ShtCom.Range("A1").CurrentRegion.Columns.Count 'The total number of columns
    
    Cells(1, yy + 1).Value = "Sequence"
    
        
    For Row1 = 2 To ShtNew.Range("A1").CurrentRegion.Rows.Count
                
        CID = ShtNew.Cells(Row1, k2).Value
        
        
        
        Set Rng = ShtCom.Range(ShtCom.Cells(1, k1), ShtCom.Cells(kk, k1)).Find(CID, lookat:=xlWhole)

        If Not Rng Is Nothing Then

           Cells(Rng.Row, yy + 1).Value = 1

        End If



    Next Row1

   

    ShtCom.Range(Cells(1, 1), Cells(kk, yy + 1)).Sort Columns(yy + 1), xlDescending, Header:=xlYes
    
    Range(Cells(2, yy + 1), Cells(Columns(yy + 1).End(xlDown).Row, yy + 1)).EntireRow.Delete
    
    ShtCom.Columns(yy + 1).Delete
    
    ShtCom.Cells(1, yy + 1).Value = "RAND"
    
     kk = ShtCom.Range("A1").CurrentRegion.Rows.Count ' The total number of rows
    
    For k = 2 To kk
        
        Cells(k, yy + 1).Value = Rnd
    
    Next k
    
    ShtCom.Range(Cells(1, 1), Cells(kk, yy + 1)).Sort Columns(yy + 1), xlAscending, Header:=xlYes

'CompleteList End


'Accuracy Sampling




 For Each WrkSht In Worksheets
           
        If Not WrkSht Is ShtNew And WrkSht.Name <> "CompanyList" And Not WrkSht Is ShtCom Then
               
                
                
                kk = WrkSht.Range("A1").CurrentRegion.Columns.Count
                'MsgBox kk
                yy = WrkSht.Range("A1").CurrentRegion.Rows.Count
                'MsgBox yy
                
                For k = 1 To kk
                                If WrkSht.Cells(1, k).Value = "Company Id" Then
                                    k1 = k
                                    Exit For
                                End If
                                
                            
                Next k
                
                If k = kk Then
                    
                    MsgBox "No Company Id Column!"
                    Exit Sub
                
                End If
                
               
                If yy > 3 Then
                
                        WrkSht.Cells(1, kk + 1).Value = "RAND"
                        
                        For k = 2 To yy
                        
                        
                            WrkSht.Cells(k, kk + 1) = Rnd
                        
                        Next k
                        
                        WrkSht.Range(WrkSht.Cells(1, 1), WrkSht.Cells(yy, kk + 1)).Sort WrkSht.Columns(kk + 1), xlAscending, Header:=xlYes
                        
                        With WrkSht
                                
                            
                                dataCount = dataCount + 1
                                ReDim Preserve arrData(1 To dataCount)
                                arrData(dataCount) = .Cells(2, k1).Value
                                'MsgBox arrData(dataCount)
                                dataCount = dataCount + 1
                                ReDim Preserve arrData(1 To dataCount)
                                arrData(dataCount) = .Cells(3, k1).Value
                                'MsgBox arrData(dataCount)
                        End With
                                
                 ElseIf yy = 3 Then
                        
                        With WrkSht
                  
                                dataCount = dataCount + 1
                                ReDim Preserve arrData(1 To dataCount)
                                arrData(dataCount) = .Cells(2, k1).Value
                                dataCount = dataCount + 1
                                ReDim Preserve arrData(1 To dataCount)
                                arrData(dataCount) = .Cells(3, k1).Value
                        End With
                        
                 ElseIf yy = 2 Then
                 
                        With WrkSht
                                dataCount = dataCount + 1
                                ReDim Preserve arrData(1 To dataCount)
                                arrData(dataCount) = .Cells(2, k1).Value
                        End With
                 Else
                                MsgBox "Error"
                                Exit Sub
                                
                End If
        
                        
                        
               
         End If
         
 Next





For i = 1 To UBound(arrData)

        d(arrData(i)) = d(arrData(i)) + 1
Next i

Zebra = 30 - d.Count

ReDim Preserve arrData(1 To 31)

If Zebra <> 0 Then

                        k = 1
                        
                        While Zebra >= 0
                            
                                              
                            arrData(d.Count + 1) = ShtNew.Cells(k + 1, 1)
                           
                            
                            For i = 1 To UBound(arrData)
                        
                                d(arrData(i)) = d(arrData(i)) + 1
                                
                            Next i
                            
                            k = k + 1
                            
                            Zebra = 30 - d.Count
                             
                        
                        Wend
End If


ShtNew.Range("D2").Resize(UBound(arrData), 1).Value = WorksheetFunction.Transpose(arrData)

'Accuracy Sampling Complete


'Pick them up

 For Each WrkSht In Worksheets
           
        If Not WrkSht Is ShtNew And WrkSht.Name <> "CompanyList" And Not WrkSht Is ShtCom Then
        
                
                kk = WrkSht.Range("A1").CurrentRegion.Columns.Count ' The total number of columns
                yy = WrkSht.Range("A1").CurrentRegion.Rows.Count ' The total number of rows
                
                WrkSht.Cells(1, kk + 1).Value = "Flag"
    
                For k = 1 To kk
                        If ShtCom.Cells(1, k).Value = "Company Id" Then
                            k1 = k ' the column named Company Id
                            Exit For
                        End If
                        
                Next k
                
                For i = 1 To UBound(arrData)
                
                        For k = 2 To yy
                        
                            If WrkSht.Cells(k, k1).Value = arrData(i) Then
                                            
                                            WrkSht.Cells(k, kk + 1).Value = Flag
                            End If
                            
                        Next k
                        
                        
                        Flag = Flag + 1
                Next i
                
        End If
        
        
  WrkSht.Range(WrkSht.Cells(1, 1), WrkSht.Cells(yy, kk + 1)).Sort WrkSht.Columns(kk + 1), xlDescending, Header:=xlYes
        
        
Next

'End of pick-up


                                            
                    
Set d = Nothing



        
                        
   
Application.ScreenUpdating = True
   
End Sub



结束语:再一次的感谢 组小牛同学,当我告诉他我用union range法去实现删除区域时,他提醒我应该去尝试排序再删除。

实验表明,同样家里的土冒机器的环境下,union_range法39秒,排序删除法19秒。

另外还有就是使用union 的时候,里面不可以有设置为nothing的区块或者没有定义的区块。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

取啥都被占用

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值