VBA处理超百万数据(主要运用二维数组及字典)

class ImportorClass

    Option Explicit

    Private m_sht As Worksheet
    Private m_activecell As Range
    Private m_rowIndex As Long

    Private m_dictYjzh
    Private m_ksrq As Date
    Private m_jzrq As Date

    Private m_dictJsd

    Private m_rowset(1 To 100000, 1 To 10)

    Public Sub Init(sht As Worksheet, ksrq As Date, jzrq As Date, cjbfilename)
        Dim lastrow, title, tarr, i
        Set m_sht = sht
        m_ksrq = ksrq
        m_jzrq = jzrq

        lastrow = m_sht.Range("A" & m_sht.Cells.Rows.count).End(xlUp).Row
        If lastrow >= 2 Then
            m_sht.Range("2:" & lastrow).Delete
        End If
        title = "A1,A2,A3,A4,A5,A6,A7,A8,A9,A10"
        tarr = Split(title, ",")
        For i = LBound(tarr) To UBound(tarr)
            m_sht.Cells(1, i + 1) = tarr(i)
        Next
        Set m_activecell = m_sht.Range("a2")

        Set m_dictYjzh = GetCjb(cjbfilename)
    End Sub

    Sub SetRowSet(rowset, rowIndex, varr, myjzh, lx)

        rowset(rowIndex, 1) = "'" & varr(11)
        rowset(rowIndex, 2) = varr(12) 'wd
        rowset(rowIndex, 3) = varr(17)
        rowset(rowIndex, 4) = varr(18) 'zq
        rowset(rowIndex, 5) = varr(35) 'ys
        rowset(rowIndex, 6) = varr(37)
        
        rowset(rowIndex, 7) = m_dictYjzh(myjzh)(1)
        rowset(rowIndex, 8) = m_dictYjzh(myjzh)(2)
        rowset(rowIndex, 9) = m_dictYjzh(myjzh)(3)

        rowset(rowIndex, 10) = lx 

    End Sub

    'import jsd
    Sub ImportJsd(filename)
    
        Dim csvFile, yjzh
        Dim count, validCount, limitedCount, currLine, arr, ele, rq, objs, i, j
        Dim key As String
        csvFile = filename

        limitedCount = -20000 'testing
        count = 0
        validCount = 0

        m_rowIndex = 1

        Set m_dictJsd = CreateObject("Scripting.Dictionary")

        Open csvFile For Input As #1
        Do While Not EOF(1)
            Line Input #1, currLine
            
            If count <> 0 Then
                arr = GetArr(currLine)
                
                rq = DateValue(arr(17))
                If rq < m_ksrq Or rq > m_jzrq Then
                Else
                    yjzh = arr(11)

                    If IsEmpty(m_dictYjzh) Then
                    
                    ElseIf m_dictYjzh.exists(yjzh) Then
                        SetRowSet m_rowset, m_rowIndex, arr, yjzh, "jsd-" & count
                       
                        key = yjzh & arr(12) & arr(18) & arr(35)
                        If Not m_dictJsd.exists(key) Then m_dictJsd(key) = ""

                        m_rowIndex = m_rowIndex + 1
                        validCount = validCount + 1

                    End If
                End If
            End If
                    
            count = count + 1
            If limitedCount > 0 And count >= limitedCount Then Exit Do
        Loop
        
        Close #1
    
    End Sub

    Sub ImportYwd(filename)
        Dim csvFile, yjzh
        Dim count, validCount, limitedCount, currLine, arr, ele, rq, objs, i, j
        Dim key As String
        csvFile = filename

        limitedCount = -20000 'testing
        count = 0
        validCount = 0

        'm_rowIndex = 1

        Open csvFile For Input As #1
        Do While Not EOF(1)
            Line Input #1, currLine
            
            If count <> 0 Then
                arr = GetArr(currLine)
                
                rq = DateValue(arr(17))
                If rq < m_ksrq Or rq > m_jzrq Then
                Else
                    yjzh = arr(11)

                    If IsEmpty(m_dictYjzh) Then
                    
                    ElseIf m_dictYjzh.exists(yjzh) Then
                            
                        key = yjzh & arr(12) & arr(18) & arr(35)
                        If m_dictJsd.exists(key) Then
                        Else
                            SetRowSet m_rowset, m_rowIndex, arr, yjzh, "ywd-" & count
                               
                            m_rowIndex = m_rowIndex + 1
                            validCount = validCount + 1
                        End If
                    End If
                End If
            End If
                    
            count = count + 1
            If limitedCount > 0 And count >= limitedCount Then Exit Do
        Loop
        
        Close #1
    End Sub

    Public Sub WriteExcel()
        m_activecell.Resize(m_rowIndex - 1, 10) = m_rowset
    End Sub
    

    '获取cjb字典 key[zh] value[1 dq/2 dzr/3 xz]
    Function GetCjb(filename)
        Dim dict, csvFile, currLine, arr, count, cjbsj
        Set dict = CreateObject("Scripting.Dictionary")

        csvFile = filename

        Open csvFile For Input As #1

        count = 0
        Do While Not EOF(1)
            Line Input #1, currLine
            
            If count = 0 Then
            Else
                arr = GetArr(currLine)
                
                If Not dict.exists(arr(1)) Then 'B zh
                    ReDim cjbsj(1 To 3)
                    cjbsj(1) = arr(0) 'A dq
                    cjbsj(2) = arr(4) 'E dzr
                    cjbsj(3) = arr(5) 'F xz
                    dict(arr(1)) = cjbsj
                End If
                
            End If
            count = count + 1
        Loop
        Close #1
        Set GetCjb = dict
    End Function

    'seperator为逗号,考虑引号内有逗号的字段
    Function GetArr(str)
        If InStr(str, """") > 0 Then
            GetArr = GetSpecialArr(str)
            Exit Function
        Else
            GetArr = Split(str, ",")
        End If
    End Function

    '考虑引号内有逗号的字段
    Function GetSpecialArr(str)
        Dim i, arrQuote, temp, arr
        arrQuote = Split(str, """")
        For i = LBound(arrQuote) To UBound(arrQuote)
            If i Mod 2 = 1 Then
                arrQuote(i) = Replace(arrQuote(i), ",", "@@")
            End If
        Next
        temp = Join(arrQuote, """")
        arr = Split(temp, ",")
        For i = LBound(arr) To UBound(arr)
            If InStr(arr(i), "@@") > 0 Then
                arr(i) = Replace(arr(i), "@@", ",")
            End If
        Next
        GetSpecialArr = arr
    End Function

    Private Sub class_Initialize()
        ' Called automatically when class is created
    End Sub

    Private Sub class_Terminate()
        ' Called automatically when all references to class instance are removed
    End Sub

end class

class Main

    Option Explicit

    Sub Main()
        
        Dim im As ImportorClass, sht As Worksheet, filename, cjbfilename, t
        
        Debug.Print Now
        t = Timer
        
        Set sht = ActiveSheet
        Set im = New ImportorClass
        
        cjbfilename = "cjb.csv"
        im.Init sht, #7/1/2020#, #7/31/2020#, cjbfilename
        
        filename = "jsd.csv"
        im.ImportJsd filename
            
        filename = "ywd.csv"
        im.ImportYwd filename
        
        im.WriteExcel
        
        Set im = Nothing
        
        Debug.Print Now
        Debug.Print "耗时" & (Timer - t) & "秒"
    End Sub

    

end class

 

  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
建立运行环境-Excel Vba,可移植到VB环境中(需修改一些代码) '仿制简单的SQL查询语句,用于对二维数组的查询 '参照SQL语句:Select * From array [Where conditions] [Distinct fields] [ResultWithTitle] ' '实现功能: ' 依条件设置查询数组,返回包含查询字段(或全部字段)的数组,可多条件组合。 ' 条件运算符包括:> = < >= <= <> , like(正则表达式) ' '附注: ' 使用此函数,需要在文件中引用正则表达式脚本 Microsoft VBScript Regular Expressions x.x ' (根据不同的电脑配置和环境此处会有差异) '算法简要: ' 1、查询条件运算符:仅有 >, =, <, >=, <=, <> , like(正则表达式) ' 本函数中仅有上述运算符。原因在于,更多的运算符编制逻辑过于复杂,又不太常用。 ' 为了尽可能多地容纳各种运算关系,添加了正则表达式匹配运算, ' 在某个单一条件中,正则几乎可以容纳绝大部分的比对运算关系了。 ' 2、数字比较: ' 采用了将数字型字符串类型转换为数字之后再比较的方法,结果更为准确。 ' 3、其他算法和运算速度: ' 编制过程中,试验过使用 正则+逻辑分支+表达式引用 的方法, ' 可以实现几乎等同于SQL查询语句的复杂功能,而且代码更简捷。 ' 但运算速度相差过于悬殊(大概几十到上百倍 - "一闪而过"和"一袋烟"的差距!),最后不得不放弃。 ' 所以现在的版本相当于一个简化了的select语句,但对于大多数查询情况而言够用了。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值