0

我有一个包含超过 450 万行的巨大 .csv 文件。由于这对 Excel 来说太大了,我需要在 .csv 文件中搜索每个用户的任何条目,然后对它们求和,但总和需要在特定月份完成。

Excel
USER 月总值
AAH 2010 年 2 月 1014

CSV
"USER","DATE_TIME","NUMBER"
"AAH","2010-03-18T17:35:01.000Z","410.0"
"ABH","2011-01-24T09:43:01.000Z","336.0"
"AAH","2010-03-18T19:25:01.000Z","114.0"
"BhC","2012-06-24T03:45:01.000Z","336.0"
"AAH","2010-03-20T19:30:01.000Z","490.0"

你能帮我解决一下吗?

4

1 回答 1

0

你可以用下面的(调整口味)来做到这一点。它适用于您的测试数据(复制到 550 万行或大约 230MB,在我的笔记本电脑上大约需要 30 秒。毫无疑问,如果性能至关重要,它可以改进,但对于您的目的来说它可能足够快)。

Option Explicit

Sub GetData()

    Dim fso As Object
    Dim fs As Object
    Dim results As Collection
    Dim arr
    Dim i As Long
    Dim monthOfInterest As Integer
    Dim recordMonth As Date
    Dim recordUser As String
    Dim recordValue As Variant
    Dim recordKey As String
    Dim result As Variant
    Dim str As String, splitStr() As String
    Dim ws As Worksheet

    Set fso = CreateObject("Scripting.FileSystemObject")

    Set results = New Collection

    'enter your path here or use something like FileDialog
    Set fs = fso.OpenTextFile("C:\test.csv", ForReading, False, TristateFalse) 'TristateTrue if Unicode
    monthOfInterest = 3

    If not fs.AtEndOfStream Then fs.SkipLine 'skip past headers at top of CSV

    Do While Not fs.AtEndOfStream
        splitStr = Split(fs.ReadLine, ",")
        If fs.Line Mod 10000 = 0 Then
            Application.StatusBar = "Line " & fs.Line
            DoEvents
        End If
        recordMonth = DateSerial( _
                        Mid(splitStr(1), 2, 4), _
                        Mid(splitStr(1), 7, 2), 1)
        If month(recordMonth) = monthOfInterest Then
            recordUser = Mid(splitStr(0), 2, Len(splitStr(0)) - 2)
            recordValue = CDec(Mid(splitStr(2), 2, Len(splitStr(2)) - 2))
            recordKey = recordUser & "|" & Format(recordMonth, "YYYY-MM")
            On Error Resume Next
            result = results(recordKey)
            If Err.Number <> 5 Then 'key exists
                results.Remove recordKey
                recordValue = recordValue + result(2)
            End If
            On Error GoTo 0
            results.Add Array(recordUser, recordMonth, recordValue), recordKey
        End If
    Loop
    fs.Close

    Application.StatusBar = "Outputting..."

    'Process results and dump to worksheet
    If results.Count > 0 Then
        Set ws = ActiveWorkbook.Worksheets.Add
        ReDim arr(0 To results.Count, 0 To 2)
        arr(0, 0) = "User"
        arr(0, 1) = "Month"
        arr(0, 2) = "Total"
        For i = 1 To UBound(arr, 1)
            arr(i, 0) = results(i)(0)
            arr(i, 1) = results(i)(1)
            arr(i, 2) = results(i)(2)
        Next i
        ws.Range(ws.Cells(1, 1), ws.Cells(1 + UBound(arr, 1), 1 + UBound(arr, 2))).Value = arr
    End If

    Application.StatusBar = ""

End Sub
于 2013-08-20T20:10:04.813 回答