我会问你是否可以在我的问题中给我一些替代方案。
基本上我正在阅读一个平均为 800 万行的 .txt 日志文件。大约 600megs 的纯原始 txt 文件。
我目前正在使用 streamreader 对 800 万行进行 2 次传递,对日志文件中的重要部分进行排序和过滤,但要这样做,我的计算机需要大约 50 秒才能完成 1 次完整运行。
我可以优化这一点的一种方法是使第一遍在最后开始读取,因为最重要的数据大约位于最后的 200k 行。不幸的是,我搜索并且流式阅读器无法做到这一点。有什么想法吗?
一些一般限制
- # 行数不同
- 文件大小不同
- 重要数据的位置有所不同,但大约在最后 200k 行
这是日志文件第一遍的循环代码,只是为了给你一个想法
Do Until sr.EndOfStream = True 'Read whole File
Dim streambuff As String = sr.ReadLine 'Array to Store CombatLogNames
Dim CombatLogNames() As String
Dim searcher As String
If streambuff.Contains("CombatLogNames flags:0x1") Then 'Keyword to Filter CombatLogNames Packets in the .txt
Dim check As String = streambuff 'Duplicate of the Line being read
Dim index1 As Char = check.Substring(check.IndexOf("(") + 1) '
Dim index2 As Char = check.Substring(check.IndexOf("(") + 2) 'Used to bypass the first CombatLogNames packet that contain only 1 entry
If (check.IndexOf("(") <> -1 And index1 <> "" And index2 <> " ") Then 'Stricter Filters for CombatLogNames
Dim endCLN As Integer = 0 'Signifies the end of CombatLogNames Packet
Dim x As Integer = 0 'Counter for array
While (endCLN = 0 And streambuff <> "---- CNETMsg_Tick") 'Loops until the end keyword for CombatLogNames is seen
streambuff = sr.ReadLine 'Reads a new line to flush out "CombatLogNames flags:0x1" which is unneeded
If ((streambuff.Contains("---- CNETMsg_Tick") = True) Or (streambuff.Contains("ResponseKeys flags:0x0 ") = True)) Then
endCLN = 1 'Value change to determine end of CombatLogName packet
Else
ReDim Preserve CombatLogNames(x) 'Resizes the array while preserving the values
searcher = streambuff.Trim.Remove(streambuff.IndexOf("(") - 5).Remove(0, _
streambuff.Trim.Remove(streambuff.IndexOf("(")).IndexOf("'")) 'Additional filtering to get only valuable data
CombatLogNames(x) = search(searcher)
x += 1 '+1 to Array counter
End If
End While
Else
'MsgBox("Something went wrong, Flame the coder of this program!!") 'Bug Testing code that is disabled
End If
Else
End If
If (sr.EndOfStream = True) Then
ReDim GlobalArr(CombatLogNames.Length - 1) 'Resizing the Global array to prime it for copying data
Array.Copy(CombatLogNames, GlobalArr, CombatLogNames.Length) 'Just copying the array to make it global
End If
Loop