我有 RTF 格式的大表,大小为 20-150 Mb。我首先尝试导出 RTF -> HTML -> 导入到 excel。一个 60 Mb 的文件大约需要 35 分钟。接下来,我尝试直接从 Word -> excel 复制表格。它总是在中途失败(所有内容都被粘贴,因为数据不在正确的单元格中)。
在确定方法之前,我尝试了更多方法(在转移到 excel 之前将所有单元格导入内存,以及其他排列,以及在此和其他来源中详述的.ConvertToText
方法)。
此方法相对较快,对于相同的 60 Mb 文件大约需要 25 分钟(这是不显示 Word、设置重新分页、事件、dispayupdate 和 tableautofit 为 false)。
考虑到这些文件可以在不到 10 秒的时间内完全加载到 RAM 内存中,我想知道为什么从 60 Mb 文件中读取数据需要 25 分钟。我知道由于更改为 HTML 格式,Word 中的表格引擎很慢,但是逐个单元格读取表格的速度非常慢。前几个单元格超级快,最后一个单元格速度较慢 - 我相信手动阅读比这更快。它违背了自动化的全部目的。但是,我别无选择。
代码是:
Dim oWord As Word.Application
Dim RTF As Word.Document
Set oWord = CreateObject("Word.Application")
Set RTF = oWord.Documents.Open(filename:=Fname, ConfirmConversions:=False, ReadOnly:=False) ', ReadOnly:=True)
Application.StatusBar = vbNullString
Application.EnableEvents = False
Application.ScreenUpdating = False
Application.DisplayAlerts = False
With oWord
Options.Pagination = False
Options.AllowReadingMode = False
Application.AutoRecover.Enabled = False
Options.SaveInterval = 0
Options.CheckGrammarAsYouType = False
Options.CheckGrammarWithSpelling = False
End With
With RTF
Options.Pagination = False
Options.AllowReadingMode = False
Application.AutoRecover.Enabled = False
Options.SaveInterval = 0
Options.CheckGrammarAsYouType = False
Options.CheckGrammarWithSpelling = False
End With
Dim AAF As Table
For Each AAF In RTF.Tables
AAF.AllowAutoFit = False
Next
oWord.Visible = False
Dim rng As Word.Range
Dim sData As String
Dim aData1() As String
Dim aData2() As String
Dim aDataAll() As String
Dim nrRecs As Long
Dim nrFields As Long
Dim lRecs As Long
Dim lFields As Long
Dim CTbl As Table 'Data Table
Dim oCell As Cell
'I'm not displaying the code which replaces all ^p with a spl character to maintain the table structure - it is staright forward, and does the job
Set rng = CTbl.ConvertToText(Separator:="$", NestedTables:=False)
sData = rng.Text 'This contains the entire table, delimited by vbCr and $...
Application.StatusBar = "Closing open files..."
RTF.Close (wdDoNotSaveChanges) 'All data has been extracted, hence quit word
oWord.Quit
Set oWord = Nothing
sData = Mid(sData, 1, Len(sData) - 1)
aData1() = Split(sData, vbCr)
nrRecs = UBound(aData1())
If Dbg Then MsgBox "The table contained " & nrRecs + 1 & " rows"
For lRecs = LBound(aData1()) To nrRecs 'Cycle through all rows
aData2() = Split(aData1(lRecs), "$") 'Split rows into arrays
Debug.Print aData1(lRecs)
nrFields = UBound(aData2()) 'Find out the number of columns
If lRecs = LBound(aData1()) Then 'If this is the first row/cycle,
ReDim Preserve aDataAll(nrRecs, 9) 'nrFields) 'Resize the array - currently I'm using a fixed size for the column since the first row of my table contains merged rows
End If
For lFields = LBound(aData2()) To nrFields 'Cycle through all columns
aDataAll(lRecs, lFields) = aData2(lFields) 'Collate the data in a single array
'If MsgBox(aDataAll(lRecs, lFields), vbYesNo, "Continue?") = vbNo Then Exit For
Next
Next 'All of this was slapped together from MS code samples and stackoverflow examples
有什么提高性能的建议吗?