到目前为止,我已经接近解析文档并获取两个标题之间的标题、标题和文本的工作代码。我试图提取的内容有项目符号、换行符等,我想在将其粘贴到单元格时保持格式。一直在环顾四周并阅读很多论坛,但无法弄清楚如何保持格式不变。我查看了 PasteSpecial ,但它会将内容粘贴到多个单元格中,而且我想尽可能避免复制/粘贴。
下面是我拥有的一个非常早期的代码(有我正在调试/修复的错误):
Sub GetTextFromWord()
Dim Paragraph As Object, WordApp As Object, WordDoc As Object
Dim para As Object
Dim paraText As String
Dim outlineLevel As Integer
Dim title As String
Dim body As String
Dim myRange As Object
Dim documentText As String
Dim startPos As Long
Dim stopPos As Long
Dim file As String
Dim i As Long
Dim category As String
startPos = -1
i = 2
Application.ScreenUpdating = True
Application.DisplayAlerts = False
file = "C:\Sample.doc"
Set WordApp = CreateObject("Word.Application")
WordApp.Visible = True
Set WordDoc = WordApp.Documents.Open(file)
Set myRange = WordDoc.Range
documentText = myRange.Text
For Each para In ActiveDocument.Paragraphs
' Get the current outline level.
outlineLevel = para.outlineLevel
' Cateogry/Header begins outline level 1, and ends at the next outline level 1.
If outlineLevel = wdOutlineLevel1 Then 'e.g., 1 Header
category = para.Range.Text
End If
' Set category as value for cells in Column A
Application.ActiveWorkbook.Worksheets("Sheet1").Cells(i - 1, 1).Value = category
' Title begins outline level 1, and ends at the next outline level 1.
If outlineLevel = wdOutlineLevel2 Then ' e.g., 1.1
' Get the title and update cells in Column B
title = para.Range.Text
Application.ActiveWorkbook.Worksheets("Sheet1").Cells(i, 2).Value = title
startPos = InStr(nextPosition, documentText, title, vbTextCompare)
If startPos <> stopPos Then
' this is text between the two titles
body = Mid$(documentText, startPos, stopPos)
ActiveSheet.Cells(i - 1, 3).Value = body
End If
stopPos = startPos
i = i + 1
End If
Next para
WordDoc.Close
WordApp.Quit
Set WordDoc = Nothing
Set WordApp = Nothing
End Sub