0

我有下面的VB代码

Sub CountWordFrequencies()
Dim InputSheet As Worksheet
Dim WordListSheet As Worksheet
Dim PuncChars As Variant, x As Variant
Dim i As Long, r As Long, b As Long
Dim txt As String
Dim wordCnt As Long
Dim AllWords As Range
Dim PC As PivotCache
Dim PT As PivotTable
Dim PF As PivotField
Application.ScreenUpdating = False
Set InputSheet = ActiveSheet
Set WordListSheet = Worksheets.Add(after:=Worksheets(Sheets.Count))
WordListSheet.Range("A1").Font.Bold = True
WordListSheet.Range("A1") = "All Words"
InputSheet.Activate
wordCnt = 2


PuncChars = Array(".", ",", ";", ":", "'", "!", "#", _
    "$", "%", "&", "(", ")", " - ", "_", "--", "+", _
    "=", "~", "/", "\", "{", "}", "[", "]", """", "?", "*")

r = 2

Dim NotRealWord As Variant
NotRealWord = Array("OF","THE")


Do While Cells(r, 1) <> ""

    txt = UCase(Cells(r, 1))

    For i = 0 To UBound(PuncChars)
        txt = Replace(txt, PuncChars(i), "")
    Next i

    txt = WorksheetFunction.Trim(txt)

    x = Split(txt)
    For i = 0 To UBound(x)
        WordListSheet.Cells(wordCnt, 1) = x(i)
        wordCnt = wordCnt + 1
    Next i
r = r + 1
Loop


WordListSheet.Activate
Set AllWords = Range("A1").CurrentRegion
Set PC = ActiveWorkbook.PivotCaches.Add _
    (SourceType:=xlDatabase, _
    SourceData:=AllWords)
Set PT = PC.CreatePivotTable _
    (TableDestination:=Range("C1"), _
    TableName:="PivotTable1")
With PT
    .AddDataField .PivotFields("All Words")
    .PivotFields("All Words").Orientation = xlRowField
    .PivotFields("All Words") _
        .AutoSort xlDescending, "Count of All Words"
End With
Set PF = ActiveSheet.PivotTables("PivotTable1").PivotFields("All Words")
With PF
    .ClearManualFilter
    .EnableMultiplePageItems = True
    For b = LBound(NotRealWord) To UBound(NotRealWord)
        .PivotItems(NotRealWord(b)).Visible = False
    Next b
End With
End Sub

这是一个词频分析功能,用户将在 A 列中插入字符串列表,从 A2 开始。他们将单击运行此脚本的按钮。然后,该脚本会将字符串分解为单个单词并创建一个数据透视表,该表将计算每个单词的频率,按频率排序。

以下是显示机制的图片:

  • 来自用户在 A 列上的输入 在此处输入图像描述
  • 结果在此处输入图像描述

结果

现在我的过滤器有问题。最终,我希望数据透视表自动过滤掉“NotRealWord”数组中的单词列表,因为这些单词对分析没有用处。我的代码只有在脚本可以在被分解的单词中找到数组列表中的所有值时才有效。所以在我的例子中,我设置了 NotRealWord = Array("OF", "THE") 并且数据透视表字段确实有这些词,所以它工作得很好。但是如果我添加了“BY”,它会返回此错误“无法获取 PivotField 类的 PivotItems 属性”。我该如何解决?

或者甚至更好,我怎样才能使 NotRealWord 成为一个动态数组,它采用列 F 中的值列表,以便用户可以添加更多他们想要过滤掉的单词,而无需修复代码(我的第一张图片也显示F) 列。

请注意,我不太擅长 VB。我知道如何阅读和适应复杂的代码,但不知道 FB 字的进出

4

1 回答 1

1

这是一种可能的方法,它与您当前的方法略有不同,但应该做您想做的事情:

Sub WordCountTester()
    Dim d As Object, k, i As Long, ws As Worksheet
    
    Set ws = ActiveSheet
    Set d = WordCounts(ws.Range("A2:A" & ws.Cells(Rows.Count, "A").End(xlUp).Row), _
                       ws.Range("F2:F" & ws.Cells(Rows.Count, "F").End(xlUp).Row))
    'list words and frequencies
    For Each k In d.keys
        ws.Range("H2").Resize(1, 2).Offset(i, 0).Value = Array(k, d(k))
        i = i + 1
    Next k
End Sub

'rngTexts = range with text to be word-counted
'rngExclude = range with words to exclude from count
Public Function WordCounts(rngTexts As Range, rngExclude As Range) As Object 'dictionary
    Dim words, c As Range, dict As Object, regexp As Object, w, wd As String, m
    Set dict = CreateObject("scripting.dictionary")
    Set regexp = CreateObject("VBScript.RegExp") 'see link below for reference
    With regexp
        .Global = True
        .MultiLine = True
        .ignorecase = True
        .Pattern = "[\dA-Z-]{2,}" 'at least two characters...
     End With
     'loop over input range
     For Each c In rngTexts.Cells
        If Len(c.Value) > 0 Then
            Set words = regexp.Execute(UCase(c.Value))
            'loop over matches
            For Each w In words
                wd = w.Value 'the text of the match
                If Not IsNumeric(wd) Then  'EDIT: ignore numbers
                   'increment count if the word is not found in the "excluded" range
                    If IsError(Application.Match(wd, rngExclude, 0)) Then
                        dict(wd) = dict(wd) + 1
                    End If
                Else
                    Debug.Print "Filtered out", wd 'excluding numbers...
                End If '>1 char
            Next w
        End If
     Next c
     Set WordCounts = dict
End Function

正则表达式参考:https://docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/scripting-articles/ms974570(v=msdn.10)

于 2021-02-01T22:46:25.080 回答