1

这个问题与这个问题密切相关,但它与抓取包含对全局变量的引用(未注释掉)的方法有关。

我正在使用以下正则表达式和测试字符串来检查它是否有效,但它只是部分有效:

正则表达式

^((?:(?:Public|Private)\s+)?(?:Function|Sub).+)[\s\S]+?(GLOBAL_VARIABLE_1)[\s\S]+?End\s+(?:Function|Sub)$

(我需要以这种方式与捕获组一起使用正则表达式的一部分,以便我可以获取方法的名称作为子匹配)。

测试字符串

'-----------------------------------------------------------------------------------------
'
'   the code:   Header
'
'-----------------------------------------------------------------------------------------

Dim GLOBAL_VARIABLE_1
Dim GLOBAL_VARIABLE_2
Dim GLOBAL_VARIABLE_3

Public Function doThis(byVal xml)
'' Created               : dd/mm/yyyy
'' Return                : string
'' Param            : xml- an xml blob

     return = replace(xml, "><", ">" & vbLf & "<")

     GLOBAL_VARIABLE_1 = 2 + 2

     doThis = return

End Function


msgbox GLOBAL_VARIABLE_1



Public Function doThat(byVal xPath)
'' Created               : dd/mm/yyyy
'' Return                : array
' 'Param            : xPath

     return = split(mid(xPath, 2), "/")

     GLOBAL_VARIABLE_2 = 2 + 2


     doThat = return

End Function


GLOBAL_VARIABLE_2 = 2 + 2


Public Sub butDontDoThis()
'' Created               : dd/mm/yyyy
'' Return                : string
' 'Param            : obj

     For i = 0 To 5
          return = return & "bye" & " "

     Next

End Sub


GLOBAL_VARIABLE_3 = 3 + 3


Public Sub alsoDoThis(byRef obj)
'' Created               : dd/mm/yyyy
'' Return                : string
' 'Param            : obj, an xml document object

     For i = 0 To 4
          return = return & "hi" & " "

     Next

     GLOBAL_VARIABLE_1 = 1 + 1

End Sub


GLOBAL_VARIABLE_3 = 3 + 3

使用http://www.regexpal.com/,我可以突出显示引用全局变量的第一个方法。但是,正则表达式并没有像我期望的那样使用其他方法。正则表达式还会选择其他没有引用特定全局变量的方法,并以最后一个实际使用全局变量的方法结束。我已经确定问题在于该[\s\S]+?(GLOBAL_VARIABLE_1)[\s\S]+?End\s+(?:Function|Sub)$部分正在进行最小/非贪婪匹配,以便它继续查找直到找到实际匹配。

总之,表达式应遵循以下规则:

  • 当它看到方法声明的第一端时停止扫描它当前正在检查的方法。在这个例子中,应该只匹配doThisandalsoDoThis方法GLOBAL_VARIABLE_1,但我不确定正则表达式应该是什么。
  • 正则表达式也应该只匹配实际使用全局变量的方法
  • 如果 aGLOBAL_VARIABLE_1被注释掉,那么它实际上并没有被该方法使用。注释GLOBAL_VARIABLE_1不应触发该方法的正匹配。
4

3 回答 3

1

描述

我将分两步执行此操作,首先确定您的每个功能和子项。在这里,我使用参考\1来确保我们匹配正确的结束函数或结束子。此正则表达式还获取函数名称并将其放入第 2 组。如果第 2 部分正确,则可以稍后使用

(?:Public|Private)\s+(Function|Sub)\s+([a-z0-9]*).*?End\s+\1 在此处输入图像描述

然后测试其中的每一个以查看它们是否包含您的变量,请注意在此测试中我使用多行匹配以确保注释字符不会出现在Global_Variable同一行之前。这还检查GLOBAL_VARIABLE_1是否前面没有以下任何内容

  • 带或不带_分隔符的字母数字。这需要使用您可能在变量名中找到的所有字符进行更新。在此处包含连字符-可能会与等式中使用的减号混淆。
  • 评论字符'

^[^']*?(?![a-z0-9][_]?|['])\bGLOBAL_VARIABLE_1

在此处输入图像描述

VB 第 1 部分

Imports System.Text.RegularExpressions
Module Module1
  Sub Main()
    Dim sourcestring as String = "replace with your source string"
    Dim re As Regex = New Regex("(?:Public|Private)\s+(Function|Sub)\s+([a-z0-9]*).*?End\s+\1",RegexOptions.IgnoreCase OR RegexOptions.Singleline)
    Dim mc as MatchCollection = re.Matches(sourcestring)
    Dim mIdx as Integer = 0
    For each m as Match in mc
      For groupIdx As Integer = 0 To m.Groups.Count - 1
        Console.WriteLine("[{0}][{1}] = {2}", mIdx, re.GetGroupNames(groupIdx), m.Groups(groupIdx).Value)
      Next
      mIdx=mIdx+1
    Next
  End Sub
End Module

$matches Array:
(
    [0] => Array
        (
            [0] => Public Function doThis(byVal xml)
'' Created               : dd/mm/yyyy
'' Return                : string
'' Param            : xml- an xml blob

     return = replace(xml, "><", ">" & vbLf & "<")

     GLOBAL_VARIABLE_1 = 2 + 2

     doThis = return

End Function
            [1] => Public Function doThat(byVal xPath)
'' Created               : dd/mm/yyyy
'' Return                : array
' 'Param            : xPath

     return = split(mid(xPath, 2), "/")

     GLOBAL_VARIABLE_2 = 2 + 2


     doThat = return

End Function
            [2] => Public Sub butDontDoThis()
'' Created               : dd/mm/yyyy
'' Return                : string
' 'Param            : obj

     For i = 0 To 5
          return = return & "bye" & " "

     Next

End Sub
            [3] => Public Sub alsoDoThis(byRef obj)
'' Created               : dd/mm/yyyy
'' Return                : string
' 'Param            : obj, an xml document object

     For i = 0 To 4
          return = return & "hi" & " "

     Next

     GLOBAL_VARIABLE_1 = 1 + 1

End Sub
        )

    [1] => Array
        (
            [0] => Function
            [1] => Function
            [2] => Sub
            [3] => Sub
        )

    [2] => Array
        (
            [0] => doThis
            [1] => doThat
            [2] => butDontDoThis
            [3] => alsoDoThis
        )

)

VB 第 2 部分

在本文中找到

Public Function doThis(byVal xml)
'' Created               : dd/mm/yyyy
'' Return                : string
'' Param            : xml- an xml blob

     return = replace(xml, "><", ">" & vbLf & "<")

     GLOBAL_VARIABLE_1 = 2 + 2

     doThis = return

End Function

例子

Imports System.Text.RegularExpressions
Module Module1
  Sub Main()
    Dim sourcestring as String = "replace with your source string"
    Dim re As Regex = New Regex("^[^']*?GLOBAL_VARIABLE_1",RegexOptions.IgnoreCase OR RegexOptions.Multiline)
    Dim mc as MatchCollection = re.Matches(sourcestring)
    Dim mIdx as Integer = 0
    For each m as Match in mc
      For groupIdx As Integer = 0 To m.Groups.Count - 1
        Console.WriteLine("[{0}][{1}] = {2}", mIdx, re.GetGroupNames(groupIdx), m.Groups(groupIdx).Value)
      Next
      mIdx=mIdx+1
    Next
  End Sub
End Module

$matches Array:
(
    [0] => Array
        (
            [0] =>  Param            : xml- an xml blob

     return = replace(xml, "><", ">" & vbLf & "<")

     GLOBAL_VARIABLE_1
        )

)

在此文本中未找到

Public Function doThis(byVal xml)
'' Created               : dd/mm/yyyy
'' Return                : string
'' Param            : xml- an xml blob

     return = replace(xml, "><", ">" & vbLf & "<")

  '   GLOBAL_VARIABLE_1 = 2 + 2

     doThis = return

End Function

例子

Imports System.Text.RegularExpressions
Module Module1
  Sub Main()
    Dim sourcestring as String = "replace with your source string"
    Dim re As Regex = New Regex("^[^']*?GLOBAL_VARIABLE_1",RegexOptions.IgnoreCase OR RegexOptions.Multiline)
    Dim mc as MatchCollection = re.Matches(sourcestring)
    Dim mIdx as Integer = 0
    For each m as Match in mc
      For groupIdx As Integer = 0 To m.Groups.Count - 1
        Console.WriteLine("[{0}][{1}] = {2}", mIdx, re.GetGroupNames(groupIdx), m.Groups(groupIdx).Value)
      Next
      mIdx=mIdx+1
    Next
  End Sub
End Module

Matches Found:
NO MATCHES.

在此文本中也找不到

Public Sub butDontDoThis()
'' Created               : dd/mm/yyyy
'' Return                : string
' 'Param            : obj

     For i = 0 To 5
          return = return & "bye" & " "

     Next

End Sub

例子

   Imports System.Text.RegularExpressions
    Module Module1
      Sub Main()
        Dim sourcestring as String = "Public Sub butDontDoThis()
    '' Created               : dd/mm/yyyy
     '' Return                : string
     ' 'Param            : obj

     For i = 0 To 5
          return = return & ""bye"" & "" ""

     Next

End Sub"
        Dim re As Regex = New Regex("^[^']*?GLOBAL_VARIABLE_1",RegexOptions.IgnoreCase OR RegexOptions.Multiline)
        Dim mc as MatchCollection = re.Matches(sourcestring)
        Dim mIdx as Integer = 0
        For each m as Match in mc
          For groupIdx As Integer = 0 To m.Groups.Count - 1
            Console.WriteLine("[{0}][{1}] = {2}", mIdx, re.GetGroupNames(groupIdx), m.Groups(groupIdx).Value)
          Next
          mIdx=mIdx+1
        Next
      End Sub
    End Module

    Matches Found:
    NO MATCHES.

免责声明

有很多边缘情况可能会导致这种情况发生,例如,如果您有评论,' end function或者如果您将字符串值分配给变量,例如thisstring = "end sub"

是的,我意识到 OP 是针对 VBscript 的,我已经包含了这些示例来演示整体逻辑以及正则表达式的工作原理。

于 2013-05-28T19:41:38.583 回答
0

找到了罪魁祸首。该问题是由正则表达式的突出显示部分引起的:

((?:(?:Public|Private)\s+)?(?:Function|Sub).+)[\s\S]+?(GLOBAL_VARIABLE_1)[\s\S]+?End\s+(?:Function|Sub)

[\s\S]+?是非贪婪匹配,但这并不一定意味着它是最短匹配。简化示例:

Public Function doThis(byVal xml)
  GLOBAL_VARIABLE_1
End Function

Public Function doThat(byVal xPath)
  GLOBAL_VARIABLE_2
End Function

Public Sub butDontDoThis()
  GLOBAL_VARIABLE_3
End Sub

Public Sub alsoDoThis(byRef obj)
  GLOBAL_VARIABLE_1
End Sub

当正则表达式应用于示例文本时,它首先匹配第一个函数(标记为粗体文本的组):

Public Function doThis(byVal xml)
  GLOBAL_VARIABLE_1
End Function

但是,在匹配之后,表达式 ( ) 的第一部分((?:(?:Public|Private)\s+)?(?:Function|Sub).+)匹配下一个函数定义 ( Public Function doThat(byVal xPath)),[\s\S]+?(GLOBAL_VARIABLE_1)然后匹配所有文本,直到下一次出现GLOBAL_VARIABLE_1

Public Function doThat(byVal xPath)
  GLOBAL_VARIABLE_2
End Function

Public Sub butDontDoThis()
  GLOBAL_VARIABLE_3
End Sub

Public Sub alsoDoThis(byRef obj)
  GLOBAL_VARIABLE_1
End Sub

中没有隐含的“不包括End Function[\s\S]+?

解决您的问题的最简单方法可能是正则表达式和字符串匹配的组合:

Set fso = CreateObject("Scripting.FileSystemObject")
text = fso.OpenTextFile("C:\Temp\sample.txt").ReadAll

Set re = New RegExp
re.Pattern = "((?:(?:Public|Private)\s+)(Function|Sub).+)([\s\S]+?)End\s+\2"
re.Global  = True
re.IgnoreCase = True

For Each m In re.Execute(text)
  If InStr(m.SubMatches(2), "GLOBAL_VARIABLE_1") > 0 Then
    WScript.Echo m.SubMatches(0)
  End If
Next

它提取每个过程/函数的主体 ( SubMatches(2)),然后检查InStr()主体是否包含GLOBAL_VARIABLE_1.

于 2013-05-28T18:56:38.760 回答
0

描述

此正则表达式会将文本分解为字符串,其中每个字符串包含一个函数或子函数。它还将通过在函数中查找前面没有所需值的第一行代码来验证字符串是否包含未注释。GLOBAL_VARIABLE_1如果表达式嵌入在双引号字符串中,则表达式也将作为常规字符处理'GLOBAL_VARIABLE_1'variable = "sometext ' more text" + GLOBAL_VARIABLE_1

(?:Public|Private)\s+(Function|Sub)\s+([a-z0-9]*)(?:(?!^End\s+\1\s+(?:$|\Z)).)*^(?:[^'\r\n]|"[^"\r\n]*")*GLOBAL_VARIABLE_1.*?^End\s\1\b

在此处输入图像描述

团体

组 0 将包含整个匹配的函数/子

  1. 将包含functionsub相应地
  2. 将包含函数/子的名称

例子

输入文本

Public Function ValidEdgeCase1(byRef obj)
  SomeVariable = "some text with an embedded ' single quote" + GLOBAL_VARIABLE_1
End Sub

Public Sub SkipEdgeCase(byRef obj)
  SomeVariable = "some text with an embedded ' single quote" ' + GLOBAL_VARIABLE_1
End Sub

Public Function FailCommented(byVal xml)
'  GLOBAL_VARIABLE_1
End Function

Public Function FAilWrongName1(byVal xPath)
  GLOBAL_VARIABLE_2
End Function

Public Sub FAilWrongName1()
  GLOBAL_VARIABLE_3
End Sub

Public Sub alsoDoThis(byRef obj)
  GLOBAL_VARIABLE_1
End Sub

Public Sub IHeartKitten(byRef obj)
  GLOBAL_VARIABLE_1
End Sub

Public Sub IHeartKitten2(byRef obj)
  GLOBAL_VARIABLE_1
End Sub

Public Function FailCommented(byVal xml)
'  GLOBAL_VARIABLE_1
End Function

示例代码

Imports System.Text.RegularExpressions
Module Module1
  Sub Main()
    Dim sourcestring as String = "replace with your source string"
    Dim re As Regex = New Regex("(?:Public|Private)\s+(Function|Sub)\s+([a-z0-9]*)(?:(?!^End\s+\1\s+(?:$|\Z)).)*^(?:[^'\r\n]|"[^"\r\n]*")*GLOBAL_VARIABLE_1.*?^End\s\1\b",RegexOptions.IgnoreCase OR RegexOptions.IgnorePatternWhitespace OR RegexOptions.Multiline OR RegexOptions.Singleline)
    Dim mc as MatchCollection = re.Matches(sourcestring)
    Dim mIdx as Integer = 0
    For each m as Match in mc
      For groupIdx As Integer = 0 To m.Groups.Count - 1
        Console.WriteLine("[{0}][{1}] = {2}", mIdx, re.GetGroupNames(groupIdx), m.Groups(groupIdx).Value)
      Next
      mIdx=mIdx+1
    Next
  End Sub
End Module

$matches 数组:

(
    [0] => Array
        (
            [0] => Public Function ValidEdgeCase1(byRef obj)
  SomeVariable = "some text with an embedded ' single quote" + GLOBAL_VARIABLE_1
End Sub
            [1] => Public Sub alsoDoThis(byRef obj)
  GLOBAL_VARIABLE_1
End Sub
            [2] => Public Sub IHeartKitten(byRef obj)
  GLOBAL_VARIABLE_1
End Sub
            [3] => Public Sub IHeartKitten2(byRef obj)
  GLOBAL_VARIABLE_1
End Sub
        )

    [1] => Array
        (
            [0] => Function
            [1] => Sub
            [2] => Sub
            [3] => Sub
        )

    [2] => Array
        (
            [0] => ValidEdgeCase1
            [1] => alsoDoThis
            [2] => IHeartKitten
            [3] => IHeartKitten2
        )

)
于 2013-06-16T02:08:39.717 回答