3

我正在尝试编写一个正则表达式以从 SQL 脚本中获取每个插入行。当我在 Regex Hero 上使用 .NET Regex Tester 时,我得到了预期的 8 个匹配项。但是,当我将此代码段作为控制台应用程序运行时,它不会返回任何匹配项。

const string text =
@"INSERT INTO [AdminPrefs] ( [SpayClinic] , [VaxClinic] , [ShelterClinic] , [DateModified] , [Prefix] , [UpdateCounter] , [LockedRecs] , [dbName] , [Timer] , [MedCtrClinic] , [OtherClinic] , [Da2PPPx] , [Da2PPEPx] , [FVRCPPx] , [FVRCPEPx] , [FELVTPx] , [FELVTEPx] , [FELVVPx] , [FELVVEPx] , [HWTPx] , [HWTEPx] , [RabiesPx] , [RabiesEPx] , [FIVTest] , [FIVTestE] , [OnePlusChar] , [XSHWMPx] , [XSHWMEPx] , [SHWMPx] , [SHWMEPx] , [MHWMPx] , [MHWMEPx] , [LHWMPx] , [LHWMEPx] , [DebuggerOn] , [PayThisAmount] , [free6] , [XSHWMPillPx] , [XSHWMPillEPx] , [SHWMPillPx] , [SHWMPillEPx] , [MHWMPillPx] , [MHWMPillEPx] , [LHWMPillPx] , [LHWMPillEPx] , [free7] , [free8] , [free9] , [XSPMPx] , [XSPMEPx] , [SPMPx] , [SPMEPx] , [MPMPx] , [MPMEPx] , [LPMPx] , [LPMEPx] , [ReceiptFooter] , [MonthsUntilBenefits] , [free12] , [XSPMPillPx] , [XSPMPillEPx] , [SPMPillPx] , [SPMPillEPx] , [MPMPillPx] , [MPMPillEPx] , [LPMPillPx] , [LPMPillEPx] , [free14] , [ClinicName] , [ShelterName] , [ShelterAbbr] , [Address1] , [Address2] , [City] , [State] , [ZipCode] , [MainPhone] , [MainFax] , [SplashPict] , [free17] , [free18] , [LicenseNo] , [SerialNo] , [free20] , [free21] , [free22] , [VLogCC] , [SNLogCC] , [free23] , [free24] , [free25] , [AgeAndBDay] , [free26] , [free27] , [free28] , [CurrRouteNum] )
VALUES
(12 , 7 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(15 , 53 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(20 , 216 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(16 , 8 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0);

INSERT INTO [AdminPrefs] ( [SpayClinic] , [VaxClinic] , [ShelterClinic] , [DateModified] , [Prefix] , [UpdateCounter] , [LockedRecs] , [dbName] , [Timer] , [MedCtrClinic] , [OtherClinic] , [Da2PPPx] , [Da2PPEPx] , [FVRCPPx] , [FVRCPEPx] , [FELVTPx] , [FELVTEPx] , [FELVVPx] , [FELVVEPx] , [HWTPx] , [HWTEPx] , [RabiesPx] , [RabiesEPx] , [FIVTest] , [FIVTestE] , [OnePlusChar] , [XSHWMPx] , [XSHWMEPx] , [SHWMPx] , [SHWMEPx] , [MHWMPx] , [MHWMEPx] , [LHWMPx] , [LHWMEPx] , [DebuggerOn] , [PayThisAmount] , [free6] , [XSHWMPillPx] , [XSHWMPillEPx] , [SHWMPillPx] , [SHWMPillEPx] , [MHWMPillPx] , [MHWMPillEPx] , [LHWMPillPx] , [LHWMPillEPx] , [free7] , [free8] , [free9] , [XSPMPx] , [XSPMEPx] , [SPMPx] , [SPMEPx] , [MPMPx] , [MPMEPx] , [LPMPx] , [LPMEPx] , [ReceiptFooter] , [MonthsUntilBenefits] , [free12] , [XSPMPillPx] , [XSPMPillEPx] , [SPMPillPx] , [SPMPillEPx] , [MPMPillPx] , [MPMPillEPx] , [LPMPillPx] , [LPMPillEPx] , [free14] , [ClinicName] , [ShelterName] , [ShelterAbbr] , [Address1] , [Address2] , [City] , [State] , [ZipCode] , [MainPhone] , [MainFax] , [SplashPict] , [free17] , [free18] , [LicenseNo] , [SerialNo] , [free20] , [free21] , [free22] , [VLogCC] , [SNLogCC] , [free23] , [free24] , [free25] , [AgeAndBDay] , [free26] , [free27] , [free28] , [CurrRouteNum] )
VALUES
(26 , 5 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(18 , 12 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(9 , 10 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(2 , 72 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0);
";

static void Main(string[] args)
{
    string query = @"^\(.*?\)(,|;)$";

    var matches = Regex.Matches(text, query, RegexOptions.Singleline | RegexOptions.Multiline);

    Console.WriteLine("Expected Matches: 8");
    Console.WriteLine("Matches Found: {0}", matches.Count);

    Console.ReadLine();
}

对于网站和我的代码(多行和单行),我的选项完全相同,它们应该使用相同的 .NET 正则表达式引擎,那么是什么导致了两者之间的差异?


最终结果:

对于所有好奇的人,我最后的正则表达式是

@"(?<=^\()                       # The beginning of a line followed by a (
((('(?<c>.*?)'(?!')(?=[\s\)])) | # Text string in SQL supports line breaks
  (?<c>-?[\d\.]+) |              # Any numbers
  (X'(?<c>[0-9a-f]*)')           # Something formatted like X'0123456789abcdef'
  )(\s,\s)?                      # Spaces and commas between the records
)+                               # Repeat the pattern at least one time
(?=(?<!'')\)[;,]\r?$)            # The End of the line ending with ); or ), and not immediately proceeded by ''";     

请注意所有计划将其用于研发(盗版和部署)开发的人,这仅适用于我的 SQL,因为它非常常规。如果与不是由我的第 3 方程序生成的 SQL 一起使用,则需要调整以处理我不需要处理的许多边缘情况。

这是解析器的解析代码的完整代码。希望它会帮助其他陷入类似问题的人。

foreach (var tableFolder in Directory.GetDirectories(_exportFolder))
{
    //Popluate the schema of the DataTable
    DataTable table = new DataTable();
    using (SqlDataAdapter ada = new SqlDataAdapter(String.Format("Select top 0 * from [{0}]", Path.GetFileName(tableFolder)), conn))
    {
        ada.Fill(table);
    }

    //All of the files to import for this table
    string[] filePaths = Directory.GetFiles(tableFolder, "*.sql");

    foreach (string file in filePaths)
    {
        string text;
        using (var txtRdr = new StreamReader(file))
        {
            text = txtRdr.ReadToEnd();
        }

        const string recordRegex =
                        @"(?<=^\()                       #The begining of a line followed by a (
                        ((('(?<s>.*?)'(?!')(?=[\s\)])) | # Something formatted like 'some text' supports line breaks
                            (?<n>-?[\d\.]+) |            # Any numbers
                            (X'(?<h>[0-9a-f]*)')         # Something formatted like X'0123456789abcdef'
                            )(\s,\s)?                    # Spaces and commas between the records
                        )+                               # Repeat the pattern at least one time
                        (?=(?<!'')\)[;,]\r?$)            # The End of the line ending with ); or ), and not immedatly proceded by ''";            

        //Creates one match per row in the database
        var records = Regex.Matches(text, recordRegex, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture);

        const string headerRegex = @"^INSERT\sINTO\s\[[\w_\-\s]+\]\s\(\s(?:\[([\w_\-\s]+)\]\s(?:,\s)?)+\)";
        var header = Regex.Match(text, headerRegex).Groups[1].Captures.Cast<Capture>().ToArray();

        foreach (Match record in records)
        {
            //Due to how we captured the 3 groups we had to put them back in order in one list.
            var columns = record.Groups.Cast<Group>()
                                .Skip(1)  //Groups[0] contins the entire record.
                                .SelectMany(group => group.Captures.Cast<Capture>()) //Flattens all of the captures in the three groups in to one list
                                .OrderBy(capture => capture.Index) //Reorder the combined list as the SelectMany will not be outputting the correct order.
                                .ToArray(); 

            DataRow row = table.NewRow();
            for (int i = 0; i < columns.Length; i++)
            {
                Type columnType = table.Columns[header[i].Value].DataType;
                if (columnType == typeof(String))
                {
                    row[header[i].Value] = columns[i].Value;
                }
                else if (columnType == typeof(Int32))
                {
                    row[header[i].Value] = Convert.ToInt32(columns[i].Value);
                }
                else if (columnType == typeof(Double))
                {
                    row[header[i].Value] = Convert.ToDouble(columns[i].Value);
                }
                else if (columnType == typeof(Boolean))
                {
                    if (columns[i].Value == "0")
                        row[header[i].Value] = false;
                    else if (columns[i].Value == "1")
                        row[header[i].Value] = true;
                    else
                        throw new InvalidDataException();
                }
                else if (columnType == typeof(Int16))
                {
                    row[header[i].Value] = Convert.ToInt16(columns[i].Value);
                }
                else if (columnType == typeof(Byte[]))
                {
                    row[header[i].Value] = StringToByteArray(columns[i].Value);
                }
                else
                {
                    throw new NotImplementedException();
                }

            }
            table.Rows.Add(row);
        }

        using (var bulkCopy = new SqlBulkCopy(conn))
        {
            bulkCopy.DestinationTableName = Path.GetFileName(tableFolder);
            bulkCopy.BulkCopyTimeout = 0;
            bulkCopy.WriteToServer(table);
        }
    }
}

更新:

通过将 caputre 组重命名为所有相同的名称 .NET 的正则表达式引擎为我组合了它们,这简化了

var columns = record.Groups[1].Cast<Group>().Skip(1).SelectMany(group => group.Captures.Cast<Capture>()).OrderBy(capture => capture.Index).ToArray();

var columns = record.Groups[1].Captures.Cast<Capture>().ToArray();
4

1 回答 1

6

请注意,在 Regex Hero 页面上切换“CrLf 标记行结束”设置会导致 8 行停止匹配;这是导致问题的线索。

在您的 C# 代码中,文字字符串中的换行符被编码为 CR/LF 对 ( "\r\n")。正$则表达式中的 (匹配多行模式下的行尾)仅匹配\n字符。\r因此,在正则表达式不考虑的最后一个逗号(或分号)之间有一个额外的字符,并且匹配失败。

您可以解决此问题的一些方法包括:

  1. 去掉回车:text = text.Replace("\r\n", "\n");, 或
  2. 匹配回车:string query = @"^\(.*?\)(,|;)\r$";
于 2012-04-04T14:44:52.590 回答