0

在下面的代码中,我尝试使用正则表达式来提取文本文件的更下面的部分。

- (void)connectionDidFinishLoading:(NSURLConnection *)connection
{
    NSLog(@"Succeeded! Received %d bytes of data",[receivedData length]);
    NSString *string = [[NSString alloc] initWithData:receivedData encoding:NSISOLatin1StringEncoding];
    NSLog(@"string length: %d", [string length]);
    NSError *error = nil;
    NSString *toMatch = @"\[Board\\t\"([0-9]?)\"]*\[Dealer\\t\"([NEWS])\"]*";
    NSRegularExpression *regex = [NSRegularExpression  regularExpressionWithPattern:toMatch
        options:0 error:&error];
    NSLog(@"length: %d", [toMatch length]);
    NSUInteger numberOfMatches = [regex numberOfMatchesInString:string options:0 range:NSMakeRange(0, [string length])];
    NSLog(@" %ud", numberOfMatches);
    for (NSTextCheckingResult* match in [regex matchesInString:string options:0 range:NSMakeRange(0, [string length])]){
        // cannot make this work: NSRange trange =[match range];
        // cannot make this work: NSLog(@"range %i,%i", trange );
        NSString* tstring=[string substringWithRange:trange];
        NSLog(@" %@", tstring );}
}

NSRegularExpression用来从以下摘录的文本中挑选信息。更具体地说,我需要每个板的Board数量和Dealer值(大约有 40 个板,我删除了列表中的几个不相关的行)。

[Board "1"]
[Dealer "N"]
[Vulnerable "None"]
[Deal "N:Q952.652.KJT4.95 T.KQT84.A865.J73 K8763.A7.Q.KQT84 AJ4.J93.9732.A62"]
[Scoring ""]
[Declarer ""]
[Contract ""]
[Board "2"]
[Dealer "E"]
[Vulnerable "NS"]
[Deal "E:K8542.3.4.AT7532 J76.K7.AT85.KQJ8 QT3.AJ84.KJ963.4 A9.QT9652.Q72.96"]
[Scoring ""]
[Declarer ""]
[Contract ""]

我正在为我的for循环打印得到 gobbledygook。gobbledygook 至少有两个原因:我的正则表达式错误,或者我的for循环错误。

控制台输出并gobbledygook遵循。

2013-02-03 11:00:14.161 BridgeDuplicate[51867:11303] the window: <UIWindow: 0x956eac0; frame = (0 0; 768 1024); hidden = YES; layer = <UIWindowLayer: 0x956ebc0>>
2013-02-03 11:00:14.163 BridgeDuplicate[51867:11303] the rootViewController: <BSViewController: 0x7188220>
2013-02-03 11:00:14.166 BridgeDuplicate[51867:11303] viewDidLoad
2013-02-03 11:00:27.156 BridgeDuplicate[51867:11303] Succeeded! Received 303896 bytes of data
2013-02-03 11:00:27.158 BridgeDuplicate[51867:11303] string length: 303896
2013-02-03 11:00:27.164 BridgeDuplicate[51867:11303] length: 41
2013-02-03 11:00:27.205 BridgeDuplicate[51867:11303]  264765d
2013-02-03 11:00:27.205 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.206 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.206 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.206 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.206 BridgeDuplicate[51867:11303]  l
2013-02-03 11:00:27.206 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.206 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.206 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.206 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.206 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.207 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.207 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.207 BridgeDuplicate[51867:11303]  ea
2013-02-03 11:00:27.207 BridgeDuplicate[51867:11303]  d
2013-02-03 11:00:27.207 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.207 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.207 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.207 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.207 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.208 BridgeDuplicate[51867:11303]  e
2013-02-03 11:00:27.208 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.208 BridgeDuplicate[51867:11303]  a
2013-02-03 11:00:27.208 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.208 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.208 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.208 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.208 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.208 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.208 BridgeDuplicate[51867:11303]  e
2013-02-03 11:00:27.209 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.209 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.209 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.209 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.209 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.209 BridgeDuplicate[51867:11303]  "
2013-02-03 11:00:27.228 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.228 BridgeDuplicate[51867:11303]  o
2013-02-03 11:00:27.228 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.228 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.228 BridgeDuplicate[51867:11303]  e
2013-02-03 11:00:27.228 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.229 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.229 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.229 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.229 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.229 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.230 BridgeDuplicate[51867:11303]  e"
2013-02-03 11:00:27.230 BridgeDuplicate[51867:11303]  
2013-02-03 11:00:27.230 BridgeDuplicate[51867:11303]  
4

1 回答 1

1

我怀疑你误解了它的NSTextCheckingResult工作原理,但也许更重要的是,你的模式存在一些问题。以下代码应该是说明性的:

NSString *string = @"[Board\t\"1\"]\n[Dealer\t\"N\"]\n";
NSLog(@"string length: %lu", (unsigned long)[string length]);
NSError *error = nil;
NSString *toMatch = @"\\[Board\\t\"([0-9]?)\"\\].*\\n\\[Dealer\\t\"([NEWS])\"\\].*";
NSRegularExpression *regex = [NSRegularExpression  regularExpressionWithPattern:toMatch options:0 error:&error];
NSLog(@"length: %lu", (unsigned long)[toMatch length]);
NSUInteger numberOfMatches = [regex numberOfMatchesInString:string options:0 range:NSMakeRange(0, [string length])];
NSLog(@"number of matches: %lu", (unsigned long)numberOfMatches);
for (NSTextCheckingResult* match in [regex matchesInString:string options:0 range:NSMakeRange(0, [string length])])
{
    NSLog(@"Number of ranges in match: %lu", match.numberOfRanges);
    for (NSUInteger i = 0; i < match.numberOfRanges; ++i)
    {
        NSRange matchedRange = [match rangeAtIndex: i];
        NSString* tstring = [string substringWithRange: matchedRange];
        NSLog(@"range %lu string: %@", (unsigned long)i, tstring);
    }
}

你会得到的是:

2013-02-03 12:16:41.112 RegExTest[72290:303] string length: 25
2013-02-03 12:16:43.889 RegExTest[72290:303] length: 49
2013-02-03 12:16:43.889 RegExTest[72290:303] number of matches: 1
2013-02-03 12:16:43.890 RegExTest[72290:303] Number of ranges in match: 3
2013-02-03 12:16:43.890 RegExTest[72290:303] range 0 string: [Board "1"]
[Dealer "N"]
2013-02-03 12:16:43.890 RegExTest[72290:303] range 1 string: 1
2013-02-03 12:16:43.890 RegExTest[72290:303] range 2 string: N

这里要知道的是,您正在为一场比赛射击,并且该比赛有多个射程。每个成功的匹配至少有一个范围:与整个模式匹配的整个字符串的范围(这不是您在这里感兴趣的内容。)基于括号的捕获组将出现在超过 0 的索引处,如图所示通过此代码。

转义规则有点痛苦——在 NSString 中有转义规则,然后是正则表达式转义规则。它们如何交互可能并不明显,但我在这里提出的模式似乎可以满足您的需求。

编辑:

这是直接从您的 URL 中提取并成功匹配的另一个版本:

NSError* error = nil;
NSString* string = [NSString stringWithContentsOfURL: [NSURL URLWithString: @"http://www.atlantaduplicatebridgeclub.com/scorepost/2013/01/20130126ana.pbn"]
                                            encoding: NSUTF8StringEncoding error: &error];
NSLog(@"string length: %lu", (unsigned long)[string length]);
NSString *toMatch = @"\\[Board\\s*\"([0-9]?)\"\\].*\\[Dealer\\s*\"([NEWS])\"\\]";
NSRegularExpression *regex = [NSRegularExpression  regularExpressionWithPattern:toMatch options:NSRegularExpressionDotMatchesLineSeparators error:&error];
NSLog(@"pattern length: %lu", (unsigned long)[toMatch length]);
NSUInteger numberOfMatches = [regex numberOfMatchesInString:string options:0 range:NSMakeRange(0, [string length])];
NSLog(@"number of matches: %lu", (unsigned long)numberOfMatches);
for (NSTextCheckingResult* match in [regex matchesInString:string options:NSRegularExpressionDotMatchesLineSeparators range:NSMakeRange(0, [string length])])
{
    NSLog(@"Number of ranges in match: %lu", match.numberOfRanges);
    for (NSUInteger i = 0; i < match.numberOfRanges; ++i)
    {
        NSRange matchedRange = [match rangeAtIndex: i];
        NSString* tstring = [string substringWithRange: matchedRange];
        NSLog(@"range %lu string: %@", (unsigned long)i, tstring);
    }
}
于 2013-02-03T17:24:18.083 回答