这是一个应该工作的正则表达式:
^ //start of line
( //first capture group
[\d]+ //one or more digits
) //end of first capture group
(?: //start of optional non-capturing group
\s? //optional whitespace
( //second capture group
[ab] //character class - a or b
) //end of second capture group
)? //end of optional non-capturing group
\s //whitespace
( //third capture group
(?: //non-capturing group
Mr|Mrs|Mister //title alternation
)
\s //whitespace
[\w/]+ //1 or more word characters or "/"
| //alternation
[\w/]+ //1 or more word characters or "/"
) //end of third capture group
(?: //start of optional non-capturing group
\s //whitespace
( //fourth capture group
.* //0 or more of any character
) //end of fourth capture group
)? //end of optional non-capturing group
$ //end of line
构建你的正则表达式。我们必须逃脱转义以将它们保留在 NSString 中:
NSString* regexString =
@"^([\\d]+(?:\\s?[ab])?)\\s((?:Mr|Ms|Mrs|Mister)\\s[\\w/]+|[\\w/]+)(?:\\s(.*))?$";
NSRegularExpression *regex =
[NSRegularExpression regularExpressionWithPattern:regexString
options:NSRegularExpressionCaseInsensitive
error:nil];
制作一个测试数组:
NSArray* testArray = @[
@"123a mr who here are some words"
,@"124 b mrs jones n/p"
,@"654 Mr Foo"
,@"123 Jones n/p"
,@"345 n/p"
,@"345"
,@"nothing here"
];
处理测试数组:
for (NSString* string in testArray) {
NSLog(@" ");
NSLog(@"input: '%@'",string);
NSRange range = NSMakeRange(0,string.length);
if ([regex numberOfMatchesInString:string options:0 range:range] == 1) {
NSString* body = [regex stringByReplacingMatchesInString:string
options:0
range:range
withTemplate:@"$1\n$2\n$3"];
NSArray* result = [body componentsSeparatedByString:@"\n"];
NSString* one = result[0];
NSString* two = result[1];
NSString* three = result[2];
NSLog(@"one: '%@'",one);
NSLog(@"two: '%@'",two);
NSLog(@"three: '%@'",three);
} else {
NSLog(@"no match");
}
}
输出:
input: '123a mr who here are some words'
one: '123a'
two: 'mr who'
three: 'here are some words'
input: '124 b mrs jones n/p'
one: '124b'
two: 'mrs jones'
three: 'n/p'
input: '654 Mr Foo'
one: '654'
two: 'Mr Foo'
three: ''
input: '123 Jones n/p'
one: '123'
two: 'Jones'
three: 'n/p'
input: '345 n/p'
one: '345'
two: 'n/p'
three: ''
input: '345'
no match
input: 'nothing here'
no match