我最终选择了CFStringTokenizer
. 我不确定下面的桥接演员表是否正确,但它似乎有效
-(void)listAllKeywordsInString:(NSString*)text
{
if(text!=nil)
{
NSMutableDictionary* keywordsDictionary = [[NSMutableDictionary alloc] initWithCapacity:1024];
NSString* key = nil;
NSLog(@"%@",text);
NSLog(@"Started parsing: %@",[[NSDate date] description]);
CFStringRef string =(__bridge CFStringRef)text; // Get string from somewhere
CFStringTokenizerRef tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault, (__bridge_retained CFStringRef) text, CFRangeMake (0,CFStringGetLength((__bridge_retained CFStringRef)text)), kCFStringTokenizerUnitWord, CFLocaleCopyCurrent());
unsigned tokensFound = 0; // or the desired number of tokens
CFStringTokenizerTokenType tokenType = kCFStringTokenizerTokenNone;
while(kCFStringTokenizerTokenNone != (tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer)) ) {
CFRange tokenRange = CFStringTokenizerGetCurrentTokenRange(tokenizer);
CFStringRef tokenValue = CFStringCreateWithSubstring(kCFAllocatorDefault, string, tokenRange);
// This is the found word
key =(__bridge NSString*)tokenValue;
//increment its count
NSNumber* count = [keywordsDictionary objectForKey:key];
if(count!=nil)
{
[keywordsDictionary setValue:[NSNumber numberWithInt:1] forKey:key];
}else {
[keywordsDictionary setValue:[NSNumber numberWithInt:count.intValue+1] forKey:key];
}
CFRelease(tokenValue);
++tokensFound;
}
NSLog(@"Ended parsing. tokens Found: %d, %@",tokensFound,[[NSDate date] description]);
NSLog(@"%@",[keywordsDictionary description]);
// Clean up
CFRelease(tokenizer);
}
}