我将网站的源代码存储到字符串中,并成功删除了所有 html 标签。但是,段落之间存在随机空格。有时只有一行,有时是 4 或 5 行。
这是我所做的
- (NSString *)parseHTMLText:(NSString *)text {
NSString *startingPt = @"<!-- (START) Pagination Content Wrapper -->";
NSString *endingPt = @"<!-- (END) Pagination Content Wrapper -->";
//isolate body text from entire source code
NSString *leftTrimmed = [text substringFromIndex:NSMaxRange([text rangeOfString:startingPt])] ;
NSString *completeTrimmed = [leftTrimmed substringToIndex:[leftTrimmed rangeOfString:endingPt].location];
completeTrimmed = [completeTrimmed stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
completeTrimmed = [self removeHTMlTagsFromString:completeTrimmed];
completeTrimmed = [completeTrimmed stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
return completeTrimmed;
}
- (NSString *)removeHTMlTagsFromString:(NSString *)text {
//check if there are any html tags
if ([text rangeOfString:@"<"].location != NSNotFound && [text rangeOfString:@">"].location != NSNotFound) {
//find first index of "<"
int startIndex = [text rangeOfString:@"<"].location;
NSString *startOfTag = [text substringFromIndex:startIndex];
// find length to ">"
int length = [startOfTag rangeOfString:@">"].location + 1;
text = [text stringByReplacingCharactersInRange:NSMakeRange(startIndex, length) withString:@""];
text = [self removeHTMlTagsFromString:text];
}
return text;
}
我试过这个,但它不起作用
completeTrimmed = [completeTrimmed stringByReplacingOccurrencesOfString:@" " withString:@""];