10

我希望能够在一个字符串中找到一个子字符串,但它有一个独特的模式,我不知道如何找到。

前任。

NSString *test1= @"Contact Names
                  67-444-322
                  Dec 21 2012
                  23941 6745 9145072 01567
                  5511 23345 614567 123456
                  Older Contacts
                  See Back Side";

我想在子字符串中找到以下模式(这些数字但不是日期数字)

                  23941 6745 9145072 01567
                  5511 23345 614567 123456

但是,示例字符串的格式几乎不会相同。除了“联系人姓名”、“老联系人”和“见背面”之外,每次都会有不同的号码和不同的标题。将保持不变的一件事是,我正在寻找的数字总是有 4 个数字,但可能有 1 行或 10 行。

有谁知道我将如何解决这个问题?我正在考虑可能只找到字符串中的数字,然后检查哪些数字之间有 3 个空格。

谢谢

4

8 回答 8

5

我已经尝试了以下方法并且它有效:

NSString *test1= @"Contact Names\n"
    "67-444-322\n"
    "Dec 21 2012\n"
    "23941 6745 9145072 01567\n"
    "5511 23345 614567 123456\n"
    "Older Contacts\n"
    "See Back Side";

NSString *pattern = @"(([0-9]+ ){3}+[0-9]+)(\\n(([0-9]+ ){3}+[0-9]+))*";
NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:pattern options:0 error:nil];
NSArray *results = [regex matchesInString:test1 options:0 range:NSMakeRange(0, [test1 length])];
if ([results count] > 0) {
    NSTextCheckingResult *result = [results objectAtIndex:0];
    NSString *match = [test1 substringWithRange:result.range];
    NSLog(@"\n%@", match); // These are your numbers
}

(如果只有一行数字,它也有效。)

于 2013-01-24T14:38:18.947 回答
3

您可以使用字符集来分隔字符串,然后确定每个组件中是否有 4 个数字。这只有在字符串中包含换行符 ( \n) 时才有效(正如您对 Lance 的回复似乎表明的那样)。

我会这样做:

NSString *test1= @"Contact Names\n
              67-444-322\n
              Dec 21 2012\n
              23941 6745 9145072 01567\n
              5511 23345 614567 123456\n
              Older Contacts\n
              See Back Side";

NSArray *lines = [test1 componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet];

// lines now contains each line in test1

for (NSString* line in lines) {

    NSArray *elements = [line componentsSeparatedByCharactersInSet:[NSCharacterSet whitespaceCharacterSet];

    if (elements.count == 4) {
        // This line contains 4 numbers
        // convert each number string into an int if needed
    }
}

抱歉,代码行太长了,Apple 的一些选择器有点长……无论如何,如果元素有 4 个单独的 ( NSString) 对象,那么它就是您正在寻找的行之一,您可以操纵数据根据您的需要。

编辑(旁白):

关于正则表达式的主题(因为这个问题包含regex标签),是的,您可以使用正则表达式,但是 Objective-C 并没有真正的“好”方式来处理它们......正则表达式更多地属于脚本语言领域以及内置支持它的语言。

于 2013-01-25T01:11:16.657 回答
3

我改进了我的代码,使其更具可读性并在找到字符串时停止(不会分成几行......如果您也需要这个,请告诉我再次添加代码或在遇到困难时帮助您)

我使用的正则表达式是:
-一个或多个数字后跟一个或多个空格(所有这些都是树乘以)
-一个或多个数字后跟一个或多个空格(这些是换行符、制表符、空格等)
-我尝试找到整个模式重复 1 次或多次

代码是

NSString *test1= @"Contact Names\n    67-444-322\n\nDec 21 2012\n23941 6745 9145072 01567\n5511 23345 614567 123456\nOlder Contacts\nSee Back Side\n";

//create the reg expr
NSString *pattern1 = @"(([0-9]+ +){3}[0-9]+\\s+)+";
NSRegularExpression *regex1 = [NSRegularExpression regularExpressionWithPattern:pattern1 options:0 error:nil];
//find matches
NSArray *results1 = [regex1 matchesInString:test1 options:0 range:NSMakeRange(0, [test1 length])];
if ([results1 count] > 0) {
    //if i find more series...what should i do?
    if ([results1 count] > 1) {
        NSLog(@"I found more than one matching series....what should i do?!");
        exit(111);
    }
    //find series and print
    NSTextCheckingResult *resultLocation1 = [results1 objectAtIndex:0];
    NSString *match1 = [test1 substringWithRange:resultLocation1.range];
    //trim leading and ending whitespaces
    match1=[match1 stringByTrimmingCharactersInSet:
            [NSCharacterSet whitespaceAndNewlineCharacterSet]];
    NSLog(@"the series is \n%@", match1);        
}else{
    NSLog(@"No matches found in string");
}

希望能帮助到你

于 2013-01-25T16:58:02.023 回答
2
#include <stdio.h>
#include <string.h>
#include <pcre.h>

int main(int argc, char **argv)
{
  const char *error;
  int erroffset;
  int ovector[186];
  char re[8192]="";
  char txt[]="Dec 21 2012                   23941 6745 9145072 01567                   5511 23345 614567 123456                   Ol\";";

  char re1[]=".*?"; // Non-greedy match on filler
  strcat(re,re1);
  char re2[]="\\d+";    // Uninteresting: int
  strcat(re,re2);
  char re3[]=".*?"; // Non-greedy match on filler
  strcat(re,re3);
  char re4[]="\\d+";    // Uninteresting: int
  strcat(re,re4);
  char re5[]=".*?"; // Non-greedy match on filler
  strcat(re,re5);
  char re6[]="(\\d+)";  // Integer Number 1
  strcat(re,re6);
  char re7[]="(\\s+)";  // White Space 1
  strcat(re,re7);
  char re8[]="(\\d+)";  // Integer Number 2
  strcat(re,re8);
  char re9[]="(\\s+)";  // White Space 2
  strcat(re,re9);
  char re10[]="(\\d+)"; // Integer Number 3
  strcat(re,re10);
  char re11[]="(\\s+)"; // White Space 3
  strcat(re,re11);
  char re12[]="(\\d+)"; // Integer Number 4
  strcat(re,re12);
  char re13[]="(\\s+)"; // White Space 4
  strcat(re,re13);
  char re14[]="(\\d+)"; // Integer Number 5
  strcat(re,re14);
  char re15[]="(\\s+)"; // White Space 5
  strcat(re,re15);
  strcat(re,re16);
  char re17[]="(\\s+)"; // White Space 6
   strcat(re,re17);
  char re18[]="(\\d+)"; // Integer Number 7
  strcat(re,re18);
  char re19[]=".*?";    // Non-greedy match on filler
  strcat(re,re19);
  char re20[]="(\\d+)"; // Integer Number 8
  strcat(re,re20);

  pcre *r =  pcre_compile(re, PCRE_CASELESS|PCRE_DOTALL, &error, &erroffset, NULL);
  int rc = pcre_exec(r, NULL, txt, strlen(txt), 0, 0, ovector, 186);
  if (rc>0)
 {
  char int1[1024];
  pcre_copy_substring(txt, ovector, rc,1,int1, 1024);
  printf("(%s)",int1);
  char ws1[1024];
  pcre_copy_substring(txt, ovector, rc,2,ws1, 1024);
  printf("(%s)",ws1);
  char int2[1024];
  pcre_copy_substring(txt, ovector, rc,3,int2, 1024);
  printf("(%s)",int2);
  char ws2[1024];
  pcre_copy_substring(txt, ovector, rc,4,ws2, 1024);
  printf("(%s)",ws2);
  char int3[1024];
  pcre_copy_substring(txt, ovector, rc,5,int3, 1024);
  printf("(%s)",int3);
  char ws3[1024];
  pcre_copy_substring(txt, ovector, rc,6,ws3, 1024);
  printf("(%s)",ws3);
  char int4[1024];
  pcre_copy_substring(txt, ovector, rc,7,int4, 1024);
  printf("(%s)",int4);
  char ws4[1024];
  pcre_copy_substring(txt, ovector, rc,8,ws4, 1024);
  printf("(%s)",ws4);
  char int5[1024];
  pcre_copy_substring(txt, ovector, rc,9,int5, 1024);
  printf("(%s)",int5);
  char ws5[1024];
  pcre_copy_substring(txt, ovector, rc,10,ws5, 1024);
  printf("(%s)",ws5);
  char int6[1024];
  pcre_copy_substring(txt, ovector, rc,11,int6, 1024);
  printf("(%s)",int6);
  char ws6[1024];
  pcre_copy_substring(txt, ovector, rc,12,ws6, 1024);
  printf("(%s)",ws6);
  char int7[1024];
  pcre_copy_substring(txt, ovector, rc,13,int7, 1024);
  printf("(%s)",int7);
  char int8[1024];
  pcre_copy_substring(txt, ovector, rc,14,int8, 1024);
  printf("(%s)",int8);
  puts("\n");
  }
}

从下次开始使用http://txt2re.com

你也可以制作一个简单的正则表达式字符串。为此,您只能将它们写在 1 个字符变量中。

于 2013-01-14T08:43:13.917 回答
2

使用所有月份的名称创建一个数组,例如monthArray。

然后使用空格分割整个字符串。现在在 for 循环检查中

if(拆分数组的四个连续元素是数字)

  {

 if(previous 5th, 6th and seventh element in the splited array does not belong to monthArray)//if forloop count is 7 then previous 5th means the 2nd element in the splited array
     {
       those 4 consecutive variable belongs to a row you are looking for.

      }
   }

//------------------------------------------------ ----------

NSArray *monthArray = [[NSArray alloc] initWithObjects:@"Dec", nil];//here you have to add the 12 monts name. Now i added only 'Dec'
NSString *test1= @"Contact Names 67-444-322 Dec 21 2012 23941 6745 9145072 01567 5511 23345 614567 123456 Older Contacts See Back Side";
NSArray *splitArray = [test1 componentsSeparatedByString:@" "];
int count = 0;

for (int i =0; i<splitArray.count; i++) {
    if ([[[splitArray objectAtIndex:i] componentsSeparatedByCharactersInSet:[[NSCharacterSet decimalDigitCharacterSet] invertedSet]] count]==1)//checks if it is a pure integer
    {
        count ++;
    }else count= 0;

    if (count>=4) {
        if (i-4>=0) {
            if ([monthArray containsObject:[splitArray objectAtIndex:i-4]]) {
                continue;
            }
        }
        if (i-5>=0) {
            if ([monthArray containsObject:[splitArray objectAtIndex:i-5]]) {
                continue;
            }
        }
        NSLog(@"myneededRow===%@ %@ %@ %@",[splitArray objectAtIndex:i-3],[splitArray objectAtIndex:i-2],[splitArray objectAtIndex:i-1],[splitArray objectAtIndex:i]);
        count = 0;

    }
}
于 2013-01-14T09:48:35.680 回答
1

如果数字的数量永远不会改变,即 [5 个数字][空格][4 个数字][空格]...

然后您可以使用 NSRegularExpression 设置模式,然后在字符串中搜索该模式。

https://developer.apple.com/library/mac/#documentation/Foundation/Reference/NSRegularExpression_Class/Reference/Reference.html

于 2013-01-14T08:47:33.703 回答
1

试试 NSLingustic 标记器类。

NSMutableArray numbers = [NSMutableArray new];
NSString *test1= @"Contact Names
                      67-444-322
                      Dec 21 2012
                      23941 6745 9145072 01567
                      5511 23345 614567 123456
                      Older Contacts
                      See Back Side";
    NSLinguisticTaggerOptions options = NSLinguisticTaggerOmitWhitespace | NSLinguisticTaggerOmitPunctuation | NSLinguisticTaggerJoinNames;
    NSLinguisticTagger *tagger = [[NSLinguisticTagger alloc] initWithTagSchemes: [NSLinguisticTagger availableTagSchemesForLanguage:@"en"] options:options];
    tagger.string = test1;
    [tagger enumerateTagsInRange:NSMakeRange(0, [test1 length]) scheme:NSLinguisticTagSchemeNameTypeOrLexicalClass options:options usingBlock:^(NSString *tag, NSRange tokenRange, NSRange sentenceRange, BOOL *stop) {
        NSString *token = [test1 substringWithRange:tokenRange];
        if(Tag == NSLinguisticTagNumber){
           [numbers addObject:token];
        }
    }];
NSLogs("All Numbers in my strings are: %@", numbers);
于 2013-01-14T08:48:10.527 回答
1

这应该有效。我必须在你的输入中添加换行符 \n 才能让我的工作,但我假设你是从 API 或文件中获取字符串,所以它应该已经有了换行符。

NSString *test1= @"Contact Names\
    67-444-322\n\
    Dec 21 2012\n\
    23941 6745 9145072 01567\n\
    5511 23345 614567 123456\n\
    Older Contacts\n\
    See Back Side";

    // first, separate by new line
    NSArray* allLinedStrings =
    [test1 componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]];

    NSRegularExpression *regex = [[NSRegularExpression alloc] initWithPattern:@"^[0-9 ]+$"
                                                                      options:0
                                                                        error:nil];
    for (NSString *line in allLinedStrings) {
        NSArray *matches = [regex matchesInString:line options:0 range:NSMakeRange(0, [line length])];
        if (matches.count) {
            NSTextCheckingResult *result = matches[0];
            NSString *match = [line substringWithRange:result.range];
            NSLog(@"match found: %@\n", match);
        }
    }
于 2013-01-24T14:47:24.460 回答