我正在使用 PCRE 进行一些正则表达式解析,我需要在字符串中搜索特定模式中的单词(假设一串单词中的所有单词用逗号分隔)并将它们放入字符串向量中。
我该怎么做呢?
对不起,粗略的代码,但我很着急......
pcre* re;
const char *error;
int erroffset;
char* subject = txt;
int ovector[3];
int subject_length = strlen(subject);
int rc = 0;
re = pcre_compile(
"\\w+", /* the pattern */
PCRE_CASELESS|PCRE_MULTILINE, /* default options */
&error, /* for error message */
&erroffset, /* for error offset */
NULL); /* use default character tables */
char* pofs = subject;
while ( rc >= 0 ) {
rc = pcre_exec(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
subject, /* the subject string */
subject_length, /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
ovector, /* output vector for substring information */
3); /* number of elements in the output vector */
/*
if (rc < 0) {
switch(rc) {
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
// Handle other special cases if you like
default: printf("Matching error %d\n", rc); break;
}
pcre_free(re); // Release memory used for the compiled pattern
return;
}
*/
/* Match succeded */
if ( rc >= 0 ) {
pofs += ovector[1];
char *substring_start = subject + ovector[0];
// do something with the substring
int substring_length = ovector[1] - ovector[0];
subject = pofs;
subject_length -= ovector[1];
}
}
std::string wordstring = "w1, w2, w3";
std::string word;
pcrecpp::StringPiece inp_w(wordstring);
pcrecpp::RE w_re("(\\S+),?\\s*");
std::vector outwords;
while (w_re.FindAndConsume(&inp_w, &word)) {
outwords.push_back(word);
}