有没有办法让 Boost 分词器在不拆分引用部分的情况下拆分字符串以下?
string s = "1st 2nd \"3rd with some comment\" 4th";
Exptected output:
1st
2nd
3rd with some comment
4th
您可以使用escaped_list_separator
分词器库中的一个。有关如何将其应用于您的问题的更多详细信息,请参阅此问题。
C++11解决方案
#include <iostream>
#include <string>
#include <vector>
std::vector<std::string> tokenize(const std::string& str) {
std::vector<std::string> tokens;
std::string buffer;
std::string::const_iterator iter = str.cbegin();
bool in_string = false;
while (iter != str.cend()) {
char c = *iter;
if (c == '"') {
if (in_string) {
tokens.push_back(buffer);
buffer.clear();
}
in_string = !in_string;
} else if (c == ' ') {
if (in_string) {
buffer.push_back(c);
} else {
if (!buffer.empty()) {
tokens.push_back(buffer);
buffer.clear();
}
}
} else {
buffer.push_back(c);
}
++iter;
}
if (!buffer.empty()) {
tokens.push_back(buffer);
}
return tokens;
}
int main() {
std::string s = "1st 2nd \"3rd with some comment\" 4th";
std::vector<std::string> tokens = tokenize(s);
for (auto iter = tokens.cbegin(); iter != tokens.cend(); ++iter) {
std::cout << *iter << "\n";
}
}
试试这段代码,这样你就可以避免使用 Boost.Tokenizer 和 Boost.Spirit 库
#include <vector>
#include <string>
#include <iostream>
const char Separators[] = { ' ', 9 };
bool Str_IsSeparator( const char Ch )
{
for ( size_t i = 0; i != sizeof( Separators ); i++ )
{
if ( Separators[i] == Ch ) { return true; }
}
return false;
}
void SplitLine( size_t FromToken, size_t ToToken, const std::string& Str, std::vector<std::string>& Components /*, bool ShouldTrimSpaces*/ )
{
size_t TokenNum = 0;
size_t Offset = FromToken - 1;
const char* CStr = Str.c_str();
const char* CStrj = Str.c_str();
while ( *CStr )
{
// bypass spaces & delimiting chars
while ( *CStr && Str_IsSeparator( *CStr ) ) { CStr++; }
if ( !*CStr ) { return; }
bool InsideQuotes = ( *CStr == '\"' );
if ( InsideQuotes )
{
for ( CStrj = ++CStr; *CStrj && *CStrj != '\"'; CStrj++ );
}
else
{
for ( CStrj = CStr; *CStrj && !Str_IsSeparator( *CStrj ); CStrj++ );
}
// extract token
if ( CStr != CStrj )
{
TokenNum++;
// store each token found
if ( TokenNum >= FromToken )
{
Components[ TokenNum-Offset ].assign( CStr, CStrj );
// if ( ShouldTrimSpaces ) { Str_TrimSpaces( &Components[ TokenNum-Offset ] ); }
// proceed to next token
if ( TokenNum >= ToToken ) { return; }
}
CStr = CStrj;
// exclude last " from token, handle EOL
if ( *CStr ) { CStr++; }
}
}
}
int main()
{
std::string test = "1st 2nd \"3rd with some comment\" 4th";
std::vector<std::string> Out;
Out.resize(5);
SplitLine(1, 4, test, Out);
for(size_t j = 0 ; j != Out.size() ; j++) { std::cout << Out[j] << std::endl; }
return 0;
}
它使用预先分配的字符串数组(它不是从零开始的,但很容易修复),而且非常简单。