用分隔符分割并返回一个vector<string_view>
.
专为快速分割.csv
文件中的行而设计。
测试MSVC 2017 v15.9.6
和Intel Compiler v19.0
编译C++17
(这是必需的string_view
)。
#include <string_view>
std::vector<std::string_view> Split(const std::string_view str, const char delim = ',')
{
std::vector<std::string_view> result;
int indexCommaToLeftOfColumn = 0;
int indexCommaToRightOfColumn = -1;
for (int i=0;i<static_cast<int>(str.size());i++)
{
if (str[i] == delim)
{
indexCommaToLeftOfColumn = indexCommaToRightOfColumn;
indexCommaToRightOfColumn = i;
int index = indexCommaToLeftOfColumn + 1;
int length = indexCommaToRightOfColumn - index;
// Bounds checking can be omitted as logically, this code can never be invoked
// Try it: put a breakpoint here and run the unit tests.
/*if (index + length >= static_cast<int>(str.size()))
{
length--;
}
if (length < 0)
{
length = 0;
}*/
std::string_view column(str.data() + index, length);
result.push_back(column);
}
}
const std::string_view finalColumn(str.data() + indexCommaToRightOfColumn + 1, str.size() - indexCommaToRightOfColumn - 1);
result.push_back(finalColumn);
return result;
}
小心生命周期:astring_view
永远不应该比string
它作为窗口的父节点寿命更长。如果父string
级超出范围,则string_view
指向的内容无效。在这种特殊情况下,API 设计很难出错,因为输入/输出都是string_view
父字符串的所有窗口。这最终在内存复制和 CPU 使用方面相当有效。
请注意,如果使用string_view
唯一的缺点是丢失隐式空终止。因此,请使用支持string_view
的函数,例如lexical_cast
Boost 中用于将字符串转换为数字的函数。
我用它来快速解析一个 .csv 文件。为了获取 .csv 文件中的每一行,我使用istringstream
了getLine()
速度极快的方法(约 2GB/秒或单核每秒 1,200,000 行)。
单元测试。使用Google Test进行测试(我使用vcpkg安装)。
// Google Test integrates into VS2017 if ReSharper is installed.
#include "gtest/gtest.h" // Can install using vcpkg
// In main(), call:
// ::testing::InitGoogleTest(&argc, argv);return RUN_ALL_TESTS();
TEST(Strings, Split)
{
{
const std::string str = "A,B,C";
auto tokens = Split(str, ',');
EXPECT_TRUE(tokens.size() == 3);
EXPECT_TRUE(tokens[0] == "A");
EXPECT_TRUE(tokens[1] == "B");
EXPECT_TRUE(tokens[2] == "C");
}
{
const std::string str = ",B,C";
auto tokens = Split(str, ',');
EXPECT_TRUE(tokens.size() == 3);
EXPECT_TRUE(tokens[0] == "");
EXPECT_TRUE(tokens[1] == "B");
EXPECT_TRUE(tokens[2] == "C");
}
{
const std::string str = "A,B,";
auto tokens = Split(str, ',');
EXPECT_TRUE(tokens.size() == 3);
EXPECT_TRUE(tokens[0] == "A");
EXPECT_TRUE(tokens[1] == "B");
EXPECT_TRUE(tokens[2] == "");
}
{
const std::string str = "";
auto tokens = Split(str, ',');
EXPECT_TRUE(tokens.size() == 1);
EXPECT_TRUE(tokens[0] == "");
}
{
const std::string str = "A";
auto tokens = Split(str, ',');
EXPECT_TRUE(tokens.size() == 1);
EXPECT_TRUE(tokens[0] == "A");
}
{
const std::string str = ",";
auto tokens = Split(str, ',');
EXPECT_TRUE(tokens.size() == 2);
EXPECT_TRUE(tokens[0] == "");
EXPECT_TRUE(tokens[1] == "");
}
{
const std::string str = ",,";
auto tokens = Split(str, ',');
EXPECT_TRUE(tokens.size() == 3);
EXPECT_TRUE(tokens[0] == "");
EXPECT_TRUE(tokens[1] == "");
EXPECT_TRUE(tokens[2] == "");
}
{
const std::string str = "A,";
auto tokens = Split(str, ',');
EXPECT_TRUE(tokens.size() == 2);
EXPECT_TRUE(tokens[0] == "A");
EXPECT_TRUE(tokens[1] == "");
}
{
const std::string str = ",B";
auto tokens = Split(str, ',');
EXPECT_TRUE(tokens.size() == 2);
EXPECT_TRUE(tokens[0] == "");
EXPECT_TRUE(tokens[1] == "B");
}
}