我有很多非标准化的 HTML(与一堆 Wiki 标记混合在一起),我需要从中去除某些标签和各种括号。
QRegularExpression 不是适合这项工作的工具,这里有一个简单的字符串说明:
myString =
QString("yes<tag par1='x'>no<tag par2='y'>no</tag>no</tag>yes<tag>no</tag>yes")
// Won't work | This matches the first nested </tag>
myString.replace(QRegularExpression("<tag param1='x'>(.+?)</tag>"),"\\1")
// Won't Work | This matches the last </tag>, removing the second "yes"
myString.replace(QRegularExpression("<tag param1='x'>(.+)</tag>"),"\\1")
理想情况下,我认为一个函数会是最好的,你可以提供 5 个参数:
QString stripCustomBrackets(
QString input, // myString
QRegularExpression openingBracket, // Eg. "<tag>" or "{{["
QRegularExpression openingIdentify,// Eg. "<tag par1='x'>"
// par1='x' identifies the tag to work with.
QRegularExpression closingBracket, // Eg. "</tag>" or "]}}"
QRegularExpression closingIdentify,// Eg. "FooBar</tag>"
// Means you only capture tags with FooBar at the end.
// <tag> keep text if true </tag>
bool capture = false) {
QString output;
if ( /* Number of openingBrackets equally match closingBrackets */ ) {
if (capture) {
/* Do code where you leave the contents in between the brackets */
} else {
/* Do code where you remove the contents in between the brackets */
}
return output;
} else {
qDebug() << "Unable to consolidate;" << endl
<< openingBracket << " count = " << /* count */ << endl
<< closingBracket << " count = " << /* count */ << endl
<< "Brackets do not match each other in number.";
return input;
}
}
qDebug() << stripCustomBrackets(mystring,
QRegularExpression("<tag"),
QRegularExpression(" par1='x'>"),
QRegularExpression("</tag>"),
QRegularExpression(""),
true);
qDebug() << stripCustomBrackets(mystring,
QRegularExpression("<tag"),
QRegularExpression(" par2='y'>"),
QRegularExpression("</tag>"),
QRegularExpression(""),
false);
qDebug() << stripCustomBrackets(mystring,
QRegularExpression("<tag"),
QRegularExpression(" par[0-9]='[a-z]'>"),
QRegularExpression("</tag>"),
QRegularExpression(""),
false);
qDebug() << stripCustomBrackets(mystring,
QRegularExpression("<tag "),
QRegularExpression(""),
QRegularExpression("No</tag>"),
QRegularExpression(""),
false);
_
"yesno<tag par2='y'>no</tag>noyes<tag>no</tag>yes"
"yes<tag par1='x'>nono</tag>yes<tag>no</tag>yes"
"yesyes<tag>no</tag>yes"
"Unable to consolidate;"
"'<tag ' Count = 2"
"'No</tag>' Count = 3"
"Brackets do not match each other in number.";
"yes<tag par1='x'>no<tag par2='y'>no</tag>no</tag>yes<tag>no</tag>yes"
实现这一目标的最可靠和最稳定的方法是什么?