From 16918c1df3e709df2a97281e3825d94c84edb668 Mon Sep 17 00:00:00 2001 From: Christian Ehrlicher Date: Tue, 06 Aug 2024 22:39:44 +0200 Subject: [PATCH] XML/QDom: speedup encodeText() The code copied the whole string, then replaced parts inline, at the cost of relocating everything beyond, at each replacement. Instead, copy character by character (in chunks where possible) and append replacements as we skip what they replace. Manual conflict resolution for 6.5: - This is a manual cherry-pick. The original change was only picked to 6.8, but the quadratic behavior is present in Qt 5, too. - Changed Task-number to Fixes: because this is the real fix; the QString change, 315210de916d060c044c01e53ff249d676122b1b, was unrelated to the original QTBUG-127549. Manual conflcit resolution for 5.15: - Kept/re-added QTextCodec::canEncode() check - Ported from Qt 6 to 5, to wit: - qsizetype -> int - QStringView::first/sliced(n) -> left/mid(n) (these functions are clearly called in-range, so the widened contract of the Qt 5 functions doesn't matter) - Ported from C++17- and C++14-isms to C++11: - replaced polymorphic lambda with a normal one (this requires rewriting the !canEncode() branch to use QByteArray/QLatin1String instead of QString) - As a drive-by, corrected the indentation of the case labels to horizontally align existing code (and follow Qt style) Fixes: QTBUG-127549 Change-Id: I368482859ed0c4127f1eec2919183711b5488ada Reviewed-by: Edward Welbourne (cherry picked from commit 2ce08e3671b8d18b0284447e5908ce15e6e8f80f) Reviewed-by: Qt Cherry-pick Bot (cherry picked from commit 225e235cf966a44af23dbe9aaaa2fd20ab6430ee) Reviewed-by: Fabian Kosmale (cherry picked from commit 905a5bd421efff6a1d90b6140500d134d32ca745) --- diff --git a/src/xml/dom/qdom.cpp b/src/xml/dom/qdom.cpp index 872221c..bf70477 100644 --- a/src/xml/dom/qdom.cpp +++ b/src/xml/dom/qdom.cpp @@ -3676,59 +3676,67 @@ const QTextCodec *const codec = s.codec(); Q_ASSERT(codec); #endif - QString retval(str); - int len = retval.length(); - int i = 0; + QString retval; + int start = 0; + auto appendToOutput = [&](int cur, QLatin1String replacement) + { + if (start < cur) { + retval.reserve(str.size() + replacement.size()); + retval.append(QStringView(str).left(cur).mid(start)); + } + // Skip over str[cur], replaced by replacement + start = cur + 1; + retval.append(replacement); + }; - while (i < len) { - const QChar ati(retval.at(i)); - - if (ati == QLatin1Char('<')) { - retval.replace(i, 1, QLatin1String("<")); - len += 3; - i += 4; - } else if (encodeQuotes && (ati == QLatin1Char('"'))) { - retval.replace(i, 1, QLatin1String(""")); - len += 5; - i += 6; - } else if (ati == QLatin1Char('&')) { - retval.replace(i, 1, QLatin1String("&")); - len += 4; - i += 5; - } else if (ati == QLatin1Char('>') && i >= 2 && retval[i - 1] == QLatin1Char(']') && retval[i - 2] == QLatin1Char(']')) { - retval.replace(i, 1, QLatin1String(">")); - len += 3; - i += 4; - } else if (performAVN && - (ati == QChar(0xA) || - ati == QChar(0xD) || - ati == QChar(0x9))) { - const QString replacement(QLatin1String("&#x") + QString::number(ati.unicode(), 16) + QLatin1Char(';')); - retval.replace(i, 1, replacement); - i += replacement.length(); - len += replacement.length() - 1; - } else if (encodeEOLs && ati == QChar(0xD)) { - retval.replace(i, 1, QLatin1String(" ")); // Replace a single 0xD with a ref for 0xD - len += 4; - i += 5; - } else { + const int len = str.size(); + for (int cur = 0; cur < len; ++cur) { + switch (const char16_t ati = str[cur].unicode()) { + case u'<': + appendToOutput(cur, QLatin1String("<")); + break; + case u'"': + if (encodeQuotes) + appendToOutput(cur, QLatin1String(""")); + break; + case u'&': + appendToOutput(cur, QLatin1String("&")); + break; + case u'>': + if (cur >= 2 && str[cur - 1] == u']' && str[cur - 2] == u']') + appendToOutput(cur, QLatin1String(">")); + break; + case u'\r': + if (performAVN || encodeEOLs) + appendToOutput(cur, QLatin1String(" ")); // \r == 0x0d + break; + case u'\n': + if (performAVN) + appendToOutput(cur, QLatin1String(" ")); // \n == 0x0a + break; + case u'\t': + if (performAVN) + appendToOutput(cur, QLatin1String(" ")); // \t == 0x09 + break; + default: #if QT_CONFIG(textcodec) if(codec->canEncode(ati)) - ++i; + ; // continue else #endif { // We have to use a character reference to get it through. - const ushort codepoint(ati.unicode()); - const QString replacement(QLatin1String("&#x") + QString::number(codepoint, 16) + QLatin1Char(';')); - retval.replace(i, 1, replacement); - i += replacement.length(); - len += replacement.length() - 1; + const QByteArray replacement = "&#x" + QByteArray::number(uint{ati}, 16) + ';'; + appendToOutput(cur, QLatin1String{replacement}); } + break; } } - - return retval; + if (start > 0) { + retval.append(QStringView(str).left(len).mid(start)); + return retval; + } + return str; } void QDomAttrPrivate::save(QTextStream& s, int, int) const