qtbase-opensource-src/tests/benchmarks/corelib/text/qregexp/main.cpp

616 lines
15 KiB
C++

/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the test suite of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:GPL-EXCEPT$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3 as published by the Free Software
** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <QDebug>
#include <QRegExp>
#include <QString>
#include <QFile>
#include <qtest.h>
#ifdef HAVE_BOOST
#include <boost/regex.hpp>
#endif
#ifdef HAVE_JSC
#include <QtScript>
#include "pcre/pcre.h"
#endif
#define ZLIB_VERSION "1.2.3.4"
class tst_qregexp : public QObject
{
Q_OBJECT
public:
tst_qregexp();
private slots:
void escape_old();
void escape_old_data() { escape_data(); }
void escape_new1();
void escape_new1_data() { escape_data(); }
void escape_new2();
void escape_new2_data() { escape_data(); }
void escape_new3();
void escape_new3_data() { escape_data(); }
void escape_new4();
void escape_new4_data() { escape_data(); }
/*
JSC outperforms everything.
Boost is less impressive then expected.
*/
void simpleFind1();
void rangeReplace1();
void matchReplace1();
void simpleFind2();
void rangeReplace2();
void matchReplace2();
void simpleFindJSC();
void rangeReplaceJSC();
void matchReplaceJSC();
void simpleFindBoost();
void rangeReplaceBoost();
void matchReplaceBoost();
/* those apply an (incorrect) regexp on entire source
(this main.cpp). JSC appears to handle this
(ab)use case best. QRegExp performs extremly bad.
*/
void horribleWrongReplace1();
void horribleReplace1();
void horribleReplace2();
void horribleWrongReplace2();
void horribleWrongReplaceJSC();
void horribleReplaceJSC();
void horribleWrongReplaceBoost();
void horribleReplaceBoost();
private:
QString str1;
QString str2;
void escape_data();
};
tst_qregexp::tst_qregexp()
:QObject()
,str1("We are all happy monkeys")
{
QFile f(":/main.cpp");
f.open(QFile::ReadOnly);
str2=f.readAll();
}
static void verify(const QString &quoted, const QString &expected)
{
if (quoted != expected)
qDebug() << "ERROR:" << quoted << expected;
}
void tst_qregexp::escape_data()
{
QTest::addColumn<QString>("pattern");
QTest::addColumn<QString>("expected");
QTest::newRow("escape 0") << "Hello world" << "Hello world";
QTest::newRow("escape 1") << "(Hello world)" << "\\(Hello world\\)";
{
QString s;
for (int i = 0; i < 10; ++i)
s += "(escape)";
QTest::newRow("escape 10") << s << QRegExp::escape(s);
}
{
QString s;
for (int i = 0; i < 100; ++i)
s += "(escape)";
QTest::newRow("escape 100") << s << QRegExp::escape(s);
}
}
void tst_qregexp::escape_old()
{
QFETCH(QString, pattern);
QFETCH(QString, expected);
QBENCHMARK {
static const char meta[] = "$()*+.?[\\]^{|}";
QString quoted = pattern;
int i = 0;
while (i < quoted.length()) {
if (strchr(meta, quoted.at(i).toLatin1()) != 0)
quoted.insert(i++, QLatin1Char('\\'));
++i;
}
verify(quoted, expected);
}
}
void tst_qregexp::escape_new1()
{
QFETCH(QString, pattern);
QFETCH(QString, expected);
QBENCHMARK {
QString quoted;
const int count = pattern.count();
quoted.reserve(count * 2);
const QLatin1Char backslash('\\');
for (int i = 0; i < count; i++) {
switch (pattern.at(i).toLatin1()) {
case '$':
case '(':
case ')':
case '*':
case '+':
case '.':
case '?':
case '[':
case '\\':
case ']':
case '^':
case '{':
case '|':
case '}':
quoted.append(backslash);
}
quoted.append(pattern.at(i));
}
verify(quoted, expected);
}
}
void tst_qregexp::escape_new2()
{
QFETCH(QString, pattern);
QFETCH(QString, expected);
QBENCHMARK {
int count = pattern.count();
const QLatin1Char backslash('\\');
QString quoted(count * 2, backslash);
const QChar *patternData = pattern.data();
QChar *quotedData = quoted.data();
int escaped = 0;
for ( ; --count >= 0; ++patternData) {
const QChar c = *patternData;
switch (c.unicode()) {
case '$':
case '(':
case ')':
case '*':
case '+':
case '.':
case '?':
case '[':
case '\\':
case ']':
case '^':
case '{':
case '|':
case '}':
++escaped;
++quotedData;
}
*quotedData = c;
++quotedData;
}
quoted.resize(pattern.size() + escaped);
verify(quoted, expected);
}
}
void tst_qregexp::escape_new3()
{
QFETCH(QString, pattern);
QFETCH(QString, expected);
QBENCHMARK {
QString quoted;
const int count = pattern.count();
quoted.reserve(count * 2);
const QLatin1Char backslash('\\');
for (int i = 0; i < count; i++) {
switch (pattern.at(i).toLatin1()) {
case '$':
case '(':
case ')':
case '*':
case '+':
case '.':
case '?':
case '[':
case '\\':
case ']':
case '^':
case '{':
case '|':
case '}':
quoted += backslash;
}
quoted += pattern.at(i);
}
verify(quoted, expected);
}
}
static inline bool needsEscaping(int c)
{
switch (c) {
case '$':
case '(':
case ')':
case '*':
case '+':
case '.':
case '?':
case '[':
case '\\':
case ']':
case '^':
case '{':
case '|':
case '}':
return true;
}
return false;
}
void tst_qregexp::escape_new4()
{
QFETCH(QString, pattern);
QFETCH(QString, expected);
QBENCHMARK {
const int n = pattern.size();
const QChar *patternData = pattern.data();
// try to prevent copy if no escape is needed
int i = 0;
for (int i = 0; i != n; ++i) {
const QChar c = patternData[i];
if (needsEscaping(c.unicode()))
break;
}
if (i == n) {
verify(pattern, expected);
// no escaping needed, "return pattern" should be done here.
return;
}
const QLatin1Char backslash('\\');
QString quoted(n * 2, backslash);
QChar *quotedData = quoted.data();
for (int j = 0; j != i; ++j)
*quotedData++ = *patternData++;
int escaped = 0;
for (; i != n; ++i) {
const QChar c = *patternData;
if (needsEscaping(c.unicode())) {
++escaped;
++quotedData;
}
*quotedData = c;
++quotedData;
++patternData;
}
quoted.resize(n + escaped);
verify(quoted, expected);
// "return quoted"
}
}
void tst_qregexp::simpleFind1()
{
int roff;
QRegExp rx("happy");
rx.setPatternSyntax(QRegExp::RegExp);
QBENCHMARK{
roff = rx.indexIn(str1);
}
QCOMPARE(roff, 11);
}
void tst_qregexp::rangeReplace1()
{
QString r;
QRegExp rx("[a-f]");
rx.setPatternSyntax(QRegExp::RegExp);
QBENCHMARK{
r = QString(str1).replace(rx, "-");
}
QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys"));
}
void tst_qregexp::matchReplace1()
{
QString r;
QRegExp rx("[^a-f]*([a-f]+)[^a-f]*");
rx.setPatternSyntax(QRegExp::RegExp);
QBENCHMARK{
r = QString(str1).replace(rx, "\\1");
}
QCOMPARE(r, QString("eaeaae"));
}
void tst_qregexp::horribleWrongReplace1()
{
QString r;
QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*");
rx.setPatternSyntax(QRegExp::RegExp);
QBENCHMARK{
r = QString(str2).replace(rx, "\\1.\\2.\\3");
}
QCOMPARE(r, str2);
}
void tst_qregexp::horribleReplace1()
{
QString r;
QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*");
rx.setPatternSyntax(QRegExp::RegExp);
QBENCHMARK{
r = QString(str2).replace(rx, "\\1.\\2.\\3");
}
QCOMPARE(r, QString("1.2.3"));
}
void tst_qregexp::simpleFind2()
{
int roff;
QRegExp rx("happy");
rx.setPatternSyntax(QRegExp::RegExp2);
QBENCHMARK{
roff = rx.indexIn(str1);
}
QCOMPARE(roff, 11);
}
void tst_qregexp::rangeReplace2()
{
QString r;
QRegExp rx("[a-f]");
rx.setPatternSyntax(QRegExp::RegExp2);
QBENCHMARK{
r = QString(str1).replace(rx, "-");
}
QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys"));
}
void tst_qregexp::matchReplace2()
{
QString r;
QRegExp rx("[^a-f]*([a-f]+)[^a-f]*");
rx.setPatternSyntax(QRegExp::RegExp2);
QBENCHMARK{
r = QString(str1).replace(rx, "\\1");
}
QCOMPARE(r, QString("eaeaae"));
}
void tst_qregexp::horribleWrongReplace2()
{
QString r;
QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*");
rx.setPatternSyntax(QRegExp::RegExp2);
QBENCHMARK{
r = QString(str2).replace(rx, "\\1.\\2.\\3");
}
QCOMPARE(r, str2);
}
void tst_qregexp::horribleReplace2()
{
QString r;
QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*");
rx.setPatternSyntax(QRegExp::RegExp2);
QBENCHMARK{
r = QString(str2).replace(rx, "\\1.\\2.\\3");
}
QCOMPARE(r, QString("1.2.3"));
}
void tst_qregexp::simpleFindJSC()
{
#ifdef HAVE_JSC
int numr;
const char * errmsg=" ";
QString rxs("happy");
JSRegExp *rx = jsRegExpCompile(rxs.utf16(), rxs.length(), JSRegExpDoNotIgnoreCase, JSRegExpSingleLine, 0, &errmsg);
QVERIFY(rx != 0);
QString s(str1);
int offsetVector[3];
QBENCHMARK{
numr = jsRegExpExecute(rx, s.utf16(), s.length(), 0, offsetVector, 3);
}
jsRegExpFree(rx);
QCOMPARE(numr, 1);
QCOMPARE(offsetVector[0], 11);
#else
QSKIP("JSC is not enabled for this platform");
#endif
}
void tst_qregexp::rangeReplaceJSC()
{
#ifdef HAVE_JSC
QScriptValue r;
QScriptEngine engine;
engine.globalObject().setProperty("s", str1);
QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[a-f]/g, '-') } )");
QVERIFY(replaceFunc.isFunction());
QBENCHMARK{
r = replaceFunc.call(QScriptValue());
}
QCOMPARE(r.toString(), QString("W- -r- -ll h-ppy monk-ys"));
#else
QSKIP("JSC is not enabled for this platform");
#endif
}
void tst_qregexp::matchReplaceJSC()
{
#ifdef HAVE_JSC
QScriptValue r;
QScriptEngine engine;
engine.globalObject().setProperty("s", str1);
QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[^a-f]*([a-f]+)[^a-f]*/g, '$1') } )");
QVERIFY(replaceFunc.isFunction());
QBENCHMARK{
r = replaceFunc.call(QScriptValue());
}
QCOMPARE(r.toString(), QString("eaeaae"));
#else
QSKIP("JSC is not enabled for this platform");
#endif
}
void tst_qregexp::horribleWrongReplaceJSC()
{
#ifdef HAVE_JSC
QScriptValue r;
QScriptEngine engine;
engine.globalObject().setProperty("s", str2);
QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*/gm, '$1.$2.$3') } )");
QVERIFY(replaceFunc.isFunction());
QBENCHMARK{
r = replaceFunc.call(QScriptValue());
}
QCOMPARE(r.toString(), str2);
#else
QSKIP("JSC is not enabled for this platform");
#endif
}
void tst_qregexp::horribleReplaceJSC()
{
#ifdef HAVE_JSC
QScriptValue r;
QScriptEngine engine;
// the m flag doesn't actually work here; dunno
engine.globalObject().setProperty("s", str2.replace('\n', ' '));
QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*/gm, '$1.$2.$3') } )");
QVERIFY(replaceFunc.isFunction());
QBENCHMARK{
r = replaceFunc.call(QScriptValue());
}
QCOMPARE(r.toString(), QString("1.2.3"));
#else
QSKIP("JSC is not enabled for this platform");
#endif
}
void tst_qregexp::simpleFindBoost()
{
#ifdef HAVE_BOOST
int roff;
boost::regex rx ("happy", boost::regex_constants::perl);
std::string s = str1.toStdString();
std::string::const_iterator start, end;
start = s.begin();
end = s.end();
boost::match_flag_type flags = boost::match_default;
QBENCHMARK{
boost::match_results<std::string::const_iterator> what;
regex_search(start, end, what, rx, flags);
roff = (what[0].first)-start;
}
QCOMPARE(roff, 11);
#else
QSKIP("Boost is not enabled for this platform");
#endif
}
void tst_qregexp::rangeReplaceBoost()
{
#ifdef HAVE_BOOST
boost::regex pattern ("[a-f]", boost::regex_constants::perl);
std::string s = str1.toStdString();
std::string r;
QBENCHMARK{
r = boost::regex_replace (s, pattern, "-");
}
QCOMPARE(r, std::string("W- -r- -ll h-ppy monk-ys"));
#else
QSKIP("Boost is not enabled for this platform");
#endif
}
void tst_qregexp::matchReplaceBoost()
{
#ifdef HAVE_BOOST
boost::regex pattern ("[^a-f]*([a-f]+)[^a-f]*",boost::regex_constants::perl);
std::string s = str1.toStdString();
std::string r;
QBENCHMARK{
r = boost::regex_replace (s, pattern, "$1");
}
QCOMPARE(r, std::string("eaeaae"));
#else
QSKIP("Boost is not enabled for this platform");
#endif
}
void tst_qregexp::horribleWrongReplaceBoost()
{
#ifdef HAVE_BOOST
boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*", boost::regex_constants::perl);
std::string s = str2.toStdString();
std::string r;
QBENCHMARK{
r = boost::regex_replace (s, pattern, "$1.$2.$3");
}
QCOMPARE(r, s);
#else
QSKIP("Boost is not enabled for this platform");
#endif
}
void tst_qregexp::horribleReplaceBoost()
{
#ifdef HAVE_BOOST
boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*", boost::regex_constants::perl);
std::string s = str2.toStdString();
std::string r;
QBENCHMARK{
r = boost::regex_replace (s, pattern, "$1.$2.$3");
}
QCOMPARE(r, std::string("1.2.3"));
#else
QSKIP("Boost is not enabled for this platform");
#endif
}
QTEST_MAIN(tst_qregexp)
#include "main.moc"