Benchmarking (#27)

Adds benchmarking capabilities and small grammar fix.
This commit is contained in:
Pedro Flemming 2017-03-06 18:30:35 +01:00 committed by GitHub
parent 23621fa862
commit 42049b4d56
9 changed files with 196 additions and 4 deletions

4
.gitignore vendored
View File

@ -33,4 +33,6 @@ lib-test/
*.app
*.cpp.orig
*.h.orig
*.h.orig
benchmark/parser_benchmark

View File

@ -54,6 +54,9 @@ format:
astyle --options=astyle.options $(ALLLIB)
astyle --options=astyle.options $(ALLTEST)
run_benchmark:
make -C benchmark/ clean run
############
### Test ###
############

13
benchmark/Makefile Normal file
View File

@ -0,0 +1,13 @@
CFLAGS = -std=c++11 -lstdc++ -Wall -I../src/ -L../
all: parser_benchmark
run: parser_benchmark
@export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../ && ./parser_benchmark
parser_benchmark: parser_benchmark.cpp
$(CXX) $(CFLAGS) parser_benchmark.cpp -o parser_benchmark -lbenchmark -lpthread -lsqlparser
clean:
rm -f parser_benchmark

34
benchmark/README.md Normal file
View File

@ -0,0 +1,34 @@
# Benchmark
This directory contains the scripts to execute benchmarks of the parser. We use [Google Benchmark](https://github.com/google/benchmark) to define and run benchmarks.
## Install Google Benchmark
```bash
cmake -DCMAKE_BUILD_TYPE=Release
make
make install
```
## Run the benchmarks
Build the libary from the parent directory and then execute:
```bash
make run
# or manually...
make
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../
./parser_benchmark
```
... or run this from the parent directory:
```bash
# From root of Git repository.
make run_benchmark
```

View File

@ -0,0 +1,24 @@
#ifndef __BENCHMARK_UTILS_H__
#define __BENCHMARK_UTILS_H__
#define TIME_DIFF(end, start)\
std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
#define NOW()\
std::chrono::high_resolution_clock::now();
#define PARSE_QUERY_BENCHMARK(name, query)\
static void name(benchmark::State& st) {\
while (st.KeepRunning()) {\
hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);\
if (!result->isValid()) st.SkipWithError("Parsing failed!");\
delete result;\
}\
}\
BENCHMARK(name);
#endif

View File

@ -0,0 +1,105 @@
#include <chrono>
#include <sstream>
#include "benchmark/benchmark.h"
#include "SQLParser.h"
#include "parser/bison_parser.h"
#include "parser/flex_lexer.h"
#include "benchmark_utils.h"
PARSE_QUERY_BENCHMARK(BM_SimpleSelect,
"SELECT * FROM test;");
PARSE_QUERY_BENCHMARK(BM_SimpleSubSelect,
"SELECT age, street AS address FROM (SELECT * FROM data);");
PARSE_QUERY_BENCHMARK(BM_TwoSelects,
"SELECT * FROM test; SELECT age, street AS address FROM data;");
PARSE_QUERY_BENCHMARK(BM_LongSelectList26,
"SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;");
PARSE_QUERY_BENCHMARK(BM_LongSelectList52,
"SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;");
PARSE_QUERY_BENCHMARK(BM_LongSelectElement26,
"SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;");
PARSE_QUERY_BENCHMARK(BM_LongSelectElement52,
"SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;");
// Benchmark the influence of increasing size of the query, while
// the number of tokens remains unchanged.
static void BM_CharacterCount(benchmark::State& st) {
const size_t querySize = st.range(0);
// Base query has size of 18 characters.
std::string query = "SELECT %name% FROM test;";
const uint pad = querySize - 18;
const std::string filler = std::string(pad, 'a');
query.replace(7, 6, filler);
while (st.KeepRunning()) {
hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);
delete result;
}
}
BENCHMARK(BM_CharacterCount)
->RangeMultiplier(1 << 2)
->Ranges({{1 << 5, 1 << 15},
{5, 5}});
// Benchmark the influence of increasing number of tokens, while
// the number of characters remains unchanged.
static void BM_ConditionalTokens(benchmark::State& st) {
const size_t targetSize = st.range(0);
const size_t numTokens = st.range(1);
// Base query contains 6 tokens.
std::string query = "SELECT * FROM test";
// Create conditional.
std::stringstream condStream;
size_t missingTokens = numTokens - 4;
if (missingTokens > 0) {
condStream << " WHERE a";
missingTokens -= 2;
while (missingTokens > 0) {
condStream << " AND a";
missingTokens -= 2;
}
}
query += condStream.str();
if (targetSize >= query.size()) {
const size_t pad = targetSize - query.size();
const std::string filler = std::string(pad, 'a');
query.replace(7, 1, filler);
} else {
// Query can't be the same length as in the other benchmarks.
// Running this will result in unusable data.
fprintf(stderr, "Too many tokens. Query too long for benchmark char limit (%lu > %lu).\n",
query.size(), targetSize);
return;
}
while (st.KeepRunning()) {
hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);
if (!result->isValid()) st.SkipWithError("Parsing failed!");\
delete result;
}
}
BENCHMARK(BM_ConditionalTokens)
->RangeMultiplier(1 << 2)
->Ranges({{1 << 14, 1 << 14},
{1 << 2, 1 << 11}});
BENCHMARK_MAIN();

View File

@ -31,7 +31,11 @@ int main(int argc, char *argv[]) {
delete result;
return 0;
} else {
printf("Invalid SQL!\n");
fprintf(stderr, "Given string is not a valid SQL query.\n");
fprintf(stderr, "%s (L%d:%d)\n",
result->errorMsg(),
result->errorLine(),
result->errorColumn());
delete result;
return -1;
}

View File

@ -487,6 +487,12 @@ select_no_paren:
$$->order = $4;
$$->limit = $5;
}
| select_clause set_operator select_with_paren opt_order opt_limit {
$$ = $1;
$$->unionSelect = $3;
$$->order = $4;
$$->limit = $5;
}
;
set_operator:
@ -678,7 +684,7 @@ table_ref:
table_ref_atomic:
table_ref_name
| '(' select_statement ')' alias {
| '(' select_statement ')' opt_alias {
auto tbl = new TableRef(kTableSelect);
tbl->select = $2;
tbl->alias = $4;

View File

@ -6,7 +6,8 @@ SELECT * from "table" JOIN table2 ON a = b WHERE (b OR NOT a) AND a = 12.5
(SELECT a FROM foo WHERE a > 12 OR b > 3 AND c NOT LIKE 's%' LIMIT 10);
SELECT * FROM "table" LIMIT 10 OFFSET 10; SELECT * FROM second;
SELECT * FROM t1 UNION SELECT * FROM t2 ORDER BY col1;
-- SELECT * FROM t1 UNION (SELECT * FROM t2 UNION SELECT * FROM t3) ORDER BY col1;
SELECT * FROM (SELECT * FROM t1);
SELECT * FROM t1 UNION (SELECT * FROM t2 UNION SELECT * FROM t3) ORDER BY col1;
# JOIN
SELECT t1.a, t1.b, t2.c FROM "table" AS t1 JOIN (SELECT * FROM foo JOIN bar ON foo.id = bar.id) t2 ON t1.a = t2.b WHERE (t1.b OR NOT t1.a) AND t2.c = 12.5
SELECT * FROM t1 JOIN t2 ON c1 = c2;