diff --git a/.gitignore b/.gitignore index d8ee45f..99f692e 100644 --- a/.gitignore +++ b/.gitignore @@ -33,4 +33,6 @@ lib-test/ *.app *.cpp.orig -*.h.orig \ No newline at end of file +*.h.orig + +benchmark/parser_benchmark diff --git a/Makefile b/Makefile index 507372d..17212a6 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,9 @@ format: astyle --options=astyle.options $(ALLLIB) astyle --options=astyle.options $(ALLTEST) +run_benchmark: + make -C benchmark/ clean run + ############ ### Test ### ############ diff --git a/benchmark/Makefile b/benchmark/Makefile new file mode 100644 index 0000000..a05a8cb --- /dev/null +++ b/benchmark/Makefile @@ -0,0 +1,13 @@ + +CFLAGS = -std=c++11 -lstdc++ -Wall -I../src/ -L../ + +all: parser_benchmark + +run: parser_benchmark + @export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../ && ./parser_benchmark + +parser_benchmark: parser_benchmark.cpp + $(CXX) $(CFLAGS) parser_benchmark.cpp -o parser_benchmark -lbenchmark -lpthread -lsqlparser + +clean: + rm -f parser_benchmark diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..9b87b38 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,34 @@ +# Benchmark + +This directory contains the scripts to execute benchmarks of the parser. We use [Google Benchmark](https://github.com/google/benchmark) to define and run benchmarks. + +## Install Google Benchmark + +```bash +cmake -DCMAKE_BUILD_TYPE=Release + +make + +make install +``` + +## Run the benchmarks + +Build the libary from the parent directory and then execute: + +```bash +make run + +# or manually... + +make +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../ +./parser_benchmark +``` + +... or run this from the parent directory: + +```bash +# From root of Git repository. +make run_benchmark +``` \ No newline at end of file diff --git a/benchmark/benchmark_utils.h b/benchmark/benchmark_utils.h new file mode 100644 index 0000000..8f23755 --- /dev/null +++ b/benchmark/benchmark_utils.h @@ -0,0 +1,24 @@ +#ifndef __BENCHMARK_UTILS_H__ +#define __BENCHMARK_UTILS_H__ + + +#define TIME_DIFF(end, start)\ + std::chrono::duration_cast>(end - start); + +#define NOW()\ + std::chrono::high_resolution_clock::now(); + + + +#define PARSE_QUERY_BENCHMARK(name, query)\ + static void name(benchmark::State& st) {\ + while (st.KeepRunning()) {\ + hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);\ + if (!result->isValid()) st.SkipWithError("Parsing failed!");\ + delete result;\ + }\ + }\ + BENCHMARK(name); + + +#endif \ No newline at end of file diff --git a/benchmark/parser_benchmark.cpp b/benchmark/parser_benchmark.cpp new file mode 100644 index 0000000..9f0fa07 --- /dev/null +++ b/benchmark/parser_benchmark.cpp @@ -0,0 +1,105 @@ + +#include +#include +#include "benchmark/benchmark.h" + +#include "SQLParser.h" +#include "parser/bison_parser.h" +#include "parser/flex_lexer.h" + +#include "benchmark_utils.h" + + +PARSE_QUERY_BENCHMARK(BM_SimpleSelect, + "SELECT * FROM test;"); + +PARSE_QUERY_BENCHMARK(BM_SimpleSubSelect, + "SELECT age, street AS address FROM (SELECT * FROM data);"); + +PARSE_QUERY_BENCHMARK(BM_TwoSelects, + "SELECT * FROM test; SELECT age, street AS address FROM data;"); + +PARSE_QUERY_BENCHMARK(BM_LongSelectList26, + "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"); + +PARSE_QUERY_BENCHMARK(BM_LongSelectList52, + "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"); + +PARSE_QUERY_BENCHMARK(BM_LongSelectElement26, + "SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;"); + +PARSE_QUERY_BENCHMARK(BM_LongSelectElement52, + "SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;"); + +// Benchmark the influence of increasing size of the query, while +// the number of tokens remains unchanged. +static void BM_CharacterCount(benchmark::State& st) { + const size_t querySize = st.range(0); + + // Base query has size of 18 characters. + std::string query = "SELECT %name% FROM test;"; + + const uint pad = querySize - 18; + const std::string filler = std::string(pad, 'a'); + query.replace(7, 6, filler); + + while (st.KeepRunning()) { + hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query); + delete result; + } +} +BENCHMARK(BM_CharacterCount) + ->RangeMultiplier(1 << 2) + ->Ranges({{1 << 5, 1 << 15}, + {5, 5}}); + +// Benchmark the influence of increasing number of tokens, while +// the number of characters remains unchanged. +static void BM_ConditionalTokens(benchmark::State& st) { + const size_t targetSize = st.range(0); + const size_t numTokens = st.range(1); + + // Base query contains 6 tokens. + std::string query = "SELECT * FROM test"; + + // Create conditional. + std::stringstream condStream; + size_t missingTokens = numTokens - 4; + if (missingTokens > 0) { + condStream << " WHERE a"; + missingTokens -= 2; + + while (missingTokens > 0) { + condStream << " AND a"; + missingTokens -= 2; + } + } + + query += condStream.str(); + + if (targetSize >= query.size()) { + const size_t pad = targetSize - query.size(); + const std::string filler = std::string(pad, 'a'); + query.replace(7, 1, filler); + + } else { + // Query can't be the same length as in the other benchmarks. + // Running this will result in unusable data. + fprintf(stderr, "Too many tokens. Query too long for benchmark char limit (%lu > %lu).\n", + query.size(), targetSize); + return; + } + + while (st.KeepRunning()) { + hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query); + if (!result->isValid()) st.SkipWithError("Parsing failed!");\ + delete result; + } +} +BENCHMARK(BM_ConditionalTokens) + ->RangeMultiplier(1 << 2) + ->Ranges({{1 << 14, 1 << 14}, + {1 << 2, 1 << 11}}); + + +BENCHMARK_MAIN(); diff --git a/example/example.cpp b/example/example.cpp index f17ef7c..62a5077 100644 --- a/example/example.cpp +++ b/example/example.cpp @@ -31,7 +31,11 @@ int main(int argc, char *argv[]) { delete result; return 0; } else { - printf("Invalid SQL!\n"); + fprintf(stderr, "Given string is not a valid SQL query.\n"); + fprintf(stderr, "%s (L%d:%d)\n", + result->errorMsg(), + result->errorLine(), + result->errorColumn()); delete result; return -1; } diff --git a/src/parser/bison_parser.y b/src/parser/bison_parser.y index f650530..be3c72a 100644 --- a/src/parser/bison_parser.y +++ b/src/parser/bison_parser.y @@ -487,6 +487,12 @@ select_no_paren: $$->order = $4; $$->limit = $5; } + | select_clause set_operator select_with_paren opt_order opt_limit { + $$ = $1; + $$->unionSelect = $3; + $$->order = $4; + $$->limit = $5; + } ; set_operator: @@ -678,7 +684,7 @@ table_ref: table_ref_atomic: table_ref_name - | '(' select_statement ')' alias { + | '(' select_statement ')' opt_alias { auto tbl = new TableRef(kTableSelect); tbl->select = $2; tbl->alias = $4; diff --git a/test/valid_queries.sql b/test/valid_queries.sql index c391f52..b211cbf 100644 --- a/test/valid_queries.sql +++ b/test/valid_queries.sql @@ -6,7 +6,8 @@ SELECT * from "table" JOIN table2 ON a = b WHERE (b OR NOT a) AND a = 12.5 (SELECT a FROM foo WHERE a > 12 OR b > 3 AND c NOT LIKE 's%' LIMIT 10); SELECT * FROM "table" LIMIT 10 OFFSET 10; SELECT * FROM second; SELECT * FROM t1 UNION SELECT * FROM t2 ORDER BY col1; --- SELECT * FROM t1 UNION (SELECT * FROM t2 UNION SELECT * FROM t3) ORDER BY col1; +SELECT * FROM (SELECT * FROM t1); +SELECT * FROM t1 UNION (SELECT * FROM t2 UNION SELECT * FROM t3) ORDER BY col1; # JOIN SELECT t1.a, t1.b, t2.c FROM "table" AS t1 JOIN (SELECT * FROM foo JOIN bar ON foo.id = bar.id) t2 ON t1.a = t2.b WHERE (t1.b OR NOT t1.a) AND t2.c = 12.5 SELECT * FROM t1 JOIN t2 ON c1 = c2;