From 69d96061b2e90be34469cc66ddb95564b311392f Mon Sep 17 00:00:00 2001 From: Pedro Flemming Date: Fri, 21 Jul 2017 02:47:45 +0200 Subject: [PATCH] Add tokenize method to SQLParser to output the list of tokens (#54) Added tokenize benchmark. Restructured Makefile --- .gitignore | 2 +- .travis.yml | 5 +- Makefile | 143 ++++++++++++++++++++------------- README.md | 8 +- benchmark/Makefile | 17 ---- benchmark/README.md | 20 ----- benchmark/benchmark.cpp | 28 +++++++ benchmark/benchmark_utils.cpp | 44 ++++++++++ benchmark/benchmark_utils.h | 31 +++++-- benchmark/parser_benchmark.cpp | 77 +++--------------- benchmark/queries.cpp | 47 +++++++++++ benchmark/queries.h | 56 +++++++++++++ example/example.cpp | 2 +- src/SQLParser.cpp | 52 ++++++++---- src/SQLParser.h | 24 +++--- src/SQLParserResult.cpp | 4 +- src/sql/SQLStatement.h | 3 +- test/queries/tpc-h-05.sql | 2 +- test/queries/tpc-h-11-15.sql | 58 ------------- test/queries/tpc-h-11.sql | 10 +++ test/queries/tpc-h-12.sql | 10 +++ test/queries/tpc-h-13.sql | 8 ++ test/queries/tpc-h-14.sql | 5 ++ test/queries/tpc-h-15.sql | 15 ++++ test/queries/tpc-h-16-22.sql | 71 ---------------- test/queries/tpc-h-16.sql | 9 +++ test/queries/tpc-h-17.sql | 4 + test/queries/tpc-h-18.sql | 7 ++ test/queries/tpc-h-19.sql | 9 +++ test/queries/tpc-h-20.sql | 8 ++ test/queries/tpc-h-21.sql | 11 +++ test/queries/tpc-h-22.sql | 9 +++ test/sql_asserts.h | 2 +- test/sql_grammar_test.cpp | 2 +- test/sql_parser.cpp | 44 ++++++++++ test/sql_tests.cpp | 8 +- test/test.sh | 4 +- test/tpc_h_tests.cpp | 39 +++++++-- 38 files changed, 544 insertions(+), 354 deletions(-) delete mode 100644 benchmark/Makefile create mode 100644 benchmark/benchmark.cpp create mode 100644 benchmark/benchmark_utils.cpp create mode 100644 benchmark/queries.cpp create mode 100644 benchmark/queries.h delete mode 100644 test/queries/tpc-h-11-15.sql create mode 100644 test/queries/tpc-h-11.sql create mode 100644 test/queries/tpc-h-12.sql create mode 100644 test/queries/tpc-h-13.sql create mode 100644 test/queries/tpc-h-14.sql create mode 100644 test/queries/tpc-h-15.sql delete mode 100644 test/queries/tpc-h-16-22.sql create mode 100644 test/queries/tpc-h-16.sql create mode 100644 test/queries/tpc-h-17.sql create mode 100644 test/queries/tpc-h-18.sql create mode 100644 test/queries/tpc-h-19.sql create mode 100644 test/queries/tpc-h-20.sql create mode 100644 test/queries/tpc-h-21.sql create mode 100644 test/queries/tpc-h-22.sql create mode 100644 test/sql_parser.cpp diff --git a/.gitignore b/.gitignore index 924444d..65d07aa 100644 --- a/.gitignore +++ b/.gitignore @@ -41,4 +41,4 @@ cmake-build-debug/ *.cpp.orig *.h.orig -benchmark/parser_benchmark +*.csv \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index e622e12..b6fd166 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ install: - sudo apt-get install -y flex valgrind - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 90 - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 90 - + # Install bison 3.0.4. - wget http://ftp.gnu.org/gnu/bison/bison-3.0.4.tar.gz - tar -xvzf bison-3.0.4.tar.gz @@ -32,8 +32,7 @@ script: - make -j4 - make test - - make test_format - make test_example # Test if benchmark can be built. - # - make build_benchmark + # - make benchmark diff --git a/Makefile b/Makefile index 3c9ae0e..bef6562 100644 --- a/Makefile +++ b/Makefile @@ -1,53 +1,59 @@ -# Directories. +all: library + +####################################### +############# Directories ############# +####################################### BIN = bin SRC = src SRCPARSER = src/parser -# Files. -PARSERCPP = $(SRCPARSER)/bison_parser.cpp $(SRCPARSER)/flex_lexer.cpp -LIBCPP = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(PARSERCPP) -LIBOBJ = $(LIBCPP:%.cpp=%.o) -TESTCPP = $(shell find test/ -name '*.cpp') - -ALLLIB = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(shell find $(SRC) -name '*.h' -not -path "$(SRCPARSER)/*") -ALLTEST = $(shell find test/ -name '*.cpp') $(shell find test/ -name '*.h') -EXAMPLESRC = $(shell find example/ -name '*.cpp') $(shell find example/ -name '*.h') - -# Compiler & linker flags. -CFLAGS = -std=c++11 -Wall -Werror -fPIC -LIBFLAGS = -shared -TARGET = libsqlparser.so -INSTALL = /usr/local - -CTESTFLAGS = -Wall -Werror -Isrc/ -Itest/ -L./ -std=c++11 -lstdc++ +INSTALL = /usr/local +###################################### +############ Compile Mode ############ +###################################### # Set compile mode to -g or -O3. -MODE_LOG = "" +# Debug mode: make mode=debug + mode ?= release +MODE_LOG = "" +OPT_FLAG = ifeq ($(mode), debug) - CFLAGS += -g - CTESTFLAGS += -g + OPT_FLAG = -g MODE_LOG = "Building in \033[1;31mdebug\033[0m mode" else - CFLAGS += -O3 - CTESTFLAGS += -O3 + OPT_FLAG = -O3 MODE_LOG = "Building in \033[0;32mrelease\033[0m mode ('make mode=debug' for debug mode)" endif GMAKE = make mode=$(mode) -all: library -library: $(TARGET) -$(TARGET): $(LIBOBJ) - $(CXX) $(LIBFLAGS) -o $(TARGET) $(LIBOBJ) +####################################### +############### Library ############### +####################################### +PARSER_CPP = $(SRCPARSER)/bison_parser.cpp $(SRCPARSER)/flex_lexer.cpp +PARSER_H = $(SRCPARSER)/bison_parser.h $(SRCPARSER)/flex_lexer.h + +LIB_BUILD = libsqlparser.so +LIB_CFLAGS = -std=c++11 -Wall -Werror -fPIC $(OPT_FLAG) +LIB_LFLAGS = -shared $(OPT_FLAG) +LIB_CPP = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(PARSER_CPP) +LIB_H = $(shell find $(SRC) -name '*.h' -not -path "$(SRCPARSER)/*") $(PARSER_H) +LIB_ALL = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(shell find $(SRC) -name '*.h' -not -path "$(SRCPARSER)/*") +LIB_OBJ = $(LIB_CPP:%.cpp=%.o) + +library: $(LIB_BUILD) + +$(LIB_BUILD): $(LIB_OBJ) + $(CXX) $(LIB_LFLAGS) -o $(LIB_BUILD) $(LIB_OBJ) $(SRCPARSER)/flex_lexer.o: $(SRCPARSER)/flex_lexer.cpp $(SRCPARSER)/bison_parser.cpp - $(CXX) $(CFLAGS) -c -o $@ $< -Wno-sign-compare -Wno-unneeded-internal-declaration -Wno-deprecated-register + $(CXX) $(LIB_CFLAGS) -c -o $@ $< -Wno-sign-compare -Wno-unneeded-internal-declaration -Wno-deprecated-register -%.o: %.cpp $(PARSERCPP) - $(CXX) $(CFLAGS) -c -o $@ $< +%.o: %.cpp $(PARSER_CPP) $(LIB_H) + $(CXX) $(LIB_CFLAGS) -c -o $@ $< $(SRCPARSER)/bison_parser.cpp: $(SRCPARSER)/bison_parser.y $(GMAKE) -C $(SRCPARSER)/ bison_parser.cpp @@ -55,11 +61,13 @@ $(SRCPARSER)/bison_parser.cpp: $(SRCPARSER)/bison_parser.y $(SRCPARSER)/flex_lexer.cpp: $(SRCPARSER)/flex_lexer.l $(GMAKE) -C $(SRCPARSER)/ flex_lexer.cpp +$(SRCPARSER)/bison_parser.h: $(SRCPARSER)/bison_parser.cpp +$(SRCPARSER)/flex_lexer.h: $(SRCPARSER)/flex_lexer.cpp + clean: - rm -f $(TARGET) + rm -f $(LIB_BUILD) rm -rf $(BIN) find $(SRC) -type f -name '*.o' -delete - $(GMAKE) -C benchmark/ clean cleanparser: $(GMAKE) -C $(SRCPARSER)/ clean @@ -67,50 +75,73 @@ cleanparser: cleanall: clean cleanparser install: - cp $(TARGET) $(INSTALL)/lib/$(TARGET) + cp $(LIB_BUILD) $(INSTALL)/lib/$(LIB_BUILD) rm -rf $(INSTALL)/include/hsql cp -r src $(INSTALL)/include/hsql find $(INSTALL)/include/hsql -not -name '*.h' -type f | xargs rm -################# -### Benchmark ### -################# -benchmark: library - $(GMAKE) -C benchmark/ clean run -build_benchmark: library - $(GMAKE) -C benchmark/ parser_benchmark +####################################### +############## Benchmark ############## +####################################### +BM_BUILD = $(BIN)/benchmark +BM_CFLAGS = -std=c++17 -Wall -Isrc/ -L./ $(OPT_FLAG) +BM_PATH = benchmark +BM_CPP = $(shell find $(BM_PATH)/ -name '*.cpp') +BM_ALL = $(shell find $(BM_PATH)/ -name '*.cpp' -or -name '*.h') -############ -### Test ### -############ +benchmark: $(BM_BUILD) -test: $(BIN)/sql_tests +run_benchmarks: benchmark + ./$(BM_BUILD) --benchmark_counters_tabular=true + # --benchmark_filter="abc + +save_benchmarks: benchmark + ./$(BM_BUILD) --benchmark_format=csv > benchmarks.csv + +$(BM_BUILD): $(BM_ALL) $(LIB_BUILD) + @mkdir -p $(BIN)/ + $(CXX) $(BM_CFLAGS) $(BM_CPP) -o $(BM_BUILD) -lbenchmark -lpthread -lsqlparser -lstdc++ -lstdc++fs + + + +######################################## +############ Test & Example ############ +######################################## +TEST_BUILD = $(BIN)/tests +TEST_CFLAGS = -std=c++11 -Wall -Werror -Isrc/ -Itest/ -L./ $(OPT_FLAG) +TEST_CPP = $(shell find test/ -name '*.cpp') +TEST_ALL = $(shell find test/ -name '*.cpp') $(shell find test/ -name '*.h') +EXAMPLE_SRC = $(shell find example/ -name '*.cpp') $(shell find example/ -name '*.h') + +test: $(TEST_BUILD) bash test/test.sh +$(TEST_BUILD): $(TEST_ALL) $(LIB_BUILD) + @mkdir -p $(BIN)/ + $(CXX) $(TEST_CFLAGS) $(TEST_CPP) -o $(TEST_BUILD) -lsqlparser -lstdc++ + test_example: $(GMAKE) -C example/ LD_LIBRARY_PATH=./ \ ./example/example "SELECT * FROM students WHERE name = 'Max Mustermann';" test_format: - @! astyle --options=astyle.options $(ALLLIB) | grep -q "Formatted" - @! astyle --options=astyle.options $(ALLTEST) | grep -q "Formatted" - -$(BIN)/sql_tests: library - @mkdir -p $(BIN)/ - $(CXX) $(CTESTFLAGS) $(TESTCPP) -o $(BIN)/sql_tests -lsqlparser + @! astyle --options=astyle.options $(LIB_ALL) | grep -q "Formatted" + @! astyle --options=astyle.options $(TEST_ALL) | grep -q "Formatted" -############ -### Misc ### -############ + +######################################## +################# Misc ################# +######################################## format: - astyle --options=astyle.options $(ALLLIB) - astyle --options=astyle.options $(ALLTEST) - astyle --options=astyle.options $(EXAMPLESRC) + astyle --options=astyle.options $(LIB_ALL) + astyle --options=astyle.options $(TEST_ALL) + astyle --options=astyle.options $(EXAMPLE_SRC) log_mode: @echo $(MODE_LOG) + diff --git a/README.md b/README.md index 26e9bca..af4bab2 100644 --- a/README.md +++ b/README.md @@ -33,14 +33,14 @@ To use the SQL parser in your own projects you simply have to follow these few s { // Basic Usage Example - + const std::string query = "..."; hsql::SQLParserResult result; - hsql::SQLParser::parseSQLString(query, &result); - + hsql::SQLParser::parse(query, &result); + if (result.isValid() && result.size() > 0) { const hsql::SQLStatement* statement = result.getStatement(0); - + if (statement.isType(hsql::SelectStatement)) { const hsql::SelectStatement* select = (const hsql::SelectStatement*) statement; /* ... */ diff --git a/benchmark/Makefile b/benchmark/Makefile deleted file mode 100644 index 1b6caae..0000000 --- a/benchmark/Makefile +++ /dev/null @@ -1,17 +0,0 @@ - -SRC = ./ -CPP = $(shell find $(SRC) -name '*.cpp') - -CFLAGS = -std=c++11 -lstdc++ -Wall -Werror -I../src/ -L../ -O3 - -all: parser_benchmark - -run: parser_benchmark - @export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../ &&\ - ./parser_benchmark - -parser_benchmark: $(CPP) - $(CXX) $(CFLAGS) $(CPP) -o parser_benchmark -lbenchmark -lpthread -lsqlparser - -clean: - rm -f parser_benchmark diff --git a/benchmark/README.md b/benchmark/README.md index 9b87b38..6c038ba 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -12,23 +12,3 @@ make make install ``` -## Run the benchmarks - -Build the libary from the parent directory and then execute: - -```bash -make run - -# or manually... - -make -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../ -./parser_benchmark -``` - -... or run this from the parent directory: - -```bash -# From root of Git repository. -make run_benchmark -``` \ No newline at end of file diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp new file mode 100644 index 0000000..e143550 --- /dev/null +++ b/benchmark/benchmark.cpp @@ -0,0 +1,28 @@ +#include "benchmark/benchmark.h" + +#include "benchmark_utils.h" +#include "queries.h" + +int main(int argc, char** argv) { + // Create parse and tokenize benchmarks for TPC-H queries. + const auto tpch_queries = getTPCHQueries(); + for (const auto& query : tpch_queries) { + std::string p_name = query.first + "-parse"; + benchmark::RegisterBenchmark(p_name.c_str(), &BM_ParseBenchmark, query.second); + std::string t_name = query.first + "-tokenize"; + benchmark::RegisterBenchmark(t_name.c_str(), &BM_TokenizeBenchmark, query.second); + } + + // Create parse and tokenize benchmarks for all queries in sql_queries array. + for (unsigned i = 0; i < sql_queries.size(); ++i) { + const auto& query = sql_queries[i]; + std::string p_name = getQueryName(i) + "-parse"; + benchmark::RegisterBenchmark(p_name.c_str(), &BM_ParseBenchmark, query.second); + + std::string t_name = getQueryName(i) + "-tokenize"; + benchmark::RegisterBenchmark(t_name.c_str(), &BM_TokenizeBenchmark, query.second); + } + + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); +} diff --git a/benchmark/benchmark_utils.cpp b/benchmark/benchmark_utils.cpp new file mode 100644 index 0000000..27fb66c --- /dev/null +++ b/benchmark/benchmark_utils.cpp @@ -0,0 +1,44 @@ +#include "benchmark_utils.h" + +#include +#include + +#include "SQLParser.h" + +size_t getNumTokens(const std::string& query) { + std::vector tokens; + hsql::SQLParser::tokenize(query, &tokens); + return tokens.size(); +} + +void BM_TokenizeBenchmark(benchmark::State& st, const std::string& query) { + st.counters["num_tokens"] = getNumTokens(query); + st.counters["num_chars"] = query.size(); + + while (st.KeepRunning()) { + std::vector tokens(512); + hsql::SQLParser::tokenize(query, &tokens); + } +} + +void BM_ParseBenchmark(benchmark::State& st, const std::string& query) { + st.counters["num_tokens"] = getNumTokens(query); + st.counters["num_chars"] = query.size(); + + while (st.KeepRunning()) { + hsql::SQLParserResult result; + hsql::SQLParser::parse(query, &result); + if (!result.isValid()) { + std::cout << query << std::endl; + std::cout << result.errorMsg() << std::endl; + st.SkipWithError("Parsing failed!"); + } + } +} + +std::string readFileContents(const std::string& file_path) { + std::ifstream t(file_path.c_str()); + std::string text((std::istreambuf_iterator(t)), + std::istreambuf_iterator()); + return text; +} diff --git a/benchmark/benchmark_utils.h b/benchmark/benchmark_utils.h index 8f23755..7eb54d8 100644 --- a/benchmark/benchmark_utils.h +++ b/benchmark/benchmark_utils.h @@ -1,6 +1,18 @@ #ifndef __BENCHMARK_UTILS_H__ #define __BENCHMARK_UTILS_H__ +#include "benchmark/benchmark.h" + +size_t getNumTokens(const std::string& query); + +void BM_TokenizeBenchmark(benchmark::State& st, const std::string& query); + +void BM_ParseBenchmark(benchmark::State& st, const std::string& query); + +std::string readFileContents(const std::string& file_path); + + + #define TIME_DIFF(end, start)\ std::chrono::duration_cast>(end - start); @@ -8,17 +20,22 @@ #define NOW()\ std::chrono::high_resolution_clock::now(); - - #define PARSE_QUERY_BENCHMARK(name, query)\ static void name(benchmark::State& st) {\ - while (st.KeepRunning()) {\ - hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);\ - if (!result->isValid()) st.SkipWithError("Parsing failed!");\ - delete result;\ - }\ + BM_ParseBenchmark(st, query);\ + }\ + BENCHMARK(name); + +#define TOKENIZE_QUERY_BENCHMARK(name, query)\ + static void name(benchmark::State& st) {\ + BM_TokenizeBenchmark(st, query);\ }\ BENCHMARK(name); +#define BENCHMARK_QUERY(test_name, query)\ + TOKENIZE_QUERY_BENCHMARK(test_name##Tokenize, query)\ + PARSE_QUERY_BENCHMARK(test_name##Parse, query) + + #endif \ No newline at end of file diff --git a/benchmark/parser_benchmark.cpp b/benchmark/parser_benchmark.cpp index e42edda..47928f0 100644 --- a/benchmark/parser_benchmark.cpp +++ b/benchmark/parser_benchmark.cpp @@ -9,67 +9,6 @@ #include "benchmark_utils.h" - -PARSE_QUERY_BENCHMARK(BM_Q1SimpleSelect, - "SELECT * FROM test;"); - -PARSE_QUERY_BENCHMARK(BM_Q2SimpleSubSelect, - "SELECT a, b AS address FROM (SELECT * FROM test WHERE c < 100 AND b > 3) t1 WHERE a < 10 AND b < 100;"); - -PARSE_QUERY_BENCHMARK(BM_Q3SingleJoin, - "SELECT \"left\".a, \"left\".b, \"right\".a, \"right\".b FROM table_a AS \"left\" JOIN table_b AS \"right\" ON \"left\".a = \"right\".a;"); - -PARSE_QUERY_BENCHMARK(BM_Q4TPCHQuery, -"SELECT" -" l_orderkey," -" SUM(l_extendedprice * (1 - l_discount)) AS revenue," -" o_orderdate," -" o_shippriority" -" FROM" -" customer," -" orders," -" lineitem" -" WHERE" -" c_mktsegment = '%s'" -" and c_custkey = o_custkey" -" and l_orderkey = o_orderkey" -" and o_orderdate < '%s'" -" and l_shipdate > '%s'" -" GROUP BY" -" l_orderkey," -" o_orderdate," -" o_shippriority" -" ORDER BY" -" revenue DESC," -" o_orderdate;" - ); -PARSE_QUERY_BENCHMARK(BM_TwoSelects, - "SELECT * FROM test; SELECT age, street AS address FROM data;"); - -PARSE_QUERY_BENCHMARK(BM_LongSelectList26, - "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"); - -PARSE_QUERY_BENCHMARK(BM_LongSelectList52, - "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"); - -PARSE_QUERY_BENCHMARK(BM_LongSelectElement26, - "SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;"); - -PARSE_QUERY_BENCHMARK(BM_LongSelectElement52, - "SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;"); - -// Prepare and Execute benchmarks. -PARSE_QUERY_BENCHMARK(BM_ExecuteStatement, - "EXECUTE procedure;"); - -PARSE_QUERY_BENCHMARK(BM_ExecuteWith2ParametersStatement, - "EXECUTE procedure(11, 'test');"); - -PARSE_QUERY_BENCHMARK(BM_ExecuteWith10ParametersStatement, - "EXECUTE procedure(11, 'test', 5.6, 4.2, 'abc', 6, 7, 8, 9, 10000);"); - - - // Benchmark the influence of increasing size of the query, while // the number of tokens remains unchanged. static void BM_CharacterCount(benchmark::State& st) { @@ -82,9 +21,11 @@ static void BM_CharacterCount(benchmark::State& st) { const std::string filler = std::string(pad, 'a'); query.replace(7, 6, filler); + st.counters["num_tokens"] = getNumTokens(query); + st.counters["num_chars"] = query.size(); while (st.KeepRunning()) { - hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query); - delete result; + hsql::SQLParserResult result; + hsql::SQLParser::parse(query, &result); } } BENCHMARK(BM_CharacterCount) @@ -129,10 +70,12 @@ static void BM_ConditionalTokens(benchmark::State& st) { return; } + st.counters["num_tokens"] = getNumTokens(query); + st.counters["num_chars"] = query.size(); while (st.KeepRunning()) { - hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query); - if (!result->isValid()) st.SkipWithError("Parsing failed!");\ - delete result; + hsql::SQLParserResult result; + hsql::SQLParser::parse(query, &result); + if (!result.isValid()) st.SkipWithError("Parsing failed!"); } } BENCHMARK(BM_ConditionalTokens) @@ -141,4 +84,4 @@ BENCHMARK(BM_ConditionalTokens) {1 << 2, 1 << 11}}); -BENCHMARK_MAIN(); + diff --git a/benchmark/queries.cpp b/benchmark/queries.cpp new file mode 100644 index 0000000..8b6d7e0 --- /dev/null +++ b/benchmark/queries.cpp @@ -0,0 +1,47 @@ +#include "queries.h" + +#include +#include +#include +#include + +#include "benchmark_utils.h" + +namespace filesystem = std::experimental::filesystem; + +std::string getQueryName(unsigned i) { + if (sql_queries[i].first.empty()) { + std::string name = "#" + std::to_string(i + 1); + return name; + } + return std::string("") + sql_queries[i].first; +} + +std::vector getQueriesFromDirectory(const std::string& dir_path) { + std::regex query_file_regex("\\.sql$"); + std::vector files; + + for (auto& entry : filesystem::directory_iterator(dir_path)) { + if (filesystem::is_regular_file(entry)) { + std::string path_str = filesystem::path(entry); + + if (std::regex_search(path_str, query_file_regex)) { + files.push_back(path_str); + } + } + } + + std::sort(files.begin(), files.end()); + + std::vector queries; + for (const std::string& file_path : files) { + const filesystem::path p(file_path); + const std::string query = readFileContents(file_path); + queries.emplace_back(p.filename(), query); + } + return queries; +} + +std::vector getTPCHQueries() { + return getQueriesFromDirectory("test/queries/"); +} diff --git a/benchmark/queries.h b/benchmark/queries.h new file mode 100644 index 0000000..357bee6 --- /dev/null +++ b/benchmark/queries.h @@ -0,0 +1,56 @@ +#ifndef __QUERIES_H__ +#define __QUERIES_H__ + +#include +#include + +typedef std::pair SQLQuery; + +// name, query +static std::vector sql_queries = { + {"Q1", "SELECT * FROM test;"}, + {"Q2", "SELECT a, b AS address FROM (SELECT * FROM test WHERE c < 100 AND b > 3) t1 WHERE a < 10 AND b < 100;"}, + {"Q3", "SELECT \"left\".a, \"left\".b, \"right\".a, \"right\".b FROM table_a AS \"left\" JOIN table_b AS \"right\" ON \"left\".a = \"right\".a;"}, + {"Q4", "" +"SELECT" +" l_orderkey," +" SUM(l_extendedprice * (1 - l_discount)) AS revenue," +" o_orderdate," +" o_shippriority" +" FROM" +" customer," +" orders," +" lineitem" +" WHERE" +" c_mktsegment = '%s'" +" and c_custkey = o_custkey" +" and l_orderkey = o_orderkey" +" and o_orderdate < '%s'" +" and l_shipdate > '%s'" +" GROUP BY" +" l_orderkey," +" o_orderdate," +" o_shippriority" +" ORDER BY" +" revenue DESC," +" o_orderdate;" +}, + + {"LongSelectList26", "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"}, + {"LongSelectElement26", "SELECT abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxy FROM test;"}, + {"LongSelectList52", "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"}, + {"LongSelectElement52", "SELECT abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxy FROM test;"}, + {"TwoSelects", "SELECT * FROM test; SELECT age, street AS address FROM data;"}, + {"ExecuteNoParams", "EXECUTE procedure;"}, + {"Execute2Params", "EXECUTE procedure(11, 'test');"}, + {"Execute10Params", "EXECUTE procedure(11, 'test', 5.6, 4.2, 'abc', 6, 7, 8, 9, 10000);"}, + // {"name", "query"}, +}; + +std::string getQueryName(unsigned i); + +std::vector getQueriesFromDirectory(const std::string& dir_path); + +std::vector getTPCHQueries(); + +#endif diff --git a/example/example.cpp b/example/example.cpp index 4cceb60..943de4b 100644 --- a/example/example.cpp +++ b/example/example.cpp @@ -17,7 +17,7 @@ int main(int argc, char* argv[]) { // parse a given query hsql::SQLParserResult result; - hsql::SQLParser::parseSQLString(query, &result); + hsql::SQLParser::parse(query, &result); // check whether the parsing was successful diff --git a/src/SQLParser.cpp b/src/SQLParser.cpp index ce5aee1..930f8d3 100644 --- a/src/SQLParser.cpp +++ b/src/SQLParser.cpp @@ -5,7 +5,6 @@ #include #include - namespace hsql { SQLParser::SQLParser() { @@ -13,16 +12,16 @@ namespace hsql { } // static - bool SQLParser::parseSQLString(const char* text, SQLParserResult* result) { + bool SQLParser::parse(const std::string& sql, SQLParserResult* result) { yyscan_t scanner; YY_BUFFER_STATE state; if (hsql_lex_init(&scanner)) { // Couldn't initialize the lexer. - fprintf(stderr, "[Error] SQLParser: Error when initializing lexer!\n"); + fprintf(stderr, "SQLParser: Error when initializing lexer!\n"); return false; } - + const char* text = sql.c_str(); state = hsql__scan_string(text, scanner); // Parse the tokens. @@ -38,25 +37,44 @@ namespace hsql { } // static - bool SQLParser::parseSQLString(const std::string& text, SQLParserResult* result) { - return parseSQLString(text.c_str(), result); + bool SQLParser::parseSQLString(const char* sql, SQLParserResult* result) { + return parse(sql, result); + } + + bool SQLParser::parseSQLString(const std::string& sql, SQLParserResult* result) { + return parse(sql, result); } // static - SQLParserResult* SQLParser::parseSQLString(const char* text) { - SQLParserResult* result = new SQLParserResult(); - - if (!SQLParser::parseSQLString(text, result)) { - delete result; - return nullptr; + bool SQLParser::tokenize(const std::string& sql, std::vector* tokens) { + // Initialize the scanner. + yyscan_t scanner; + if (hsql_lex_init(&scanner)) { + fprintf(stderr, "SQLParser: Error when initializing lexer!\n"); + return false; } - return result; - } + YY_BUFFER_STATE state; + state = hsql__scan_string(sql.c_str(), scanner); - // static - SQLParserResult* SQLParser::parseSQLString(const std::string& text) { - return parseSQLString(text.c_str()); + YYSTYPE yylval; + YYLTYPE yylloc; + + // Step through the string until EOF is read. + // Note: hsql_lex returns int, but we know that its range is within 16 bit. + int16_t token = hsql_lex(&yylval, &yylloc, scanner); + while (token != 0) { + tokens->push_back(token); + token = hsql_lex(&yylval, &yylloc, scanner); + + if (token == SQL_IDENTIFIER || token == SQL_STRING) { + free(yylval.sval); + } + } + + hsql__delete_buffer(state, scanner); + hsql_lex_destroy(scanner); + return true; } } // namespace hsql diff --git a/src/SQLParser.h b/src/SQLParser.h index e494680..244bf3a 100644 --- a/src/SQLParser.h +++ b/src/SQLParser.h @@ -9,29 +9,25 @@ namespace hsql { // Static methods used to parse SQL strings. class SQLParser { public: + // Parses a given constant character SQL string into the result object. // Returns true if the lexer and parser could run without internal errors. // This does NOT mean that the SQL string was valid SQL. To check that // you need to check result->isValid(); + static bool parse(const std::string& sql, SQLParserResult* result); + + // Run tokenization on the given string and store the tokens in the output vector. + static bool tokenize(const std::string& sql, std::vector* tokens); + + // Deprecated. + // Old method to parse SQL strings. Replaced by parse(). static bool parseSQLString(const char* sql, SQLParserResult* result); - // Parses a given SQL string into the result object. + // Deprecated. + // Old method to parse SQL strings. Replaced by parse(). static bool parseSQLString(const std::string& sql, SQLParserResult* result); - // Deprecated: - // Parses a given constant character SQL string. - // Note: This is kept for legacy reasons. It is recommended to use - // the (const char*, SQLParserResult*) implementation. - static SQLParserResult* parseSQLString(const char* sql); - - // Deprecated: - // Parses an SQL std::string. - // Note: This is kept for legacy reasons. It is recommended to use - // the (const std::string&, SQLParserResult*) implementation. - static SQLParserResult* parseSQLString(const std::string& sql); - private: - // Static class can't be instatiated. SQLParser(); }; diff --git a/src/SQLParserResult.cpp b/src/SQLParserResult.cpp index 94fc07f..47a5300 100644 --- a/src/SQLParserResult.cpp +++ b/src/SQLParserResult.cpp @@ -100,7 +100,9 @@ namespace hsql { void SQLParserResult::addParameter(Expr* parameter) { parameters_.push_back(parameter); std::sort(parameters_.begin(), parameters_.end(), - [](const Expr* a, const Expr* b) { return a->ival < b->ival; }); + [](const Expr * a, const Expr * b) { + return a->ival < b->ival; + }); } const std::vector& SQLParserResult::parameters() { diff --git a/src/sql/SQLStatement.h b/src/sql/SQLStatement.h index 12387bd..07996fc 100644 --- a/src/sql/SQLStatement.h +++ b/src/sql/SQLStatement.h @@ -1,9 +1,10 @@ #ifndef __SQLPARSER__SQLSTATEMENT_H__ #define __SQLPARSER__SQLSTATEMENT_H__ -#include "Expr.h" #include +#include "Expr.h" + namespace hsql { enum StatementType { kStmtError, // unused diff --git a/test/queries/tpc-h-05.sql b/test/queries/tpc-h-05.sql index 0469f3f..95d2923 100644 --- a/test/queries/tpc-h-05.sql +++ b/test/queries/tpc-h-05.sql @@ -3,7 +3,7 @@ SELECT N_NAME, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS REVENUE FROM CUSTOMER, ORDERS, LINEITEM, SUPPLIER, NATION, REGION WHERE C_CUSTKEY = O_CUSTKEY AND L_ORDERKEY = O_ORDERKEY AND L_SUPPKEY = S_SUPPKEY AND C_NATIONKEY = S_NATIONKEY AND S_NATIONKEY = N_NATIONKEY AND N_REGIONKEY = R_REGIONKEY -AND R_NAME = 'ASIA' AND O_ORDERDATE >= '1994-01-01' +AND R_NAME = 'ASIA' AND O_ORDERDATE >= '1994-01-01' AND O_ORDERDATE < DATEADD(YY, 1, cast('1994-01-01' as datetime)) GROUP BY N_NAME ORDER BY REVENUE DESC \ No newline at end of file diff --git a/test/queries/tpc-h-11-15.sql b/test/queries/tpc-h-11-15.sql deleted file mode 100644 index 69c755d..0000000 --- a/test/queries/tpc-h-11-15.sql +++ /dev/null @@ -1,58 +0,0 @@ --- From: --- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html - --- TPC_H Query 11 - Important Stock Identification -SELECT PS_PARTKEY, SUM(PS_SUPPLYCOST*PS_AVAILQTY) AS VALUE -FROM PARTSUPP, SUPPLIER, NATION -WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY' -GROUP BY PS_PARTKEY -HAVING SUM(PS_SUPPLYCOST*PS_AVAILQTY) > (SELECT SUM(PS_SUPPLYCOST*PS_AVAILQTY) * 0.0001000000 - FROM PARTSUPP, SUPPLIER, NATION - WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY') -ORDER BY VALUE DESC; - - --- TPC_H Query 12 - Shipping Modes and Order Priority -SELECT L_SHIPMODE, -SUM(CASE WHEN O_ORDERPRIORITY = '1-URGENT' OR O_ORDERPRIORITY = '2-HIGH' THEN 1 ELSE 0 END) AS HIGH_LINE_COUNT, -SUM(CASE WHEN O_ORDERPRIORITY <> '1-URGENT' AND O_ORDERPRIORITY <> '2-HIGH' THEN 1 ELSE 0 END ) AS LOW_LINE_COUNT -FROM ORDERS, LINEITEM -WHERE O_ORDERKEY = L_ORDERKEY AND L_SHIPMODE IN ('MAIL','SHIP') -AND L_COMMITDATE < L_RECEIPTDATE AND L_SHIPDATE < L_COMMITDATE AND L_RECEIPTDATE >= '1994-01-01' -AND L_RECEIPTDATE < dateadd(mm, 1, cast('1995-09-01' as datetime)) -GROUP BY L_SHIPMODE -ORDER BY L_SHIPMODE; - - --- TPC_H Query 13 - Customer Distribution -SELECT C_COUNT, COUNT(*) AS CUSTDIST -FROM (SELECT C_CUSTKEY, COUNT(O_ORDERKEY) - FROM CUSTOMER left outer join ORDERS on C_CUSTKEY = O_CUSTKEY - AND O_COMMENT not like '%%special%%requests%%' - GROUP BY C_CUSTKEY) AS C_ORDERS -GROUP BY C_COUNT -ORDER BY CUSTDIST DESC, C_COUNT DESC; - - --- TPC_H Query 14 - Promotion Effect -SELECT 100.00* SUM(CASE WHEN P_TYPE LIKE 'PROMO%%' THEN L_EXTENDEDPRICE*(1-L_DISCOUNT) -ELSE 0 END) / SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS PROMO_REVENUE -FROM LINEITEM, "PART" -WHERE L_PARTKEY = P_PARTKEY AND L_SHIPDATE >= '1995-09-01' AND L_SHIPDATE < dateadd(mm, 1, '1995-09-01'); - - --- TPC_H Query 15.1 - Create View for Top Supplier Query -CREATE VIEW REVENUE0 (SUPPLIER_NO, TOTAL_REVENUE) AS -SELECT L_SUPPKEY, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) FROM LINEITEM -WHERE L_SHIPDATE >= '1996-01-01' AND L_SHIPDATE < dateadd(mm, 3, cast('1996-01-01' as datetime)) -GROUP BY L_SUPPKEY; - - --- TPC_H Query 15.2 - Top Supplier -SELECT S_SUPPKEY, S_NAME, S_ADDRESS, S_PHONE, TOTAL_REVENUE -FROM SUPPLIER, REVENUE0 -WHERE S_SUPPKEY = SUPPLIER_NO AND TOTAL_REVENUE = (SELECT MAX(TOTAL_REVENUE) FROM REVENUE0) -ORDER BY S_SUPPKEY; - --- TPC_H Query 15.3 - Drop View -DROP VIEW REVENUE0; diff --git a/test/queries/tpc-h-11.sql b/test/queries/tpc-h-11.sql new file mode 100644 index 0000000..41954bb --- /dev/null +++ b/test/queries/tpc-h-11.sql @@ -0,0 +1,10 @@ +-- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html +-- TPC_H Query 11 - Important Stock Identification +SELECT PS_PARTKEY, SUM(PS_SUPPLYCOST*PS_AVAILQTY) AS VALUE +FROM PARTSUPP, SUPPLIER, NATION +WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY' +GROUP BY PS_PARTKEY +HAVING SUM(PS_SUPPLYCOST*PS_AVAILQTY) > (SELECT SUM(PS_SUPPLYCOST*PS_AVAILQTY) * 0.0001000000 + FROM PARTSUPP, SUPPLIER, NATION + WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY') +ORDER BY VALUE DESC; \ No newline at end of file diff --git a/test/queries/tpc-h-12.sql b/test/queries/tpc-h-12.sql new file mode 100644 index 0000000..59a91b0 --- /dev/null +++ b/test/queries/tpc-h-12.sql @@ -0,0 +1,10 @@ +-- TPC_H Query 12 - Shipping Modes and Order Priority +SELECT L_SHIPMODE, +SUM(CASE WHEN O_ORDERPRIORITY = '1-URGENT' OR O_ORDERPRIORITY = '2-HIGH' THEN 1 ELSE 0 END) AS HIGH_LINE_COUNT, +SUM(CASE WHEN O_ORDERPRIORITY <> '1-URGENT' AND O_ORDERPRIORITY <> '2-HIGH' THEN 1 ELSE 0 END ) AS LOW_LINE_COUNT +FROM ORDERS, LINEITEM +WHERE O_ORDERKEY = L_ORDERKEY AND L_SHIPMODE IN ('MAIL','SHIP') +AND L_COMMITDATE < L_RECEIPTDATE AND L_SHIPDATE < L_COMMITDATE AND L_RECEIPTDATE >= '1994-01-01' +AND L_RECEIPTDATE < dateadd(mm, 1, cast('1995-09-01' as datetime)) +GROUP BY L_SHIPMODE +ORDER BY L_SHIPMODE; \ No newline at end of file diff --git a/test/queries/tpc-h-13.sql b/test/queries/tpc-h-13.sql new file mode 100644 index 0000000..dd2614c --- /dev/null +++ b/test/queries/tpc-h-13.sql @@ -0,0 +1,8 @@ +-- TPC_H Query 13 - Customer Distribution +SELECT C_COUNT, COUNT(*) AS CUSTDIST +FROM (SELECT C_CUSTKEY, COUNT(O_ORDERKEY) + FROM CUSTOMER left outer join ORDERS on C_CUSTKEY = O_CUSTKEY + AND O_COMMENT not like '%%special%%requests%%' + GROUP BY C_CUSTKEY) AS C_ORDERS +GROUP BY C_COUNT +ORDER BY CUSTDIST DESC, C_COUNT DESC; \ No newline at end of file diff --git a/test/queries/tpc-h-14.sql b/test/queries/tpc-h-14.sql new file mode 100644 index 0000000..12fea0d --- /dev/null +++ b/test/queries/tpc-h-14.sql @@ -0,0 +1,5 @@ +-- TPC_H Query 14 - Promotion Effect +SELECT 100.00* SUM(CASE WHEN P_TYPE LIKE 'PROMO%%' THEN L_EXTENDEDPRICE*(1-L_DISCOUNT) +ELSE 0 END) / SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS PROMO_REVENUE +FROM LINEITEM, "PART" +WHERE L_PARTKEY = P_PARTKEY AND L_SHIPDATE >= '1995-09-01' AND L_SHIPDATE < dateadd(mm, 1, '1995-09-01'); \ No newline at end of file diff --git a/test/queries/tpc-h-15.sql b/test/queries/tpc-h-15.sql new file mode 100644 index 0000000..5593df1 --- /dev/null +++ b/test/queries/tpc-h-15.sql @@ -0,0 +1,15 @@ +-- TPC_H Query 15.1 - Create View for Top Supplier Query +CREATE VIEW REVENUE0 (SUPPLIER_NO, TOTAL_REVENUE) AS +SELECT L_SUPPKEY, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) FROM LINEITEM +WHERE L_SHIPDATE >= '1996-01-01' AND L_SHIPDATE < dateadd(mm, 3, cast('1996-01-01' as datetime)) +GROUP BY L_SUPPKEY; + + +-- TPC_H Query 15.2 - Top Supplier +SELECT S_SUPPKEY, S_NAME, S_ADDRESS, S_PHONE, TOTAL_REVENUE +FROM SUPPLIER, REVENUE0 +WHERE S_SUPPKEY = SUPPLIER_NO AND TOTAL_REVENUE = (SELECT MAX(TOTAL_REVENUE) FROM REVENUE0) +ORDER BY S_SUPPKEY; + +-- TPC_H Query 15.3 - Drop View +DROP VIEW REVENUE0; \ No newline at end of file diff --git a/test/queries/tpc-h-16-22.sql b/test/queries/tpc-h-16-22.sql deleted file mode 100644 index dd5f4cd..0000000 --- a/test/queries/tpc-h-16-22.sql +++ /dev/null @@ -1,71 +0,0 @@ --- From: --- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html - --- TPC_H Query 16 - Parts/Supplier Relationship -SELECT P_BRAND, P_TYPE, P_SIZE, COUNT(DISTINCT PS_SUPPKEY) AS SUPPLIER_CNT -FROM PARTSUPP, "PART" -WHERE P_PARTKEY = PS_PARTKEY AND P_BRAND <> 'Brand#45' AND P_TYPE NOT LIKE 'MEDIUM POLISHED%%' -AND P_SIZE IN (49, 14, 23, 45, 19, 3, 36, 9) AND PS_SUPPKEY NOT IN (SELECT S_SUPPKEY FROM SUPPLIER - WHERE S_COMMENT LIKE '%%Customer%%Complaints%%') -GROUP BY P_BRAND, P_TYPE, P_SIZE -ORDER BY SUPPLIER_CNT DESC, P_BRAND, P_TYPE, P_SIZE; - - --- TPC_H Query 17 - Small-Quantity-Order Revenue -SELECT SUM(L_EXTENDEDPRICE)/7.0 AS AVG_YEARLY FROM LINEITEM, "PART" -WHERE P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#23' AND P_CONTAINER = 'MED BOX' -AND L_QUANTITY < (SELECT 0.2*AVG(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = P_PARTKEY); - - --- TPC_H Query 18 - Large Volume Customer -SELECT TOP 100 C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE, SUM(L_QUANTITY) -FROM CUSTOMER, ORDERS, LINEITEM -WHERE O_ORDERKEY IN (SELECT L_ORDERKEY FROM LINEITEM GROUP BY L_ORDERKEY HAVING - SUM(L_QUANTITY) > 300) AND C_CUSTKEY = O_CUSTKEY AND O_ORDERKEY = L_ORDERKEY -GROUP BY C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE -ORDER BY O_TOTALPRICE DESC, O_ORDERDATE; - - --- TPC_H Query 19 - Discounted Revenue -SELECT SUM(L_EXTENDEDPRICE* (1 - L_DISCOUNT)) AS REVENUE -FROM LINEITEM, "PART" -WHERE (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#12' AND P_CONTAINER IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND L_QUANTITY >= 1 AND L_QUANTITY <= 1 + 10 AND P_SIZE BETWEEN 1 AND 5 -AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON') -OR (P_PARTKEY = L_PARTKEY AND P_BRAND ='Brand#23' AND P_CONTAINER IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND L_QUANTITY >=10 AND L_QUANTITY <=10 + 10 AND P_SIZE BETWEEN 1 AND 10 -AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON') -OR (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#34' AND P_CONTAINER IN ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND L_QUANTITY >=20 AND L_QUANTITY <= 20 + 10 AND P_SIZE BETWEEN 1 AND 15 -AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON'); - - --- TPC_H Query 20 - Potential Part Promotion -SELECT S_NAME, S_ADDRESS FROM SUPPLIER, NATION -WHERE S_SUPPKEY IN (SELECT PS_SUPPKEY FROM PARTSUPP - WHERE PS_PARTKEY in (SELECT P_PARTKEY FROM "PART" WHERE P_NAME like 'forest%%') AND - PS_AVAILQTY > (SELECT 0.5*sum(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = PS_PARTKEY AND - L_SUPPKEY = PS_SUPPKEY AND L_SHIPDATE >= '1994-01-01' AND - L_SHIPDATE < dateadd(yy,1,'1994-01-01'))) AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'CANADA' -ORDER BY S_NAME; - - --- TPC_H Query 21 - Suppliers Who Kept Orders Waiting -SELECT TOP 100 S_NAME, COUNT(*) AS NUMWAIT -FROM SUPPLIER, LINEITEM L1, ORDERS, NATION WHERE S_SUPPKEY = L1.L_SUPPKEY AND -O_ORDERKEY = L1.L_ORDERKEY AND O_ORDERSTATUS = 'F' AND L1.L_RECEIPTDATE> L1.L_COMMITDATE -AND EXISTS (SELECT * FROM LINEITEM L2 WHERE L2.L_ORDERKEY = L1.L_ORDERKEY - AND L2.L_SUPPKEY <> L1.L_SUPPKEY) AND -NOT EXISTS (SELECT * FROM LINEITEM L3 WHERE L3.L_ORDERKEY = L1.L_ORDERKEY AND - L3.L_SUPPKEY <> L1.L_SUPPKEY AND L3.L_RECEIPTDATE > L3.L_COMMITDATE) AND -S_NATIONKEY = N_NATIONKEY AND N_NAME = 'SAUDI ARABIA' -GROUP BY S_NAME -ORDER BY NUMWAIT DESC, S_NAME; - - --- TPC_H Query 22 - Global Sales Opportunity */ -SELECT CNTRYCODE, COUNT(*) AS NUMCUST, SUM(C_ACCTBAL) AS TOTACCTBAL -FROM (SELECT SUBSTRING(C_PHONE,1,2) AS CNTRYCODE, C_ACCTBAL - FROM CUSTOMER WHERE SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17') AND - C_ACCTBAL > (SELECT AVG(C_ACCTBAL) FROM CUSTOMER WHERE C_ACCTBAL > 0.00 AND - SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17')) AND - NOT EXISTS ( SELECT * FROM ORDERS WHERE O_CUSTKEY = C_CUSTKEY)) AS CUSTSALE -GROUP BY CNTRYCODE -ORDER BY CNTRYCODE; \ No newline at end of file diff --git a/test/queries/tpc-h-16.sql b/test/queries/tpc-h-16.sql new file mode 100644 index 0000000..7bf0bbc --- /dev/null +++ b/test/queries/tpc-h-16.sql @@ -0,0 +1,9 @@ +-- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html +-- TPC_H Query 16 - Parts/Supplier Relationship +SELECT P_BRAND, P_TYPE, P_SIZE, COUNT(DISTINCT PS_SUPPKEY) AS SUPPLIER_CNT +FROM PARTSUPP, "PART" +WHERE P_PARTKEY = PS_PARTKEY AND P_BRAND <> 'Brand#45' AND P_TYPE NOT LIKE 'MEDIUM POLISHED%%' +AND P_SIZE IN (49, 14, 23, 45, 19, 3, 36, 9) AND PS_SUPPKEY NOT IN (SELECT S_SUPPKEY FROM SUPPLIER + WHERE S_COMMENT LIKE '%%Customer%%Complaints%%') +GROUP BY P_BRAND, P_TYPE, P_SIZE +ORDER BY SUPPLIER_CNT DESC, P_BRAND, P_TYPE, P_SIZE; \ No newline at end of file diff --git a/test/queries/tpc-h-17.sql b/test/queries/tpc-h-17.sql new file mode 100644 index 0000000..e6d50ac --- /dev/null +++ b/test/queries/tpc-h-17.sql @@ -0,0 +1,4 @@ +-- TPC_H Query 17 - Small-Quantity-Order Revenue +SELECT SUM(L_EXTENDEDPRICE)/7.0 AS AVG_YEARLY FROM LINEITEM, "PART" +WHERE P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#23' AND P_CONTAINER = 'MED BOX' +AND L_QUANTITY < (SELECT 0.2*AVG(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = P_PARTKEY); \ No newline at end of file diff --git a/test/queries/tpc-h-18.sql b/test/queries/tpc-h-18.sql new file mode 100644 index 0000000..57e9a34 --- /dev/null +++ b/test/queries/tpc-h-18.sql @@ -0,0 +1,7 @@ +-- TPC_H Query 18 - Large Volume Customer +SELECT TOP 100 C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE, SUM(L_QUANTITY) +FROM CUSTOMER, ORDERS, LINEITEM +WHERE O_ORDERKEY IN (SELECT L_ORDERKEY FROM LINEITEM GROUP BY L_ORDERKEY HAVING + SUM(L_QUANTITY) > 300) AND C_CUSTKEY = O_CUSTKEY AND O_ORDERKEY = L_ORDERKEY +GROUP BY C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE +ORDER BY O_TOTALPRICE DESC, O_ORDERDATE; \ No newline at end of file diff --git a/test/queries/tpc-h-19.sql b/test/queries/tpc-h-19.sql new file mode 100644 index 0000000..8000b96 --- /dev/null +++ b/test/queries/tpc-h-19.sql @@ -0,0 +1,9 @@ +-- TPC_H Query 19 - Discounted Revenue +SELECT SUM(L_EXTENDEDPRICE* (1 - L_DISCOUNT)) AS REVENUE +FROM LINEITEM, "PART" +WHERE (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#12' AND P_CONTAINER IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND L_QUANTITY >= 1 AND L_QUANTITY <= 1 + 10 AND P_SIZE BETWEEN 1 AND 5 +AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON') +OR (P_PARTKEY = L_PARTKEY AND P_BRAND ='Brand#23' AND P_CONTAINER IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND L_QUANTITY >=10 AND L_QUANTITY <=10 + 10 AND P_SIZE BETWEEN 1 AND 10 +AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON') +OR (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#34' AND P_CONTAINER IN ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND L_QUANTITY >=20 AND L_QUANTITY <= 20 + 10 AND P_SIZE BETWEEN 1 AND 15 +AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON'); \ No newline at end of file diff --git a/test/queries/tpc-h-20.sql b/test/queries/tpc-h-20.sql new file mode 100644 index 0000000..9803a25 --- /dev/null +++ b/test/queries/tpc-h-20.sql @@ -0,0 +1,8 @@ +-- TPC_H Query 20 - Potential Part Promotion +SELECT S_NAME, S_ADDRESS FROM SUPPLIER, NATION +WHERE S_SUPPKEY IN (SELECT PS_SUPPKEY FROM PARTSUPP + WHERE PS_PARTKEY in (SELECT P_PARTKEY FROM "PART" WHERE P_NAME like 'forest%%') AND + PS_AVAILQTY > (SELECT 0.5*sum(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = PS_PARTKEY AND + L_SUPPKEY = PS_SUPPKEY AND L_SHIPDATE >= '1994-01-01' AND + L_SHIPDATE < dateadd(yy,1,'1994-01-01'))) AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'CANADA' +ORDER BY S_NAME; \ No newline at end of file diff --git a/test/queries/tpc-h-21.sql b/test/queries/tpc-h-21.sql new file mode 100644 index 0000000..27be0c6 --- /dev/null +++ b/test/queries/tpc-h-21.sql @@ -0,0 +1,11 @@ +-- TPC_H Query 21 - Suppliers Who Kept Orders Waiting +SELECT TOP 100 S_NAME, COUNT(*) AS NUMWAIT +FROM SUPPLIER, LINEITEM L1, ORDERS, NATION WHERE S_SUPPKEY = L1.L_SUPPKEY AND +O_ORDERKEY = L1.L_ORDERKEY AND O_ORDERSTATUS = 'F' AND L1.L_RECEIPTDATE> L1.L_COMMITDATE +AND EXISTS (SELECT * FROM LINEITEM L2 WHERE L2.L_ORDERKEY = L1.L_ORDERKEY + AND L2.L_SUPPKEY <> L1.L_SUPPKEY) AND +NOT EXISTS (SELECT * FROM LINEITEM L3 WHERE L3.L_ORDERKEY = L1.L_ORDERKEY AND + L3.L_SUPPKEY <> L1.L_SUPPKEY AND L3.L_RECEIPTDATE > L3.L_COMMITDATE) AND +S_NATIONKEY = N_NATIONKEY AND N_NAME = 'SAUDI ARABIA' +GROUP BY S_NAME +ORDER BY NUMWAIT DESC, S_NAME; \ No newline at end of file diff --git a/test/queries/tpc-h-22.sql b/test/queries/tpc-h-22.sql new file mode 100644 index 0000000..4d1d11e --- /dev/null +++ b/test/queries/tpc-h-22.sql @@ -0,0 +1,9 @@ +-- TPC_H Query 22 - Global Sales Opportunity */ +SELECT CNTRYCODE, COUNT(*) AS NUMCUST, SUM(C_ACCTBAL) AS TOTACCTBAL +FROM (SELECT SUBSTRING(C_PHONE,1,2) AS CNTRYCODE, C_ACCTBAL + FROM CUSTOMER WHERE SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17') AND + C_ACCTBAL > (SELECT AVG(C_ACCTBAL) FROM CUSTOMER WHERE C_ACCTBAL > 0.00 AND + SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17')) AND + NOT EXISTS ( SELECT * FROM ORDERS WHERE O_CUSTKEY = C_CUSTKEY)) AS CUSTSALE +GROUP BY CNTRYCODE +ORDER BY CNTRYCODE; \ No newline at end of file diff --git a/test/sql_asserts.h b/test/sql_asserts.h index 13496cd..2ac60b4 100644 --- a/test/sql_asserts.h +++ b/test/sql_asserts.h @@ -4,7 +4,7 @@ #define TEST_PARSE_SQL_QUERY(query, result, numStatements) \ hsql::SQLParserResult result; \ - hsql::SQLParser::parseSQLString(query, &result); \ + hsql::SQLParser::parse(query, &result); \ ASSERT(result.isValid()); \ ASSERT_EQ(result.size(), numStatements); diff --git a/test/sql_grammar_test.cpp b/test/sql_grammar_test.cpp index 0daf0c0..8429a43 100644 --- a/test/sql_grammar_test.cpp +++ b/test/sql_grammar_test.cpp @@ -80,7 +80,7 @@ TEST(AutoGrammarTest) { // Parsing SQLParserResult result; - SQLParser::parseSQLString(sql.c_str(), &result); + SQLParser::parse(sql.c_str(), &result); end = std::chrono::system_clock::now(); std::chrono::duration elapsed_seconds = end - start; diff --git a/test/sql_parser.cpp b/test/sql_parser.cpp new file mode 100644 index 0000000..ed3c236 --- /dev/null +++ b/test/sql_parser.cpp @@ -0,0 +1,44 @@ +#include "thirdparty/microtest/microtest.h" + +#include +#include +#include + +#include "sql_asserts.h" +#include "SQLParser.h" +#include "parser/bison_parser.h" + +using namespace hsql; + +void test_tokens(const std::string& query, const std::vector& expected_tokens) { + std::vector tokens; + ASSERT(SQLParser::tokenize(query, &tokens)); + + ASSERT_EQ(expected_tokens.size(), tokens.size()); + + for (unsigned i = 0; i < expected_tokens.size(); ++i) { + ASSERT_EQ(expected_tokens[i], tokens[i]); + } +} + +TEST(SQLParserTokenizeTest) { + test_tokens("SELECT * FROM test;", { SQL_SELECT, '*', SQL_FROM, SQL_IDENTIFIER, ';' }); + test_tokens("SELECT a, 'b' FROM test WITH HINT;", { SQL_SELECT, SQL_IDENTIFIER, ',', SQL_STRING, SQL_FROM, SQL_IDENTIFIER, SQL_WITH, SQL_HINT, ';' }); +} + +TEST(SQLParserTokenizeStringifyTest) { + const std::string query = "SELECT * FROM test;"; + std::vector tokens; + ASSERT(SQLParser::tokenize(query, &tokens)); + + // Make u16string. + std::u16string token_string(tokens.cbegin(), tokens.cend()); + + // Check if u16 string is cacheable. + std::map cache; + cache[token_string] = query; + + ASSERT(query == cache[token_string]); + ASSERT(&query != &cache[token_string]); +} + diff --git a/test/sql_tests.cpp b/test/sql_tests.cpp index 9265171..5e63973 100644 --- a/test/sql_tests.cpp +++ b/test/sql_tests.cpp @@ -14,7 +14,7 @@ using namespace hsql; TEST(DeleteStatementTest) { SQLParserResult result; - SQLParser::parseSQLString("DELETE FROM students WHERE grade > 2.0;", &result); + SQLParser::parse("DELETE FROM students WHERE grade > 2.0;", &result); ASSERT(result.isValid()); ASSERT_EQ(result.size(), 1); @@ -30,7 +30,7 @@ TEST(DeleteStatementTest) { TEST(CreateStatementTest) { SQLParserResult result; - SQLParser::parseSQLString("CREATE TABLE students (name TEXT, student_number INT, city INTEGER, grade DOUBLE)", &result); + SQLParser::parse("CREATE TABLE students (name TEXT, student_number INT, city INTEGER, grade DOUBLE)", &result); ASSERT(result.isValid()); ASSERT_EQ(result.size(), 1); @@ -54,7 +54,7 @@ TEST(CreateStatementTest) { TEST(UpdateStatementTest) { SQLParserResult result; - SQLParser::parseSQLString("UPDATE students SET grade = 5.0, name = 'test' WHERE name = 'Max Mustermann';", &result); + SQLParser::parse("UPDATE students SET grade = 5.0, name = 'test' WHERE name = 'Max Mustermann';", &result); ASSERT(result.isValid()); ASSERT_EQ(result.size(), 1); @@ -130,7 +130,7 @@ TEST(ReleaseStatementTest) { SQLParserResult parse_and_move(std::string query) { hsql::SQLParserResult result; - hsql::SQLParser::parseSQLString(query, &result); + hsql::SQLParser::parse(query, &result); // Moves on return. return result; } diff --git a/test/test.sh b/test/test.sh index efe68ad..06cd93f 100755 --- a/test/test.sh +++ b/test/test.sh @@ -17,7 +17,7 @@ CONFLICT_RET=0 ################################################# # Running SQL parser tests. printf "\n${GREEN}Running SQL parser tests...${NC}\n" -bin/sql_tests -f "test/valid_queries.sql" +bin/tests -f "test/valid_queries.sql" SQL_TEST_RET=$? if [ $SQL_TEST_RET -eq 0 ]; then @@ -31,7 +31,7 @@ fi # Running memory leak checks. printf "\n${GREEN}Running memory leak checks...${NC}\n" valgrind --leak-check=full --error-exitcode=200 --log-fd=3 \ - ./bin/sql_tests -f "test/valid_queries.sql" 3>&1 >/dev/null 2>/dev/null + ./bin/tests -f "test/valid_queries.sql" 3>&1 >/dev/null 2>/dev/null MEM_LEAK_RET=$? if [ $MEM_LEAK_RET -ne 200 ]; then diff --git a/test/tpc_h_tests.cpp b/test/tpc_h_tests.cpp index 310972d..5d6143d 100644 --- a/test/tpc_h_tests.cpp +++ b/test/tpc_h_tests.cpp @@ -31,16 +31,30 @@ TEST(TPCHQueryGrammarTests) { "test/queries/tpc-h-08.sql", "test/queries/tpc-h-09.sql", "test/queries/tpc-h-10.sql", - "test/queries/tpc-h-11-15.sql", - "test/queries/tpc-h-16-22.sql" + "test/queries/tpc-h-11.sql", + "test/queries/tpc-h-12.sql", + "test/queries/tpc-h-13.sql", + "test/queries/tpc-h-14.sql", + "test/queries/tpc-h-15.sql", + "test/queries/tpc-h-16.sql", + "test/queries/tpc-h-17.sql", + "test/queries/tpc-h-18.sql", + "test/queries/tpc-h-19.sql", + "test/queries/tpc-h-20.sql", + "test/queries/tpc-h-21.sql", + "test/queries/tpc-h-22.sql", }; int testsFailed = 0; + std::string concatenated = ""; for (const std::string& file_path : files) { std::string query = readFileContents(file_path); + concatenated += query; + if (concatenated.back() != ';') concatenated += ';'; + SQLParserResult result; - SQLParser::parseSQLString(query.c_str(), &result); + SQLParser::parse(query.c_str(), &result); if (!result.isValid()) { mt::printFailed(file_path.c_str()); printf("%s %s (L%d:%d)%s\n", mt::red(), result.errorMsg(), result.errorLine(), result.errorColumn(), mt::def()); @@ -49,18 +63,29 @@ TEST(TPCHQueryGrammarTests) { mt::printOk(file_path.c_str()); } } + + SQLParserResult result; + SQLParser::parse(concatenated.c_str(), &result); + if (!result.isValid()) { + mt::printFailed("TPCHAllConcatenated"); + printf("%s %s (L%d:%d)%s\n", mt::red(), result.errorMsg(), result.errorLine(), result.errorColumn(), mt::def()); + ++testsFailed; + } else { + mt::printOk("TPCHAllConcatenated"); + } + ASSERT_EQ(testsFailed, 0); } TEST(TPCHQueryDetailTest) { - std::string query = readFileContents("test/queries/tpc-h-16-22.sql"); + std::string query = readFileContents("test/queries/tpc-h-20.sql"); SQLParserResult result; - SQLParser::parseSQLString(query.c_str(), &result); + SQLParser::parse(query.c_str(), &result); ASSERT(result.isValid()); - ASSERT_EQ(result.size(), 7); + ASSERT_EQ(result.size(), 1); - const SQLStatement* stmt20 = result.getStatement(4); + const SQLStatement* stmt20 = result.getStatement(0); ASSERT_EQ(stmt20->type(), kStmtSelect); const SelectStatement* select20 = (const SelectStatement*) stmt20;