Add tokenize method to SQLParser to output the list of tokens (#54)

Added tokenize benchmark. Restructured Makefile
This commit is contained in:
Pedro Flemming 2017-07-21 02:47:45 +02:00 committed by GitHub
parent 12e35dcd63
commit 69d96061b2
38 changed files with 544 additions and 354 deletions

2
.gitignore vendored
View File

@ -41,4 +41,4 @@ cmake-build-debug/
*.cpp.orig *.cpp.orig
*.h.orig *.h.orig
benchmark/parser_benchmark *.csv

View File

@ -32,8 +32,7 @@ script:
- make -j4 - make -j4
- make test - make test
- make test_format
- make test_example - make test_example
# Test if benchmark can be built. # Test if benchmark can be built.
# - make build_benchmark # - make benchmark

141
Makefile
View File

@ -1,53 +1,59 @@
# Directories. all: library
#######################################
############# Directories #############
#######################################
BIN = bin BIN = bin
SRC = src SRC = src
SRCPARSER = src/parser SRCPARSER = src/parser
# Files.
PARSERCPP = $(SRCPARSER)/bison_parser.cpp $(SRCPARSER)/flex_lexer.cpp
LIBCPP = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(PARSERCPP)
LIBOBJ = $(LIBCPP:%.cpp=%.o)
TESTCPP = $(shell find test/ -name '*.cpp')
ALLLIB = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(shell find $(SRC) -name '*.h' -not -path "$(SRCPARSER)/*")
ALLTEST = $(shell find test/ -name '*.cpp') $(shell find test/ -name '*.h')
EXAMPLESRC = $(shell find example/ -name '*.cpp') $(shell find example/ -name '*.h')
# Compiler & linker flags.
CFLAGS = -std=c++11 -Wall -Werror -fPIC
LIBFLAGS = -shared
TARGET = libsqlparser.so
INSTALL = /usr/local INSTALL = /usr/local
CTESTFLAGS = -Wall -Werror -Isrc/ -Itest/ -L./ -std=c++11 -lstdc++ ######################################
############ Compile Mode ############
######################################
# Set compile mode to -g or -O3. # Set compile mode to -g or -O3.
MODE_LOG = "" # Debug mode: make mode=debug
mode ?= release mode ?= release
MODE_LOG = ""
OPT_FLAG =
ifeq ($(mode), debug) ifeq ($(mode), debug)
CFLAGS += -g OPT_FLAG = -g
CTESTFLAGS += -g
MODE_LOG = "Building in \033[1;31mdebug\033[0m mode" MODE_LOG = "Building in \033[1;31mdebug\033[0m mode"
else else
CFLAGS += -O3 OPT_FLAG = -O3
CTESTFLAGS += -O3
MODE_LOG = "Building in \033[0;32mrelease\033[0m mode ('make mode=debug' for debug mode)" MODE_LOG = "Building in \033[0;32mrelease\033[0m mode ('make mode=debug' for debug mode)"
endif endif
GMAKE = make mode=$(mode) GMAKE = make mode=$(mode)
all: library
library: $(TARGET)
$(TARGET): $(LIBOBJ) #######################################
$(CXX) $(LIBFLAGS) -o $(TARGET) $(LIBOBJ) ############### Library ###############
#######################################
PARSER_CPP = $(SRCPARSER)/bison_parser.cpp $(SRCPARSER)/flex_lexer.cpp
PARSER_H = $(SRCPARSER)/bison_parser.h $(SRCPARSER)/flex_lexer.h
LIB_BUILD = libsqlparser.so
LIB_CFLAGS = -std=c++11 -Wall -Werror -fPIC $(OPT_FLAG)
LIB_LFLAGS = -shared $(OPT_FLAG)
LIB_CPP = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(PARSER_CPP)
LIB_H = $(shell find $(SRC) -name '*.h' -not -path "$(SRCPARSER)/*") $(PARSER_H)
LIB_ALL = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(shell find $(SRC) -name '*.h' -not -path "$(SRCPARSER)/*")
LIB_OBJ = $(LIB_CPP:%.cpp=%.o)
library: $(LIB_BUILD)
$(LIB_BUILD): $(LIB_OBJ)
$(CXX) $(LIB_LFLAGS) -o $(LIB_BUILD) $(LIB_OBJ)
$(SRCPARSER)/flex_lexer.o: $(SRCPARSER)/flex_lexer.cpp $(SRCPARSER)/bison_parser.cpp $(SRCPARSER)/flex_lexer.o: $(SRCPARSER)/flex_lexer.cpp $(SRCPARSER)/bison_parser.cpp
$(CXX) $(CFLAGS) -c -o $@ $< -Wno-sign-compare -Wno-unneeded-internal-declaration -Wno-deprecated-register $(CXX) $(LIB_CFLAGS) -c -o $@ $< -Wno-sign-compare -Wno-unneeded-internal-declaration -Wno-deprecated-register
%.o: %.cpp $(PARSERCPP) %.o: %.cpp $(PARSER_CPP) $(LIB_H)
$(CXX) $(CFLAGS) -c -o $@ $< $(CXX) $(LIB_CFLAGS) -c -o $@ $<
$(SRCPARSER)/bison_parser.cpp: $(SRCPARSER)/bison_parser.y $(SRCPARSER)/bison_parser.cpp: $(SRCPARSER)/bison_parser.y
$(GMAKE) -C $(SRCPARSER)/ bison_parser.cpp $(GMAKE) -C $(SRCPARSER)/ bison_parser.cpp
@ -55,11 +61,13 @@ $(SRCPARSER)/bison_parser.cpp: $(SRCPARSER)/bison_parser.y
$(SRCPARSER)/flex_lexer.cpp: $(SRCPARSER)/flex_lexer.l $(SRCPARSER)/flex_lexer.cpp: $(SRCPARSER)/flex_lexer.l
$(GMAKE) -C $(SRCPARSER)/ flex_lexer.cpp $(GMAKE) -C $(SRCPARSER)/ flex_lexer.cpp
$(SRCPARSER)/bison_parser.h: $(SRCPARSER)/bison_parser.cpp
$(SRCPARSER)/flex_lexer.h: $(SRCPARSER)/flex_lexer.cpp
clean: clean:
rm -f $(TARGET) rm -f $(LIB_BUILD)
rm -rf $(BIN) rm -rf $(BIN)
find $(SRC) -type f -name '*.o' -delete find $(SRC) -type f -name '*.o' -delete
$(GMAKE) -C benchmark/ clean
cleanparser: cleanparser:
$(GMAKE) -C $(SRCPARSER)/ clean $(GMAKE) -C $(SRCPARSER)/ clean
@ -67,50 +75,73 @@ cleanparser:
cleanall: clean cleanparser cleanall: clean cleanparser
install: install:
cp $(TARGET) $(INSTALL)/lib/$(TARGET) cp $(LIB_BUILD) $(INSTALL)/lib/$(LIB_BUILD)
rm -rf $(INSTALL)/include/hsql rm -rf $(INSTALL)/include/hsql
cp -r src $(INSTALL)/include/hsql cp -r src $(INSTALL)/include/hsql
find $(INSTALL)/include/hsql -not -name '*.h' -type f | xargs rm find $(INSTALL)/include/hsql -not -name '*.h' -type f | xargs rm
#################
### Benchmark ###
#################
benchmark: library
$(GMAKE) -C benchmark/ clean run
build_benchmark: library #######################################
$(GMAKE) -C benchmark/ parser_benchmark ############## Benchmark ##############
#######################################
BM_BUILD = $(BIN)/benchmark
BM_CFLAGS = -std=c++17 -Wall -Isrc/ -L./ $(OPT_FLAG)
BM_PATH = benchmark
BM_CPP = $(shell find $(BM_PATH)/ -name '*.cpp')
BM_ALL = $(shell find $(BM_PATH)/ -name '*.cpp' -or -name '*.h')
############ benchmark: $(BM_BUILD)
### Test ###
############
test: $(BIN)/sql_tests run_benchmarks: benchmark
./$(BM_BUILD) --benchmark_counters_tabular=true
# --benchmark_filter="abc
save_benchmarks: benchmark
./$(BM_BUILD) --benchmark_format=csv > benchmarks.csv
$(BM_BUILD): $(BM_ALL) $(LIB_BUILD)
@mkdir -p $(BIN)/
$(CXX) $(BM_CFLAGS) $(BM_CPP) -o $(BM_BUILD) -lbenchmark -lpthread -lsqlparser -lstdc++ -lstdc++fs
########################################
############ Test & Example ############
########################################
TEST_BUILD = $(BIN)/tests
TEST_CFLAGS = -std=c++11 -Wall -Werror -Isrc/ -Itest/ -L./ $(OPT_FLAG)
TEST_CPP = $(shell find test/ -name '*.cpp')
TEST_ALL = $(shell find test/ -name '*.cpp') $(shell find test/ -name '*.h')
EXAMPLE_SRC = $(shell find example/ -name '*.cpp') $(shell find example/ -name '*.h')
test: $(TEST_BUILD)
bash test/test.sh bash test/test.sh
$(TEST_BUILD): $(TEST_ALL) $(LIB_BUILD)
@mkdir -p $(BIN)/
$(CXX) $(TEST_CFLAGS) $(TEST_CPP) -o $(TEST_BUILD) -lsqlparser -lstdc++
test_example: test_example:
$(GMAKE) -C example/ $(GMAKE) -C example/
LD_LIBRARY_PATH=./ \ LD_LIBRARY_PATH=./ \
./example/example "SELECT * FROM students WHERE name = 'Max Mustermann';" ./example/example "SELECT * FROM students WHERE name = 'Max Mustermann';"
test_format: test_format:
@! astyle --options=astyle.options $(ALLLIB) | grep -q "Formatted" @! astyle --options=astyle.options $(LIB_ALL) | grep -q "Formatted"
@! astyle --options=astyle.options $(ALLTEST) | grep -q "Formatted" @! astyle --options=astyle.options $(TEST_ALL) | grep -q "Formatted"
$(BIN)/sql_tests: library
@mkdir -p $(BIN)/
$(CXX) $(CTESTFLAGS) $(TESTCPP) -o $(BIN)/sql_tests -lsqlparser
############
### Misc ### ########################################
############ ################# Misc #################
########################################
format: format:
astyle --options=astyle.options $(ALLLIB) astyle --options=astyle.options $(LIB_ALL)
astyle --options=astyle.options $(ALLTEST) astyle --options=astyle.options $(TEST_ALL)
astyle --options=astyle.options $(EXAMPLESRC) astyle --options=astyle.options $(EXAMPLE_SRC)
log_mode: log_mode:
@echo $(MODE_LOG) @echo $(MODE_LOG)

View File

@ -36,7 +36,7 @@ To use the SQL parser in your own projects you simply have to follow these few s
const std::string query = "..."; const std::string query = "...";
hsql::SQLParserResult result; hsql::SQLParserResult result;
hsql::SQLParser::parseSQLString(query, &result); hsql::SQLParser::parse(query, &result);
if (result.isValid() && result.size() > 0) { if (result.isValid() && result.size() > 0) {
const hsql::SQLStatement* statement = result.getStatement(0); const hsql::SQLStatement* statement = result.getStatement(0);

View File

@ -1,17 +0,0 @@
SRC = ./
CPP = $(shell find $(SRC) -name '*.cpp')
CFLAGS = -std=c++11 -lstdc++ -Wall -Werror -I../src/ -L../ -O3
all: parser_benchmark
run: parser_benchmark
@export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../ &&\
./parser_benchmark
parser_benchmark: $(CPP)
$(CXX) $(CFLAGS) $(CPP) -o parser_benchmark -lbenchmark -lpthread -lsqlparser
clean:
rm -f parser_benchmark

View File

@ -12,23 +12,3 @@ make
make install make install
``` ```
## Run the benchmarks
Build the libary from the parent directory and then execute:
```bash
make run
# or manually...
make
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../
./parser_benchmark
```
... or run this from the parent directory:
```bash
# From root of Git repository.
make run_benchmark
```

28
benchmark/benchmark.cpp Normal file
View File

@ -0,0 +1,28 @@
#include "benchmark/benchmark.h"
#include "benchmark_utils.h"
#include "queries.h"
int main(int argc, char** argv) {
// Create parse and tokenize benchmarks for TPC-H queries.
const auto tpch_queries = getTPCHQueries();
for (const auto& query : tpch_queries) {
std::string p_name = query.first + "-parse";
benchmark::RegisterBenchmark(p_name.c_str(), &BM_ParseBenchmark, query.second);
std::string t_name = query.first + "-tokenize";
benchmark::RegisterBenchmark(t_name.c_str(), &BM_TokenizeBenchmark, query.second);
}
// Create parse and tokenize benchmarks for all queries in sql_queries array.
for (unsigned i = 0; i < sql_queries.size(); ++i) {
const auto& query = sql_queries[i];
std::string p_name = getQueryName(i) + "-parse";
benchmark::RegisterBenchmark(p_name.c_str(), &BM_ParseBenchmark, query.second);
std::string t_name = getQueryName(i) + "-tokenize";
benchmark::RegisterBenchmark(t_name.c_str(), &BM_TokenizeBenchmark, query.second);
}
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
}

View File

@ -0,0 +1,44 @@
#include "benchmark_utils.h"
#include <fstream>
#include <iostream>
#include "SQLParser.h"
size_t getNumTokens(const std::string& query) {
std::vector<int16_t> tokens;
hsql::SQLParser::tokenize(query, &tokens);
return tokens.size();
}
void BM_TokenizeBenchmark(benchmark::State& st, const std::string& query) {
st.counters["num_tokens"] = getNumTokens(query);
st.counters["num_chars"] = query.size();
while (st.KeepRunning()) {
std::vector<int16_t> tokens(512);
hsql::SQLParser::tokenize(query, &tokens);
}
}
void BM_ParseBenchmark(benchmark::State& st, const std::string& query) {
st.counters["num_tokens"] = getNumTokens(query);
st.counters["num_chars"] = query.size();
while (st.KeepRunning()) {
hsql::SQLParserResult result;
hsql::SQLParser::parse(query, &result);
if (!result.isValid()) {
std::cout << query << std::endl;
std::cout << result.errorMsg() << std::endl;
st.SkipWithError("Parsing failed!");
}
}
}
std::string readFileContents(const std::string& file_path) {
std::ifstream t(file_path.c_str());
std::string text((std::istreambuf_iterator<char>(t)),
std::istreambuf_iterator<char>());
return text;
}

View File

@ -1,6 +1,18 @@
#ifndef __BENCHMARK_UTILS_H__ #ifndef __BENCHMARK_UTILS_H__
#define __BENCHMARK_UTILS_H__ #define __BENCHMARK_UTILS_H__
#include "benchmark/benchmark.h"
size_t getNumTokens(const std::string& query);
void BM_TokenizeBenchmark(benchmark::State& st, const std::string& query);
void BM_ParseBenchmark(benchmark::State& st, const std::string& query);
std::string readFileContents(const std::string& file_path);
#define TIME_DIFF(end, start)\ #define TIME_DIFF(end, start)\
std::chrono::duration_cast<std::chrono::duration<double>>(end - start); std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
@ -8,17 +20,22 @@
#define NOW()\ #define NOW()\
std::chrono::high_resolution_clock::now(); std::chrono::high_resolution_clock::now();
#define PARSE_QUERY_BENCHMARK(name, query)\ #define PARSE_QUERY_BENCHMARK(name, query)\
static void name(benchmark::State& st) {\ static void name(benchmark::State& st) {\
while (st.KeepRunning()) {\ BM_ParseBenchmark(st, query);\
hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);\
if (!result->isValid()) st.SkipWithError("Parsing failed!");\
delete result;\
}\ }\
BENCHMARK(name);
#define TOKENIZE_QUERY_BENCHMARK(name, query)\
static void name(benchmark::State& st) {\
BM_TokenizeBenchmark(st, query);\
}\ }\
BENCHMARK(name); BENCHMARK(name);
#define BENCHMARK_QUERY(test_name, query)\
TOKENIZE_QUERY_BENCHMARK(test_name##Tokenize, query)\
PARSE_QUERY_BENCHMARK(test_name##Parse, query)
#endif #endif

View File

@ -9,67 +9,6 @@
#include "benchmark_utils.h" #include "benchmark_utils.h"
PARSE_QUERY_BENCHMARK(BM_Q1SimpleSelect,
"SELECT * FROM test;");
PARSE_QUERY_BENCHMARK(BM_Q2SimpleSubSelect,
"SELECT a, b AS address FROM (SELECT * FROM test WHERE c < 100 AND b > 3) t1 WHERE a < 10 AND b < 100;");
PARSE_QUERY_BENCHMARK(BM_Q3SingleJoin,
"SELECT \"left\".a, \"left\".b, \"right\".a, \"right\".b FROM table_a AS \"left\" JOIN table_b AS \"right\" ON \"left\".a = \"right\".a;");
PARSE_QUERY_BENCHMARK(BM_Q4TPCHQuery,
"SELECT"
" l_orderkey,"
" SUM(l_extendedprice * (1 - l_discount)) AS revenue,"
" o_orderdate,"
" o_shippriority"
" FROM"
" customer,"
" orders,"
" lineitem"
" WHERE"
" c_mktsegment = '%s'"
" and c_custkey = o_custkey"
" and l_orderkey = o_orderkey"
" and o_orderdate < '%s'"
" and l_shipdate > '%s'"
" GROUP BY"
" l_orderkey,"
" o_orderdate,"
" o_shippriority"
" ORDER BY"
" revenue DESC,"
" o_orderdate;"
);
PARSE_QUERY_BENCHMARK(BM_TwoSelects,
"SELECT * FROM test; SELECT age, street AS address FROM data;");
PARSE_QUERY_BENCHMARK(BM_LongSelectList26,
"SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;");
PARSE_QUERY_BENCHMARK(BM_LongSelectList52,
"SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;");
PARSE_QUERY_BENCHMARK(BM_LongSelectElement26,
"SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;");
PARSE_QUERY_BENCHMARK(BM_LongSelectElement52,
"SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;");
// Prepare and Execute benchmarks.
PARSE_QUERY_BENCHMARK(BM_ExecuteStatement,
"EXECUTE procedure;");
PARSE_QUERY_BENCHMARK(BM_ExecuteWith2ParametersStatement,
"EXECUTE procedure(11, 'test');");
PARSE_QUERY_BENCHMARK(BM_ExecuteWith10ParametersStatement,
"EXECUTE procedure(11, 'test', 5.6, 4.2, 'abc', 6, 7, 8, 9, 10000);");
// Benchmark the influence of increasing size of the query, while // Benchmark the influence of increasing size of the query, while
// the number of tokens remains unchanged. // the number of tokens remains unchanged.
static void BM_CharacterCount(benchmark::State& st) { static void BM_CharacterCount(benchmark::State& st) {
@ -82,9 +21,11 @@ static void BM_CharacterCount(benchmark::State& st) {
const std::string filler = std::string(pad, 'a'); const std::string filler = std::string(pad, 'a');
query.replace(7, 6, filler); query.replace(7, 6, filler);
st.counters["num_tokens"] = getNumTokens(query);
st.counters["num_chars"] = query.size();
while (st.KeepRunning()) { while (st.KeepRunning()) {
hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query); hsql::SQLParserResult result;
delete result; hsql::SQLParser::parse(query, &result);
} }
} }
BENCHMARK(BM_CharacterCount) BENCHMARK(BM_CharacterCount)
@ -129,10 +70,12 @@ static void BM_ConditionalTokens(benchmark::State& st) {
return; return;
} }
st.counters["num_tokens"] = getNumTokens(query);
st.counters["num_chars"] = query.size();
while (st.KeepRunning()) { while (st.KeepRunning()) {
hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query); hsql::SQLParserResult result;
if (!result->isValid()) st.SkipWithError("Parsing failed!");\ hsql::SQLParser::parse(query, &result);
delete result; if (!result.isValid()) st.SkipWithError("Parsing failed!");
} }
} }
BENCHMARK(BM_ConditionalTokens) BENCHMARK(BM_ConditionalTokens)
@ -141,4 +84,4 @@ BENCHMARK(BM_ConditionalTokens)
{1 << 2, 1 << 11}}); {1 << 2, 1 << 11}});
BENCHMARK_MAIN();

47
benchmark/queries.cpp Normal file
View File

@ -0,0 +1,47 @@
#include "queries.h"
#include <experimental/filesystem>
#include <algorithm>
#include <iostream>
#include <regex>
#include "benchmark_utils.h"
namespace filesystem = std::experimental::filesystem;
std::string getQueryName(unsigned i) {
if (sql_queries[i].first.empty()) {
std::string name = "#" + std::to_string(i + 1);
return name;
}
return std::string("") + sql_queries[i].first;
}
std::vector<SQLQuery> getQueriesFromDirectory(const std::string& dir_path) {
std::regex query_file_regex("\\.sql$");
std::vector<std::string> files;
for (auto& entry : filesystem::directory_iterator(dir_path)) {
if (filesystem::is_regular_file(entry)) {
std::string path_str = filesystem::path(entry);
if (std::regex_search(path_str, query_file_regex)) {
files.push_back(path_str);
}
}
}
std::sort(files.begin(), files.end());
std::vector<SQLQuery> queries;
for (const std::string& file_path : files) {
const filesystem::path p(file_path);
const std::string query = readFileContents(file_path);
queries.emplace_back(p.filename(), query);
}
return queries;
}
std::vector<SQLQuery> getTPCHQueries() {
return getQueriesFromDirectory("test/queries/");
}

56
benchmark/queries.h Normal file
View File

@ -0,0 +1,56 @@
#ifndef __QUERIES_H__
#define __QUERIES_H__
#include <string>
#include <vector>
typedef std::pair<std::string, std::string> SQLQuery;
// name, query
static std::vector<SQLQuery> sql_queries = {
{"Q1", "SELECT * FROM test;"},
{"Q2", "SELECT a, b AS address FROM (SELECT * FROM test WHERE c < 100 AND b > 3) t1 WHERE a < 10 AND b < 100;"},
{"Q3", "SELECT \"left\".a, \"left\".b, \"right\".a, \"right\".b FROM table_a AS \"left\" JOIN table_b AS \"right\" ON \"left\".a = \"right\".a;"},
{"Q4", ""
"SELECT"
" l_orderkey,"
" SUM(l_extendedprice * (1 - l_discount)) AS revenue,"
" o_orderdate,"
" o_shippriority"
" FROM"
" customer,"
" orders,"
" lineitem"
" WHERE"
" c_mktsegment = '%s'"
" and c_custkey = o_custkey"
" and l_orderkey = o_orderkey"
" and o_orderdate < '%s'"
" and l_shipdate > '%s'"
" GROUP BY"
" l_orderkey,"
" o_orderdate,"
" o_shippriority"
" ORDER BY"
" revenue DESC,"
" o_orderdate;"
},
{"LongSelectList26", "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"},
{"LongSelectElement26", "SELECT abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxy FROM test;"},
{"LongSelectList52", "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"},
{"LongSelectElement52", "SELECT abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxy FROM test;"},
{"TwoSelects", "SELECT * FROM test; SELECT age, street AS address FROM data;"},
{"ExecuteNoParams", "EXECUTE procedure;"},
{"Execute2Params", "EXECUTE procedure(11, 'test');"},
{"Execute10Params", "EXECUTE procedure(11, 'test', 5.6, 4.2, 'abc', 6, 7, 8, 9, 10000);"},
// {"name", "query"},
};
std::string getQueryName(unsigned i);
std::vector<SQLQuery> getQueriesFromDirectory(const std::string& dir_path);
std::vector<SQLQuery> getTPCHQueries();
#endif

View File

@ -17,7 +17,7 @@ int main(int argc, char* argv[]) {
// parse a given query // parse a given query
hsql::SQLParserResult result; hsql::SQLParserResult result;
hsql::SQLParser::parseSQLString(query, &result); hsql::SQLParser::parse(query, &result);
// check whether the parsing was successful // check whether the parsing was successful

View File

@ -5,7 +5,6 @@
#include <stdio.h> #include <stdio.h>
#include <string> #include <string>
namespace hsql { namespace hsql {
SQLParser::SQLParser() { SQLParser::SQLParser() {
@ -13,16 +12,16 @@ namespace hsql {
} }
// static // static
bool SQLParser::parseSQLString(const char* text, SQLParserResult* result) { bool SQLParser::parse(const std::string& sql, SQLParserResult* result) {
yyscan_t scanner; yyscan_t scanner;
YY_BUFFER_STATE state; YY_BUFFER_STATE state;
if (hsql_lex_init(&scanner)) { if (hsql_lex_init(&scanner)) {
// Couldn't initialize the lexer. // Couldn't initialize the lexer.
fprintf(stderr, "[Error] SQLParser: Error when initializing lexer!\n"); fprintf(stderr, "SQLParser: Error when initializing lexer!\n");
return false; return false;
} }
const char* text = sql.c_str();
state = hsql__scan_string(text, scanner); state = hsql__scan_string(text, scanner);
// Parse the tokens. // Parse the tokens.
@ -38,25 +37,44 @@ namespace hsql {
} }
// static // static
bool SQLParser::parseSQLString(const std::string& text, SQLParserResult* result) { bool SQLParser::parseSQLString(const char* sql, SQLParserResult* result) {
return parseSQLString(text.c_str(), result); return parse(sql, result);
}
bool SQLParser::parseSQLString(const std::string& sql, SQLParserResult* result) {
return parse(sql, result);
} }
// static // static
SQLParserResult* SQLParser::parseSQLString(const char* text) { bool SQLParser::tokenize(const std::string& sql, std::vector<int16_t>* tokens) {
SQLParserResult* result = new SQLParserResult(); // Initialize the scanner.
yyscan_t scanner;
if (!SQLParser::parseSQLString(text, result)) { if (hsql_lex_init(&scanner)) {
delete result; fprintf(stderr, "SQLParser: Error when initializing lexer!\n");
return nullptr; return false;
} }
return result; YY_BUFFER_STATE state;
state = hsql__scan_string(sql.c_str(), scanner);
YYSTYPE yylval;
YYLTYPE yylloc;
// Step through the string until EOF is read.
// Note: hsql_lex returns int, but we know that its range is within 16 bit.
int16_t token = hsql_lex(&yylval, &yylloc, scanner);
while (token != 0) {
tokens->push_back(token);
token = hsql_lex(&yylval, &yylloc, scanner);
if (token == SQL_IDENTIFIER || token == SQL_STRING) {
free(yylval.sval);
}
} }
// static hsql__delete_buffer(state, scanner);
SQLParserResult* SQLParser::parseSQLString(const std::string& text) { hsql_lex_destroy(scanner);
return parseSQLString(text.c_str()); return true;
} }
} // namespace hsql } // namespace hsql

View File

@ -9,29 +9,25 @@ namespace hsql {
// Static methods used to parse SQL strings. // Static methods used to parse SQL strings.
class SQLParser { class SQLParser {
public: public:
// Parses a given constant character SQL string into the result object. // Parses a given constant character SQL string into the result object.
// Returns true if the lexer and parser could run without internal errors. // Returns true if the lexer and parser could run without internal errors.
// This does NOT mean that the SQL string was valid SQL. To check that // This does NOT mean that the SQL string was valid SQL. To check that
// you need to check result->isValid(); // you need to check result->isValid();
static bool parse(const std::string& sql, SQLParserResult* result);
// Run tokenization on the given string and store the tokens in the output vector.
static bool tokenize(const std::string& sql, std::vector<int16_t>* tokens);
// Deprecated.
// Old method to parse SQL strings. Replaced by parse().
static bool parseSQLString(const char* sql, SQLParserResult* result); static bool parseSQLString(const char* sql, SQLParserResult* result);
// Parses a given SQL string into the result object. // Deprecated.
// Old method to parse SQL strings. Replaced by parse().
static bool parseSQLString(const std::string& sql, SQLParserResult* result); static bool parseSQLString(const std::string& sql, SQLParserResult* result);
// Deprecated:
// Parses a given constant character SQL string.
// Note: This is kept for legacy reasons. It is recommended to use
// the (const char*, SQLParserResult*) implementation.
static SQLParserResult* parseSQLString(const char* sql);
// Deprecated:
// Parses an SQL std::string.
// Note: This is kept for legacy reasons. It is recommended to use
// the (const std::string&, SQLParserResult*) implementation.
static SQLParserResult* parseSQLString(const std::string& sql);
private: private:
// Static class can't be instatiated.
SQLParser(); SQLParser();
}; };

View File

@ -100,7 +100,9 @@ namespace hsql {
void SQLParserResult::addParameter(Expr* parameter) { void SQLParserResult::addParameter(Expr* parameter) {
parameters_.push_back(parameter); parameters_.push_back(parameter);
std::sort(parameters_.begin(), parameters_.end(), std::sort(parameters_.begin(), parameters_.end(),
[](const Expr* a, const Expr* b) { return a->ival < b->ival; }); [](const Expr * a, const Expr * b) {
return a->ival < b->ival;
});
} }
const std::vector<Expr*>& SQLParserResult::parameters() { const std::vector<Expr*>& SQLParserResult::parameters() {

View File

@ -1,9 +1,10 @@
#ifndef __SQLPARSER__SQLSTATEMENT_H__ #ifndef __SQLPARSER__SQLSTATEMENT_H__
#define __SQLPARSER__SQLSTATEMENT_H__ #define __SQLPARSER__SQLSTATEMENT_H__
#include "Expr.h"
#include <vector> #include <vector>
#include "Expr.h"
namespace hsql { namespace hsql {
enum StatementType { enum StatementType {
kStmtError, // unused kStmtError, // unused

View File

@ -1,58 +0,0 @@
-- From:
-- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html
-- TPC_H Query 11 - Important Stock Identification
SELECT PS_PARTKEY, SUM(PS_SUPPLYCOST*PS_AVAILQTY) AS VALUE
FROM PARTSUPP, SUPPLIER, NATION
WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY'
GROUP BY PS_PARTKEY
HAVING SUM(PS_SUPPLYCOST*PS_AVAILQTY) > (SELECT SUM(PS_SUPPLYCOST*PS_AVAILQTY) * 0.0001000000
FROM PARTSUPP, SUPPLIER, NATION
WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY')
ORDER BY VALUE DESC;
-- TPC_H Query 12 - Shipping Modes and Order Priority
SELECT L_SHIPMODE,
SUM(CASE WHEN O_ORDERPRIORITY = '1-URGENT' OR O_ORDERPRIORITY = '2-HIGH' THEN 1 ELSE 0 END) AS HIGH_LINE_COUNT,
SUM(CASE WHEN O_ORDERPRIORITY <> '1-URGENT' AND O_ORDERPRIORITY <> '2-HIGH' THEN 1 ELSE 0 END ) AS LOW_LINE_COUNT
FROM ORDERS, LINEITEM
WHERE O_ORDERKEY = L_ORDERKEY AND L_SHIPMODE IN ('MAIL','SHIP')
AND L_COMMITDATE < L_RECEIPTDATE AND L_SHIPDATE < L_COMMITDATE AND L_RECEIPTDATE >= '1994-01-01'
AND L_RECEIPTDATE < dateadd(mm, 1, cast('1995-09-01' as datetime))
GROUP BY L_SHIPMODE
ORDER BY L_SHIPMODE;
-- TPC_H Query 13 - Customer Distribution
SELECT C_COUNT, COUNT(*) AS CUSTDIST
FROM (SELECT C_CUSTKEY, COUNT(O_ORDERKEY)
FROM CUSTOMER left outer join ORDERS on C_CUSTKEY = O_CUSTKEY
AND O_COMMENT not like '%%special%%requests%%'
GROUP BY C_CUSTKEY) AS C_ORDERS
GROUP BY C_COUNT
ORDER BY CUSTDIST DESC, C_COUNT DESC;
-- TPC_H Query 14 - Promotion Effect
SELECT 100.00* SUM(CASE WHEN P_TYPE LIKE 'PROMO%%' THEN L_EXTENDEDPRICE*(1-L_DISCOUNT)
ELSE 0 END) / SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS PROMO_REVENUE
FROM LINEITEM, "PART"
WHERE L_PARTKEY = P_PARTKEY AND L_SHIPDATE >= '1995-09-01' AND L_SHIPDATE < dateadd(mm, 1, '1995-09-01');
-- TPC_H Query 15.1 - Create View for Top Supplier Query
CREATE VIEW REVENUE0 (SUPPLIER_NO, TOTAL_REVENUE) AS
SELECT L_SUPPKEY, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) FROM LINEITEM
WHERE L_SHIPDATE >= '1996-01-01' AND L_SHIPDATE < dateadd(mm, 3, cast('1996-01-01' as datetime))
GROUP BY L_SUPPKEY;
-- TPC_H Query 15.2 - Top Supplier
SELECT S_SUPPKEY, S_NAME, S_ADDRESS, S_PHONE, TOTAL_REVENUE
FROM SUPPLIER, REVENUE0
WHERE S_SUPPKEY = SUPPLIER_NO AND TOTAL_REVENUE = (SELECT MAX(TOTAL_REVENUE) FROM REVENUE0)
ORDER BY S_SUPPKEY;
-- TPC_H Query 15.3 - Drop View
DROP VIEW REVENUE0;

10
test/queries/tpc-h-11.sql Normal file
View File

@ -0,0 +1,10 @@
-- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html
-- TPC_H Query 11 - Important Stock Identification
SELECT PS_PARTKEY, SUM(PS_SUPPLYCOST*PS_AVAILQTY) AS VALUE
FROM PARTSUPP, SUPPLIER, NATION
WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY'
GROUP BY PS_PARTKEY
HAVING SUM(PS_SUPPLYCOST*PS_AVAILQTY) > (SELECT SUM(PS_SUPPLYCOST*PS_AVAILQTY) * 0.0001000000
FROM PARTSUPP, SUPPLIER, NATION
WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY')
ORDER BY VALUE DESC;

10
test/queries/tpc-h-12.sql Normal file
View File

@ -0,0 +1,10 @@
-- TPC_H Query 12 - Shipping Modes and Order Priority
SELECT L_SHIPMODE,
SUM(CASE WHEN O_ORDERPRIORITY = '1-URGENT' OR O_ORDERPRIORITY = '2-HIGH' THEN 1 ELSE 0 END) AS HIGH_LINE_COUNT,
SUM(CASE WHEN O_ORDERPRIORITY <> '1-URGENT' AND O_ORDERPRIORITY <> '2-HIGH' THEN 1 ELSE 0 END ) AS LOW_LINE_COUNT
FROM ORDERS, LINEITEM
WHERE O_ORDERKEY = L_ORDERKEY AND L_SHIPMODE IN ('MAIL','SHIP')
AND L_COMMITDATE < L_RECEIPTDATE AND L_SHIPDATE < L_COMMITDATE AND L_RECEIPTDATE >= '1994-01-01'
AND L_RECEIPTDATE < dateadd(mm, 1, cast('1995-09-01' as datetime))
GROUP BY L_SHIPMODE
ORDER BY L_SHIPMODE;

View File

@ -0,0 +1,8 @@
-- TPC_H Query 13 - Customer Distribution
SELECT C_COUNT, COUNT(*) AS CUSTDIST
FROM (SELECT C_CUSTKEY, COUNT(O_ORDERKEY)
FROM CUSTOMER left outer join ORDERS on C_CUSTKEY = O_CUSTKEY
AND O_COMMENT not like '%%special%%requests%%'
GROUP BY C_CUSTKEY) AS C_ORDERS
GROUP BY C_COUNT
ORDER BY CUSTDIST DESC, C_COUNT DESC;

View File

@ -0,0 +1,5 @@
-- TPC_H Query 14 - Promotion Effect
SELECT 100.00* SUM(CASE WHEN P_TYPE LIKE 'PROMO%%' THEN L_EXTENDEDPRICE*(1-L_DISCOUNT)
ELSE 0 END) / SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS PROMO_REVENUE
FROM LINEITEM, "PART"
WHERE L_PARTKEY = P_PARTKEY AND L_SHIPDATE >= '1995-09-01' AND L_SHIPDATE < dateadd(mm, 1, '1995-09-01');

15
test/queries/tpc-h-15.sql Normal file
View File

@ -0,0 +1,15 @@
-- TPC_H Query 15.1 - Create View for Top Supplier Query
CREATE VIEW REVENUE0 (SUPPLIER_NO, TOTAL_REVENUE) AS
SELECT L_SUPPKEY, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) FROM LINEITEM
WHERE L_SHIPDATE >= '1996-01-01' AND L_SHIPDATE < dateadd(mm, 3, cast('1996-01-01' as datetime))
GROUP BY L_SUPPKEY;
-- TPC_H Query 15.2 - Top Supplier
SELECT S_SUPPKEY, S_NAME, S_ADDRESS, S_PHONE, TOTAL_REVENUE
FROM SUPPLIER, REVENUE0
WHERE S_SUPPKEY = SUPPLIER_NO AND TOTAL_REVENUE = (SELECT MAX(TOTAL_REVENUE) FROM REVENUE0)
ORDER BY S_SUPPKEY;
-- TPC_H Query 15.3 - Drop View
DROP VIEW REVENUE0;

View File

@ -1,71 +0,0 @@
-- From:
-- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html
-- TPC_H Query 16 - Parts/Supplier Relationship
SELECT P_BRAND, P_TYPE, P_SIZE, COUNT(DISTINCT PS_SUPPKEY) AS SUPPLIER_CNT
FROM PARTSUPP, "PART"
WHERE P_PARTKEY = PS_PARTKEY AND P_BRAND <> 'Brand#45' AND P_TYPE NOT LIKE 'MEDIUM POLISHED%%'
AND P_SIZE IN (49, 14, 23, 45, 19, 3, 36, 9) AND PS_SUPPKEY NOT IN (SELECT S_SUPPKEY FROM SUPPLIER
WHERE S_COMMENT LIKE '%%Customer%%Complaints%%')
GROUP BY P_BRAND, P_TYPE, P_SIZE
ORDER BY SUPPLIER_CNT DESC, P_BRAND, P_TYPE, P_SIZE;
-- TPC_H Query 17 - Small-Quantity-Order Revenue
SELECT SUM(L_EXTENDEDPRICE)/7.0 AS AVG_YEARLY FROM LINEITEM, "PART"
WHERE P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#23' AND P_CONTAINER = 'MED BOX'
AND L_QUANTITY < (SELECT 0.2*AVG(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = P_PARTKEY);
-- TPC_H Query 18 - Large Volume Customer
SELECT TOP 100 C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE, SUM(L_QUANTITY)
FROM CUSTOMER, ORDERS, LINEITEM
WHERE O_ORDERKEY IN (SELECT L_ORDERKEY FROM LINEITEM GROUP BY L_ORDERKEY HAVING
SUM(L_QUANTITY) > 300) AND C_CUSTKEY = O_CUSTKEY AND O_ORDERKEY = L_ORDERKEY
GROUP BY C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE
ORDER BY O_TOTALPRICE DESC, O_ORDERDATE;
-- TPC_H Query 19 - Discounted Revenue
SELECT SUM(L_EXTENDEDPRICE* (1 - L_DISCOUNT)) AS REVENUE
FROM LINEITEM, "PART"
WHERE (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#12' AND P_CONTAINER IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND L_QUANTITY >= 1 AND L_QUANTITY <= 1 + 10 AND P_SIZE BETWEEN 1 AND 5
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON')
OR (P_PARTKEY = L_PARTKEY AND P_BRAND ='Brand#23' AND P_CONTAINER IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND L_QUANTITY >=10 AND L_QUANTITY <=10 + 10 AND P_SIZE BETWEEN 1 AND 10
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON')
OR (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#34' AND P_CONTAINER IN ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND L_QUANTITY >=20 AND L_QUANTITY <= 20 + 10 AND P_SIZE BETWEEN 1 AND 15
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON');
-- TPC_H Query 20 - Potential Part Promotion
SELECT S_NAME, S_ADDRESS FROM SUPPLIER, NATION
WHERE S_SUPPKEY IN (SELECT PS_SUPPKEY FROM PARTSUPP
WHERE PS_PARTKEY in (SELECT P_PARTKEY FROM "PART" WHERE P_NAME like 'forest%%') AND
PS_AVAILQTY > (SELECT 0.5*sum(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = PS_PARTKEY AND
L_SUPPKEY = PS_SUPPKEY AND L_SHIPDATE >= '1994-01-01' AND
L_SHIPDATE < dateadd(yy,1,'1994-01-01'))) AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'CANADA'
ORDER BY S_NAME;
-- TPC_H Query 21 - Suppliers Who Kept Orders Waiting
SELECT TOP 100 S_NAME, COUNT(*) AS NUMWAIT
FROM SUPPLIER, LINEITEM L1, ORDERS, NATION WHERE S_SUPPKEY = L1.L_SUPPKEY AND
O_ORDERKEY = L1.L_ORDERKEY AND O_ORDERSTATUS = 'F' AND L1.L_RECEIPTDATE> L1.L_COMMITDATE
AND EXISTS (SELECT * FROM LINEITEM L2 WHERE L2.L_ORDERKEY = L1.L_ORDERKEY
AND L2.L_SUPPKEY <> L1.L_SUPPKEY) AND
NOT EXISTS (SELECT * FROM LINEITEM L3 WHERE L3.L_ORDERKEY = L1.L_ORDERKEY AND
L3.L_SUPPKEY <> L1.L_SUPPKEY AND L3.L_RECEIPTDATE > L3.L_COMMITDATE) AND
S_NATIONKEY = N_NATIONKEY AND N_NAME = 'SAUDI ARABIA'
GROUP BY S_NAME
ORDER BY NUMWAIT DESC, S_NAME;
-- TPC_H Query 22 - Global Sales Opportunity */
SELECT CNTRYCODE, COUNT(*) AS NUMCUST, SUM(C_ACCTBAL) AS TOTACCTBAL
FROM (SELECT SUBSTRING(C_PHONE,1,2) AS CNTRYCODE, C_ACCTBAL
FROM CUSTOMER WHERE SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17') AND
C_ACCTBAL > (SELECT AVG(C_ACCTBAL) FROM CUSTOMER WHERE C_ACCTBAL > 0.00 AND
SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17')) AND
NOT EXISTS ( SELECT * FROM ORDERS WHERE O_CUSTKEY = C_CUSTKEY)) AS CUSTSALE
GROUP BY CNTRYCODE
ORDER BY CNTRYCODE;

View File

@ -0,0 +1,9 @@
-- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html
-- TPC_H Query 16 - Parts/Supplier Relationship
SELECT P_BRAND, P_TYPE, P_SIZE, COUNT(DISTINCT PS_SUPPKEY) AS SUPPLIER_CNT
FROM PARTSUPP, "PART"
WHERE P_PARTKEY = PS_PARTKEY AND P_BRAND <> 'Brand#45' AND P_TYPE NOT LIKE 'MEDIUM POLISHED%%'
AND P_SIZE IN (49, 14, 23, 45, 19, 3, 36, 9) AND PS_SUPPKEY NOT IN (SELECT S_SUPPKEY FROM SUPPLIER
WHERE S_COMMENT LIKE '%%Customer%%Complaints%%')
GROUP BY P_BRAND, P_TYPE, P_SIZE
ORDER BY SUPPLIER_CNT DESC, P_BRAND, P_TYPE, P_SIZE;

View File

@ -0,0 +1,4 @@
-- TPC_H Query 17 - Small-Quantity-Order Revenue
SELECT SUM(L_EXTENDEDPRICE)/7.0 AS AVG_YEARLY FROM LINEITEM, "PART"
WHERE P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#23' AND P_CONTAINER = 'MED BOX'
AND L_QUANTITY < (SELECT 0.2*AVG(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = P_PARTKEY);

View File

@ -0,0 +1,7 @@
-- TPC_H Query 18 - Large Volume Customer
SELECT TOP 100 C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE, SUM(L_QUANTITY)
FROM CUSTOMER, ORDERS, LINEITEM
WHERE O_ORDERKEY IN (SELECT L_ORDERKEY FROM LINEITEM GROUP BY L_ORDERKEY HAVING
SUM(L_QUANTITY) > 300) AND C_CUSTKEY = O_CUSTKEY AND O_ORDERKEY = L_ORDERKEY
GROUP BY C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE
ORDER BY O_TOTALPRICE DESC, O_ORDERDATE;

View File

@ -0,0 +1,9 @@
-- TPC_H Query 19 - Discounted Revenue
SELECT SUM(L_EXTENDEDPRICE* (1 - L_DISCOUNT)) AS REVENUE
FROM LINEITEM, "PART"
WHERE (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#12' AND P_CONTAINER IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND L_QUANTITY >= 1 AND L_QUANTITY <= 1 + 10 AND P_SIZE BETWEEN 1 AND 5
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON')
OR (P_PARTKEY = L_PARTKEY AND P_BRAND ='Brand#23' AND P_CONTAINER IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND L_QUANTITY >=10 AND L_QUANTITY <=10 + 10 AND P_SIZE BETWEEN 1 AND 10
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON')
OR (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#34' AND P_CONTAINER IN ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND L_QUANTITY >=20 AND L_QUANTITY <= 20 + 10 AND P_SIZE BETWEEN 1 AND 15
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON');

View File

@ -0,0 +1,8 @@
-- TPC_H Query 20 - Potential Part Promotion
SELECT S_NAME, S_ADDRESS FROM SUPPLIER, NATION
WHERE S_SUPPKEY IN (SELECT PS_SUPPKEY FROM PARTSUPP
WHERE PS_PARTKEY in (SELECT P_PARTKEY FROM "PART" WHERE P_NAME like 'forest%%') AND
PS_AVAILQTY > (SELECT 0.5*sum(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = PS_PARTKEY AND
L_SUPPKEY = PS_SUPPKEY AND L_SHIPDATE >= '1994-01-01' AND
L_SHIPDATE < dateadd(yy,1,'1994-01-01'))) AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'CANADA'
ORDER BY S_NAME;

11
test/queries/tpc-h-21.sql Normal file
View File

@ -0,0 +1,11 @@
-- TPC_H Query 21 - Suppliers Who Kept Orders Waiting
SELECT TOP 100 S_NAME, COUNT(*) AS NUMWAIT
FROM SUPPLIER, LINEITEM L1, ORDERS, NATION WHERE S_SUPPKEY = L1.L_SUPPKEY AND
O_ORDERKEY = L1.L_ORDERKEY AND O_ORDERSTATUS = 'F' AND L1.L_RECEIPTDATE> L1.L_COMMITDATE
AND EXISTS (SELECT * FROM LINEITEM L2 WHERE L2.L_ORDERKEY = L1.L_ORDERKEY
AND L2.L_SUPPKEY <> L1.L_SUPPKEY) AND
NOT EXISTS (SELECT * FROM LINEITEM L3 WHERE L3.L_ORDERKEY = L1.L_ORDERKEY AND
L3.L_SUPPKEY <> L1.L_SUPPKEY AND L3.L_RECEIPTDATE > L3.L_COMMITDATE) AND
S_NATIONKEY = N_NATIONKEY AND N_NAME = 'SAUDI ARABIA'
GROUP BY S_NAME
ORDER BY NUMWAIT DESC, S_NAME;

View File

@ -0,0 +1,9 @@
-- TPC_H Query 22 - Global Sales Opportunity */
SELECT CNTRYCODE, COUNT(*) AS NUMCUST, SUM(C_ACCTBAL) AS TOTACCTBAL
FROM (SELECT SUBSTRING(C_PHONE,1,2) AS CNTRYCODE, C_ACCTBAL
FROM CUSTOMER WHERE SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17') AND
C_ACCTBAL > (SELECT AVG(C_ACCTBAL) FROM CUSTOMER WHERE C_ACCTBAL > 0.00 AND
SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17')) AND
NOT EXISTS ( SELECT * FROM ORDERS WHERE O_CUSTKEY = C_CUSTKEY)) AS CUSTSALE
GROUP BY CNTRYCODE
ORDER BY CNTRYCODE;

View File

@ -4,7 +4,7 @@
#define TEST_PARSE_SQL_QUERY(query, result, numStatements) \ #define TEST_PARSE_SQL_QUERY(query, result, numStatements) \
hsql::SQLParserResult result; \ hsql::SQLParserResult result; \
hsql::SQLParser::parseSQLString(query, &result); \ hsql::SQLParser::parse(query, &result); \
ASSERT(result.isValid()); \ ASSERT(result.isValid()); \
ASSERT_EQ(result.size(), numStatements); ASSERT_EQ(result.size(), numStatements);

View File

@ -80,7 +80,7 @@ TEST(AutoGrammarTest) {
// Parsing // Parsing
SQLParserResult result; SQLParserResult result;
SQLParser::parseSQLString(sql.c_str(), &result); SQLParser::parse(sql.c_str(), &result);
end = std::chrono::system_clock::now(); end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start; std::chrono::duration<double> elapsed_seconds = end - start;

44
test/sql_parser.cpp Normal file
View File

@ -0,0 +1,44 @@
#include "thirdparty/microtest/microtest.h"
#include <iostream>
#include <map>
#include <string>
#include "sql_asserts.h"
#include "SQLParser.h"
#include "parser/bison_parser.h"
using namespace hsql;
void test_tokens(const std::string& query, const std::vector<int16_t>& expected_tokens) {
std::vector<int16_t> tokens;
ASSERT(SQLParser::tokenize(query, &tokens));
ASSERT_EQ(expected_tokens.size(), tokens.size());
for (unsigned i = 0; i < expected_tokens.size(); ++i) {
ASSERT_EQ(expected_tokens[i], tokens[i]);
}
}
TEST(SQLParserTokenizeTest) {
test_tokens("SELECT * FROM test;", { SQL_SELECT, '*', SQL_FROM, SQL_IDENTIFIER, ';' });
test_tokens("SELECT a, 'b' FROM test WITH HINT;", { SQL_SELECT, SQL_IDENTIFIER, ',', SQL_STRING, SQL_FROM, SQL_IDENTIFIER, SQL_WITH, SQL_HINT, ';' });
}
TEST(SQLParserTokenizeStringifyTest) {
const std::string query = "SELECT * FROM test;";
std::vector<int16_t> tokens;
ASSERT(SQLParser::tokenize(query, &tokens));
// Make u16string.
std::u16string token_string(tokens.cbegin(), tokens.cend());
// Check if u16 string is cacheable.
std::map<std::u16string, std::string> cache;
cache[token_string] = query;
ASSERT(query == cache[token_string]);
ASSERT(&query != &cache[token_string]);
}

View File

@ -14,7 +14,7 @@ using namespace hsql;
TEST(DeleteStatementTest) { TEST(DeleteStatementTest) {
SQLParserResult result; SQLParserResult result;
SQLParser::parseSQLString("DELETE FROM students WHERE grade > 2.0;", &result); SQLParser::parse("DELETE FROM students WHERE grade > 2.0;", &result);
ASSERT(result.isValid()); ASSERT(result.isValid());
ASSERT_EQ(result.size(), 1); ASSERT_EQ(result.size(), 1);
@ -30,7 +30,7 @@ TEST(DeleteStatementTest) {
TEST(CreateStatementTest) { TEST(CreateStatementTest) {
SQLParserResult result; SQLParserResult result;
SQLParser::parseSQLString("CREATE TABLE students (name TEXT, student_number INT, city INTEGER, grade DOUBLE)", &result); SQLParser::parse("CREATE TABLE students (name TEXT, student_number INT, city INTEGER, grade DOUBLE)", &result);
ASSERT(result.isValid()); ASSERT(result.isValid());
ASSERT_EQ(result.size(), 1); ASSERT_EQ(result.size(), 1);
@ -54,7 +54,7 @@ TEST(CreateStatementTest) {
TEST(UpdateStatementTest) { TEST(UpdateStatementTest) {
SQLParserResult result; SQLParserResult result;
SQLParser::parseSQLString("UPDATE students SET grade = 5.0, name = 'test' WHERE name = 'Max Mustermann';", &result); SQLParser::parse("UPDATE students SET grade = 5.0, name = 'test' WHERE name = 'Max Mustermann';", &result);
ASSERT(result.isValid()); ASSERT(result.isValid());
ASSERT_EQ(result.size(), 1); ASSERT_EQ(result.size(), 1);
@ -130,7 +130,7 @@ TEST(ReleaseStatementTest) {
SQLParserResult parse_and_move(std::string query) { SQLParserResult parse_and_move(std::string query) {
hsql::SQLParserResult result; hsql::SQLParserResult result;
hsql::SQLParser::parseSQLString(query, &result); hsql::SQLParser::parse(query, &result);
// Moves on return. // Moves on return.
return result; return result;
} }

View File

@ -17,7 +17,7 @@ CONFLICT_RET=0
################################################# #################################################
# Running SQL parser tests. # Running SQL parser tests.
printf "\n${GREEN}Running SQL parser tests...${NC}\n" printf "\n${GREEN}Running SQL parser tests...${NC}\n"
bin/sql_tests -f "test/valid_queries.sql" bin/tests -f "test/valid_queries.sql"
SQL_TEST_RET=$? SQL_TEST_RET=$?
if [ $SQL_TEST_RET -eq 0 ]; then if [ $SQL_TEST_RET -eq 0 ]; then
@ -31,7 +31,7 @@ fi
# Running memory leak checks. # Running memory leak checks.
printf "\n${GREEN}Running memory leak checks...${NC}\n" printf "\n${GREEN}Running memory leak checks...${NC}\n"
valgrind --leak-check=full --error-exitcode=200 --log-fd=3 \ valgrind --leak-check=full --error-exitcode=200 --log-fd=3 \
./bin/sql_tests -f "test/valid_queries.sql" 3>&1 >/dev/null 2>/dev/null ./bin/tests -f "test/valid_queries.sql" 3>&1 >/dev/null 2>/dev/null
MEM_LEAK_RET=$? MEM_LEAK_RET=$?
if [ $MEM_LEAK_RET -ne 200 ]; then if [ $MEM_LEAK_RET -ne 200 ]; then

View File

@ -31,16 +31,30 @@ TEST(TPCHQueryGrammarTests) {
"test/queries/tpc-h-08.sql", "test/queries/tpc-h-08.sql",
"test/queries/tpc-h-09.sql", "test/queries/tpc-h-09.sql",
"test/queries/tpc-h-10.sql", "test/queries/tpc-h-10.sql",
"test/queries/tpc-h-11-15.sql", "test/queries/tpc-h-11.sql",
"test/queries/tpc-h-16-22.sql" "test/queries/tpc-h-12.sql",
"test/queries/tpc-h-13.sql",
"test/queries/tpc-h-14.sql",
"test/queries/tpc-h-15.sql",
"test/queries/tpc-h-16.sql",
"test/queries/tpc-h-17.sql",
"test/queries/tpc-h-18.sql",
"test/queries/tpc-h-19.sql",
"test/queries/tpc-h-20.sql",
"test/queries/tpc-h-21.sql",
"test/queries/tpc-h-22.sql",
}; };
int testsFailed = 0; int testsFailed = 0;
std::string concatenated = "";
for (const std::string& file_path : files) { for (const std::string& file_path : files) {
std::string query = readFileContents(file_path); std::string query = readFileContents(file_path);
concatenated += query;
if (concatenated.back() != ';') concatenated += ';';
SQLParserResult result; SQLParserResult result;
SQLParser::parseSQLString(query.c_str(), &result); SQLParser::parse(query.c_str(), &result);
if (!result.isValid()) { if (!result.isValid()) {
mt::printFailed(file_path.c_str()); mt::printFailed(file_path.c_str());
printf("%s %s (L%d:%d)%s\n", mt::red(), result.errorMsg(), result.errorLine(), result.errorColumn(), mt::def()); printf("%s %s (L%d:%d)%s\n", mt::red(), result.errorMsg(), result.errorLine(), result.errorColumn(), mt::def());
@ -49,18 +63,29 @@ TEST(TPCHQueryGrammarTests) {
mt::printOk(file_path.c_str()); mt::printOk(file_path.c_str());
} }
} }
SQLParserResult result;
SQLParser::parse(concatenated.c_str(), &result);
if (!result.isValid()) {
mt::printFailed("TPCHAllConcatenated");
printf("%s %s (L%d:%d)%s\n", mt::red(), result.errorMsg(), result.errorLine(), result.errorColumn(), mt::def());
++testsFailed;
} else {
mt::printOk("TPCHAllConcatenated");
}
ASSERT_EQ(testsFailed, 0); ASSERT_EQ(testsFailed, 0);
} }
TEST(TPCHQueryDetailTest) { TEST(TPCHQueryDetailTest) {
std::string query = readFileContents("test/queries/tpc-h-16-22.sql"); std::string query = readFileContents("test/queries/tpc-h-20.sql");
SQLParserResult result; SQLParserResult result;
SQLParser::parseSQLString(query.c_str(), &result); SQLParser::parse(query.c_str(), &result);
ASSERT(result.isValid()); ASSERT(result.isValid());
ASSERT_EQ(result.size(), 7); ASSERT_EQ(result.size(), 1);
const SQLStatement* stmt20 = result.getStatement(4); const SQLStatement* stmt20 = result.getStatement(0);
ASSERT_EQ(stmt20->type(), kStmtSelect); ASSERT_EQ(stmt20->type(), kStmtSelect);
const SelectStatement* select20 = (const SelectStatement*) stmt20; const SelectStatement* select20 = (const SelectStatement*) stmt20;