From dfbc50ed27fc503167e1d6ed66131b3524bb1e49 Mon Sep 17 00:00:00 2001 From: Pedro Date: Thu, 13 Nov 2014 01:27:47 +0100 Subject: [PATCH] Implemented UNION and OFFSET --- run_tests.sh | 3 +- src/lib/CreateStatement.h | 6 ++- src/lib/Expr.h | 8 ++- src/lib/ImportStatement.h | 6 ++- src/lib/SelectStatement.h | 25 +++++++-- src/lib/Statement.h | 18 +++++-- src/lib/Table.h | 11 +++- src/lib/sqlhelper.cpp | 8 ++- src/parser/bison_parser.y | 84 +++++++++++++++++++---------- src/parser/flex_lexer.l | 66 ++++++++++++----------- src/parser/keywordlist_generator.py | 3 +- src/parser/sql_keywords.txt | 2 + test/valid_queries.sql | 4 ++ 13 files changed, 167 insertions(+), 77 deletions(-) diff --git a/run_tests.sh b/run_tests.sh index 15ee6e3..fd5ebc3 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -10,8 +10,9 @@ make grammar_test -C src/ echo "\n\n" -./bin/analysis "SELECT t1.a AS id, t1.b, t2.c FROM \"tbl\" AS t1 JOIN (SELECT * FROM foo JOIN bar ON foo.id = bar.id) t2 ON t1.a = t2.b WHERE (t1.b OR NOT t1.a) AND t2.c = 12.5" +# ./bin/analysis "SELECT t1.a AS id, t1.b, t2.c FROM \"tbl\" AS t1 JOIN (SELECT * FROM foo JOIN bar ON foo.id = bar.id) t2 ON t1.a = t2.b WHERE (t1.b OR NOT t1.a) AND t2.c = 12.5" # ./bin/analysis "CREATE TABLE \"table\" FROM TBL FILE 'students.tbl'" +./bin/analysis "SELECT * FROM t1 UNION (SELECT abc AS t FROM t2 ORDER BY col3 LIMIT 10) ORDER BY col1;" echo "\n\n" diff --git a/src/lib/CreateStatement.h b/src/lib/CreateStatement.h index d36bb81..8930ee4 100644 --- a/src/lib/CreateStatement.h +++ b/src/lib/CreateStatement.h @@ -19,7 +19,11 @@ typedef enum { * @struct CreateStatement */ struct CreateStatement : Statement { - CreateStatement() : Statement(kStmtCreate) {}; + CreateStatement() : + Statement(kStmtCreate), + file_path(NULL), + table_name(NULL) {}; + virtual ~CreateStatement(); // defined in destructors.cpp CreateType create_type; diff --git a/src/lib/Expr.h b/src/lib/Expr.h index 97528d3..69348fd 100644 --- a/src/lib/Expr.h +++ b/src/lib/Expr.h @@ -50,7 +50,13 @@ struct Expr { - Expr(ExprType type) : type(type), expr(NULL), expr2(NULL), name(NULL), table(NULL), alias(NULL) {}; + Expr(ExprType type) : + type(type), + expr(NULL), + expr2(NULL), + name(NULL), + table(NULL), + alias(NULL) {}; // Interesting side-effect: // Making the destructor virtual causes segmentation faults diff --git a/src/lib/ImportStatement.h b/src/lib/ImportStatement.h index a36c437..058956e 100644 --- a/src/lib/ImportStatement.h +++ b/src/lib/ImportStatement.h @@ -19,7 +19,11 @@ typedef enum { * @struct ImportStatement */ struct ImportStatement : Statement { - ImportStatement() : Statement(kStmtImport) {}; + ImportStatement() : + Statement(kStmtImport), + file_path(NULL), + table_name(NULL) {}; + virtual ~ImportStatement(); // defined in destructors.cpp ImportFileType file_type; diff --git a/src/lib/SelectStatement.h b/src/lib/SelectStatement.h index b25048b..7d447a7 100644 --- a/src/lib/SelectStatement.h +++ b/src/lib/SelectStatement.h @@ -20,7 +20,10 @@ typedef enum { } OrderType; struct OrderDescription { - OrderDescription(OrderType type, Expr* expr) : type(type), expr(expr) {} + OrderDescription(OrderType type, Expr* expr) : + type(type), + expr(expr) {} + virtual ~OrderDescription(); // defined in destructors.cpp OrderType type; @@ -34,7 +37,10 @@ struct OrderDescription { const int64_t kNoLimit = -1; const int64_t kNoOffset = -1; struct LimitDescription { - LimitDescription(int64_t limit, int64_t offset) : limit(limit), offset(offset) {} + LimitDescription(int64_t limit, int64_t offset) : + limit(limit), + offset(offset) {} + int64_t limit; int64_t offset; }; @@ -44,15 +50,24 @@ struct LimitDescription { * Representation of a full select statement. */ struct SelectStatement : Statement { - SelectStatement() : Statement(kStmtSelect) {}; + SelectStatement() : + Statement(kStmtSelect), + from_table(NULL), + select_list(NULL), + where_clause(NULL), + group_by(NULL), + order(NULL), + limit(NULL), + union_select(NULL) {}; + virtual ~SelectStatement(); // defined in destructors.cpp TableRef* from_table; List* select_list; - Expr* where_clause; - + Expr* where_clause; List* group_by; + SelectStatement* union_select; OrderDescription* order; LimitDescription* limit; }; diff --git a/src/lib/Statement.h b/src/lib/Statement.h index dc0e95d..9b5ce3b 100644 --- a/src/lib/Statement.h +++ b/src/lib/Statement.h @@ -11,7 +11,7 @@ namespace hsql { typedef enum { - kStmtError, + kStmtError, // Unused kStmtSelect, kStmtImport, // Following types are planned but not supported yet @@ -26,7 +26,9 @@ typedef enum { struct Statement { - Statement(StatementType type) : type(type) {}; + Statement(StatementType type) : + type(type) {}; + virtual ~Statement(); // defined in destructors.cpp StatementType type; @@ -35,8 +37,16 @@ struct Statement { class StatementList : public List { public: - StatementList() : List(), isValid(true) {}; - StatementList(Statement* stmt) : List(stmt), isValid(true) {}; + StatementList() : + List(), + isValid(true), + parser_msg(NULL) {}; + + StatementList(Statement* stmt) : + List(stmt), + isValid(true), + parser_msg(NULL) {}; + virtual ~StatementList(); // defined in destructors.cpp bool isValid; diff --git a/src/lib/Table.h b/src/lib/Table.h index 62b389a..1f3e0e4 100644 --- a/src/lib/Table.h +++ b/src/lib/Table.h @@ -29,7 +29,16 @@ typedef enum { typedef struct TableRef TableRef; struct TableRef { - TableRef(TableRefType type) : type(type) {} + TableRef(TableRefType type) : + type(type), + name(NULL), + alias(NULL), + select(NULL), + list(NULL), + left(NULL), + right(NULL), + join_condition(NULL) {} + virtual ~TableRef(); // defined in destructors.cpp TableRefType type; diff --git a/src/lib/sqlhelper.cpp b/src/lib/sqlhelper.cpp index 688b779..a5afeb9 100644 --- a/src/lib/sqlhelper.cpp +++ b/src/lib/sqlhelper.cpp @@ -70,7 +70,7 @@ void printExpression(Expr* expr, uint num_indent) { default: fprintf(stderr, "Unrecognized expression type %d\n", expr->type); return; } if (expr->alias != NULL) { - inprint("Alias", num_indent); inprint(expr->alias, num_indent+1); + inprint("Alias", num_indent+1); inprint(expr->alias, num_indent+2); } } @@ -87,6 +87,12 @@ void printSelectStatementInfo(SelectStatement* stmt, uint num_indent) { printExpression(stmt->where_clause, num_indent+2); } + + if (stmt->union_select != NULL) { + inprint("Union:", num_indent+1); + printSelectStatementInfo(stmt->union_select, num_indent+2); + } + if (stmt->order != NULL) { inprint("OrderBy:", num_indent+1); printExpression(stmt->order->expr, num_indent+2); diff --git a/src/parser/bison_parser.y b/src/parser/bison_parser.y index d381160..c107ed1 100644 --- a/src/parser/bison_parser.y +++ b/src/parser/bison_parser.y @@ -105,12 +105,12 @@ typedef void* yyscan_t; %token NOTEQUALS LESSEQ GREATEREQ /* SQL Keywords */ -%token DISTINCT DATABASE NATURAL CONTROL BETWEEN SELECT -%token HAVING OFFSET CREATE IMPORT RENAME DELETE INSERT -%token UPDATE UNLOAD COLUMN ISNULL WHERE GROUP ORDER LIMIT -%token INNER OUTER RIGHT CROSS USING TABLE INDEX ALTER FROM -%token DESC JOIN LEFT FILE DROP LOAD INTO NULL LIKE TOP ASC -%token CSV TBL NOT AND BY ON AS OR IN IS +%token DATABASE DISTINCT BETWEEN CONTROL NATURAL COLUMN +%token CREATE DELETE HAVING IMPORT INSERT ISNULL OFFSET +%token RENAME SELECT UNLOAD UPDATE ALTER CROSS GROUP INDEX +%token INNER LIMIT ORDER OUTER RIGHT TABLE UNION USING WHERE +%token DESC DROP FILE FROM INTO JOIN LEFT LIKE LOAD NULL ALL +%token AND ASC CSV NOT TBL TOP AS BY IN IS ON OR /********************************* @@ -118,7 +118,7 @@ typedef void* yyscan_t; *********************************/ %type statement_list %type statement -%type select_statement +%type select_statement select_ref select_with_paren select_no_paren select_clause %type import_statement %type create_statement %type table_name opt_alias alias file_path @@ -126,12 +126,12 @@ typedef void* yyscan_t; %type join_clause join_table %type expr scalar_expr unary_expr binary_expr function_expr star_expr expr_alias %type column_name literal int_literal num_literal string_literal -%type comp_expr where_clause join_condition -%type expr_list group_clause select_list +%type comp_expr opt_where join_condition +%type expr_list opt_group select_list %type table_ref_commalist -%type order_by_clause -%type limit_clause -%type order_type +%type opt_order +%type opt_limit +%type opt_order_type %type import_file_type @@ -226,18 +226,42 @@ create_statement: ******************************/ select_statement: - SELECT select_list from_clause where_clause group_clause order_by_clause limit_clause - { - SelectStatement* s = new SelectStatement(); - s->select_list = $2; - s->from_table = $3; - s->where_clause = $4; - s->group_by = $5; - s->order = $6; - s->limit = $7; - $$ = s; + select_with_paren + | select_no_paren + ; + +select_with_paren: + '(' select_no_paren ')' { $$ = $2; } + | '(' select_with_paren ')' { $$ = $2; } + ; + +select_no_paren: + select_clause opt_order opt_limit { + $$ = $1; + $$->order = $2; + $$->limit = $3; + } + | select_ref UNION select_ref opt_order opt_limit { + $$ = $1; + $$->union_select = $3; + $$->order = $4; + $$->limit = $5; + } + ; + +select_ref: + select_clause + | select_with_paren + ; + +select_clause: + SELECT select_list from_clause opt_where opt_group { + $$ = new SelectStatement(); + $$->select_list = $2; + $$->from_table = $3; + $$->where_clause = $4; + $$->group_by = $5; } - | '(' select_statement ')' { $$ = $2; } ; @@ -251,31 +275,33 @@ from_clause: ; -where_clause: +opt_where: WHERE expr { $$ = $2; } | /* empty */ { $$ = NULL; } ; // TODO: having -group_clause: +opt_group: GROUP BY expr_list { $$ = $3; } | /* empty */ { $$ = NULL; } ; -order_by_clause: - ORDER BY expr order_type { $$ = new OrderDescription($4, $3); } +opt_order: + ORDER BY expr opt_order_type { $$ = new OrderDescription($4, $3); } | /* empty */ { $$ = NULL; } ; -order_type: +opt_order_type: ASC { $$ = kOrderAsc; } | DESC { $$ = kOrderDesc; } | /* empty */ { $$ = kOrderAsc; } ; -limit_clause: + +opt_limit: LIMIT int_literal { $$ = new LimitDescription($2->ival, kNoOffset); delete $2; } + | LIMIT int_literal OFFSET int_literal { $$ = new LimitDescription($2->ival, $4->ival); delete $2; delete $4; } | /* empty */ { $$ = NULL; } ; diff --git a/src/parser/flex_lexer.l b/src/parser/flex_lexer.l index aec4e0a..1f49705 100644 --- a/src/parser/flex_lexer.l +++ b/src/parser/flex_lexer.l @@ -56,57 +56,59 @@ [ \t\n]+ /* skip whitespace */; -DISTINCT TOKEN(DISTINCT) DATABASE TOKEN(DATABASE) -NATURAL TOKEN(NATURAL) -CONTROL TOKEN(CONTROL) +DISTINCT TOKEN(DISTINCT) BETWEEN TOKEN(BETWEEN) -SELECT TOKEN(SELECT) -HAVING TOKEN(HAVING) -OFFSET TOKEN(OFFSET) -CREATE TOKEN(CREATE) -IMPORT TOKEN(IMPORT) -RENAME TOKEN(RENAME) -DELETE TOKEN(DELETE) -INSERT TOKEN(INSERT) -UPDATE TOKEN(UPDATE) -UNLOAD TOKEN(UNLOAD) +CONTROL TOKEN(CONTROL) +NATURAL TOKEN(NATURAL) COLUMN TOKEN(COLUMN) +CREATE TOKEN(CREATE) +DELETE TOKEN(DELETE) +HAVING TOKEN(HAVING) +IMPORT TOKEN(IMPORT) +INSERT TOKEN(INSERT) ISNULL TOKEN(ISNULL) -WHERE TOKEN(WHERE) +OFFSET TOKEN(OFFSET) +RENAME TOKEN(RENAME) +SELECT TOKEN(SELECT) +UNLOAD TOKEN(UNLOAD) +UPDATE TOKEN(UPDATE) +ALTER TOKEN(ALTER) +CROSS TOKEN(CROSS) GROUP TOKEN(GROUP) -ORDER TOKEN(ORDER) -LIMIT TOKEN(LIMIT) +INDEX TOKEN(INDEX) INNER TOKEN(INNER) +LIMIT TOKEN(LIMIT) +ORDER TOKEN(ORDER) OUTER TOKEN(OUTER) RIGHT TOKEN(RIGHT) -CROSS TOKEN(CROSS) -USING TOKEN(USING) TABLE TOKEN(TABLE) -INDEX TOKEN(INDEX) -ALTER TOKEN(ALTER) -FROM TOKEN(FROM) +UNION TOKEN(UNION) +USING TOKEN(USING) +WHERE TOKEN(WHERE) DESC TOKEN(DESC) +DROP TOKEN(DROP) +FILE TOKEN(FILE) +FROM TOKEN(FROM) +INTO TOKEN(INTO) JOIN TOKEN(JOIN) LEFT TOKEN(LEFT) -FILE TOKEN(FILE) -DROP TOKEN(DROP) -LOAD TOKEN(LOAD) -INTO TOKEN(INTO) -NULL TOKEN(NULL) LIKE TOKEN(LIKE) -TOP TOKEN(TOP) +LOAD TOKEN(LOAD) +NULL TOKEN(NULL) +ALL TOKEN(ALL) +AND TOKEN(AND) ASC TOKEN(ASC) CSV TOKEN(CSV) -TBL TOKEN(TBL) NOT TOKEN(NOT) -AND TOKEN(AND) -BY TOKEN(BY) -ON TOKEN(ON) +TBL TOKEN(TBL) +TOP TOKEN(TOP) AS TOKEN(AS) -OR TOKEN(OR) +BY TOKEN(BY) IN TOKEN(IN) IS TOKEN(IS) +ON TOKEN(ON) +OR TOKEN(OR) "<>" TOKEN(NOTEQUALS) diff --git a/src/parser/keywordlist_generator.py b/src/parser/keywordlist_generator.py index e4fc5bc..cf801a9 100644 --- a/src/parser/keywordlist_generator.py +++ b/src/parser/keywordlist_generator.py @@ -5,7 +5,8 @@ import math with open("sql_keywords.txt", 'r') as fh: keywords = [line.strip() for line in fh.readlines() if not line.strip().startswith("//") and len(line.strip()) > 0] - keywords = sorted(keywords, key=lambda x: len(x), reverse=True) + keywords = sorted(keywords) # Sort by name + keywords = sorted(keywords, key=lambda x: len(x), reverse=True) # Sort by length ################# # Flex diff --git a/src/parser/sql_keywords.txt b/src/parser/sql_keywords.txt index d60c525..4c8821d 100644 --- a/src/parser/sql_keywords.txt +++ b/src/parser/sql_keywords.txt @@ -14,6 +14,8 @@ DESC LIMIT DISTINCT OFFSET +UNION +ALL // Join clause JOIN diff --git a/test/valid_queries.sql b/test/valid_queries.sql index 8960b00..801f0ad 100644 --- a/test/valid_queries.sql +++ b/test/valid_queries.sql @@ -5,6 +5,10 @@ SELECT col1 AS myname, col2, 'test' FROM "table", foo AS t WHERE age > 12 AND zi SELECT * from "table" JOIN table2 ON a = b WHERE (b OR NOT a) AND a = 12.5 (SELECT a FROM foo WHERE a > 12 OR b > 3 AND c NOT LIKE 's%' LIMIT 10); SELECT t1.a, t1.b, t2.c FROM "table" AS t1 JOIN (SELECT * FROM foo JOIN bar ON foo.id = bar.id) t2 ON t1.a = t2.b WHERE (t1.b OR NOT t1.a) AND t2.c = 12.5 +SELECT * FROM "table" LIMIT 10 OFFSET 10; +SELECT * FROM t1 UNION SELECT * FROM t2 ORDER BY col1; +SELECT * FROM t1 UNION (SELECT * FROM t2) ORDER BY col1; +SELECT * FROM t1 UNION (SELECT * FROM t2 UNION SELECT * FROM t3) ORDER BY col1; # CREATE statement CREATE TABLE "table" FROM TBL FILE 'students.tbl' # Multiple statements