From 6ea2c0e3181a396991732a169ff5eec33f3d84b1 Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 8 Oct 2014 16:30:22 -0700 Subject: [PATCH] commit of current state --- src/.gitignore | 5 ++ src/Makefile | 33 ++++++++++++ src/bin/.gitignore | 2 + src/build_and_run.sh | 5 ++ src/lex_lexer.l | 51 ++++++++++++++++++ src/lex_parser.y | 120 +++++++++++++++++++++++++++++++++++++++++ src/lib/Expression.cpp | 0 src/lib/Expression.h | 16 ++++++ src/lib/List.h | 26 +++++++++ src/lib/SQLParser.cpp | 37 +++++++++++++ src/lib/SQLParser.h | 16 ++++++ src/lib/Statement.cpp | 15 ++++++ src/lib/Statement.h | 60 +++++++++++++++++++++ src/sql_parser.cpp | 52 ++++++++++++++++++ src/sql_tests.cpp | 107 ++++++++++++++++++++++++++++++++++++ 15 files changed, 545 insertions(+) create mode 100644 src/.gitignore create mode 100644 src/Makefile create mode 100644 src/bin/.gitignore create mode 100644 src/build_and_run.sh create mode 100644 src/lex_lexer.l create mode 100644 src/lex_parser.y create mode 100644 src/lib/Expression.cpp create mode 100644 src/lib/Expression.h create mode 100644 src/lib/List.h create mode 100644 src/lib/SQLParser.cpp create mode 100644 src/lib/SQLParser.h create mode 100644 src/lib/Statement.cpp create mode 100644 src/lib/Statement.h create mode 100644 src/sql_parser.cpp create mode 100644 src/sql_tests.cpp diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..a048789 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,5 @@ +lex_lexer.c +lex_lexer.h +lex_parser.c +lex_parser.h +*.o \ No newline at end of file diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..92c752e --- /dev/null +++ b/src/Makefile @@ -0,0 +1,33 @@ +# Makefile + +LIB_FILES = lex_lexer.c lex_parser.c lib/Statement.cpp lib/SQLParser.cpp + + +TESTS_MAIN = sql_tests.cpp +TESTS_BIN = bin/tests + +PARSER_MAIN = sql_parser.cpp +PARSER_BIN = bin/parser + +CC = g++ +CFLAGS = -g -O3 -Ilib/ -I./ + + +tests: $(LIB_FILES) $(TESTS_MAIN) + $(CC) $(CFLAGS) $(LIB_FILES) $(TESTS_MAIN) -o $(TESTS_BIN) + + +parser: $(LIB_FILES) $(PARSER_MAIN) + $(CC) $(CFLAGS) $(LIB_FILES) $(PARSER_MAIN) -o $(PARSER_BIN) + + +lex_lexer.c: lex_lexer.l + flex lex_lexer.l + + +lex_parser.c: lex_parser.y lex_lexer.c + bison lex_parser.y + + +clean: + rm -f *.o *~ lex_lexer.c lex_lexer.h lex_parser.c lex_parser.h $(PARSER_BIN) $(TESTS_BIN) \ No newline at end of file diff --git a/src/bin/.gitignore b/src/bin/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/src/bin/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/src/build_and_run.sh b/src/build_and_run.sh new file mode 100644 index 0000000..6f78f9b --- /dev/null +++ b/src/build_and_run.sh @@ -0,0 +1,5 @@ + +make clean +make tests + +./bin/tests \ No newline at end of file diff --git a/src/lex_lexer.l b/src/lex_lexer.l new file mode 100644 index 0000000..ced2c3b --- /dev/null +++ b/src/lex_lexer.l @@ -0,0 +1,51 @@ +%{ + +/* + * lexer.l file + * To generate the lexical analyzer run: "flex lexer.l" + */ + +#include "Statement.h" +#include "List.h" +#include "lex_parser.h" + +#include +#include +using namespace std; + +#define TOK(name) { return name; } + +%} + + +%option outfile="lex_lexer.c" header-file="lex_lexer.h" +%option warn nodefault +%option reentrant noyywrap never-interactive nounistd +%option bison-bridge +%option case-insensitive + +%% + +[ \t\n]+ ; + +SELECT TOK(SELECT) +FROM TOK(FROM) +GROUP TOK(GROUP) +BY TOK(BY) + +[-+*/(),.;] TOK(yytext[0]) + +[0-9]+ | +[0-9]+"."[0-9]* | +"."[0-9]* TOK(INTNUM) + +[A-Za-z][A-Za-z0-9_]* { + yylval->sval = strdup(yytext); + return STRING; +} + +%% + +int yyerror(const char *msg) { + fprintf(stderr,"Error:%s\n",msg); return 0; +} \ No newline at end of file diff --git a/src/lex_parser.y b/src/lex_parser.y new file mode 100644 index 0000000..67b00e3 --- /dev/null +++ b/src/lex_parser.y @@ -0,0 +1,120 @@ +%{ + +/* + * parser.y file + * To generate the parser run: "bison parser.y" + */ + +#include "Statement.h" +#include "List.h" +#include "lex_parser.h" +#include "lex_lexer.h" + +#include + +int yyerror(Statement **expression, yyscan_t scanner, const char *msg) { + // Add error handling routine as needed +} + +%} + +%code requires { + +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +} + +%output "lex_parser.c" +%defines "lex_parser.h" + +%define api.pure +%lex-param { yyscan_t scanner } +%parse-param { Statement **statement } +%parse-param { yyscan_t scanner } + +%union { + int value; + char* sval; + + Statement* statement; + SelectStatement* select_statement; + TableRef* table; + Expression* expr; + + List* slist; + List* explist; +} + +%token SELECT FROM GROUP BY INTNUM +%token STRING + +%type statement +%type select_statement +%type from_clause +%type string_list +%type expr_list group_clause +%type expr; +%% + + +input: + statement opt_semicolon { *statement = $1; }; + + +opt_semicolon: + ';' | ; + + +statement: + select_statement { $$ = $1; } + | { $$ = NULL; }; + + +select_statement: + SELECT expr_list from_clause group_clause + { + SelectStatement* s = new SelectStatement(); + s->_select_list = $2; + s->_from_table = $3; + s->_group_by = $4; + $$ = s; + }; + + +expr_list: + expr { $$ = new List($1); } + | expr_list ',' expr { $1->push_back($3); $$ = $1; }; + + +expr: + STRING { $$ = new Expression($1); } + | STRING '(' STRING ')' { $$ = new Expression($3, $1); }; + + +from_clause: + FROM string_list + { + TableRef* t = new TableRef(eTableName); + t->_table_names = $2; + $$ = t; + } + | FROM '(' select_statement ')' + { + TableRef* t = new TableRef(eTableSelect); + t->_stmt = $3; + $$ = t; + }; + + +group_clause: + GROUP BY expr_list { $$ = $3; } + | { $$ = NULL; }; + +string_list: + STRING { $$ = new List($1); } + | string_list ',' STRING { $1->push_back($3); $$ = $1; } + +%% \ No newline at end of file diff --git a/src/lib/Expression.cpp b/src/lib/Expression.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/lib/Expression.h b/src/lib/Expression.h new file mode 100644 index 0000000..690e1c2 --- /dev/null +++ b/src/lib/Expression.h @@ -0,0 +1,16 @@ +#ifndef __EXPRESSION_H__ +#define __EXPRESSION_H__ + + + +class Expression { +public: + Expression(char* name) : name(name) {}; + Expression(char* name, char* func_name) : name(name), func_name(func_name) {}; + + char* name; + char* func_name; +}; + + +#endif \ No newline at end of file diff --git a/src/lib/List.h b/src/lib/List.h new file mode 100644 index 0000000..e9309ec --- /dev/null +++ b/src/lib/List.h @@ -0,0 +1,26 @@ +#ifndef __LIST_H__ +#define __LIST_H__ + +#include +#include + +template +class List { +public: + std::vector<_T> _vector; + + List() {} + + List(_T first_value) { + _vector.push_back(first_value); + } + + inline size_t size() { return _vector.size(); }; + + inline _T at(int i) { return _vector[i]; } + inline _T &operator[](int i) { return _vector[i]; } + inline void push_back(_T value) { _vector.push_back(value); } +}; + + +#endif \ No newline at end of file diff --git a/src/lib/SQLParser.cpp b/src/lib/SQLParser.cpp new file mode 100644 index 0000000..4b5f430 --- /dev/null +++ b/src/lib/SQLParser.cpp @@ -0,0 +1,37 @@ +#include "SQLParser.h" +#include "lex_parser.h" +#include "lex_lexer.h" +#include + +int yyparse(Statement **expression, yyscan_t scanner); + + +SQLParser::SQLParser() { + fprintf(stderr, "SQLParser only has static methods atm! Do not initialize!\n"); +} + + +Statement* SQLParser::parseSQL(const char *text) { + Statement* stmt; + yyscan_t scanner; + YY_BUFFER_STATE state; + + if (yylex_init(&scanner)) { + // couldn't initialize + fprintf(stderr, "Error when initializing!\n"); + return NULL; + } + + state = yy_scan_string(text, scanner); + + if (yyparse(&stmt, scanner)) { + // error parsing + fprintf(stderr, "Error when parsing!\n"); + return NULL; + } + + yy_delete_buffer(state, scanner); + + yylex_destroy(scanner); + return stmt; +} \ No newline at end of file diff --git a/src/lib/SQLParser.h b/src/lib/SQLParser.h new file mode 100644 index 0000000..779a644 --- /dev/null +++ b/src/lib/SQLParser.h @@ -0,0 +1,16 @@ +#ifndef __SQLPARSER_H_ +#define __SQLPARSER_H_ + +#include "Statement.h" + +class SQLParser { +public: + static Statement* parseSQL(const char* sql); + +private: + SQLParser(); +}; + + + +#endif \ No newline at end of file diff --git a/src/lib/Statement.cpp b/src/lib/Statement.cpp new file mode 100644 index 0000000..dfc2e6d --- /dev/null +++ b/src/lib/Statement.cpp @@ -0,0 +1,15 @@ +/* + * Statement.c + * Implementation of functions used to build the syntax tree. + */ + +#include "Statement.h" + +#include + + +Statement::Statement(EStatementType type) : _type(type) {}; + +SelectStatement::SelectStatement() : Statement(eSelect) {}; + +TableRef::TableRef(ETableRefType type) : _type(type) {}; diff --git a/src/lib/Statement.h b/src/lib/Statement.h new file mode 100644 index 0000000..786f185 --- /dev/null +++ b/src/lib/Statement.h @@ -0,0 +1,60 @@ +/* + * Statement.h + * Definition of the structure used to build the syntax tree. + */ +#ifndef __STATEMENT_H__ +#define __STATEMENT_H__ + +#include "Expression.h" +#include "List.h" + +class TableRef; + +typedef enum { + eSelect, + eDelete, + eInsert, + eCreate +} EStatementType; + + + +class Statement { +public: + Statement(EStatementType type); + + EStatementType _type; +}; + + +class SelectStatement : public Statement { +public: + SelectStatement(); + + TableRef* _from_table; + List* _select_list; + List* _group_by; +}; + + +/** + * TableRef + * Holds reference to tables. Can be either table names or a select statement. + */ +typedef enum { + eTableName, + eTableSelect +} ETableRefType; + +class TableRef { +public: + TableRef(ETableRefType type); + + ETableRefType _type; + + SelectStatement* _stmt; + List* _table_names; + +}; + +#endif // __STATEMENT_H__ \ No newline at end of file diff --git a/src/sql_parser.cpp b/src/sql_parser.cpp new file mode 100644 index 0000000..0f8b2ec --- /dev/null +++ b/src/sql_parser.cpp @@ -0,0 +1,52 @@ +/* + * sql_parser.cpp + */ + +#include "sql_interface.h" +#include + +void evaluate_statement(Statement* stmt); + + +int main(int argc, char *argv[]) { + if (argc <= 1) { + fprintf(stderr, "No SQL-Statement given!\n"); + return -1; + } + + Statement *stmt = NULL; + + for (int n = 1; n < argc; ++n) { + char* sql = argv[n]; + + printf("\nEvaluating Statement \"%s\"\n", sql); + Statement* stmt = parse_sql(sql); + evaluate_statement(stmt); + } + + return 0; +} + + + +void evaluate_select_statement(SelectStatement* stmt) { + // printf("Selecting %s from %s\n", stmt->_targets->toString(), stmt->_from_clause); +} + + + +void evaluate_statement(Statement* stmt) { + printf("Statement at %p\n", stmt); + if (stmt == NULL) return; + + switch (stmt->_type) { + case eSelect: + evaluate_select_statement((SelectStatement*) stmt); + break; + case eInsert: + printf("Insert Statment found!\n"); + break; + default: + break; + } +} \ No newline at end of file diff --git a/src/sql_tests.cpp b/src/sql_tests.cpp new file mode 100644 index 0000000..68a71d0 --- /dev/null +++ b/src/sql_tests.cpp @@ -0,0 +1,107 @@ +/* + * sql_tests.cpp + */ + +#include "lib/SQLParser.h" +#include +#include +#include + +#define STREQUALS(str1, str2) std::string(str1).compare(std::string(str2)) == 0 + +#define ASSERT(cond) if (!(cond)) { fprintf(stderr, "failed! Assertion (" #cond ")\n"); return; } +#define ASSERT_STR(STR1, STR2) ASSERT(STREQUALS(STR1, STR2)); + + +void SelectTest1() { + printf("Test: SelectTest1... "); + fflush(stdout); + + const char* sql = "SELECT age, name, address from table;"; + Statement* stmt = SQLParser::parseSQL(sql); + ASSERT(stmt != NULL); + ASSERT(stmt->_type == eSelect); + + SelectStatement* select = (SelectStatement*) stmt; + + ASSERT(select->_select_list->size() == 3); + ASSERT_STR(select->_select_list->at(0)->name, "age"); + ASSERT_STR(select->_select_list->at(1)->name, "name"); + ASSERT_STR(select->_select_list->at(2)->name, "address"); + + ASSERT(select->_from_table != NULL); + ASSERT(select->_from_table->_type == eTableName); + ASSERT_STR(select->_from_table->_table_names->at(0), "table"); + + printf("passed!\n"); +} + +void SelectTest2() { + printf("Test: SelectTest2... "); + fflush(stdout); + + const char* sql = "SELECT age, name, address FROM (SELECT age FROM table, table2);"; + Statement* stmt = SQLParser::parseSQL(sql); + ASSERT(stmt != NULL); + ASSERT(stmt->_type == eSelect); + + SelectStatement* select = (SelectStatement*) stmt; + + ASSERT(select->_select_list->size() == 3); + ASSERT_STR(select->_select_list->at(0)->name, "age"); + ASSERT_STR(select->_select_list->at(1)->name, "name"); + ASSERT_STR(select->_select_list->at(2)->name, "address"); + + ASSERT(select->_from_table != NULL); + ASSERT(select->_from_table->_type == eTableSelect); + ASSERT(select->_from_table->_stmt != NULL); + ASSERT(select->_from_table->_stmt->_select_list->size() == 1); + ASSERT_STR(select->_from_table->_stmt->_from_table->_table_names->at(0), "table"); + ASSERT_STR(select->_from_table->_stmt->_from_table->_table_names->at(1), "table2"); + + printf("passed!\n"); +} + +void SelectTest3() { + printf("Test: SelectTest3... "); + fflush(stdout); + + const char* sql = "SELECT name, AVG(age) FROM table GROUP BY name"; + Statement* stmt = SQLParser::parseSQL(sql); + ASSERT(stmt != NULL); + ASSERT(stmt->_type == eSelect); + + SelectStatement* select = (SelectStatement*) stmt; + + ASSERT(select->_select_list->size() == 2); + + ASSERT(select->_select_list->at(0)->func_name == NULL); + ASSERT_STR("name", select->_select_list->at(0)->name); + + ASSERT(select->_select_list->at(1)->func_name != NULL); + ASSERT_STR("age", select->_select_list->at(1)->name); + ASSERT_STR("AVG", select->_select_list->at(1)->func_name); + + ASSERT(select->_group_by != NULL); + ASSERT(select->_group_by->size() == 1); + ASSERT_STR("name", select->_group_by->at(0)->name); + + printf("passed!\n"); +} + + + +int main(int argc, char *argv[]) { + + printf("\n######################################\n"); + printf("## Running all tests...\n\n"); + + SelectTest1(); + SelectTest2(); + SelectTest3(); + + printf("\n## Finished running all tests...\n"); + printf("######################################\n"); + + return 0; +} \ No newline at end of file