diff --git a/.gitignore b/.gitignore index cb330d7..f5059a7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ +*.o build compile_commands.json +run_tests.sh + # scons cruft .cache .sconsign.dblite diff --git a/SConstruct b/SConstruct index 3f08374..1b20c83 100644 --- a/SConstruct +++ b/SConstruct @@ -8,10 +8,10 @@ build_dir = "build/default/" src_dir = "src/" program_sources = ["src/main.c"] -lib_srcs = [] +lib_srcs = ["src/parse.c", "src/memory.c"] lib_includes = ["src/"] -test_srcs = [""] +test_srcs = ["test/test_parse.c"] test_lib_srcs = ["third-party/unity/src/unity.c"] test_lib_includes = ["third-party/unity/src/"] @@ -20,7 +20,7 @@ test_lib_includes = ["third-party/unity/src/"] # VariantDir(build_dir, ".", duplicate=0) -env = Environment(CPPPATH=lib_includes, COMPILATIONDB_USE_ABSPATH=True) +env = Environment(CPPPATH=lib_includes, COMPILATIONDB_USE_ABSPATH=True, CCFLAGS=["-ggdb", "-O0"]) env.Tool('compilation_db') env.CompilationDatabase() diff --git a/src/memory.c b/src/memory.c new file mode 100644 index 0000000..fc49bb4 --- /dev/null +++ b/src/memory.c @@ -0,0 +1,73 @@ +#include "nihilispm.h" +#include "nihilispm_internal.h" + +#include +#include + + +static struct nl_object *nl_object_alloc(); +static void nl_cell_delete(struct nl_cell *cell); + +struct nl_object *nl_cell_create(struct nl_object *car, struct nl_object *cdr) +{ + struct nl_object* obj = nl_object_alloc(); + obj->type = NL_TYPE_CELL; + obj->cell.car = car; + obj->cell.cdr = cdr; + return obj; +} + +struct nl_object *nl_int_create(int integer) +{ + struct nl_object* obj = nl_object_alloc(); + obj->type = NL_TYPE_INT; + obj->integer = integer; + return obj; +} + +struct nl_object *nl_symbol_create(const char *symbol) +{ + struct nl_object* obj = nl_object_alloc(); + obj->type = NL_TYPE_SYMBOL; + obj->symbol = symbol; + return obj; +} + +struct nl_object *nl_string_create(const char *string) +{ + struct nl_object* obj = nl_object_alloc(); + obj->type = NL_TYPE_STRING; + obj->string = string; + return obj; +} + +static struct nl_object* nl_object_alloc() { + return malloc(sizeof(struct nl_object)); +} + +void nl_object_delete(struct nl_object *obj) { + if (obj == NULL) { + return; + } + + switch (obj->type) { + case NL_TYPE_CELL: + nl_object_delete(obj->cell.car); + obj->cell.car = NULL; + nl_object_delete(obj->cell.cdr); + obj->cell.cdr = NULL; + break; + case NL_TYPE_SYMBOL: + free(obj->symbol); + obj->symbol = NULL; + break; + case NL_TYPE_STRING: + free(obj->string); + obj->string = NULL; + case NL_TYPE_INT: + case NL_TYPE_COUNT: + break; + } + free(obj); +} + diff --git a/src/nihilispm.h b/src/nihilispm.h new file mode 100644 index 0000000..927c4b1 --- /dev/null +++ b/src/nihilispm.h @@ -0,0 +1,40 @@ +#ifndef _NIHILISPM_H_ +#define _NIHILISPM_H_ + +enum nl_type { + NL_TYPE_CELL = 0, + NL_TYPE_SYMBOL = 1, + NL_TYPE_INT = 2, + NL_TYPE_STRING = 3, + NL_TYPE_COUNT = 4, +}; + +struct nl_cell { + struct nl_object *car; + struct nl_object *cdr; +}; + +struct nl_object { + enum nl_type type; + union { + struct nl_cell cell; + const char *symbol; + int integer; + const char *string; + }; +}; + + + +struct nl_parse_result { + int result; + struct nl_cell *statement; +}; + +struct nl_state; // TODO + +struct nl_object *nl_tokenize(const char *source); +struct nl_object *nl_parse(const char *sexp); +struct nl_cell *nl_evaluate(const struct nl_cell *sexp); + +#endif diff --git a/src/nihilispm_internal.h b/src/nihilispm_internal.h new file mode 100644 index 0000000..c974e48 --- /dev/null +++ b/src/nihilispm_internal.h @@ -0,0 +1,11 @@ +#include "nihilispm.h" + +struct nl_object *nl_cell_create(struct nl_object *car, struct nl_object *cdr); +struct nl_object *nl_int_create(int integer); +struct nl_object *nl_symbol_create(const char* symbol); +struct nl_object *nl_string_create(const char* string); + +void nl_object_delete(struct nl_object *obj); + +// For testing +struct nl_object *nl_token_next(const char **curr_src); diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..66d6ba5 --- /dev/null +++ b/src/parse.c @@ -0,0 +1,123 @@ +#include "nihilispm.h" +#include "nihilispm_internal.h" + +// TODO: remove these +#include +#include +#include +#include +#include +#include + +#define START_LIST_CHAR '(' +#define END_LIST_CHAR ')' +#define QUOTE_CHAR '"' + +// TODO: remove malloc and strndup calls + +static bool nl_is_whitespace(char c) { + return c == ' ' || c == '\n'; +} + +static bool nl_is_token(char c) { + return c == START_LIST_CHAR || c == END_LIST_CHAR || c == QUOTE_CHAR; +} + +static bool nl_is_delimiter(char c) { + return nl_is_whitespace(c) || nl_is_token(c) || c == '\0'; +} + +struct nl_object *nl_token_next(const char **curr_src) { + assert(*curr_src != NULL); + const char *start = *curr_src; + + while (true) { + struct nl_object *curr = NULL; + char *str = NULL; + switch (**curr_src) { + case '\0': + return NULL; + case ' ': + case '\n': + start++; + (*curr_src)++; + continue; + case START_LIST_CHAR: + case END_LIST_CHAR: + str = malloc(sizeof(char) * 2); + str[0] = **curr_src; + str[1] = '\0'; + (*curr_src)++; + return nl_cell_create(nl_symbol_create(str), NULL); + case QUOTE_CHAR: + // skip beginning quote + (*curr_src)++; + // TODO: Support escaping + while (**curr_src != '"') { + (*curr_src)++; + } + // skip end quote + (*curr_src)++; + // -2 for removing start/end quotes + str = strndup(start + 1, *curr_src - start - 2); + return nl_cell_create(nl_string_create(str), NULL); + //case '0': + // TODO: Parse integers + default: + while (!nl_is_delimiter(**curr_src)) { + (*curr_src)++; + } + str = strndup(start, *curr_src - start); + return nl_cell_create(nl_symbol_create(str), NULL); + } + } + + // Unreachable + assert(false); + return NULL; +} + +struct nl_object *nl_tokenize(const char *source) { + struct nl_object *tokens = NULL, *curr_token = NULL, *prev_token = NULL; + const char *curr_src = source; + + while (true) { + curr_token = nl_token_next(&curr_src); + if (curr_token == NULL) { + break; + } + + if (tokens == NULL) { + tokens = curr_token; + } else { + prev_token->cell.cdr = curr_token; + } + prev_token = curr_token; + } + + return tokens; +} + +// TODO: Should the parse a single sexp (return the last-parsed position), or +// all sexps in the source (return a list of sexps)? +struct nl_object *nl_parse(const char *source) { + struct nl_object *tokens = nl_tokenize(source); + struct nl_object *sexp = NULL; + + for (struct nl_object *token = tokens; + token != NULL; + token = token->cell.cdr) { + + + + } + + nl_object_delete(tokens); + return sexp; +} + +/* struct ParseResult *nl_parse(const char *source) { */ +/* struct Cell *tokens = nl_tokenize(source); */ +/* struct Cell *sexp = n */ + +/* } */ diff --git a/test/test_parse.c b/test/test_parse.c new file mode 100644 index 0000000..4b6a863 --- /dev/null +++ b/test/test_parse.c @@ -0,0 +1,263 @@ +#include +#include + +#include "nihilispm.h" +#include "nihilispm_internal.h" + +/* static struct nl_parse_result *result; */ +static struct nl_object *response; + +void setUp(void) { + response = NULL; +} + +void tearDown(void) { + nl_object_delete(response); +} + +/* void test_parse_null() { */ +/* result = nl_parse("()"); */ +/* TEST_ASSERT_EQUAL(result->result, 0); */ +/* TEST_ASSERT_NOT_NULL(result->statement); */ +/* TEST_ASSERT_NULL(result->statement->car); */ +/* TEST_ASSERT_NULL(result->statement->cdr); */ +/* } */ + +/* void test_parse_error() { */ +/* result = nl_parse("("); */ +/* TEST_ASSERT_EQUAL(result->result, 0); */ +/* TEST_ASSERT_NULL(result->statement); */ +/* } */ + +void test_token_next_empty_str() { + const char *input = ""; + const char *curr = input; + + response = nl_token_next(&curr); + + TEST_ASSERT_NULL(response); + TEST_ASSERT_EQUAL('\0', *curr); +} + +void test_token_next_only_whitespace() { + const char *input = " \n"; + const char *curr = input; + + response = nl_token_next(&curr); + + TEST_ASSERT_NULL(response); + TEST_ASSERT_EQUAL('\0', *curr); +} + +void test_token_next_lparen() { + const char *input = "("; + const char *curr = input; + + response = nl_token_next(&curr); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type); + TEST_ASSERT_EQUAL_STRING("(", response->cell.car->string); + TEST_ASSERT_EQUAL('\0', *curr); +} + +void test_token_next_rparen() { + const char *input = ")"; + const char *curr = input; + + response = nl_token_next(&curr); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type); + TEST_ASSERT_EQUAL_STRING(")", response->cell.car->string); + TEST_ASSERT_EQUAL('\0', *curr); +} + +void test_token_next_lrparen() { + const char *input = "()"; + const char *curr = input; + + response = nl_token_next(&curr); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type); + TEST_ASSERT_EQUAL_STRING("(", response->cell.car->string); + TEST_ASSERT_EQUAL(')', *curr); + + nl_object_delete(response); + response = nl_token_next(&curr); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type); + TEST_ASSERT_EQUAL_STRING(")", response->cell.car->string); + TEST_ASSERT_EQUAL('\0', *curr); +} + +void test_token_next_string() { + const char *input = "\"foo\""; + const char *curr = input; + + response = nl_token_next(&curr); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + TEST_ASSERT_EQUAL(NL_TYPE_STRING, response->cell.car->type); + TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->string); + TEST_ASSERT_EQUAL('\0', *curr); +} + +void test_token_next_string_w_whitespace() { + const char *input = " \"foo\" "; + const char *curr = input; + + response = nl_token_next(&curr); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + TEST_ASSERT_EQUAL(NL_TYPE_STRING, response->cell.car->type); + TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->string); + TEST_ASSERT_EQUAL_STRING(" ", curr); +} + +void test_token_next_symbol() { + const char *input = "foo"; + const char *curr = input; + + response = nl_token_next(&curr); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type); + TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->string); + TEST_ASSERT_EQUAL_STRING("", curr); +} + +void test_token_next_symbol_w_whitespace() { + const char *input = " foo "; + const char *curr = input; + + response = nl_token_next(&curr); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type); + TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->string); + TEST_ASSERT_EQUAL_STRING(" ", curr); +} + +void test_tokenize_empty_str() { + response = nl_tokenize(""); + + TEST_ASSERT_NULL(response); +} + +void test_tokenize_nil() { + response = nl_tokenize("()"); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + + const struct nl_object *token1 = response; + const struct nl_object *token2 = response->cell.cdr; + + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, token1->cell.car->type); + TEST_ASSERT_EQUAL_STRING("(", token1->cell.car->string); + + TEST_ASSERT_NOT_NULL(token2); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, token2->type); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, token2->cell.car->type); + TEST_ASSERT_EQUAL_STRING(")", token2->cell.car->string); + + TEST_ASSERT_NULL(token2->cell.cdr); +} + +void test_tokenize_statement() { + response = nl_tokenize("(foo)"); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + + const struct nl_object *token1 = response; + const struct nl_object *token2 = response->cell.cdr; + + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, token1->cell.car->type); + TEST_ASSERT_EQUAL_STRING("(", token1->cell.car->string); + + TEST_ASSERT_NOT_NULL(token2); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, token2->type); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, token2->cell.car->type); + TEST_ASSERT_EQUAL_STRING("foo", token2->cell.car->string); + + const struct nl_object *token3 = token2->cell.cdr; + + TEST_ASSERT_NOT_NULL(token3); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, token3->type); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, token3->cell.car->type); + TEST_ASSERT_EQUAL_STRING(")", token3->cell.car->string); +} + +void test_parse_empty_str() { + response = nl_parse(""); + TEST_ASSERT_NULL(response); +} + +void test_parse_empty_str() { + response = nl_parse("foo"); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->type); + TEST_ASSERT_EQUAL_STRING(NL_TYPE_SYMBOL, "foo"); +} + +void test_parse_nil() { + response = nl_parse("()"); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + TEST_ASSERT_NULL(response->cell.car); + TEST_ASSERT_NULL(response->cell.cdr); +} + +void test_parse_list_1elem() { + response = nl_parse("(foo)"); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + + TEST_ASSERT_NOT_NULL(response->cell.car); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type); + TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->symbol); +} + +void test_parse_list_2elem() { + response = nl_parse("(foo bar)"); + + TEST_ASSERT_NOT_NULL(response); + TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type); + + TEST_ASSERT_NOT_NULL(response->cell.car); + TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type); + TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->symbol); +} + + +int main(void) { + UNITY_BEGIN(); + RUN_TEST(test_token_next_empty_str); + RUN_TEST(test_token_next_only_whitespace); + RUN_TEST(test_token_next_lparen); + RUN_TEST(test_token_next_rparen); + RUN_TEST(test_token_next_lrparen); + RUN_TEST(test_token_next_string); + RUN_TEST(test_token_next_string_w_whitespace); + RUN_TEST(test_token_next_symbol); + RUN_TEST(test_token_next_symbol_w_whitespace); + RUN_TEST(test_tokenize_empty_str); + RUN_TEST(test_tokenize_nil); + RUN_TEST(test_tokenize_statement); + return UNITY_END(); +}