Implement mostly correct parsing
This commit is contained in:
@@ -1,11 +1,21 @@
|
||||
#include "nihilispm.h"
|
||||
|
||||
#define ARRAY_SIZE(x) sizeof(x) / sizeof(x[0])
|
||||
#define FOREACH_LIST(list, iter, item) \
|
||||
for (struct nl_object *iter = list, *item = iter->cell.car; \
|
||||
iter != NULL; \
|
||||
iter = token->cell.cdr, item = iter->cell.car)
|
||||
|
||||
|
||||
|
||||
struct nl_object *nl_cell_create(struct nl_object *car, struct nl_object *cdr);
|
||||
struct nl_object *nl_int_create(int integer);
|
||||
struct nl_object *nl_symbol_create(const char* symbol);
|
||||
struct nl_object *nl_string_create(const char* string);
|
||||
|
||||
|
||||
void nl_object_delete(struct nl_object *obj);
|
||||
|
||||
// For testing
|
||||
struct nl_object *nl_token_next(const char **curr_src);
|
||||
struct nl_object *nl_parse_token_atom(struct nl_object *maybe_atom);
|
||||
|
||||
127
src/parse.c
127
src/parse.c
@@ -13,6 +13,12 @@
|
||||
#define END_LIST_CHAR ')'
|
||||
#define QUOTE_CHAR '"'
|
||||
|
||||
static const char *reserved_symbols[] = {
|
||||
"(",
|
||||
")",
|
||||
"\"",
|
||||
};
|
||||
|
||||
// TODO: remove malloc and strndup calls
|
||||
|
||||
static bool nl_is_whitespace(char c) {
|
||||
@@ -32,7 +38,6 @@ struct nl_object *nl_token_next(const char **curr_src) {
|
||||
const char *start = *curr_src;
|
||||
|
||||
while (true) {
|
||||
struct nl_object *curr = NULL;
|
||||
char *str = NULL;
|
||||
switch (**curr_src) {
|
||||
case '\0':
|
||||
@@ -90,17 +95,123 @@ struct nl_object *nl_tokenize(const char *source) {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
struct nl_object *nl_parse_token_atom(struct nl_object *maybe_atom) {
|
||||
struct nl_object *atom = NULL;
|
||||
switch (maybe_atom->type) {
|
||||
case NL_TYPE_CELL:
|
||||
// Cell types currently are not valid for tokens
|
||||
assert(false);
|
||||
break;
|
||||
case NL_TYPE_SYMBOL: {
|
||||
// Check for reserved tokens first, which indicate special, non-atom behavior
|
||||
for (int i = 0; i < ARRAY_SIZE(reserved_symbols); i++) {
|
||||
if (!strcmp(maybe_atom->string, reserved_symbols[i])) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
atom = nl_symbol_create(strdup(maybe_atom->symbol));
|
||||
break;
|
||||
}
|
||||
case NL_TYPE_STRING:
|
||||
atom = nl_string_create(strdup(maybe_atom->string));
|
||||
break;
|
||||
case NL_TYPE_INT:
|
||||
// TODO: Copy the token
|
||||
assert(0);
|
||||
break;
|
||||
case NL_TYPE_COUNT:
|
||||
assert(false);
|
||||
}
|
||||
return atom;
|
||||
}
|
||||
|
||||
static struct nl_object *nl_parse_tokens_recursive(struct nl_object **token_iter) {
|
||||
// Invariants:
|
||||
// - This function returns NULL if it encounters the first unmatched END_LIST_CHAR
|
||||
// - This function returns NULL if it encounters end-of-list without first finding a START_LIST_CHAR
|
||||
// - This function returns an nl_object of the atom or list at **token_iter
|
||||
assert(token_iter != NULL);
|
||||
if (*token_iter == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
assert((*token_iter)->type == NL_TYPE_CELL);
|
||||
|
||||
struct nl_object *token = (*token_iter)->cell.car;
|
||||
struct nl_object *next_sexp = nl_parse_token_atom(token);
|
||||
if (next_sexp != NULL) {
|
||||
*token_iter = (*token_iter)->cell.cdr;
|
||||
return next_sexp;
|
||||
}
|
||||
assert(token->type == NL_TYPE_SYMBOL);
|
||||
|
||||
if (token->symbol[0] == START_LIST_CHAR) {
|
||||
struct nl_object *list = NULL;
|
||||
struct nl_object **next_node = &list;
|
||||
// Consume the START_LIST_CHAR
|
||||
*token_iter = (*token_iter)->cell.cdr;
|
||||
while (1) {
|
||||
token = (*token_iter)->cell.car;
|
||||
if (token->symbol[0] == END_LIST_CHAR) {
|
||||
*token_iter = (*token_iter)->cell.cdr;
|
||||
if (list == NULL) {
|
||||
list = nl_cell_create(NULL, NULL);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
next_sexp = nl_parse_tokens_recursive(token_iter);
|
||||
if (next_sexp == NULL) {
|
||||
// Error somewhere in the recursive parsing
|
||||
nl_object_delete(list);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*next_node = nl_cell_create(next_sexp, NULL);
|
||||
next_node = &(*next_node)->cell.cdr;
|
||||
}
|
||||
} else if (token->symbol[0] == END_LIST_CHAR) {
|
||||
// Mismatched parens
|
||||
return NULL;
|
||||
}
|
||||
// Any other symbol type should have been an atom, this shouldn't happen
|
||||
assert(false);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// parse_tokens -> doesn't care about quotes, terminates on EOF
|
||||
// parse_tokens_recursive -> error on EOF)
|
||||
|
||||
struct nl_object *nl_parse_tokens(struct nl_object *tokens) {
|
||||
struct nl_object* resultl = NULL;
|
||||
struct nl_object** token_iter = &tokens;
|
||||
struct nl_object** next_cell = &resultl;
|
||||
|
||||
while (*token_iter != NULL) {
|
||||
struct nl_object *new_sexp = nl_parse_tokens_recursive(token_iter);
|
||||
if (new_sexp == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
*next_cell = nl_cell_create(new_sexp, NULL);
|
||||
next_cell = &(*next_cell)->cell.cdr;
|
||||
}
|
||||
|
||||
if (resultl == NULL) {
|
||||
return nl_cell_create(NULL, NULL);
|
||||
}
|
||||
|
||||
return resultl;
|
||||
error:
|
||||
nl_object_delete(resultl);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// TODO: Should the parse a single sexp (return the last-parsed position), or
|
||||
// all sexps in the source (return a list of sexps)?
|
||||
struct nl_object *nl_parse(const char *source) {
|
||||
struct nl_object *tokens = nl_tokenize(source);
|
||||
struct nl_object *sexp = NULL;
|
||||
|
||||
for (struct nl_object *token = tokens;
|
||||
token != NULL;
|
||||
token = token->cell.cdr) {
|
||||
}
|
||||
|
||||
struct nl_object *sexp = nl_parse_tokens(tokens);
|
||||
nl_object_delete(tokens);
|
||||
return sexp;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include <stdlib.h>
|
||||
#include <unity.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "nihilispm.h"
|
||||
#include "nihilispm_internal.h"
|
||||
@@ -15,21 +16,7 @@ void tearDown(void) {
|
||||
nl_object_delete(response);
|
||||
}
|
||||
|
||||
/* void test_parse_null() { */
|
||||
/* result = nl_parse("()"); */
|
||||
/* TEST_ASSERT_EQUAL(result->result, 0); */
|
||||
/* TEST_ASSERT_NOT_NULL(result->statement); */
|
||||
/* TEST_ASSERT_NULL(result->statement->car); */
|
||||
/* TEST_ASSERT_NULL(result->statement->cdr); */
|
||||
/* } */
|
||||
|
||||
/* void test_parse_error() { */
|
||||
/* result = nl_parse("("); */
|
||||
/* TEST_ASSERT_EQUAL(result->result, 0); */
|
||||
/* TEST_ASSERT_NULL(result->statement); */
|
||||
/* } */
|
||||
|
||||
void test_token_next_empty_str() {
|
||||
static void test_token_next_empty_str(void) {
|
||||
const char *input = "";
|
||||
const char *curr = input;
|
||||
|
||||
@@ -39,7 +26,7 @@ void test_token_next_empty_str() {
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_only_whitespace() {
|
||||
static void test_token_next_only_whitespace(void) {
|
||||
const char *input = " \n";
|
||||
const char *curr = input;
|
||||
|
||||
@@ -49,7 +36,7 @@ void test_token_next_only_whitespace() {
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_lparen() {
|
||||
static void test_token_next_lparen(void) {
|
||||
const char *input = "(";
|
||||
const char *curr = input;
|
||||
|
||||
@@ -62,7 +49,7 @@ void test_token_next_lparen() {
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_rparen() {
|
||||
static void test_token_next_rparen(void) {
|
||||
const char *input = ")";
|
||||
const char *curr = input;
|
||||
|
||||
@@ -75,7 +62,7 @@ void test_token_next_rparen() {
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_lrparen() {
|
||||
static void test_token_next_lrparen(void) {
|
||||
const char *input = "()";
|
||||
const char *curr = input;
|
||||
|
||||
@@ -97,7 +84,7 @@ void test_token_next_lrparen() {
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_string() {
|
||||
static void test_token_next_string(void) {
|
||||
const char *input = "\"foo\"";
|
||||
const char *curr = input;
|
||||
|
||||
@@ -110,7 +97,7 @@ void test_token_next_string() {
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_string_w_whitespace() {
|
||||
static void test_token_next_string_w_whitespace(void) {
|
||||
const char *input = " \"foo\" ";
|
||||
const char *curr = input;
|
||||
|
||||
@@ -123,7 +110,7 @@ void test_token_next_string_w_whitespace() {
|
||||
TEST_ASSERT_EQUAL_STRING(" ", curr);
|
||||
}
|
||||
|
||||
void test_token_next_symbol() {
|
||||
static void test_token_next_symbol(void) {
|
||||
const char *input = "foo";
|
||||
const char *curr = input;
|
||||
|
||||
@@ -136,7 +123,7 @@ void test_token_next_symbol() {
|
||||
TEST_ASSERT_EQUAL_STRING("", curr);
|
||||
}
|
||||
|
||||
void test_token_next_symbol_w_whitespace() {
|
||||
static void test_token_next_symbol_w_whitespace(void) {
|
||||
const char *input = " foo ";
|
||||
const char *curr = input;
|
||||
|
||||
@@ -149,13 +136,13 @@ void test_token_next_symbol_w_whitespace() {
|
||||
TEST_ASSERT_EQUAL_STRING(" ", curr);
|
||||
}
|
||||
|
||||
void test_tokenize_empty_str() {
|
||||
static void test_tokenize_empty_str(void) {
|
||||
response = nl_tokenize("");
|
||||
|
||||
TEST_ASSERT_NULL(response);
|
||||
}
|
||||
|
||||
void test_tokenize_nil() {
|
||||
static void test_tokenize_nil(void) {
|
||||
response = nl_tokenize("()");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
@@ -175,7 +162,7 @@ void test_tokenize_nil() {
|
||||
TEST_ASSERT_NULL(token2->cell.cdr);
|
||||
}
|
||||
|
||||
void test_tokenize_statement() {
|
||||
static void test_tokenize_statement(void) {
|
||||
response = nl_tokenize("(foo)");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
@@ -200,21 +187,28 @@ void test_tokenize_statement() {
|
||||
TEST_ASSERT_EQUAL_STRING(")", token3->cell.car->string);
|
||||
}
|
||||
|
||||
void test_parse_empty_str() {
|
||||
response = nl_parse("");
|
||||
static void test_parse_atom_symbol(void) {
|
||||
response = nl_parse_token_atom(nl_symbol_create(strdup("foo")));
|
||||
|
||||
TEST_ASSERT_EQUAL(response->type, NL_TYPE_SYMBOL);
|
||||
TEST_ASSERT_EQUAL_STRING(response->symbol, "foo");
|
||||
}
|
||||
|
||||
static void test_parse_atom_lparen(void) {
|
||||
response = nl_parse_token_atom(nl_symbol_create(strdup("(")));
|
||||
|
||||
TEST_ASSERT_NULL(response);
|
||||
}
|
||||
|
||||
void test_parse_symbol() {
|
||||
response = nl_parse("foo");
|
||||
static void test_parse_atom_rparen(void) {
|
||||
response = nl_parse_token_atom(nl_symbol_create(strdup(")")));
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->type);
|
||||
TEST_ASSERT_EQUAL_STRING(NL_TYPE_SYMBOL, "foo");
|
||||
TEST_ASSERT_NULL(response);
|
||||
}
|
||||
|
||||
void test_parse_nil() {
|
||||
response = nl_parse("()");
|
||||
|
||||
static void test_parse_empty_str(void) {
|
||||
response = nl_parse("");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
@@ -222,26 +216,118 @@ void test_parse_nil() {
|
||||
TEST_ASSERT_NULL(response->cell.cdr);
|
||||
}
|
||||
|
||||
void test_parse_list_1elem() {
|
||||
static void test_parse_symbol(void) {
|
||||
response = nl_parse("foo");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response->cell.car);
|
||||
TEST_ASSERT_EQUAL(response->cell.car->type, NL_TYPE_SYMBOL);
|
||||
TEST_ASSERT_EQUAL_STRING(response->cell.car->symbol, "foo");
|
||||
}
|
||||
|
||||
static void test_parse_nil(void) {
|
||||
response = nl_parse("()");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
TEST_ASSERT_NOT_NULL(response->cell.car);
|
||||
TEST_ASSERT_NULL(response->cell.cdr);
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->cell.car->type);
|
||||
TEST_ASSERT_NULL(response->cell.car->cell.car);
|
||||
TEST_ASSERT_NULL(response->cell.car->cell.cdr);
|
||||
}
|
||||
|
||||
static void test_parse_list_1elem(void) {
|
||||
response = nl_parse("(foo)");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response->cell.car);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->symbol);
|
||||
TEST_ASSERT_NULL(response->cell.cdr);
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->cell.car->type);
|
||||
TEST_ASSERT_NULL(response->cell.car->cell.cdr);
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->cell.car->symbol);
|
||||
}
|
||||
|
||||
void test_parse_list_2elem() {
|
||||
static void test_parse_list_2elem(void) {
|
||||
response = nl_parse("(foo bar)");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response->cell.car);
|
||||
TEST_ASSERT_NULL(response->cell.cdr);
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->cell.car->type);
|
||||
TEST_ASSERT_NOT_NULL(response->cell.car->cell.cdr);
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->cell.car->symbol);
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->cell.car->cell.cdr->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->cell.cdr->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("bar", response->cell.car->cell.cdr->cell.car->string);
|
||||
|
||||
}
|
||||
|
||||
static void test_parse_2elem(void) {
|
||||
response = nl_parse("foo bar");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response->cell.car);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->symbol);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response->cell.cdr);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->cell.cdr->type);
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.cdr->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("bar", response->cell.cdr->cell.car->symbol);
|
||||
}
|
||||
|
||||
static void test_parse_2elem_str(void) {
|
||||
response = nl_parse("\"foo\" \"bar\"");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response->cell.car);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_STRING, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->string);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response->cell.cdr);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->cell.cdr->type);
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_STRING, response->cell.cdr->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("bar", response->cell.cdr->cell.car->string);
|
||||
}
|
||||
|
||||
static void test_parse_nested(void) {
|
||||
response = nl_parse("((foo))");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response->cell.car);
|
||||
TEST_ASSERT_NULL(response->cell.cdr);
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->cell.car->type);
|
||||
TEST_ASSERT_NULL(response->cell.car->cell.cdr);
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->cell.car->cell.car->type);
|
||||
TEST_ASSERT_NULL(response->cell.car->cell.car->cell.cdr);
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->cell.car->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->cell.car->cell.car->symbol);
|
||||
}
|
||||
|
||||
|
||||
@@ -259,5 +345,16 @@ int main(void) {
|
||||
RUN_TEST(test_tokenize_empty_str);
|
||||
RUN_TEST(test_tokenize_nil);
|
||||
RUN_TEST(test_tokenize_statement);
|
||||
RUN_TEST(test_parse_atom_symbol);
|
||||
RUN_TEST(test_parse_atom_lparen);
|
||||
RUN_TEST(test_parse_atom_rparen);
|
||||
RUN_TEST(test_parse_empty_str);
|
||||
RUN_TEST(test_parse_symbol);
|
||||
RUN_TEST(test_parse_nil);
|
||||
RUN_TEST(test_parse_list_1elem);
|
||||
RUN_TEST(test_parse_list_2elem);
|
||||
RUN_TEST(test_parse_2elem);
|
||||
RUN_TEST(test_parse_2elem_str);
|
||||
RUN_TEST(test_parse_nested);
|
||||
return UNITY_END();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user