Add initial tokenizer and tests
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,5 +1,8 @@
|
||||
*.o
|
||||
build
|
||||
compile_commands.json
|
||||
run_tests.sh
|
||||
|
||||
# scons cruft
|
||||
.cache
|
||||
.sconsign.dblite
|
||||
|
||||
@@ -8,10 +8,10 @@ build_dir = "build/default/"
|
||||
src_dir = "src/"
|
||||
|
||||
program_sources = ["src/main.c"]
|
||||
lib_srcs = []
|
||||
lib_srcs = ["src/parse.c", "src/memory.c"]
|
||||
lib_includes = ["src/"]
|
||||
|
||||
test_srcs = [""]
|
||||
test_srcs = ["test/test_parse.c"]
|
||||
test_lib_srcs = ["third-party/unity/src/unity.c"]
|
||||
test_lib_includes = ["third-party/unity/src/"]
|
||||
|
||||
@@ -20,7 +20,7 @@ test_lib_includes = ["third-party/unity/src/"]
|
||||
#
|
||||
|
||||
VariantDir(build_dir, ".", duplicate=0)
|
||||
env = Environment(CPPPATH=lib_includes, COMPILATIONDB_USE_ABSPATH=True)
|
||||
env = Environment(CPPPATH=lib_includes, COMPILATIONDB_USE_ABSPATH=True, CCFLAGS=["-ggdb", "-O0"])
|
||||
env.Tool('compilation_db')
|
||||
env.CompilationDatabase()
|
||||
|
||||
|
||||
73
src/memory.c
Normal file
73
src/memory.c
Normal file
@@ -0,0 +1,73 @@
|
||||
#include "nihilispm.h"
|
||||
#include "nihilispm_internal.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
static struct nl_object *nl_object_alloc();
|
||||
static void nl_cell_delete(struct nl_cell *cell);
|
||||
|
||||
struct nl_object *nl_cell_create(struct nl_object *car, struct nl_object *cdr)
|
||||
{
|
||||
struct nl_object* obj = nl_object_alloc();
|
||||
obj->type = NL_TYPE_CELL;
|
||||
obj->cell.car = car;
|
||||
obj->cell.cdr = cdr;
|
||||
return obj;
|
||||
}
|
||||
|
||||
struct nl_object *nl_int_create(int integer)
|
||||
{
|
||||
struct nl_object* obj = nl_object_alloc();
|
||||
obj->type = NL_TYPE_INT;
|
||||
obj->integer = integer;
|
||||
return obj;
|
||||
}
|
||||
|
||||
struct nl_object *nl_symbol_create(const char *symbol)
|
||||
{
|
||||
struct nl_object* obj = nl_object_alloc();
|
||||
obj->type = NL_TYPE_SYMBOL;
|
||||
obj->symbol = symbol;
|
||||
return obj;
|
||||
}
|
||||
|
||||
struct nl_object *nl_string_create(const char *string)
|
||||
{
|
||||
struct nl_object* obj = nl_object_alloc();
|
||||
obj->type = NL_TYPE_STRING;
|
||||
obj->string = string;
|
||||
return obj;
|
||||
}
|
||||
|
||||
static struct nl_object* nl_object_alloc() {
|
||||
return malloc(sizeof(struct nl_object));
|
||||
}
|
||||
|
||||
void nl_object_delete(struct nl_object *obj) {
|
||||
if (obj == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (obj->type) {
|
||||
case NL_TYPE_CELL:
|
||||
nl_object_delete(obj->cell.car);
|
||||
obj->cell.car = NULL;
|
||||
nl_object_delete(obj->cell.cdr);
|
||||
obj->cell.cdr = NULL;
|
||||
break;
|
||||
case NL_TYPE_SYMBOL:
|
||||
free(obj->symbol);
|
||||
obj->symbol = NULL;
|
||||
break;
|
||||
case NL_TYPE_STRING:
|
||||
free(obj->string);
|
||||
obj->string = NULL;
|
||||
case NL_TYPE_INT:
|
||||
case NL_TYPE_COUNT:
|
||||
break;
|
||||
}
|
||||
free(obj);
|
||||
}
|
||||
|
||||
40
src/nihilispm.h
Normal file
40
src/nihilispm.h
Normal file
@@ -0,0 +1,40 @@
|
||||
#ifndef _NIHILISPM_H_
|
||||
#define _NIHILISPM_H_
|
||||
|
||||
enum nl_type {
|
||||
NL_TYPE_CELL = 0,
|
||||
NL_TYPE_SYMBOL = 1,
|
||||
NL_TYPE_INT = 2,
|
||||
NL_TYPE_STRING = 3,
|
||||
NL_TYPE_COUNT = 4,
|
||||
};
|
||||
|
||||
struct nl_cell {
|
||||
struct nl_object *car;
|
||||
struct nl_object *cdr;
|
||||
};
|
||||
|
||||
struct nl_object {
|
||||
enum nl_type type;
|
||||
union {
|
||||
struct nl_cell cell;
|
||||
const char *symbol;
|
||||
int integer;
|
||||
const char *string;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct nl_parse_result {
|
||||
int result;
|
||||
struct nl_cell *statement;
|
||||
};
|
||||
|
||||
struct nl_state; // TODO
|
||||
|
||||
struct nl_object *nl_tokenize(const char *source);
|
||||
struct nl_object *nl_parse(const char *sexp);
|
||||
struct nl_cell *nl_evaluate(const struct nl_cell *sexp);
|
||||
|
||||
#endif
|
||||
11
src/nihilispm_internal.h
Normal file
11
src/nihilispm_internal.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#include "nihilispm.h"
|
||||
|
||||
struct nl_object *nl_cell_create(struct nl_object *car, struct nl_object *cdr);
|
||||
struct nl_object *nl_int_create(int integer);
|
||||
struct nl_object *nl_symbol_create(const char* symbol);
|
||||
struct nl_object *nl_string_create(const char* string);
|
||||
|
||||
void nl_object_delete(struct nl_object *obj);
|
||||
|
||||
// For testing
|
||||
struct nl_object *nl_token_next(const char **curr_src);
|
||||
123
src/parse.c
Normal file
123
src/parse.c
Normal file
@@ -0,0 +1,123 @@
|
||||
#include "nihilispm.h"
|
||||
#include "nihilispm_internal.h"
|
||||
|
||||
// TODO: remove these
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define START_LIST_CHAR '('
|
||||
#define END_LIST_CHAR ')'
|
||||
#define QUOTE_CHAR '"'
|
||||
|
||||
// TODO: remove malloc and strndup calls
|
||||
|
||||
static bool nl_is_whitespace(char c) {
|
||||
return c == ' ' || c == '\n';
|
||||
}
|
||||
|
||||
static bool nl_is_token(char c) {
|
||||
return c == START_LIST_CHAR || c == END_LIST_CHAR || c == QUOTE_CHAR;
|
||||
}
|
||||
|
||||
static bool nl_is_delimiter(char c) {
|
||||
return nl_is_whitespace(c) || nl_is_token(c) || c == '\0';
|
||||
}
|
||||
|
||||
struct nl_object *nl_token_next(const char **curr_src) {
|
||||
assert(*curr_src != NULL);
|
||||
const char *start = *curr_src;
|
||||
|
||||
while (true) {
|
||||
struct nl_object *curr = NULL;
|
||||
char *str = NULL;
|
||||
switch (**curr_src) {
|
||||
case '\0':
|
||||
return NULL;
|
||||
case ' ':
|
||||
case '\n':
|
||||
start++;
|
||||
(*curr_src)++;
|
||||
continue;
|
||||
case START_LIST_CHAR:
|
||||
case END_LIST_CHAR:
|
||||
str = malloc(sizeof(char) * 2);
|
||||
str[0] = **curr_src;
|
||||
str[1] = '\0';
|
||||
(*curr_src)++;
|
||||
return nl_cell_create(nl_symbol_create(str), NULL);
|
||||
case QUOTE_CHAR:
|
||||
// skip beginning quote
|
||||
(*curr_src)++;
|
||||
// TODO: Support escaping
|
||||
while (**curr_src != '"') {
|
||||
(*curr_src)++;
|
||||
}
|
||||
// skip end quote
|
||||
(*curr_src)++;
|
||||
// -2 for removing start/end quotes
|
||||
str = strndup(start + 1, *curr_src - start - 2);
|
||||
return nl_cell_create(nl_string_create(str), NULL);
|
||||
//case '0':
|
||||
// TODO: Parse integers
|
||||
default:
|
||||
while (!nl_is_delimiter(**curr_src)) {
|
||||
(*curr_src)++;
|
||||
}
|
||||
str = strndup(start, *curr_src - start);
|
||||
return nl_cell_create(nl_symbol_create(str), NULL);
|
||||
}
|
||||
}
|
||||
|
||||
// Unreachable
|
||||
assert(false);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct nl_object *nl_tokenize(const char *source) {
|
||||
struct nl_object *tokens = NULL, *curr_token = NULL, *prev_token = NULL;
|
||||
const char *curr_src = source;
|
||||
|
||||
while (true) {
|
||||
curr_token = nl_token_next(&curr_src);
|
||||
if (curr_token == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (tokens == NULL) {
|
||||
tokens = curr_token;
|
||||
} else {
|
||||
prev_token->cell.cdr = curr_token;
|
||||
}
|
||||
prev_token = curr_token;
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
// TODO: Should the parse a single sexp (return the last-parsed position), or
|
||||
// all sexps in the source (return a list of sexps)?
|
||||
struct nl_object *nl_parse(const char *source) {
|
||||
struct nl_object *tokens = nl_tokenize(source);
|
||||
struct nl_object *sexp = NULL;
|
||||
|
||||
for (struct nl_object *token = tokens;
|
||||
token != NULL;
|
||||
token = token->cell.cdr) {
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
nl_object_delete(tokens);
|
||||
return sexp;
|
||||
}
|
||||
|
||||
/* struct ParseResult *nl_parse(const char *source) { */
|
||||
/* struct Cell *tokens = nl_tokenize(source); */
|
||||
/* struct Cell *sexp = n */
|
||||
|
||||
/* } */
|
||||
263
test/test_parse.c
Normal file
263
test/test_parse.c
Normal file
@@ -0,0 +1,263 @@
|
||||
#include <stdlib.h>
|
||||
#include <unity.h>
|
||||
|
||||
#include "nihilispm.h"
|
||||
#include "nihilispm_internal.h"
|
||||
|
||||
/* static struct nl_parse_result *result; */
|
||||
static struct nl_object *response;
|
||||
|
||||
void setUp(void) {
|
||||
response = NULL;
|
||||
}
|
||||
|
||||
void tearDown(void) {
|
||||
nl_object_delete(response);
|
||||
}
|
||||
|
||||
/* void test_parse_null() { */
|
||||
/* result = nl_parse("()"); */
|
||||
/* TEST_ASSERT_EQUAL(result->result, 0); */
|
||||
/* TEST_ASSERT_NOT_NULL(result->statement); */
|
||||
/* TEST_ASSERT_NULL(result->statement->car); */
|
||||
/* TEST_ASSERT_NULL(result->statement->cdr); */
|
||||
/* } */
|
||||
|
||||
/* void test_parse_error() { */
|
||||
/* result = nl_parse("("); */
|
||||
/* TEST_ASSERT_EQUAL(result->result, 0); */
|
||||
/* TEST_ASSERT_NULL(result->statement); */
|
||||
/* } */
|
||||
|
||||
void test_token_next_empty_str() {
|
||||
const char *input = "";
|
||||
const char *curr = input;
|
||||
|
||||
response = nl_token_next(&curr);
|
||||
|
||||
TEST_ASSERT_NULL(response);
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_only_whitespace() {
|
||||
const char *input = " \n";
|
||||
const char *curr = input;
|
||||
|
||||
response = nl_token_next(&curr);
|
||||
|
||||
TEST_ASSERT_NULL(response);
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_lparen() {
|
||||
const char *input = "(";
|
||||
const char *curr = input;
|
||||
|
||||
response = nl_token_next(&curr);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("(", response->cell.car->string);
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_rparen() {
|
||||
const char *input = ")";
|
||||
const char *curr = input;
|
||||
|
||||
response = nl_token_next(&curr);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING(")", response->cell.car->string);
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_lrparen() {
|
||||
const char *input = "()";
|
||||
const char *curr = input;
|
||||
|
||||
response = nl_token_next(&curr);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("(", response->cell.car->string);
|
||||
TEST_ASSERT_EQUAL(')', *curr);
|
||||
|
||||
nl_object_delete(response);
|
||||
response = nl_token_next(&curr);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING(")", response->cell.car->string);
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_string() {
|
||||
const char *input = "\"foo\"";
|
||||
const char *curr = input;
|
||||
|
||||
response = nl_token_next(&curr);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_STRING, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->string);
|
||||
TEST_ASSERT_EQUAL('\0', *curr);
|
||||
}
|
||||
|
||||
void test_token_next_string_w_whitespace() {
|
||||
const char *input = " \"foo\" ";
|
||||
const char *curr = input;
|
||||
|
||||
response = nl_token_next(&curr);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_STRING, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->string);
|
||||
TEST_ASSERT_EQUAL_STRING(" ", curr);
|
||||
}
|
||||
|
||||
void test_token_next_symbol() {
|
||||
const char *input = "foo";
|
||||
const char *curr = input;
|
||||
|
||||
response = nl_token_next(&curr);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->string);
|
||||
TEST_ASSERT_EQUAL_STRING("", curr);
|
||||
}
|
||||
|
||||
void test_token_next_symbol_w_whitespace() {
|
||||
const char *input = " foo ";
|
||||
const char *curr = input;
|
||||
|
||||
response = nl_token_next(&curr);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->string);
|
||||
TEST_ASSERT_EQUAL_STRING(" ", curr);
|
||||
}
|
||||
|
||||
void test_tokenize_empty_str() {
|
||||
response = nl_tokenize("");
|
||||
|
||||
TEST_ASSERT_NULL(response);
|
||||
}
|
||||
|
||||
void test_tokenize_nil() {
|
||||
response = nl_tokenize("()");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
|
||||
const struct nl_object *token1 = response;
|
||||
const struct nl_object *token2 = response->cell.cdr;
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, token1->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("(", token1->cell.car->string);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(token2);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, token2->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, token2->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING(")", token2->cell.car->string);
|
||||
|
||||
TEST_ASSERT_NULL(token2->cell.cdr);
|
||||
}
|
||||
|
||||
void test_tokenize_statement() {
|
||||
response = nl_tokenize("(foo)");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
|
||||
const struct nl_object *token1 = response;
|
||||
const struct nl_object *token2 = response->cell.cdr;
|
||||
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, token1->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("(", token1->cell.car->string);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(token2);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, token2->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, token2->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", token2->cell.car->string);
|
||||
|
||||
const struct nl_object *token3 = token2->cell.cdr;
|
||||
|
||||
TEST_ASSERT_NOT_NULL(token3);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, token3->type);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, token3->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING(")", token3->cell.car->string);
|
||||
}
|
||||
|
||||
void test_parse_empty_str() {
|
||||
response = nl_parse("");
|
||||
TEST_ASSERT_NULL(response);
|
||||
}
|
||||
|
||||
void test_parse_empty_str() {
|
||||
response = nl_parse("foo");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->type);
|
||||
TEST_ASSERT_EQUAL_STRING(NL_TYPE_SYMBOL, "foo");
|
||||
}
|
||||
|
||||
void test_parse_nil() {
|
||||
response = nl_parse("()");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
TEST_ASSERT_NULL(response->cell.car);
|
||||
TEST_ASSERT_NULL(response->cell.cdr);
|
||||
}
|
||||
|
||||
void test_parse_list_1elem() {
|
||||
response = nl_parse("(foo)");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response->cell.car);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->symbol);
|
||||
}
|
||||
|
||||
void test_parse_list_2elem() {
|
||||
response = nl_parse("(foo bar)");
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_CELL, response->type);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(response->cell.car);
|
||||
TEST_ASSERT_EQUAL(NL_TYPE_SYMBOL, response->cell.car->type);
|
||||
TEST_ASSERT_EQUAL_STRING("foo", response->cell.car->symbol);
|
||||
}
|
||||
|
||||
|
||||
int main(void) {
|
||||
UNITY_BEGIN();
|
||||
RUN_TEST(test_token_next_empty_str);
|
||||
RUN_TEST(test_token_next_only_whitespace);
|
||||
RUN_TEST(test_token_next_lparen);
|
||||
RUN_TEST(test_token_next_rparen);
|
||||
RUN_TEST(test_token_next_lrparen);
|
||||
RUN_TEST(test_token_next_string);
|
||||
RUN_TEST(test_token_next_string_w_whitespace);
|
||||
RUN_TEST(test_token_next_symbol);
|
||||
RUN_TEST(test_token_next_symbol_w_whitespace);
|
||||
RUN_TEST(test_tokenize_empty_str);
|
||||
RUN_TEST(test_tokenize_nil);
|
||||
RUN_TEST(test_tokenize_statement);
|
||||
return UNITY_END();
|
||||
}
|
||||
Reference in New Issue
Block a user