Add initial tokenizer and tests

This commit is contained in:
2022-10-14 19:37:12 -04:00
parent 9cde08b910
commit 2f8bff9c3b
7 changed files with 516 additions and 3 deletions

123
src/parse.c Normal file
View File

@@ -0,0 +1,123 @@
#include "nihilispm.h"
#include "nihilispm_internal.h"
// TODO: remove these
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#define START_LIST_CHAR '('
#define END_LIST_CHAR ')'
#define QUOTE_CHAR '"'
// TODO: remove malloc and strndup calls
static bool nl_is_whitespace(char c) {
return c == ' ' || c == '\n';
}
static bool nl_is_token(char c) {
return c == START_LIST_CHAR || c == END_LIST_CHAR || c == QUOTE_CHAR;
}
static bool nl_is_delimiter(char c) {
return nl_is_whitespace(c) || nl_is_token(c) || c == '\0';
}
struct nl_object *nl_token_next(const char **curr_src) {
assert(*curr_src != NULL);
const char *start = *curr_src;
while (true) {
struct nl_object *curr = NULL;
char *str = NULL;
switch (**curr_src) {
case '\0':
return NULL;
case ' ':
case '\n':
start++;
(*curr_src)++;
continue;
case START_LIST_CHAR:
case END_LIST_CHAR:
str = malloc(sizeof(char) * 2);
str[0] = **curr_src;
str[1] = '\0';
(*curr_src)++;
return nl_cell_create(nl_symbol_create(str), NULL);
case QUOTE_CHAR:
// skip beginning quote
(*curr_src)++;
// TODO: Support escaping
while (**curr_src != '"') {
(*curr_src)++;
}
// skip end quote
(*curr_src)++;
// -2 for removing start/end quotes
str = strndup(start + 1, *curr_src - start - 2);
return nl_cell_create(nl_string_create(str), NULL);
//case '0':
// TODO: Parse integers
default:
while (!nl_is_delimiter(**curr_src)) {
(*curr_src)++;
}
str = strndup(start, *curr_src - start);
return nl_cell_create(nl_symbol_create(str), NULL);
}
}
// Unreachable
assert(false);
return NULL;
}
struct nl_object *nl_tokenize(const char *source) {
struct nl_object *tokens = NULL, *curr_token = NULL, *prev_token = NULL;
const char *curr_src = source;
while (true) {
curr_token = nl_token_next(&curr_src);
if (curr_token == NULL) {
break;
}
if (tokens == NULL) {
tokens = curr_token;
} else {
prev_token->cell.cdr = curr_token;
}
prev_token = curr_token;
}
return tokens;
}
// TODO: Should the parse a single sexp (return the last-parsed position), or
// all sexps in the source (return a list of sexps)?
struct nl_object *nl_parse(const char *source) {
struct nl_object *tokens = nl_tokenize(source);
struct nl_object *sexp = NULL;
for (struct nl_object *token = tokens;
token != NULL;
token = token->cell.cdr) {
}
nl_object_delete(tokens);
return sexp;
}
/* struct ParseResult *nl_parse(const char *source) { */
/* struct Cell *tokens = nl_tokenize(source); */
/* struct Cell *sexp = n */
/* } */