WIP: begin assembler in python

This commit is contained in:
2019-12-20 18:18:22 -08:00
parent 927886b7f3
commit 2b4664162b
11 changed files with 524 additions and 0 deletions

View File

View File

@@ -0,0 +1,7 @@
class ArgumentType(object):
def can_parse(token: str) -> bool:
raise NotImplementedError()
def get_name() -> str:
raise NotImplementedError()

View File

@@ -0,0 +1,118 @@
from .ArgumentType import ArgumentType
from . import Arguments
class Address(ArgumentType):
NAME = "Address"
def __init__(self):
pass
def can_parse(self, token: str) -> bool:
try:
addr = int(token, base=0)
return addr < 0x8000 and addr >= 0
except ValueError:
return False
def get_name(self) -> str:
return Address.NAME
class Label(ArgumentType):
NAME = "Label"
def __init__(self):
pass
def can_parse(self, token: str) -> bool:
if not token[0] in string.ascii_letters + ['_']:
return False
return True
def get_name(self) -> str:
return Label.NAME
class Register8(ArgumentType):
NAME = "Register8"
def __init__(self, indirect: bool=False, indirect_increment: bool=False):
self.indirect = indirect
def can_parse(self, token: str) -> bool:
if token in Arguments.Register8.REGISTERS:
return True
if self.indirect and token in Arguments.Register8.REGISTERS_INDIRECT:
return True
if self.indirect_increment \
and token in Arguments.Register8.REGISTERS_INDIRECT_INCREMENT:
return True
return False
def parse(self, token) -> Arguments.Register8:
return Arguments.Register8(token)
def get_name(self) -> str:
return Register8.NAME
class Register16(ArgumentType):
NAME = "Immediate8"
def __init__(self, indirect: bool=False, indirect_increment: bool=False):
self.indirect = indirect
def can_parse(self, token: str) -> bool:
return token in Arguments.Register16.REGISTERS
def to_argument(self, token: str):
return None
def parse(self, token) -> Arguments.Register16:
return Arguments.Register16(token)
def get_name(self) -> str:
return Register16.NAME
class Immediate8(ArgumentType):
NAME = "Immediate8"
def __init__(self):
pass
def can_parse(self, token: str) -> bool:
try:
addr = int(token, base=0)
return addr <= 0xFF and addr >= 0
except ValueError:
return False
def get_name(self) -> str:
return Immediate8.NAME
class Immediate16(ArgumentType):
NAME = "Immediate16"
def __init__(self):
pass
def can_parse(self, token: str) -> bool:
try:
addr = int(token, base=0)
return addr <= 0xFFFF and addr >= 0
except ValueError:
return False
def parse(self, token) -> Arguments.Immediate16:
return Arguments.Immediate16(token)
def get_name(self) -> str:
return Immediate16.NAME

View File

@@ -0,0 +1,60 @@
class Argument(object):
def __init__(self):
pass
class Address(Argument):
NAME = "Address"
def __init__(self, value: int):
self.value = value
class Label(Argument):
NAME = "Label"
def __init__(self, value: str):
self.value = value
class Register8(Argument):
NAME = "Register8"
REGISTERS = ["A", "B", "C", "D", "E", "H", "L"]
REGISTERS_INDIRECT = ["(HL)"]
REGISTERS_INDIRECT_INCREMENT = ["(HL)", "(HL+)", "(HL-)"]
def __init__(self, value: str):
if value not in Register8.REGISTERS \
+ Register8.REGISTERS_INDIRECT_INCREMENT:
raise ValueError("Unknown Register8: {}".format(value))
self.value = value
class Register16(Argument):
NAME = "Immediate8"
REGISTERS = ["BC", "DE", "DE", "HL", "SP"]
def __init__(self, value: int):
if value not in Register16.REGISTERS:
raise ValueError("Unknown Register16: {}".format(value))
self.value = value
class Immediate8(Argument):
NAME = "Immediate8"
def __init__(self, value: int):
self.value = value
class Immediate16(Argument):
NAME = "Immediate16"
def __init__(self, value: int):
self.value = value

View File

@@ -0,0 +1,3 @@
from .ArgumentType import ArgumentType
from .ArgumentTypes import Label, Address, Immediate8, Immediate16, Register8, Register16
from .Arguments import Argument

153
src/python/src/gbasm/gbasm.py Executable file
View File

@@ -0,0 +1,153 @@
#!/usr/bin/python3
import argparse
import logging
import sys
from .instructions import Instruction
from .instructions.inc import Inc
from .arguments import ArgumentType, Argument
from typing import Callable, Dict, List, Optional
logger = logging.getLogger(__name__)
COMMENT_CHAR = '#'
LABEL_SUFFIX = ':'
GB_INSTRUCTIONS = [
Inc()
]
def build_instruction_map() -> Dict[str, Instruction]:
d = {} # type: Dict[str, Instruction]
for i in GB_INSTRUCTIONS:
d[i.token] = i
return d
def try_parse_arguments(args: List[str],
arg_types: List[ArgumentType]) -> Optional[List[Argument]]:
if len(args) != len(arg_types):
return None
out_args = []
for (arg, arg_type) in zip(args, arg_types):
try:
out_args.append(arg_type.parse(arg))
except ValueError:
return None
return out_args
def parse_line_size(instruction: Instruction,
arguments: List[str]) -> bytes:
for argtype_list in instruction.argument_specs:
args = try_parse_arguments(arguments, argtype_list)
if args is not None:
return instruction.num_bytes(args)
raise ValueError("Failed to parse line.")
def parse_line_bytes(instruction: Instruction,
arguments: List[str],
label_resolver: Callable[[str], int]) -> bytes:
for argtype_list in instruction.argument_specs:
args = try_parse_arguments(arguments, argtype_list)
if args is not None:
return instruction.to_bytes(args, label_resolver)
raise ValueError("Failed to parse line.")
def assemble_file(infile) -> bytes:
program = infile.readlines()
return assemble(infile)
def assemble(lines: str) -> bytes:
instruction_map = build_instruction_map()
logger.debug("Instruction map: {}".format(instruction_map))
byte_offset = 0
instruction_count = 0
labels = {} # type: Dict[str, int]
program = bytes()
def label_resolver(label: str) -> int:
nonlocal labels
return labels[label]
for step in ["SIZE", "CONTENT"]:
logger.debug("Starting step: {}".format(step))
for line_num, line in enumerate(lines):
# Remove comments
line = line.split(COMMENT_CHAR)[0]
# Tokenize
tokens = line.split()
logging.info("Line:", line)
logging.info("Tokens:", tokens)
if len(tokens) == 0:
continue
instruction_name = tokens[0]
args = tokens[1:]
try:
instruction = instruction_map[instruction_name]
except KeyError:
if instruction_name[-1] == LABEL_SUFFIX:
if step == 'SIZE':
label = instruction_name[:-1]
logger.debug("Found label '{}' at {}"
.format(label, byte_offset))
if label in labels.keys():
raise KeyError("Label '{}' defined at {} and {}"
.format(label, labels[label], line_num))
labels[label] = byte_offset
continue
raise KeyError("Unknown instruction \"{}\" on line {}"
.format(instruction_name, line_num))
if step == 'SIZE':
byte_offset += parse_line_size(instruction, args)
instruction_count += 1
if step == 'CONTENT':
try:
program += parse_line_bytes(instruction, args, label_resolver)
except ValueError:
raise ValueError("Failed to parse line {},\n{}"
.format(line_num, line))
if step == 'SIZE':
logger.info("Program size: {} bytes, {} instructions"
.format(byte_offset, instruction_count))
logger.debug("Found labels: {}".format(labels))
return program
def main() -> None:
parser = argparse.ArgumentParser(
description= "An assembler for Gameboy assembly")
parser.add_argument("--infile", "-i", type=argparse.FileType("r"), default=sys.stdin)
parser.add_argument("--outfile", "-o", type=argparse.FileType("wb"), default=sys.stdout)
parser.add_argument("--verbose", "-v", action='store_true')
args = parser.parse_args()
logging.basicConfig(format="%(levelname)s: %(message)s")
logger.setLevel(logging.INFO)
if args.verbose:
logging.basicConfig(format="%(levelname)s: %(filename)s:%(lineno)d: %(message)s")
logger.setLevel(logging.DEBUG)
program = assemble_file(args.infile)
outfile.write(program)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,15 @@
from typing import Callable, List
from ..arguments import Argument, ArgumentType
class Instruction(object):
def __init__(self, token: str, argument_specs: List[List[ArgumentType]]):
self.token = token
self.argument_specs = argument_specs
def num_bytes(self, arguments) -> int:
raise NotImplementedError()
def to_bytes(arguments: List[Argument],
label_resolver: Callable[[str], int]) -> bytes:
raise NotImplementedError()

View File

@@ -0,0 +1 @@
from . Instruction import Instruction

View File

@@ -0,0 +1,49 @@
from .Instruction import Instruction
from ..arguments.ArgumentTypes import Register8, Register16
from ..arguments import Argument
from typing import Callable, List
class Inc(Instruction):
def __init__(self):
argtypes = [[Register8()], [Register16()]]
super().__init__("INC", argtypes)
def num_bytes(self, arguments) -> int:
return 1
def to_bytes(self, arguments: List[Argument],
label_resolver: Callable[[str], int]) -> bytes:
if len(arguments) != 1:
raise ValueError("Incorrect number of arguments")
value = arguments[0].value
if value == "BC":
return bytes([0x03])
if value == "DE":
return bytes([0x13])
if value == "HL":
return bytes([0x23])
if value == "SP":
return bytes([0x33])
if value == "A":
return bytes([0x3C])
if value == "B":
return bytes([0x04])
if value == "C":
return bytes([0x0C])
if value == "D":
return bytes([0x14])
if value == "E":
return bytes([0x1C])
if value == "H":
return bytes([0x24])
if value == "L":
return bytes([0x2C])
if value == "(HL)":
return bytes([0x34])
raise ValueError("Unknown value: {}".format(value))

View File

@@ -0,0 +1,74 @@
---
name: inc_a
program: |
INC A
expected:
- 0x3c
---
name: inc_b
program: |
INC B
expected:
- 0x04
---
name: inc_c
program: |
INC C
expected:
- 0x0c
---
name: inc_d
program: |
INC D
expected:
- 0x14
---
name: inc_e
program: |
INC E
expected:
- 0x1c
---
name: inc_h
program: |
INC H
expected:
- 0x24
---
name: inc_l
program: |
INC L
expected:
- 0x2c
---
name: inc_(hl)
program: |
INC (HL)
expected:
- 0x34
---
name: inc_bc
program: |
INC BC
expected:
- 0x03
---
name: inc_de
program: |
INC DE
expected:
- 0x13
---
name: inc_hl
program: |
INC HL
expected:
- 0x23
---
name: inc_sp
program: |
INC SP
expected:
- 0x33

View File

@@ -0,0 +1,44 @@
from gbasm.gbasm import assemble
from pathlib import Path
import os
import yaml
import pytest
import logging
logging.getLogger().setLevel(logging.INFO)
logging.basicConfig()
class AssembleCase(object):
def __init__(self, name: str, program: str, expected: bytes):
self.name = name
self.program = program
self.expected = expected
def find_case_files():
test_root = Path(os.path.dirname(os.path.abspath(__file__)))
case_root = test_root / "cases" / "instructions"
return case_root.glob("**/*.yaml")
def get_test_cases():
cases = []
files = find_case_files()
for f in files:
index = 0
with open(str(f), "r") as yaml_file:
test_descs = yaml.safe_load_all(yaml_file)
for desc in test_descs:
case = AssembleCase(desc['name'], desc['program'], bytes(desc['expected']))
cases.append(case)
return cases
cases = get_test_cases()
print(cases)
@pytest.mark.parametrize("case", cases, ids=[case.name for case in cases])
def test_assemble_instruction(case):
lines = case.program.split("\n")
assembled = assemble(lines)
assert assembled == case.expected