cpu: change cpu cycle calculation

The Gameboy CPU runs at ~4MHz, but all the instructions take some
number of cycles divisible by 4.

This patch causes the cpu_cycle() to run a single 1MHz cycle, instead
of requiring 4 calls at 4MHz to perform the same emulation. Based on
the code in gbdb, the video controller now appears to run much slower,
and the timings needs to be verified.

This results in a large performance gain.
This commit is contained in:
2018-07-01 22:00:02 +00:00
parent 01c1c9e393
commit 9d40f5026a

View File

@@ -99,30 +99,30 @@ void lr35902_init(struct lr35902_state *cpu,
cpu->metrics.mem_writes = 0;
}
/* The shortest number of cycles each instruction may take. */
static const unsigned int op_cycles[256] = {
/* 0x0_ */ 4, 12, 8, 8, 4, 4, 8, 4, 20, 8, 8, 8, 4, 4, 8, 4,
/* 0x1_ */ 4, 12, 8, 8, 4, 4, 8, 4, 12, 8, 8, 8, 4, 4, 8, 4,
/* 0x2_ */ 8, 12, 8, 8, 4, 4, 8, 4, 8, 8, 8, 8, 4, 4, 8, 4,
/* 0x3_ */ 8, 12, 8, 8, 12, 12, 12, 4, 8, 8, 8, 8, 4, 4, 8, 4,
/* 0x4_ */ 4, 4, 4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 8, 4,
/* 0x5_ */ 4, 4, 4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 8, 4,
/* 0x6_ */ 4, 4, 4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 8, 4,
/* 0x7_ */ 8, 8, 8, 8, 8, 8, 4, 8, 4, 4, 4, 4, 4, 4, 8, 4,
static const unsigned int op_extra_cycles[256] = {
/* 0x0_ */ 0, 2, 1, 1, 0, 0, 1, 0, 4, 1, 1, 1, 0, 0, 1, 0,
/* 0x1_ */ 0, 2, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 1, 0,
/* 0x2_ */ 1, 2, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
/* 0x3_ */ 1, 2, 1, 1, 2, 2, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0,
/* 0x4_ */ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
/* 0x5_ */ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
/* 0x6_ */ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
/* 0x7_ */ 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
//FIXME
/* 0x0_ */ 4, 12, 8, 8, 4, 4, 8, 4, 20, 8, 8, 8, 4, 4, 8, 4,
/* 0x1_ */ 4, 12, 8, 8, 4, 4, 8, 4, 12, 8, 8, 8, 4, 4, 8, 4,
/* 0x2_ */ 8, 12, 8, 8, 4, 4, 8, 4, 8, 8, 8, 8, 4, 4, 8, 4,
/* 0x3_ */ 8, 12, 8, 8, 12, 12, 12, 4, 8, 8, 8, 8, 4, 4, 8, 4,
/* 0x4_ */ 4, 4, 4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 8, 4,
/* 0x5_ */ 4, 4, 4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 8, 4,
/* 0x6_ */ 4, 4, 4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 8, 4,
/* 0x7_ */ 8, 8, 8, 8, 8, 8, 4, 8, 4, 4, 4, 4, 4, 4, 8, 4,
/* 0x0_ */ 0, 2, 1, 1, 0, 0, 1, 0, 4, 1, 1, 1, 0, 0, 1, 0,
/* 0x1_ */ 0, 2, 1, 1, 0, 0, 1, 0, 2, 1, 1, 1, 0, 0, 1, 0,
/* 0x2_ */ 1, 2, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
/* 0x3_ */ 1, 2, 1, 1, 2, 2, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0,
/* 0x4_ */ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
/* 0x5_ */ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
/* 0x6_ */ 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 3, 3, 3, 3, 1, 3,
/* 0x7_ */ 1, 1, 1, 1, 1, 1, 3, 1, 3, 3, 3, 3, 3, 3, 1, 3,
};
static const unsigned int cb_op_cycles[256] = { 0 };
static const unsigned int cb_extra_cycles[256] = { 0 };
/* TODO: optimize macros with temp variables so mem_read() doesn't expand multiple times */
#define INC_8(cpu, reg) \
@@ -318,7 +318,6 @@ static const unsigned int cb_op_cycles[256] = { 0 };
void lr35902_cycle(struct lr35902_state *cpu)
{
uint8_t inst;
unsigned int cycles;
uint8_t val;
uint16_t val_16;
uint8_t *reg = NULL;
@@ -334,7 +333,7 @@ void lr35902_cycle(struct lr35902_state *cpu)
}
inst = cpu->mem_read(cpu, cpu->pc++);
cycles = op_cycles[inst];
cpu->stall_cycles = op_extra_cycles[inst];
switch (inst) {
case 0x00: /* NOP */
@@ -999,7 +998,7 @@ void lr35902_cycle(struct lr35902_state *cpu)
case 0xca:
case 0xcb:
inst = cpu->mem_read(cpu, cpu->pc++);
cycles = cb_op_cycles[inst];
cpu->stall_cycles = cb_extra_cycles[inst];
switch (inst) {
case 0x00:
case 0x01:
@@ -1469,6 +1468,4 @@ void lr35902_cycle(struct lr35902_state *cpu)
RST(cpu, 0x38);
break;
}
cpu->stall_cycles = cycles - 1;
}