fruitjam-doom/opl/slot_render_pico.S
2022-03-07 12:44:30 -06:00

789 lines
No EOL
24 KiB
ArmAsm

//
// Copyright (C) 2001-2020 Mitsutaka Okazaki
// Copyright (C) 2021-2022 Graham Sanderson
//
.syntax unified
#define ATTACK 0
#define DECAY 1
#define SUSTAIN 2
#define RELEASE 3
//.section .data.slot_render_asm
.section .scratch_y.slot_render_S, "a"
exp_table:
.hword 1024+1018, 1024+1013, 1024+1007, 1024+1002, 1024+996, 1024+991, 1024+986, 1024+980, 1024+975, 1024+969, 1024+964, 1024+959, 1024+953, 1024+948, 1024+942, 1024+937
.hword 1024+932, 1024+927, 1024+921, 1024+916, 1024+911, 1024+906, 1024+900, 1024+895, 1024+890, 1024+885, 1024+880, 1024+874, 1024+869, 1024+864, 1024+859, 1024+854
.hword 1024+849, 1024+844, 1024+839, 1024+834, 1024+829, 1024+824, 1024+819, 1024+814, 1024+809, 1024+804, 1024+799, 1024+794, 1024+789, 1024+784, 1024+779, 1024+774
.hword 1024+770, 1024+765, 1024+760, 1024+755, 1024+750, 1024+745, 1024+741, 1024+736, 1024+731, 1024+726, 1024+722, 1024+717, 1024+712, 1024+708, 1024+703, 1024+698
.hword 1024+693, 1024+689, 1024+684, 1024+680, 1024+675, 1024+670, 1024+666, 1024+661, 1024+657, 1024+652, 1024+648, 1024+643, 1024+639, 1024+634, 1024+630, 1024+625
.hword 1024+621, 1024+616, 1024+612, 1024+607, 1024+603, 1024+599, 1024+594, 1024+590, 1024+585, 1024+581, 1024+577, 1024+572, 1024+568, 1024+564, 1024+560, 1024+555
.hword 1024+551, 1024+547, 1024+542, 1024+538, 1024+534, 1024+530, 1024+526, 1024+521, 1024+517, 1024+513, 1024+509, 1024+505, 1024+501, 1024+496, 1024+492, 1024+488
.hword 1024+484, 1024+480, 1024+476, 1024+472, 1024+468, 1024+464, 1024+460, 1024+456, 1024+452, 1024+448, 1024+444, 1024+440, 1024+436, 1024+432, 1024+428, 1024+424
.hword 1024+420, 1024+416, 1024+412, 1024+409, 1024+405, 1024+401, 1024+397, 1024+393, 1024+389, 1024+385, 1024+382, 1024+378, 1024+374, 1024+370, 1024+367, 1024+363
.hword 1024+359, 1024+355, 1024+352, 1024+348, 1024+344, 1024+340, 1024+337, 1024+333, 1024+329, 1024+326, 1024+322, 1024+318, 1024+315, 1024+311, 1024+308, 1024+304
.hword 1024+300, 1024+297, 1024+293, 1024+290, 1024+286, 1024+283, 1024+279, 1024+276, 1024+272, 1024+268, 1024+265, 1024+262, 1024+258, 1024+255, 1024+251, 1024+248
.hword 1024+244, 1024+241, 1024+237, 1024+234, 1024+231, 1024+227, 1024+224, 1024+220, 1024+217, 1024+214, 1024+210, 1024+207, 1024+204, 1024+200, 1024+197, 1024+194
.hword 1024+190, 1024+187, 1024+184, 1024+181, 1024+177, 1024+174, 1024+171, 1024+168, 1024+164, 1024+161, 1024+158, 1024+155, 1024+152, 1024+148, 1024+145, 1024+142
.hword 1024+139, 1024+136, 1024+133, 1024+130, 1024+126, 1024+123, 1024+120, 1024+117, 1024+114, 1024+111, 1024+108, 1024+105, 1024+102, 1024+99, 1024+96, 1024+93
.hword 1024+90, 1024+87, 1024+84, 1024+81, 1024+78, 1024+75, 1024+72, 1024+69, 1024+66, 1024+63, 1024+60, 1024+57, 1024+54, 1024+51, 1024+48, 1024+45
.hword 1024+42, 1024+40, 1024+37, 1024+34, 1024+31, 1024+28, 1024+25, 1024+22, 1024+20, 1024+17, 1024+14, 1024+11, 1024+8, 1024+6, 1024+3, 1024+0
eg_step_tables: // note the defines have 0x80 to indicate that there are 4 x 8 entries based for each of eg_rate_l
#define ST_NORMAL ((0 * 8) | 0x80)
.byte 0, 1, 0, 1, 0, 1, 0, 1
.byte 0, 1, 0, 1, 1, 1, 0, 1
.byte 0, 1, 1, 1, 0, 1, 1, 1
.byte 0, 1, 1, 1, 1, 1, 1, 1
#define ST_FAST ((4 * 8) | 0x80)
.byte 1, 1, 1, 1, 1, 1, 1, 1
.byte 1, 1, 1, 2, 1, 1, 1, 2
.byte 1, 2, 1, 2, 1, 2, 1, 2
.byte 1, 2, 2, 2, 1, 2, 2, 2
#define ST_FAST2 ((8 * 8) | 0x80)
.byte 2, 2, 2, 2, 2, 2, 2, 2
.byte 2, 2, 2, 4, 2, 2, 2, 4
.byte 2, 4, 2, 4, 2, 4, 2, 4
.byte 2, 4, 4, 4, 2, 4, 4, 4
#define ST_FOUR (12 * 8)
.byte 4, 4, 4, 4, 4, 4, 4, 4
#define ST_ZERO (13 * 8)
.byte 0, 0, 0, 0, 0, 0, 0, 0
attack_rate_def_table:
.byte ST_ZERO, ST_NORMAL, ST_NORMAL, ST_NORMAL
.byte ST_NORMAL, ST_NORMAL, ST_NORMAL, ST_NORMAL
.byte ST_NORMAL, ST_NORMAL, ST_NORMAL, ST_NORMAL
.byte ST_NORMAL, ST_FAST, ST_FAST2, ST_ZERO
decay_rate_def_table:
.byte ST_ZERO, ST_NORMAL, ST_NORMAL, ST_NORMAL
.byte ST_NORMAL, ST_NORMAL, ST_NORMAL, ST_NORMAL
.byte ST_NORMAL, ST_NORMAL, ST_NORMAL, ST_NORMAL
.byte ST_NORMAL, ST_FAST, ST_FAST2, ST_FOUR
.section .text.slot_render_asm
#include "pico/asm_helper.S"
#include "hardware/regs/sio.h"
#define rl_slot r0
#define rl_sample_out r1
#define rl_eg_counter r2
#define rl_eg_shift_mask r3
#define rl_tmp0 r4
#define rl_tmp1 r5
#define rl_tmp2 r6
#define rl_interp r7
#define rh_begin_loop r8
#define rh_end_loop r9
#define rh_sample_out_end r10
#define rh_fn r11
#define rh_exp_table r12
// only used for alg0 and alg1; hmm. don't care
#define rh_buffer_mod_buffer_offset lr
#define INTERP0_ACCUM0 (SIO_INTERP0_ACCUM0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define INTERP0_PEEK0 (SIO_INTERP0_PEEK_LANE0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define INTERP0_PEEK1 (SIO_INTERP0_PEEK_LANE1_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define INTERP0_BASE2_AKA_LFO_AM_BUFFER_LSL3_OFFSET (SIO_INTERP0_BASE2_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define INTERP1_PEEK0 (SIO_INTERP1_PEEK_LANE0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define INTERP1_PEEK1 (SIO_INTERP1_PEEK_LANE1_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define INTERP1_POP0 (SIO_INTERP1_POP_LANE0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define INTERP1_ADD_RAW0 (SIO_INTERP1_ACCUM0_ADD_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define INTERP1_ADD_RAW1 (SIO_INTERP1_ACCUM1_ADD_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define INTERP1_BASE0 (SIO_INTERP1_BASE0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define INTERP1_BASE2_AKA_PG_PHASE_MULTIPLIER (SIO_INTERP1_BASE2_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define SLOTB_EG_STATE 0x00
#define SLOTB_EG_RATE_H 0x01
#define SLOTB_EG_RATE_L 0x02
#define SLOTB_EG_SHIFT 0x03
#define SLOTB_NINE_MINUS_FB 0x04
#define SLOTB_RKS 0x05
#define SLOTH_EG_OUT 0x08
#define SLOTH_TLL 0x0a
#define SLOTW_EG_OUT_TLL_LSL3 0x0c
#define SLOTW_OUTPUT0 0x10
#define SLOTW_OUTPUT1 0x14
#define SLOTW_MOD_BUFFER 0x18
#define SLOTW_BUFFER 0x1c
#define SLOTW_BUFFER_MOD_BUFFER_OFFSET 0x20
#define SLOTW_PATCH 0x24
#define PATCH_EG 0x3
#define PATCH_DR 0x6
#define PATCH_SL 0x7
#define PATCH_RR 0x8
.macro advance_phase_pm0
ldr rl_tmp0, [rl_interp, #INTERP1_BASE0]
str rl_tmp0, [rl_interp, #INTERP1_ADD_RAW0]
ldr rl_tmp0, [rl_interp, #INTERP1_ADD_RAW0]
.endm
.macro advance_phase_pm1
movs rl_tmp1, #1
str rl_tmp1, [rl_interp, #INTERP1_ADD_RAW1]
ldr rl_tmp0, [rl_interp, #INTERP1_PEEK1]
ldrb rl_tmp0, [rl_tmp0] // pm
sxtb rl_tmp0, rl_tmp0
ldr rl_tmp1, [rl_interp, #INTERP1_BASE2_AKA_PG_PHASE_MULTIPLIER]
muls rl_tmp0, rl_tmp1
ldr rl_tmp1, [rl_interp, #INTERP1_BASE0]
adds rl_tmp0, rl_tmp1
str rl_tmp0, [rl_interp, #INTERP1_ADD_RAW0]
ldr rl_tmp0, [rl_interp, #INTERP1_ADD_RAW0]
.endm
.macro calc_sample is_am
// interp0->accum[0] = index << 1;
lsls rl_tmp0, #1 // todo can we avoid this
str rl_tmp0, [rl_interp, #INTERP0_ACCUM0]
// uint16_t h = *(uint16_t *)(interp0->peek[0]) | *(uint16_t *)(interp0->peek[1]);
ldr rl_tmp1, [rl_interp, #INTERP0_PEEK0]
ldr rl_tmp0, [rl_interp, #INTERP0_PEEK1]
ldrh rl_tmp0, [rl_tmp0]
ldrh rl_tmp1, [rl_tmp1]
orrs rl_tmp1, rl_tmp0 // rl_tmp1 = att
// uint16_t att = h + slot->eg_out_tll_lsl3
ldr rl_tmp0, [rl_slot, #SLOTW_EG_OUT_TLL_LSL3]
adds rl_tmp1, rl_tmp0
// if (am) { h += am }
.if \is_am
adds rl_tmp1, rl_tmp2
.endif
// int16_t t = exp_table[att&0xff];
lsls rl_tmp0, rl_tmp1, #24
lsrs rl_tmp0, rl_tmp0, #23 // rl_tmp0 = (att&0xff) *2
add rl_tmp0, rh_exp_table // or load
ldrh rl_tmp0, [rl_tmp0]
// int16_t res = t >> ((att>>8)&127);
// todo this is currently res : ~res but we can inverted our OR table
// return ((att & 0x8000) ? ~res : res);
lsls rl_tmp1, #17
sbcs rl_tmp2, rl_tmp2
mvns rl_tmp2, rl_tmp2 // todo remove this too, but we need to fix up fm
lsrs rl_tmp1, #25
lsrs rl_tmp0, rl_tmp1
// todo: inaudible todo force zeros for now
beq 9f
# note this matches the original ~res << 1 code, however
# we don't have an early cutoff for full attenuation, so get -2 not 0 in some
# cases, however this was true of the original code too; the annoying thing
# is that it isn't trivial to fix up the - otherwise I think unimportant - differences for the sake of diff
eors rl_tmp0, rl_tmp2
lsls rl_tmp0, #1 // todo remove
9:
.endm
.macro calc_sample_am0
calc_sample 0
.endm
.macro calc_sample_am1_lsr1
// must preserve rl_tmp0... set am << 3 in rl_tmp2
ldr rl_tmp1, [rl_interp, #INTERP0_BASE2_AKA_LFO_AM_BUFFER_LSL3_OFFSET]
lsrs rl_tmp2, rl_sample_out, #1
ldrb rl_tmp2, [rl_tmp1, rl_tmp2]
calc_sample 1
.endm
.macro calc_sample_am1_lsr2
// must preserve rl_tmp0... set am << 3 in rl_tmp2
ldr rl_tmp1, [rl_interp, #INTERP0_BASE2_AKA_LFO_AM_BUFFER_LSL3_OFFSET]
lsrs rl_tmp2, rl_sample_out, #2
ldrb rl_tmp2, [rl_tmp1, rl_tmp2]
calc_sample 1
.endm
.macro mod_sample_done
strh rl_tmp0, [rl_sample_out]
adds rl_sample_out, #2
cmp rl_sample_out, rh_sample_out_end
bge 9f
bx rh_begin_loop
9: bx rh_end_loop
.endm
.macro add_fb_fm
ldr rl_tmp1, [rl_slot, #SLOTW_OUTPUT0]
ldr rl_tmp2, [rl_slot, #SLOTW_OUTPUT1]
str rl_tmp1, [rl_slot, #SLOTW_OUTPUT1]
adds rl_tmp1, rl_tmp2
ldrb rl_tmp2, [rl_slot, #SLOTB_NINE_MINUS_FB]
lsrs rl_tmp1, rl_tmp2
adds rl_tmp0, rl_tmp1
.endm
.thumb_func
mod_am0_fb0_pm0_fn:
advance_phase_pm0 // -> tmp0 = pg_out
calc_sample_am0 // tmp0 = pg_out -> tmp0 = sample
mod_sample_done
.thumb_func
mod_am0_fb0_pm1_fn:
advance_phase_pm1 // -> tmp0 = pg_out
calc_sample_am0 // tmp0 = pg_out -> tmp0 = sample
mod_sample_done
.thumb_func
mod_am1_fb0_pm0_fn:
advance_phase_pm0 // -> tmp0 = pg_out
calc_sample_am1_lsr1 // tmp0 = pg_out -> tmp0 = sample
mod_sample_done
.thumb_func
mod_am1_fb0_pm1_fn:
advance_phase_pm1 // -> tmp0 = pg_out
calc_sample_am1_lsr1 // tmp0 = pg_out -> tmp0 = sample
mod_sample_done
.thumb_func
mod_am0_fb1_pm0_fn:
advance_phase_pm0 // -> tmp0 = pg_out
add_fb_fm // tmp0 = pg_out -> tmp0 = pg_out + self_fm()
calc_sample_am0 // tmp0 = pg_out + fm -> tmp0 = sample
str rl_tmp0, [rl_slot, #SLOTW_OUTPUT0]
mod_sample_done
.thumb_func
mod_am0_fb1_pm1_fn:
advance_phase_pm1 // -> tmp0 = pg_out
add_fb_fm // tmp0 = pg_out -> tmp0 = pg_out + self_fm()
calc_sample_am0 // tmp0 = pg_out + fm -> tmp0 = sample
str rl_tmp0, [rl_slot, #SLOTW_OUTPUT0]
mod_sample_done
.thumb_func
mod_am1_fb1_pm0_fn:
advance_phase_pm0 // -> tmp0 = pg_out
add_fb_fm // tmp0 = pg_out -> tmp0 = pg_out + self_fm()
calc_sample_am1_lsr1 // tmp0 = pg_out + fm -> tmp0 = sample
str rl_tmp0, [rl_slot, #SLOTW_OUTPUT0]
mod_sample_done
.thumb_func
mod_am1_fb1_pm1_fn:
advance_phase_pm1 // -> tmp0 = pg_out
add_fb_fm // tmp0 = pg_out -> tmp0 = pg_out + self_fm()
calc_sample_am1_lsr1 // tmp0 = pg_out + fm -> tmp0 = sample
str rl_tmp0, [rl_slot, #SLOTW_OUTPUT0]
mod_sample_done
.macro alg_sample_done
ldr rl_tmp1, [rl_sample_out]
add rl_tmp1, rl_tmp0
str rl_tmp1, [rl_sample_out]
adds rl_sample_out, #4
cmp rl_sample_out, rh_sample_out_end
bge 9f
bx rh_begin_loop
9: bx rh_end_loop
.endm
// ah these two are the same, just called in differnt places
.macro alg0_f_mod
lsrs rl_tmp1, rl_sample_out, #1
add rl_tmp1, rh_buffer_mod_buffer_offset
ldrh rl_tmp2, [rl_tmp1]
add rl_tmp0, rl_tmp2
.endm
.macro alg1_a_mod
lsrs rl_tmp1, rl_sample_out, #1
add rl_tmp1, rh_buffer_mod_buffer_offset
ldrh rl_tmp2, [rl_tmp1]
add rl_tmp0, rl_tmp2
.endm
// alg0 is FM by the mod slot
.thumb_func
alg0_am0_pm0_fn:
advance_phase_pm0 // -> tmp0 = pg_out
alg0_f_mod // tmp0 = pg_out -> tmp0 = pg_out + mod_slot[s]
calc_sample_am0 // tmp0 = pg_out + mod_fm -> tmp0 = sample
str rl_tmp0, [rl_slot, #SLOTW_OUTPUT0]
alg_sample_done
.thumb_func
alg0_am0_pm1_fn:
advance_phase_pm1 // -> tmp0 = pg_out
alg0_f_mod // tmp0 = pg_out -> tmp0 = pg_out + mod_slot[s]
calc_sample_am0 // tmp0 = pg_out + mod_fm -> tmp0 = sample
str rl_tmp0, [rl_slot, #SLOTW_OUTPUT0]
alg_sample_done
.thumb_func
alg0_am1_pm0_fn:
advance_phase_pm0 // -> tmp0 = pg_out
alg0_f_mod // tmp0 = pg_out -> tmp0 = pg_out + mod_slot[s]
calc_sample_am1_lsr2 // tmp0 = pg_out + mod_fm -> tmp0 = sample
str rl_tmp0, [rl_slot, #SLOTW_OUTPUT0]
alg_sample_done
.thumb_func
alg0_am1_pm1_fn:
advance_phase_pm1 // -> tmp0 = pg_out
alg0_f_mod // tmp0 = pg_out -> tmp0 = pg_out + mod_slot[s]
calc_sample_am1_lsr2 // tmp0 = pg_out + mod_fm -> tmp0 = sample
str rl_tmp0, [rl_slot, #SLOTW_OUTPUT0]
alg_sample_done
// alg1 is AM by the mod slot
.thumb_func
alg1_am0_pm0_fn:
advance_phase_pm0 // -> tmp0 = pg_out
calc_sample_am0 // tmp0 = pg_out + mod_fm -> tmp0 = sample
alg1_a_mod // tmp0 = sample-> tmp- = sample + mod_slot[s]
alg_sample_done
.thumb_func
alg1_am0_pm1_fn:
advance_phase_pm1 // -> tmp0 = pg_out
calc_sample_am0 // tmp0 = pg_out + mod_fm -> tmp0 = sample
alg1_a_mod // tmp0 = sample-> tmp- = sample + mod_slot[s]
alg_sample_done
.thumb_func
alg1_am1_pm0_fn:
advance_phase_pm0 // -> tmp0 = pg_out
calc_sample_am1_lsr2 // tmp0 = pg_out + mod_fm -> tmp0 = sample
alg1_a_mod // tmp0 = sample-> tmp- = sample + mod_slot[s]
alg_sample_done
.thumb_func
alg1_am1_pm1_fn:
advance_phase_pm1 // -> tmp0 = pg_out
calc_sample_am1_lsr2 // tmp0 = pg_out + mod_fm -> tmp0 = sample
alg1_a_mod // tmp0 = sample-> tmp- = sample + mod_slot[s]
alg_sample_done
.global slot_render_fn_table
.align 4
slot_render_fn_table:
.word mod_am0_fb0_pm0_fn
.word mod_am0_fb0_pm1_fn
.word mod_am1_fb0_pm0_fn
.word mod_am1_fb0_pm1_fn
.word mod_am0_fb1_pm0_fn
.word mod_am0_fb1_pm1_fn
.word mod_am1_fb1_pm0_fn
.word mod_am1_fb1_pm1_fn
.word alg0_am0_pm0_fn
.word alg0_am0_pm1_fn
.word alg0_am1_pm0_fn
.word alg0_am1_pm1_fn
.word alg1_am0_pm0_fn
.word alg1_am0_pm1_fn
.word alg1_am1_pm0_fn
.word alg1_am1_pm1_fn
.macro set_eg_shift_mask
ldrb rl_eg_shift_mask, [rl_slot, #SLOTB_EG_RATE_H]
cmp rl_eg_shift_mask, #0
beq 9f
ldrb rl_tmp0, [rl_slot, #SLOTB_EG_SHIFT]
movs rl_eg_shift_mask, #1
lsls rl_eg_shift_mask, rl_tmp0
9:
subs rl_eg_shift_mask, #1
.endm
// rl_tmp0 = eg_out
.macro update_eg_out_tll_lsl3
//slot->eg_out_tll_lsl3 = std::min(EG_MAX/*EG_MUTE*/, slot->eg_out + slot->tll) << 3; // note EG_MAX not EG_MUTE to avoid overflow check later
ldrh rl_tmp1, [rl_slot, #SLOTH_TLL]
adds rl_tmp0, rl_tmp1
movs rl_tmp2, #0x1f
lsls rl_tmp2, #4
cmp rl_tmp0, rl_tmp2
ble 9f
movs rl_tmp0, rl_tmp2
9:
lsls rl_tmp0, #3
str rl_tmp0, [rl_slot, #SLOTW_EG_OUT_TLL_LSL3]
.endm
#define FRAMEW_EG_STEP_TABLE 0
#define FRAMEW_SAMPLE_OUT_END_BACK 4
#define FRAME_SIZE 8
.macro set_step_table table_def
// get_attack_step_table
ldr rl_tmp0, =eg_step_tables
ldr rl_tmp1, =\table_def
ldrb rl_tmp2, [rl_slot, SLOTB_EG_RATE_H]
ldrb rl_tmp2, [rl_tmp1, rl_tmp2]
lsls rl_tmp1, rl_tmp2, #25
bcc 1f
lsrs rl_tmp2, rl_tmp1, #25
ldrb rl_tmp1, [rl_slot, SLOTB_EG_RATE_L]
lsls rl_tmp1, #3
adds rl_tmp2, rl_tmp1
1:
adds rl_tmp0, rl_tmp2
str rl_tmp0, [sp, FRAMEW_EG_STEP_TABLE]
.endm
.macro update_eg_vars_for_non_zero_rate
ldrb rl_tmp1, [rl_slot, #SLOTB_RKS]
// slot->eg_rate_l = slot->rks & 3;
lsls rl_tmp2, rl_tmp1, #30
lsrs rl_tmp2, rl_tmp2, #30
strb rl_tmp2, [rl_slot, #SLOTB_EG_RATE_L]
// slot->eg_rate_h = std::min(15, p_rate + (slot->rks >> 2));
lsrs rl_tmp1, #2
adds rl_tmp0, rl_tmp1
cmp rl_tmp0, #15
ble 9f
movs rl_tmp0, #15
9:
strb rl_tmp0, [rl_slot, #SLOTB_EG_RATE_H]
// slot->eg_shift = (slot->eg_rate_h < 12) ? (12 - slot->eg_rate_h) : 0;
// note eg_shift was already zeroed above
movs rl_tmp1, #12
subs rl_tmp1, rl_tmp0
bcc 1f
strb rl_tmp1, [rl_slot, #SLOTB_EG_SHIFT]
.endm
// (slot, nsamples, eg_counter, fn);
.global test_slot_asm
.thumb_func
test_slot_asm:
#if 0
#define rl_slot r0 i
#define rl_sample_out r1 x
#define rl_eg_counter r2 i
#define rl_eg_shift_mask r3 x
#define rl_tmp0 r4 -
#define rl_tmp1 r5 -
#define rl_tmp2 r6 -
#define rl_interp r7
#endif
// rh_begin_loop r8 x
// rh_end_loop r9 x
// rh_sample_out_end r10 x
// rh_fn r11 x
// rh_exp_table r12 x
// rh_mod_buffer_offset lr -
push {r4-r7, lr}
mov r4, r8
mov r5, r9
mov r6, r10
mov r7, r11
push {r4-r7}
mov r4, r12
push {r3, r4} // we want to save r3 as well
sub sp, #FRAME_SIZE
// note r1 == rl_sample_out
lsrs r5, r3, #3
bne 1f
// is a mod fn
lsls r4, r1, #1
ldr rl_sample_out, [rl_slot, #SLOTW_MOD_BUFFER]
add r4, rl_sample_out
b 2f
1:
// is an arg fn
lsls r4, r1, #2
ldr rl_sample_out, [rl_slot, #SLOTW_BUFFER]
add r4, rl_sample_out
2:
mov rh_sample_out_end, r4
str r4, [sp, #FRAMEW_SAMPLE_OUT_END_BACK]
ldr r4, =slot_render_fn_table
lsls r3, #2
ldr r4, [r4, r3]
mov rh_fn, r4
ldr rl_tmp0, =exp_table
mov rh_exp_table, rl_tmp0
ldr rl_tmp0, [rl_slot, #SLOTW_BUFFER_MOD_BUFFER_OFFSET]
mov rh_buffer_mod_buffer_offset, rl_tmp0
ldr rl_interp, =0xd0000080
ldrb rl_tmp0, [rl_slot, #SLOTB_EG_STATE]
cmp rl_tmp0, #ATTACK
bne check_decay
attack:
ldr rl_tmp0, =check_decay
mov rh_end_loop, rl_tmp0
set_eg_shift_mask
set_step_table attack_rate_def_table
// if (slot->eg_out == 0) {
ldrh rl_tmp1, [rl_slot, #SLOTH_EG_OUT]
cmp rl_tmp1, #0
bne attack_loop_enter
// early enter decay state
adds rl_eg_counter, #1
b enter_decay_state
attack_loop_enter:
ldr rl_tmp0, =attack_loop
mov rh_begin_loop, rl_tmp0
.thumb_func
attack_loop:
adds rl_eg_counter, #1
tst rl_eg_counter, rl_eg_shift_mask
beq 1f
bx rh_fn
1:
ldrh rl_tmp0, [rl_slot, #SLOTH_EG_OUT]
// cmp rl_tmp0, #0
// bhi 1f
// bx rh_fn
//1:
// uint8_t step = eg_step_table[(eg_counter >> slot->eg_shift) & 7];
ldrb rl_tmp1, [rl_slot, #SLOTB_EG_SHIFT]
movs rl_tmp2, rl_eg_counter
lsrs rl_tmp2, rl_tmp1
lsls rl_tmp2, #29
lsrs rl_tmp2, #29
ldr rl_tmp1, [sp, #FRAMEW_EG_STEP_TABLE]
ldrb rl_tmp1, [rl_tmp1, rl_tmp2]
// slot->eg_out += (~slot->eg_out * step) >> 3;
mvns rl_tmp2, rl_tmp0
muls rl_tmp2, rl_tmp1
asrs rl_tmp2, #3
adds rl_tmp0, rl_tmp2
strh rl_tmp0, [rl_slot, #SLOTH_EG_OUT]
update_eg_out_tll_lsl3
ldrh rl_tmp0, [rl_slot, #SLOTH_EG_OUT]
cmp rl_tmp0, #0
beq enter_decay_state
bx rh_fn
enter_decay_state:
// int p_rate = slot->patch->DR;
ldr rl_tmp0, [rl_slot, #SLOTW_PATCH]
ldrb rl_tmp0, [rl_tmp0, #PATCH_DR]
cmp rl_tmp0, #0
// set all eg_state, shift, rate_h, rate_l all to zero (if rl_tmp0 is zero)
str rl_tmp0, [rl_slot, #SLOTB_EG_STATE]
beq 1f
update_eg_vars_for_non_zero_rate
1:
movs rl_tmp0, #DECAY
strb rl_tmp0, [rl_slot, #SLOTB_EG_STATE]
// do last sample
mov rh_sample_out_end, rl_sample_out
ldr rl_tmp0, =check_decay
mov rh_begin_loop, rl_tmp0
bx rh_fn
.thumb_func
check_decay:
ldr rl_tmp0, [sp, #FRAMEW_SAMPLE_OUT_END_BACK]
cmp rl_sample_out, rl_tmp0
beq check_sustain_release // done is too far away
mov rh_sample_out_end, rl_tmp0
ldrb rl_tmp0, [rl_slot, #SLOTB_EG_STATE]
cmp rl_tmp0, #DECAY
bne check_sustain_release
decay:
ldr rl_tmp0, =check_sustain_release
mov rh_end_loop, rl_tmp0
set_eg_shift_mask
set_step_table decay_rate_def_table
adds rl_tmp0, rl_eg_counter, #1
tst rl_tmp0, rl_eg_shift_mask
beq decay_loop_enter
// check ((slot->patch->SL != 15) && (slot->eg_out >> 4) == slot->patch->SL))
ldrh rl_tmp0, [rl_slot, #SLOTH_EG_OUT]
ldr rl_tmp1, [rl_slot, #SLOTW_PATCH]
ldrb rl_tmp1, [rl_tmp1, #PATCH_SL]
cmp rl_tmp1, #15
beq decay_loop_enter
lsrs rl_tmp2, rl_tmp0, #4
cmp rl_tmp2, rl_tmp1
bne decay_loop_enter
// early enter sustain state
mov rl_eg_counter, rl_tmp0
b enter_sustain_state
decay_loop_enter:
ldr rl_tmp0, =decay_loop
mov rh_begin_loop, rl_tmp0
.thumb_func
decay_loop:
adds rl_eg_counter, #1
tst rl_eg_counter, rl_eg_shift_mask
beq 1f
bx rh_fn
1:
//slot->eg_out = static_cast<int16_t>(std::min(EG_MUTE, slot->eg_out + (int) eg_step_table[(eg_counter >> slot->eg_shift) & 7]));
ldr rl_tmp0, [sp, #FRAMEW_EG_STEP_TABLE]
ldrb rl_tmp1, [rl_slot, #SLOTB_EG_SHIFT]
movs rl_tmp2, rl_eg_counter
lsrs rl_tmp2, rl_tmp1
lsls rl_tmp2, #29
lsrs rl_tmp2, #29
ldrb rl_tmp1, [rl_tmp0, rl_tmp2]
ldrh rl_tmp0, [rl_slot, #SLOTH_EG_OUT]
adds rl_tmp0, rl_tmp1
// todo do we actually need this min check
lsrs rl_tmp1, rl_tmp0, #9
beq 1f
ldr rl_tmp0, =0x1ff
1:
strh rl_tmp0, [rl_slot, #SLOTH_EG_OUT]
// uses rl_tmp0
update_eg_out_tll_lsl3
ldrh rl_tmp0, [rl_slot, #SLOTH_EG_OUT]
ldr rl_tmp1, [rl_slot, #SLOTW_PATCH]
ldrb rl_tmp1, [rl_tmp1, #PATCH_SL]
cmp rl_tmp1, #15
beq 1f
lsrs rl_tmp2, rl_tmp0, #4
cmp rl_tmp2, rl_tmp1
beq enter_sustain_state
1:
// check for EG_MUTE (0x1ff)
adds rl_tmp1, rl_tmp0, #1
lsrs rl_tmp1, #9
bne 1f
bx rh_fn
1:
// we decayed to zero, but still need to run loop contents once more
mov rh_sample_out_end, rl_sample_out
bx rh_fn
enter_sustain_state:
// int p_rate = slot->patch->EG ? 0 : slot->patch->RR
ldr rl_tmp2, [rl_slot, #SLOTW_PATCH]
ldrb rl_tmp1, [rl_tmp2, #PATCH_EG]
movs rl_tmp0, #0
cmp rl_tmp1, #0
bne 1f
ldrb rl_tmp0, [rl_tmp2, #PATCH_RR]
1:
// set all eg_state, shift, rate_h, rate_l all to zero (if rl_tmp0 is zero)
str rl_tmp0, [rl_slot, #SLOTB_EG_STATE]
cmp rl_tmp0, #0
beq 1f
update_eg_vars_for_non_zero_rate
1:
movs rl_tmp0, #SUSTAIN
strb rl_tmp0, [rl_slot, #SLOTB_EG_STATE]
// do last sample
mov rh_sample_out_end, rl_sample_out
ldr rl_tmp0, =check_sustain_release
mov rh_begin_loop, rl_tmp0
bx rh_fn
.thumb_func
check_sustain_release:
ldr rl_tmp0, [sp, #FRAMEW_SAMPLE_OUT_END_BACK]
cmp rl_sample_out, rl_tmp0
beq done
mov rh_sample_out_end, rl_tmp0
ldrb rl_tmp0, [rl_slot, #SLOTB_EG_STATE]
cmp rl_tmp0, #SUSTAIN
blt done
ldr rl_tmp0, =done
mov rh_end_loop, rl_tmp0
set_eg_shift_mask
set_step_table decay_rate_def_table
sustain_release_loop_enter:
ldr rl_tmp0, =sustain_release_loop
mov rh_begin_loop, rl_tmp0
.thumb_func
sustain_release_loop:
adds rl_eg_counter, #1
tst rl_eg_counter, rl_eg_shift_mask
beq 1f
bx rh_fn
1:
//slot->eg_out = static_cast<int16_t>(std::min(EG_MUTE, slot->eg_out + (int) eg_step_table[(eg_counter >> slot->eg_shift) & 7]));
ldr rl_tmp0, [sp, #FRAMEW_EG_STEP_TABLE]
ldrb rl_tmp1, [rl_slot, #SLOTB_EG_SHIFT]
movs rl_tmp2, rl_eg_counter
lsrs rl_tmp2, rl_tmp1
lsls rl_tmp2, #29
lsrs rl_tmp2, #29
ldrb rl_tmp1, [rl_tmp0, rl_tmp2]
ldrh rl_tmp0, [rl_slot, #SLOTH_EG_OUT]
adds rl_tmp0, rl_tmp1
strh rl_tmp0, [rl_slot, #SLOTH_EG_OUT]
adds rl_tmp2, rl_tmp0, #1
lsrs rl_tmp1, rl_tmp2, #9
bne 1f
update_eg_out_tll_lsl3
bx rh_fn
1:
// we decayed to zero, but still need to run loop contents
mov rh_sample_out_end, rl_sample_out
bx rh_fn
.thumb_func
done:
add sp, #FRAME_SIZE
pop {r2-r7}
mov r12, r3
mov r8, r4
mov r9, r5
mov r10, r6
mov r11, r7
// figure out how many samples we did
lsrs r2, #3
bne 1f
// is a mod fn
ldr r0, [rl_slot, #SLOTW_MOD_BUFFER]
subs r1, r0
lsrs r0, r1, #1
pop {r4-r7, pc}
1:
// is an arg fn
ldr r0, [rl_slot, #SLOTW_BUFFER]
subs r1, r1
lsrs r1, r0, #2
pop {r4-r7, pc}
#if 0
movs rl_tmp0, #1
lsls rl_tmp0, #12
subs rl_tmp0, #2
lsls rl_tmp1, rl_eg_counter, #1
cmp rl_tmp0, rl_tmp1
bne 1f
bkpt #0
1:
#endif