diff --git a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_keytest.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_keytest.uf2 deleted file mode 100644 index 410af7c..0000000 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_keytest.uf2 and /dev/null differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_pico64.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_pico64.uf2 deleted file mode 100644 index 3a573b4..0000000 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_pico64.uf2 and /dev/null differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_pico800.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_pico800.uf2 deleted file mode 100644 index b439f54..0000000 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_pico800.uf2 and /dev/null differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_pico81.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_pico81.uf2 deleted file mode 100644 index 8042294..0000000 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_pico81.uf2 and /dev/null differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picocolem.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picocolem.uf2 deleted file mode 100644 index e0694d5..0000000 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picocolem.uf2 and /dev/null differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picoo2em.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picoo2em.uf2 deleted file mode 100644 index 814551d..0000000 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picoo2em.uf2 and /dev/null differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picospeccy.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picospeccy.uf2 deleted file mode 100644 index d5ca5aa..0000000 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picospeccy.uf2 and /dev/null differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picovcs.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picovcs.uf2 deleted file mode 100644 index 27f6c79..0000000 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/_oldcspin/mcume_picovcs.uf2 and /dev/null differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/mcume_keytest.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/mcume_keytest.uf2 index 047e5d5..00e2e74 100644 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/mcume_keytest.uf2 and b/MCUME_pico/bin/PICOMPUTERMAX/mcume_keytest.uf2 differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico20.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico20.uf2 index 337a964..343353b 100644 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico20.uf2 and b/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico20.uf2 differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico64.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico64.uf2 index adcb39a..1df4730 100644 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico64.uf2 and b/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico64.uf2 differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico800.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico800.uf2 index 6261ded..ce13300 100644 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico800.uf2 and b/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico800.uf2 differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico81.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico81.uf2 index 2de762c..66df0c7 100644 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico81.uf2 and b/MCUME_pico/bin/PICOMPUTERMAX/mcume_pico81.uf2 differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/mcume_picocolem.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/mcume_picocolem.uf2 index 5fd404c..9e2f618 100644 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/mcume_picocolem.uf2 and b/MCUME_pico/bin/PICOMPUTERMAX/mcume_picocolem.uf2 differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/mcume_picoo2em.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/mcume_picoo2em.uf2 index 9d25cf3..a3aba1e 100644 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/mcume_picoo2em.uf2 and b/MCUME_pico/bin/PICOMPUTERMAX/mcume_picoo2em.uf2 differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/mcume_picospeccy.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/mcume_picospeccy.uf2 index f783d71..ef01091 100644 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/mcume_picospeccy.uf2 and b/MCUME_pico/bin/PICOMPUTERMAX/mcume_picospeccy.uf2 differ diff --git a/MCUME_pico/bin/PICOMPUTERMAX/mcume_picovcs.uf2 b/MCUME_pico/bin/PICOMPUTERMAX/mcume_picovcs.uf2 index 9d4634f..afea4cd 100644 Binary files a/MCUME_pico/bin/PICOMPUTERMAX/mcume_picovcs.uf2 and b/MCUME_pico/bin/PICOMPUTERMAX/mcume_picovcs.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_pico20.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_pico20.uf2 index 400b3d0..1513da7 100644 Binary files a/MCUME_pico/bin/PICORETROVGA/mcume_pico20.uf2 and b/MCUME_pico/bin/PICORETROVGA/mcume_pico20.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_pico64.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_pico64.uf2 index 0b2dbad..4cf5fb5 100644 Binary files a/MCUME_pico/bin/PICORETROVGA/mcume_pico64.uf2 and b/MCUME_pico/bin/PICORETROVGA/mcume_pico64.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_pico800.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_pico800.uf2 index abd06cf..f45b18d 100644 Binary files a/MCUME_pico/bin/PICORETROVGA/mcume_pico800.uf2 and b/MCUME_pico/bin/PICORETROVGA/mcume_pico800.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_pico81.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_pico81.uf2 index ece3223..671b5bc 100644 Binary files a/MCUME_pico/bin/PICORETROVGA/mcume_pico81.uf2 and b/MCUME_pico/bin/PICORETROVGA/mcume_pico81.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_picocolem.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_picocolem.uf2 index 0cdb4b5..fbdaae5 100644 Binary files a/MCUME_pico/bin/PICORETROVGA/mcume_picocolem.uf2 and b/MCUME_pico/bin/PICORETROVGA/mcume_picocolem.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_picoo2em.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_picoo2em.uf2 index d663c18..cee2602 100644 Binary files a/MCUME_pico/bin/PICORETROVGA/mcume_picoo2em.uf2 and b/MCUME_pico/bin/PICORETROVGA/mcume_picoo2em.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_picospeccy.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_picospeccy.uf2 index 5c8d67e..d10d1d2 100644 Binary files a/MCUME_pico/bin/PICORETROVGA/mcume_picospeccy.uf2 and b/MCUME_pico/bin/PICORETROVGA/mcume_picospeccy.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_picovcs.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_picovcs.uf2 index 6f8a222..c514b00 100644 Binary files a/MCUME_pico/bin/PICORETROVGA/mcume_picovcs.uf2 and b/MCUME_pico/bin/PICORETROVGA/mcume_picovcs.uf2 differ diff --git a/MCUME_pico/config/iopins.h b/MCUME_pico/config/iopins.h index ecd4bbb..4b78be8 100644 --- a/MCUME_pico/config/iopins.h +++ b/MCUME_pico/config/iopins.h @@ -67,6 +67,8 @@ VSYNC and HSYNC */ #define VGA_COLORBASE 2 #define VGA_SYNCBASE 14 +#define VGA_VSYNC 15 + #endif diff --git a/MCUME_pico/pico20/pico20.cpp b/MCUME_pico/pico20/pico20.cpp index 534aa65..74f3792 100644 --- a/MCUME_pico/pico20/pico20.cpp +++ b/MCUME_pico/pico20/pico20.cpp @@ -36,14 +36,17 @@ static int skip=0; #include "hardware/vreg.h" int main(void) { - vreg_set_voltage(VREG_VOLTAGE_1_05); +// vreg_set_voltage(VREG_VOLTAGE_1_05); // set_sys_clock_khz(125000, true); // set_sys_clock_khz(150000, true); // set_sys_clock_khz(133000, true); // set_sys_clock_khz(200000, true); +// set_sys_clock_khz(210000, true); + set_sys_clock_khz(230000, true); // set_sys_clock_khz(225000, true); - set_sys_clock_khz(250000, true); +// set_sys_clock_khz(250000, true); stdio_init_all(); + #ifdef USE_VGA // tft.begin(VGA_MODE_400x240); tft.begin(VGA_MODE_320x240); @@ -98,8 +101,10 @@ void emu_DrawVsync(void) { skip += 1; skip &= VID_FRAME_SKIP; + volatile bool vb=vbl; + while (vbl==vb) {}; #ifdef USE_VGA - tft.waitSync(); + //tft.waitSync(); #else //volatile bool vb=vbl; //while (vbl==vb) {}; diff --git a/MCUME_pico/pico64/pico64.cpp b/MCUME_pico/pico64/pico64.cpp index 5da483b..0447186 100644 --- a/MCUME_pico/pico64/pico64.cpp +++ b/MCUME_pico/pico64/pico64.cpp @@ -36,14 +36,17 @@ static int skip=0; #include "hardware/vreg.h" int main(void) { - vreg_set_voltage(VREG_VOLTAGE_1_05); +// vreg_set_voltage(VREG_VOLTAGE_1_05); // set_sys_clock_khz(125000, true); // set_sys_clock_khz(150000, true); // set_sys_clock_khz(133000, true); // set_sys_clock_khz(200000, true); +// set_sys_clock_khz(210000, true); + set_sys_clock_khz(230000, true); // set_sys_clock_khz(225000, true); - set_sys_clock_khz(250000, true); +// set_sys_clock_khz(250000, true); stdio_init_all(); + #ifdef USE_VGA // tft.begin(VGA_MODE_400x240); tft.begin(VGA_MODE_320x240); @@ -97,10 +100,10 @@ void emu_DrawVsync(void) skip += 1; skip &= VID_FRAME_SKIP; #ifdef USE_VGA - tft.waitSync(); +// tft.waitSync(); #else - // volatile bool vb=vbl; - // while (vbl==vb) {}; +// volatile bool vb=vbl; +// while (vbl==vb) {}; #endif } diff --git a/MCUME_pico/pico800/pico800.cpp b/MCUME_pico/pico800/pico800.cpp index 5f16e9f..c1641b7 100644 --- a/MCUME_pico/pico800/pico800.cpp +++ b/MCUME_pico/pico800/pico800.cpp @@ -36,14 +36,17 @@ static int skip=0; int main(void) { - vreg_set_voltage(VREG_VOLTAGE_1_05); +// vreg_set_voltage(VREG_VOLTAGE_1_05); // set_sys_clock_khz(125000, true); // set_sys_clock_khz(150000, true); // set_sys_clock_khz(133000, true); // set_sys_clock_khz(200000, true); +// set_sys_clock_khz(210000, true); + set_sys_clock_khz(230000, true); // set_sys_clock_khz(225000, true); - set_sys_clock_khz(250000, true); +// set_sys_clock_khz(250000, true); stdio_init_all(); + #ifdef USE_VGA // tft.begin(VGA_MODE_400x240); tft.begin(VGA_MODE_320x240); @@ -63,7 +66,7 @@ int main(void) { tft.fillScreenNoDma( RGBVAL16(0x00,0x00,0x00) ); tft.startDMA(); struct repeating_timer timer; - add_repeating_timer_ms(15, repeating_timer_callback, NULL, &timer); + add_repeating_timer_ms(5, repeating_timer_callback, NULL, &timer); } tft.waitSync(); } @@ -96,11 +99,13 @@ void emu_DrawVsync(void) { skip += 1; skip &= VID_FRAME_SKIP; -#ifdef USE_VGA - tft.waitSync(); -#else volatile bool vb=vbl; while (vbl==vb) {}; +#ifdef USE_VGA +// tft.waitSync(); +#else +// volatile bool vb=vbl; +// while (vbl==vb) {}; #endif } diff --git a/MCUME_pico/pico81/pico81.cpp b/MCUME_pico/pico81/pico81.cpp index acc86ca..963fbc8 100644 --- a/MCUME_pico/pico81/pico81.cpp +++ b/MCUME_pico/pico81/pico81.cpp @@ -35,13 +35,14 @@ static int skip=0; #include "hardware/vreg.h" int main(void) { - vreg_set_voltage(VREG_VOLTAGE_1_05); +// vreg_set_voltage(VREG_VOLTAGE_1_05); // set_sys_clock_khz(125000, true); // set_sys_clock_khz(150000, true); // set_sys_clock_khz(133000, true); -// set_sys_clock_khz(200000, true); + set_sys_clock_khz(200000, true); // set_sys_clock_khz(225000, true); - set_sys_clock_khz(250000, true); +// set_sys_clock_khz(240000, true); +// set_sys_clock_khz(250000, true); stdio_init_all(); #ifdef USE_VGA tft.begin(VGA_MODE_320x240); diff --git a/MCUME_pico/picocolem/picocolem.cpp b/MCUME_pico/picocolem/picocolem.cpp index c6182ed..43f552c 100644 --- a/MCUME_pico/picocolem/picocolem.cpp +++ b/MCUME_pico/picocolem/picocolem.cpp @@ -35,13 +35,14 @@ static int skip=0; #include "hardware/vreg.h" int main(void) { - vreg_set_voltage(VREG_VOLTAGE_1_05); +// vreg_set_voltage(VREG_VOLTAGE_1_05); // set_sys_clock_khz(125000, true); // set_sys_clock_khz(150000, true); // set_sys_clock_khz(133000, true); -// set_sys_clock_khz(200000, true); + set_sys_clock_khz(200000, true); // set_sys_clock_khz(225000, true); - set_sys_clock_khz(250000, true); +// set_sys_clock_khz(240000, true); +// set_sys_clock_khz(250000, true); stdio_init_all(); #ifdef USE_VGA tft.begin(VGA_MODE_320x240); @@ -94,11 +95,13 @@ void emu_DrawVsync(void) { skip += 1; skip &= VID_FRAME_SKIP; -#ifdef USE_VGA - tft.waitSync(); -#else volatile bool vb=vbl; while (vbl==vb) {}; +#ifdef USE_VGA +// tft.waitSync(); +#else +// volatile bool vb=vbl; +// while (vbl==vb) {}; #endif } diff --git a/MCUME_pico/picoo2em/picoo2em.cpp b/MCUME_pico/picoo2em/picoo2em.cpp index 51b665c..bc6fda6 100644 --- a/MCUME_pico/picoo2em/picoo2em.cpp +++ b/MCUME_pico/picoo2em/picoo2em.cpp @@ -35,13 +35,14 @@ static int skip=0; #include "hardware/vreg.h" int main(void) { - vreg_set_voltage(VREG_VOLTAGE_1_05); +// vreg_set_voltage(VREG_VOLTAGE_1_05); // set_sys_clock_khz(125000, true); // set_sys_clock_khz(150000, true); // set_sys_clock_khz(133000, true); -// set_sys_clock_khz(200000, true); + set_sys_clock_khz(200000, true); // set_sys_clock_khz(225000, true); - set_sys_clock_khz(150000, true); +// set_sys_clock_khz(240000, true); +// set_sys_clock_khz(250000, true); stdio_init_all(); #ifdef USE_VGA // tft.begin(VGA_MODE_400x240); diff --git a/MCUME_pico/picospeccy/Z80.c b/MCUME_pico/picospeccy/Z80.c index 8da7f5b..29be44d 100644 --- a/MCUME_pico/picospeccy/Z80.c +++ b/MCUME_pico/picospeccy/Z80.c @@ -569,11 +569,6 @@ int ExecZ80(register Z80 *R,register int RunCycles) asm volatile("nop"); asm volatile("nop"); - asm volatile("nop"); - asm volatile("nop"); - asm volatile("nop"); - asm volatile("nop"); - asm volatile("nop"); #ifndef USE_VGA asm volatile("nop"); asm volatile("nop"); diff --git a/MCUME_pico/picospeccy/picospeccy.cpp b/MCUME_pico/picospeccy/picospeccy.cpp index be378d2..202276b 100644 --- a/MCUME_pico/picospeccy/picospeccy.cpp +++ b/MCUME_pico/picospeccy/picospeccy.cpp @@ -37,14 +37,17 @@ static int skip=0; #include "hardware/vreg.h" int main(void) { - vreg_set_voltage(VREG_VOLTAGE_1_05); +// vreg_set_voltage(VREG_VOLTAGE_1_05); // set_sys_clock_khz(125000, true); // set_sys_clock_khz(150000, true); // set_sys_clock_khz(133000, true); // set_sys_clock_khz(200000, true); +// set_sys_clock_khz(210000, true); + set_sys_clock_khz(230000, true); // set_sys_clock_khz(225000, true); - set_sys_clock_khz(250000, true); +// set_sys_clock_khz(250000, true); stdio_init_all(); + #ifdef USE_VGA tft.begin(VGA_MODE_320x240); #else @@ -63,7 +66,7 @@ int main(void) { tft.fillScreenNoDma( RGBVAL16(0x00,0x00,0x00) ); tft.startDMA(); struct repeating_timer timer; - add_repeating_timer_ms(20, repeating_timer_callback, NULL, &timer); + add_repeating_timer_ms(5, repeating_timer_callback, NULL, &timer); } tft.waitSync(); } @@ -94,11 +97,13 @@ void emu_DrawVsync(void) { skip += 1; skip &= VID_FRAME_SKIP; + volatile bool vb=vbl; + while (vbl==vb) {}; #ifdef USE_VGA - tft.waitSync(); +// tft.waitSync(); #else - // volatile bool vb=vbl; - // while (vbl==vb) {}; +// volatile bool vb=vbl; +// while (vbl==vb) {}; #endif } diff --git a/MCUME_pico/picovcs/picovcs.cpp b/MCUME_pico/picovcs/picovcs.cpp index 7b58272..fedd181 100644 --- a/MCUME_pico/picovcs/picovcs.cpp +++ b/MCUME_pico/picovcs/picovcs.cpp @@ -35,13 +35,14 @@ static int skip=0; #include "hardware/vreg.h" int main(void) { - vreg_set_voltage(VREG_VOLTAGE_1_05); +// vreg_set_voltage(VREG_VOLTAGE_1_05); // set_sys_clock_khz(125000, true); // set_sys_clock_khz(150000, true); // set_sys_clock_khz(133000, true); -// set_sys_clock_khz(200000, true); + set_sys_clock_khz(200000, true); // set_sys_clock_khz(225000, true); - set_sys_clock_khz(150000, true); +// set_sys_clock_khz(240000, true); +// set_sys_clock_khz(250000, true); stdio_init_all(); #ifdef USE_VGA // tft.begin(VGA_MODE_400x240); diff --git a/MCUME_pico/picovga_t4/VGA_t4.cpp b/MCUME_pico/picovga_t4/VGA_t4.cpp index a5666ae..0643986 100755 --- a/MCUME_pico/picovga_t4/VGA_t4.cpp +++ b/MCUME_pico/picovga_t4/VGA_t4.cpp @@ -78,7 +78,7 @@ static void core1_func() //VgaTerm(); // terminate } else - VgaInit(v); + VgaInit(v,(u8*)framebuffer,320,240,320); __dmb(); VgaVmodeReq = NULL; } @@ -133,7 +133,7 @@ vga_error_t VGA_T4::begin(vga_mode_t mode) sem_init(&core1_initted, 0, 1); multicore_launch_core1(core1_func); - vmode = Video(DEV_VGA, RES_QVGA, FORM_8BIT, framebuffer); + vmode = Video(DEV_VGA, RES_QVGA); VgaInitReql(vmode); // wait for initialization of audio to be complete diff --git a/MCUME_pico/picovga_t4/canvas.h b/MCUME_pico/picovga_t4/canvas.h deleted file mode 100755 index fff6ef2..0000000 --- a/MCUME_pico/picovga_t4/canvas.h +++ /dev/null @@ -1,142 +0,0 @@ - -// **************************************************************************** -// -// Canvas -// -// **************************************************************************** - -#ifndef _CANVAS_H -#define _CANVAS_H - -#define DRAW_HWINTER 1 // 1=use hardware interpolator to draw images - -// canvas format -// Note: do not use enum, symbols could not be used by the preprocessor -#define CANVAS_8 0 // 8-bit pixels -#define CANVAS_4 1 // 4-bit pixels -#define CANVAS_2 2 // 2-bit pixels -#define CANVAS_1 3 // 1-bit pixels -#define CANVAS_PLANE2 4 // 4 colors on 2 planes -#define CANVAS_ATTRIB8 5 // 2x4 bit color attributes per 8x8 pixel sample - // draw functions: bit 0..3 = draw color - // bit 4 = draw color is background color - -// canvas descriptor -typedef struct { - u8* img; // image data - u8* img2; // image data 2 (2nd plane of CANVAS_PLANE2, attributes of CANVAS_ATTRIB8) - int w; // width - int h; // height - int wb; // pitch (bytes between lines) - u8 format; // canvas format CANVAS_* -} sCanvas; - -// Draw rectangle -void DrawRect(sCanvas* canvas, int x, int y, int w, int h, u8 col); - -// Draw frame -void DrawFrame(sCanvas* canvas, int x, int y, int w, int h, u8 col); - -// clear canvas (fill with black color) -void DrawClear(sCanvas* canvas); - -// Draw point -void DrawPoint(sCanvas* canvas, int x, int y, u8 col); - -// Draw line -void DrawLine(sCanvas* canvas, int x1, int y1, int x2, int y2, u8 col); - -// Draw filled circle -// x0, y0 ... coordinate of center -// r ... radius -// col ... color -// col with CANVAS_ATTRIB8 format: bit 0..3 = draw color, bit 4 = draw color is background color -// mask ... mask of used octants (0xff = 255 = draw whole circle) -// . B2|B1 . -// B3 . | . B0 -// ------o------ -// B4 . | . B7 -// . B5|B6 . -void DrawFillCircle(sCanvas* canvas, int x0, int y0, int r, u8 col, u8 mask=0xff); - -// Draw circle -// x0, y0 ... coordinate of center -// r ... radius -// col ... color -// col with CANVAS_ATTRIB8 format: bit 0..3 = draw color, bit 4 = draw color is background color -// mask ... mask of used octants (0xff = 255 = draw whole circle) -// . B2|B1 . -// B3 . | . B0 -// ------o------ -// B4 . | . B7 -// . B5|B6 . -void DrawCircle(sCanvas* canvas, int x0, int y0, int r, u8 col, u8 mask=0xff); - -// Draw text (transparent background) -// font = pointer to 1-bit font -void DrawText(sCanvas* canvas, const char* text, int x, int y, u8 col, - const void* font, int fontheight=8, int scalex=1, int scaley=1); - -// Draw text with background -// font = pointer to 1-bit font -void DrawTextBg(sCanvas* canvas, const char* text, int x, int y, u8 col, u8 bgcol, - const void* font, int fontheight=8, int scalex=1, int scaley=1); - -// Draw image -void DrawImg(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int w, int h); - -// Draw image with transparency (source and destination must have same format, col = transparency key color) -// CANVAS_ATTRIB8 format replaced by DrawImg function -void DrawBlit(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int w, int h, u8 col); - -// DrawImgMat mode -enum { - DRAWIMG_WRAP, // wrap image - DRAWIMG_NOBORDER, // no border (transparent border) - DRAWIMG_CLAMP, // clamp image (use last pixel as border) - DRAWING_COLOR, // color border - DRAWIMG_TRANSP, // transparent image with key color - DRAWIMG_PERSP, // perspective floor -}; - -// draw 8-bit image with 2D transformation matrix -// canvas ... destination canvas -// src ... source canvas with image -// x ... destination coordinate X -// y ... destination coordinate Y -// w ... destination width -// h ... destination height -// m ... transformation matrix (should be prepared using PrepDrawImg or PrepDrawPersp function) -// mode ... draw mode DRAWIMG_* -// color ... key or border color -// Note to wrap and perspective mode: Width and height of source image must be power of 2! -void DrawImgMat(sCanvas* canvas, const sCanvas* src, int x, int y, int w, int h, - const class cMat2Df* m, u8 mode, u8 color); - -// draw tile map using perspective projection -// canvas ... destination canvas -// src ... source canvas with column of 8-bit square tiles (width = tile size, must be power of 2) -// map ... byte map of tile indices -// mapwbits ... number of bits of map width (number of tiles; width must be power of 2) -// maphbits ... number of bits of map height (number of tiles; height must be power of 2) -// tilebits ... number of bits of tile size (e.g. 5 = tile 32x32 pixel) -// x ... destination coordinate X -// y ... destination coordinate Y -// w ... destination width -// h ... destination height -// mat ... transformation matrix (should be prepared using PrepDrawPersp function) -// horizon ... horizon offset (0=do not use perspective projection) -void DrawTileMap(sCanvas* canvas, const sCanvas* src, const u8* map, int mapwbits, int maphbits, - int tilebits, int x, int y, int w, int h, const cMat2Df* mat, u8 horizon); - -// draw image line interpolated -// canvas = destination canvas (8-bit pixel format) -// src = source canvas (source image in 8-bit pixel format) -// xd,yd = destination coordinates -// xs,ys = source coordinates -// wd = destination width -// ws = source width -// Overflow in X direction is not checked! -void DrawImgLine(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int wd, int ws); - -#endif // _CANVAS_H diff --git a/MCUME_pico/picovga_t4/define.h b/MCUME_pico/picovga_t4/define.h deleted file mode 100755 index a88759c..0000000 --- a/MCUME_pico/picovga_t4/define.h +++ /dev/null @@ -1,198 +0,0 @@ - -// **************************************************************************** -// -// VGA common definitions of C and ASM -// -// **************************************************************************** - -#include "vga_config.h" // VGA configuration - -#define LAYERS_MAX 4 // max. number of layers (should be 4) - -#define BLACK_MAX MAXX // size of buffer with black color (used to clear rest of unused line) - -// VGA PIO program -#define BASE_OFFSET 17 // offset of base layer program -#define LAYER_OFFSET 0 // offset of overlapped layer program - -// layer program -#define LAYERPROG_BASE 0 // program of base layer (overlapped layers are OFF) -#define LAYERPROG_KEY 1 // layer with key color -#define LAYERPROG_BLACK 2 // layer with black key color -#define LAYERPROG_WHITE 3 // layer with white key color -#define LAYERPROG_MONO 4 // layer with mono pattern or simple color -#define LAYERPROG_RLE 5 // layer with RLE compression - -#define LAYERPROG_NUM 6 // number of layer programs - -// layer mode (CPP = clock cycles per pixel) -// Control buffer: 16 bytes -// Data buffer: 4 bytes -// fast sprites can be up Control buffer: width*2 bytes -// sprites Data buffer: width bytes -#define LAYERMODE_BASE 0 // base layer -#define LAYERMODE_KEY 1 // layer with key color -#define LAYERMODE_BLACK 2 // layer with black key color -#define LAYERMODE_WHITE 3 // layer with white key color -#define LAYERMODE_MONO 4 // layer with mono pattern -#define LAYERMODE_COLOR 5 // layer with simple color -#define LAYERMODE_RLE 6 // layer with RLE compression -#define LAYERMODE_SPRITEKEY 7 // layer with sprites with key color -#define LAYERMODE_SPRITEBLACK 8 // layer with sprites with black key color -#define LAYERMODE_SPRITEWHITE 9 // layer with sprites with white key color -#define LAYERMODE_FASTSPRITEKEY 10 // layer with fast sprites with key color -#define LAYERMODE_FASTSPRITEBLACK 11 // layer with fast sprites with black key color -#define LAYERMODE_FASTSPRITEWHITE 12 // layer with fast sprites with white key color -#define LAYERMODE_PERSPKEY 13 // layer with key color and image with transformation matrix -#define LAYERMODE_PERSPBLACK 14 // layer with black key color and image with transformation matrix -#define LAYERMODE_PERSPWHITE 15 // layer with white key color and image with transformation matrix -#define LAYERMODE_PERSP2KEY 16 // layer with key color and double-pixel image with transformation matrix -#define LAYERMODE_PERSP2BLACK 17 // layer with black key color and double-pixel image with transformation matrix -#define LAYERMODE_PERSP2WHITE 18 // layer with white key color and double-pixel image with transformation matrix - -#define LAYERMODE_NUM 19 // number of overlapped layer modes - -// Structure of sprite sSprite (on change update structure sSprite in vga_layer.h) -#define SSPRITE_IMG 0 // u8* img; // pointer to image data -#define SSPRITE_X0 4 // u8* x0; // pointer to pixel offset of start of lines/4 (used with fast sprites) -#define SSPRITE_W0 8 // u8* w0; // pointer to pixel length of length of lines/4 (used with fast sprites) -#define SSPRITE_KEYCOL 12 // u32 keycol; // key color -#define SSPRITE_X 16 // s16 x; // sprite X-coordinate on the screen -#define SSPRITE_Y 18 // s16 y; // sprite Y-coordinate on the screen -#define SSPRITE_W 20 // u16 w; // sprite width -#define SSPRITE_H 22 // u16 h; // sprite height -#define SSPRITE_WB 24 // u16 wb; // sprite pitch (number of bytes between lines) - // u16 res; // ...reserved, structure align -#define SSPRITE_SIZE 28 // size of sSprite structure - -// Structure of layer screen sLayer (on change update structure sLayer in vga_layer.h) -#define SLAYER_IMG 0 // const u8* img; // pointer to image in current layer format, or sprite list -#define SLAYER_PAR 4 // const void* par; // additional parameter (RLE index table, transformation matrix) -#define SLAYER_INIT 8 // u32 init; // init word sent on start of scanline -#define SLAYER_KEYCOL 12 // u32 keycol; // key color -#define SLAYER_TRANS 16 // u16 trans; // trans count -#define SLAYER_X 18 // s16 x; // start X coordinate -#define SLAYER_Y 20 // s16 y; // start Y coordinate -#define SLAYER_W 22 // u16 w; // width in pixels -#define SLAYER_H 24 // u16 h; // height -#define SLAYER_WB 26 // u16 wb; // image width in bytes (pitch of lines) -#define SLAYER_MODE 28 // u8 mode; // layer mode -#define SLAYER_HORIZ 29 // s8 horiz; // horizon of perspective projection/4 (only with LAYERMODE_PERSP* modes, 0=no perspecitve, <0 ceilling) -#define SLAYER_XBITS 30 // u8 xbits; // number of bits of width of source image (only with LAYERMODE_PERSP* modes) -#define SLAYER_YBITS 31 // u8 ybits; // number of bits of height of source image (only with LAYERMODE_PERSP* modes) -#define SLAYER_SPRITENUM 32 // u16 spritenum; // number of sprites -#define SLAYER_ON 34 // Bool on; // layer is ON -#define SLAYER_CPP 35 // u8 cpp; // current clock pulses per pixel (used to calculate X coordinate) -#define SLAYER_SIZE 36 // size of sLayer structure - -// Structure of video segment sSegm (on change update structure sSegm in vga_screen.h) -#define SSEGM_WIDTH 0 // u16 width; // width of this video segment in pixels (must be multiple of 4, 0=inactive segment) -#define SSEGM_WB 2 // u16 wb; // pitch - number of bytes between lines -#define SSEGM_OFFX 4 // s16 offx; // display offset at X direction (must be multiple of 4) -#define SSEGM_OFFY 6 // s16 offy; // display offset at Y direction -#define SSEGM_WRAPX 8 // u16 wrapx; // wrap width in X direction (number of pixels, must be multiply of 4 and > 0) - // text modes: wrapx must be multiply of 8 -#define SSEGM_WRAPY 10 // u16 wrapy; // wrap width in Y direction (number of lines, cannot be 0) -#define SSEGM_DATA 12 // const void* data; // pointer to video buffer with image data -#define SSEGM_FORM 16 // u8 form; // graphics format GF_* -#define SSEGM_DBLY 17 // bool dbly; // double Y (2 scanlines per 1 image line) -#define SSEGM_PAR3 18 // u16 par3; // SSEGM_PAR3 parameter 3 -#define SSEGM_PAR 20 // u32 par; // parameter 1: color, pointer to palettes, tile source, font -#define SSEGM_PAR2 24 // u32 par2; // parameter 2 -#define SSEGM_SIZE 28 // size of sSegm structure - -// Structure of video strip sStrip (on change update structure sStrip in vga_screen.h) -#define SSTRIP_HEIGHT 0 // u16 height; // height of this strip in number of scanlines -#define SSTRIP_NUM 2 // u16 num; // number of video segments -#define SSTRIP_SEG 4 // sSegm seg[SEGMAX]; -#define SSTRIP_SIZE (4+SSEGM_SIZE*SEGMAX) // size of sStrip structure (= 4 + 28*8 = 228 bytes) - -// Structure of video screen sScreen (on change update structure sScreen in vga_screen.h) -#define SSCREEN_NUM 0 // u16 num; // number of video strips -#define SSCREEN_BACKUP 2 // u16 num_backup; // backup number of video strips during display OFF -#define SSCREEN_STRIP 4 // sStrip strip[STRIPMAX]; // list of video strips -#define SSCREEN_SIZE (4+SSTRIP_SIZE*STRIPMAX) // size of sScreen structure (= 4 + 228*8 = 1828 bytes) - -// --- graphics formats -// There are 3 groups of formats - separated due internal reasons, do not mix them. - -// 1st group of formats - rendered specially -#define GF_COLOR 0 // simple color (par=color pattern 4-pixels even line, par2=color pattern 4-pixels odd line) -// Data buffer: width bytes (320 pixels: 320 bytes) -// Control buffer: 8 bytes - -// 2nd group of formats - rendering into control buffer cbuf -#define GF_GRAPH8 1 // native 8-bit graphics (X1Y1R2G2B2) - fast, transfers "as is" to PIO - // (num = number of pixels/4 = number of bytes/4) -// Control buffer: 8 bytes (320 pixels: 8 bytes) -#define GF_TILE 2 // tiles (par = tile table with one column of tiles, - // par2 = tile height, par3 = tile width as multiple of 4) -// Control buffer: width/tile width*8 bytes (320 pixels of 32x32: 80 bytes) -#define GF_TILE2 3 // alternate tiles (par = tile table with one row of tiles, - // par2 = LOW tile height, HIGH tile width bytes, - // par3 = tile width as multiple of 4) -// Control buffer: width/tile width*8 bytes (320 pixels of 32x32: 80 bytes) -#define GF_PROGRESS 4 // horizontal progress indicator (data = values 0..255 of 4-pixels in rows, - // par = scanline gradient < data, par2 = scanline gradient >= data) -// Control buffer: 16 bytes -#define GF_GRAD1 5 // gradient with 1 line -// Control buffer: 8 bytes (320 pixels: 8 bytes) -#define GF_GRAD2 6 // gradient with 2 lines -// Control buffer: 8 bytes (320 pixels: 8 bytes) - -#define GF_GRP2MIN GF_GRAPH8 // 2nd group minimal format -#define GF_GRP2MAX GF_GRAD2 // 2nd group maximal format - -// 3rd group of formats - rendering into data buffer dbuf -// Control buffer: 8 bytes -// Data buffer: width bytes -#define GF_GRAPH4 7 // 4-bit graphics (num = number of pixels/4 = number of bytes/2; - // par = pointer to 16-color palette translation table) -#define GF_GRAPH2 8 // 2-bit graphics (num = number of pixels/4 = number of bytes, - // par = pointer to 4-color palette translation table) -#define GF_GRAPH1 9 // 1-bit graphics (num = number of pixels/8 = number of bytes, - // par = 2 colors of palettes) -#define GF_MTEXT 10 // 8-pixel mono text (num = number of characters, font is 8-bit width, - // par = pointer to 1-bit font, par2 = 2 colors of palettes) -#define GF_ATEXT 11 // 8-pixel attribute text, character + 2x4 bit attributes - // (num = number of characters, font is 8-bit width, - // par = pointer to 1-bit font, par2 = pointer to 16 colors of palettes) -#define GF_FTEXT 12 // 8-pixel foreground color text, character + foreground color - // (num = number of characters, font is 8-bit width, - // par = pointer to 1-bit font, par2 = background color) -#define GF_CTEXT 13 // 8-pixel color text, character + background color + foreground color - // (num = number of characters, font is 8-bit width, - // par = pointer to 1-bit font) -#define GF_GTEXT 14 // 8-pixel gradient text (par = pointer to 1-bit font, par2 = pointer to color array) -#define GF_DTEXT 15 // 8-pixel double gradient text (par = pointer to 1-bit font, par2 = pointer to color array) -#define GF_LEVEL 16 // level graph (data=samples 0..255, par = 2 colors of palettes, par2 = Y zero level 0..255) -#define GF_LEVELGRAD 17 // level gradient graph (data = samples 0..255, par = scanline gradient < data, par2 = scanline gradient >= data) -#define GF_OSCIL 18 // oscilloscope pixel graph (data=samples 0..255, par = 2 colors of palettes, par2 = height of pixels - 1) -#define GF_OSCLINE 19 // oscilloscope line graph (data=samples 0..255, par = 2 colors of palettes) -#define GF_PLANE2 20 // 4 colors on 2 graphic planes (data=graphic, par=offset of 2nd graphic plane, - // par2 = pointer to 4-color palette translation table) -#define GF_ATTRIB8 21 // 2x4 bit color attribute per 8x8 pixel sample (data=mono graphic, par=offset of color attributes, - // par2 = pointer to 16-color palette table) -#define GF_GRAPH8MAT 22 // 8-bit graphics with 2D matrix transformation, using hardware interpolator inter1 (inter1 state is not saved during interrup) - // (data=image, par=pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)), - // par2 LOW=number of bits of image width, par2 HIGH=number of bits of image height) -#define GF_GRAPH8PERSP 23 // 8-bit graphics with perspective, using hardware interpolator inter1 (inter1 state is not saved during interrup) - // (data=image, par=pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)), - // par2 LOW=number of bits of image width, par2 HIGH=number of bits of image height, - // par3=horizon offset) -#define GF_TILEPERSP 24 // tiles with perspective, using hardware interpolators inter0 and inter1 (their state is not saved during interrup) - // (data=tile map, par=one column of tiles, par2=pointer to integer matrix, - // wb LOW=number of bits of map width, wb HIGH=number of bits of map height, - // par3 LOW=number of bits of tile size, par3 HIGH=horizon offset/4 or 0=no perspective or <0=ceilling, - // wrapy=segment height) -#define GF_TILEPERSP15 25 // tiles with perspective, 1.5 pixels (parameters as GF_TILEPERSP) -#define GF_TILEPERSP2 26 // tiles with perspective, double pixels (parameters as GF_TILEPERSP) -#define GF_TILEPERSP3 27 // tiles with perspective, triple pixels (parameters as GF_TILEPERSP) -#define GF_TILEPERSP4 28 // tiles with perspective, quadruple pixels (parameters as GF_TILEPERSP) - -#define GF_GRP3MIN GF_GRAPH4 // 3rd group minimal format -#define GF_GRP3MAX GF_TILEPERSP4 // 3rd group maximal format - - -#define FRACT 12 // number of bits of fractional part of fractint number (use max. 13, min. 8) -#define FRACTMUL (1<>8)|(((n)&0xff000000)>>24)) // ---------------------------------------------------------------------------- // Constants @@ -96,8 +102,6 @@ typedef unsigned char Bool; #define PI 3.14159265358979324 #define PI2 (3.14159265358979324*2) -//extern const ALIGNED u8 FontBoldB8x16[4096]; - #define VGA_RGB(r,g,b) ( (((r>>5)&0x07)<<5) | (((g>>5)&0x07)<<2) | (((b>>6)&0x3)<<0) ) @@ -125,12 +129,8 @@ typedef unsigned char Bool; // PicoVGA includes -#include "define.h" // common definitions of C and ASM -#include "canvas.h" // canvas +#include "vga_config.h" // VGA configuration #include "vga_vmode.h" // VGA videomodes -#include "vga_layer.h" // VGA layers -#include "vga_screen.h" // VGA screen layout -#include "vga_pal.h" // VGA palette #include "vga.h" // VGA output #include "picovga.pio.h" // PIO diff --git a/MCUME_pico/picovga_t4/picovga.pio b/MCUME_pico/picovga_t4/picovga.pio index fac181f..bd7275f 100644 --- a/MCUME_pico/picovga_t4/picovga.pio +++ b/MCUME_pico/picovga_t4/picovga.pio @@ -1,6 +1,11 @@ ; ============================================================================ ; VGA output - base layer (15 instructions) +; +; file derived from the PicoVGA project +; https://github.com/Panda381/PicoVGA +; by Miroslav Nemecek +; ; ============================================================================ ; Control word of "dark" command (left shift): ; - bit 0..7 (8 bits) output color (set to 0 if not used) @@ -60,218 +65,4 @@ public extra2: ; wrap jump to instruction out pc,5 .wrap -; ============================================================================ -; VGA output - layer with key color (13 instructions) -; ============================================================================ -; Control word (left shift): -; - bit 0..10 (11 bits) number of pixels - 1 (number of pixels must be multiple of 4) -; - bit 11..18 (8 bits) key color -; - bit 19..31 (13 bits) start delay D = clock cycles - 7 between irq and first pixel -; Clocks per pixel: minimum 6, maximum 37. -.program keylayer -.origin 0 ; must load at offset 0 (LAYER_OFF) - - ; idle wait -.wrap_target -public idle: - pull block ; [1] idle wait - -public entry: - wait 0 irq 4 ; [1] wait for IRQ sync goes 0 - out x,13 ; [1] get length of delay - 7 -layer_wait: - jmp x--,layer_wait ; [1] delay loop - out y,8 ; [1] get key color - out x,11 ; [1] get number of pixels-1 -layer_loop: - mov isr,x ; [1] save pixel counter into ISR - out x,8 ; [1] get output pixel - jmp x!=y,layer_2 ; [1] jump if pixel is not transparent - jmp layer_3 ; [1] jump to end of loop -layer_2: - mov pins,x ; [1] output pixel to pins -layer_3: -public extra1: - mov x,isr [0] ; [1+CPP-6] return pixel counter (set extra wait CPP-6) - jmp x--,layer_loop ; [1] loop next pixel - ; wrap jump to idle -.wrap - -; ============================================================================ -; VGA output - layer with black key color (11 instructions) -; ============================================================================ -; Control word (left shift): -; - bit 0..15 (16 bits) number of pixels - 1 (number of pixels must be multiple of 4) -; - bit 16..31 (16 bits) start delay D = clock cycles - 5 between irq and first pixel -; Cannot display black pixel (it is used as transparency) -; Clocks per pixel: minimum 4, maximum 34. - -.program blacklayer -.origin 0 ; must load at offset 0 (LAYER_OFF) - - ; idle wait -.wrap_target -public idle: - pull block ; [1] idle wait - -public entry: - wait 0 irq 4 ; [1] wait for IRQ sync goes 0 - out x,16 ; [1] get length of delay - 5 -layer_wait: - jmp x--,layer_wait ; [1] delay loop - out x,16 ; [1] get number of pixels-1 -layer_loop: - out y,8 ; [1] get output pixel - jmp !y,layer_2 ; [1] jump if pixel is transparent (color = 0) - mov pins,y ; [1] output pixel to pins -public extra1: - jmp x--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4) - jmp idle ; [1] go idle -layer_2: -public extra2: - jmp x--,layer_loop [0] ; [1+CPP-3] loop next pixel (set extra wait CPP-3) - ; wrap jump to idle -.wrap - -; ============================================================================ -; VGA output - layer with white key color (10 instructions) -; ============================================================================ -; Control word (left shift): -; - bit 0..15 (16 bits) number of pixels - 1 (number of pixels must be multiple of 4) -; - bit 16..31 (16 bits) start delay D = clock cycles - 5 between irq and first pixel -; Cannot display white pixel (it is used as transparency). Source pixels must be incremented + 1. -; Clocks per pixel: minimum 4, maximum 35. - -.program whitelayer -.origin 0 ; must load at offset 0 (LAYER_OFF) - - ; idle wait -.wrap_target -public idle: - pull block ; [1] idle wait - -public entry: - wait 0 irq 4 ; [1] wait for IRQ sync goes 0 - out x,16 ; [1] get length of delay - 7 -layer_wait: - jmp x--,layer_wait ; [1] delay loop - out x,16 ; [1] get number of pixels-1 -layer_loop: - out y,8 ; [1] get output pixel - jmp y--,layer_2 ; [1] jump if pixel is not transparent (color != 0) - jmp layer_3 ; [1] jump to end of loop -layer_2: - mov pins,y ; [1] output pixel to pins -public extra1: -layer_3: - jmp x--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4) - ; wrap jump to idle -.wrap - -; ============================================================================ -; VGA output - layer with mono or color pattern (16 instructions) -; ============================================================================ -; Control word (left shift): -; - bit 0 (1 bit) flag 0=use color opaque mode, 1=use mono transparent mode -; - bit 1..11 (11 bits) number of pixels - 1 (number of pixels must be multiple of 32 in mono, or 4 in color) -; - bit 12..19 (8 bits) key color -; - bit 20..31 (12 bits) start delay D = clock cycles - 8 between irq and first mono pixel, or 6 for color pixel -; Mono, clocks per pixel: minimum 4, maximum 35. -; Color, clocks per pixel: minimum 2, maximum 33. - -.program monolayer -.origin 0 ; must load at offset 0 (LAYER_OFF) - -.wrap_target -public idle: - pull block ; [1] idle wait - -public entry: - wait 0 irq 4 ; [1] wait for IRQ sync goes 0 - out x,12 ; [1] get length of delay - 8 (or 6 in color) -layer_wait: - jmp x--,layer_wait ; [1] delay loop - out isr,8 ; [1] get key color - out y,11 ; [1] get number of pixels-1 - out x,1 ; [1] get mode flag - jmp !x,layer_color ; [1] 0=use color mode -layer_loop: - out x,1 ; [1] get one bit - jmp !x,layer_out ; [1] bit=0, output pixel - jmp layer_skip ; [1] jump to end of loop -layer_out: - mov pins,isr ; [1] output pixel -layer_skip: -public extra1: - jmp y--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4) - jmp idle - -layer_color: - out pins,8 -public extra2: - jmp y--,layer_color [0] ; [1+CPP-2] loop next pixel (set extra wait CPP-2) - ; wrap jump to idle -.wrap - -; ============================================================================ -; VGA output - layer with RLE compression (17 instructions) -; ============================================================================ -; Input is left shifted with byte-swap (lower byte comes first) -; Requires 3 clock cycles per pixel. -; Clocks per pixel: minimum 3, maximum 32. - -.program rlelayer -.origin 0 ; must load at offset 0 (LAYER_OFF) - - ; [1 instruction] idle wait (tokens: {8} ignored, {8} 'idle' command) -public idle: - out pc,8 ; [1] idle wait - - ; [4 instructions] start -public entry: - wait 0 irq 4 ; [1] wait for IRQ sync goes 0 - out x,32 [2] ; [3] get length of delay - 7 -entry_wait: - jmp x--,entry_wait ; [1] delay - jmp raw_next ; [1] - - ; [1 instruction] skip N+2 (2..257) pixels (tokens: {8} N = number of pixels - 2, {8} 'skip' command) -public skip: -public extra1: - jmp x--,skip [0] ; [1+CPP-1] wait (set extra wait CPP-1) - - ; [1 instruction] skip 1 pixel (tokens: {8} ignored, {8} 'skip1' command) -public skip1: -public extra2: - jmp raw_next [0] ; [1+CPP-3] jump (set extra wait CPP-3) - - ; [4 instructions] repeat N+3 (3..258) pixels (tokens: {8} pixel to repeat, {8} 'run' command, {8} N = number of pixels - 3) -public run: -public extra3: - mov pins,x [0] ; [1+CPP-2] output pixel (set extra wait CPP-2) - out y,8 ; [1] get counter N -run_loop: -public extra4: - mov pins,x [0] ; [1+CPP-2] output pixel (set extra wait CPP-2) - jmp y--,run_loop ; [1] next pixel - - ; [1 instruction] output 1 RAW pixel (tokens: {8} pixel, {8} 'raw1' command) -public raw1: -public extra5: - mov pins,x [0] ; [1+CPP-3] output pixel (set extra wait CPP-3) -.wrap_target -raw_next: - out x,8 ; [1] get counter N - out pc,8 ; [1] jump - - ; [5 instructions] output N+2 (2..257) RAW pixels (tokens: {8} N = number of pixels - 2, {8} 'raw' command, {(N+2)*8} pixels) -public raw: ; 14: -raw_loop: -public extra6: - out pins,8 [0] ; [1+CPP-2] output pixel (set extra wait CPP-2) - jmp x--,raw_loop ; [1] loop next pixel -public extra7: - out pins,8 [0] ; [1+CPP-3] output pixel (set extra wait CPP-3) - ; wrap jump to raw_next -.wrap diff --git a/MCUME_pico/picovga_t4/render/vga_atext.S b/MCUME_pico/picovga_t4/render/vga_atext.S deleted file mode 100755 index 038f096..0000000 --- a/MCUME_pico/picovga_t4/render/vga_atext.S +++ /dev/null @@ -1,362 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_ATEXT -// -// **************************************************************************** -// u32 par SSEGM_PAR pointer to the font -// u32 par2 SSEGM_PAR2 pointer to 16 colors of palettes -// u16 par3 font height - -#include "../define.h" // common definitions of C and ASM -#include "hardware/regs/sio.h" // registers of hardware divider -#include "hardware/regs/addressmap.h" // SIO base address - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// render font pixel mask -.extern RenderTextMask // u32 RenderTextMask[512]; - -// extern "C" u8* RenderAText(u8* dbuf, int x, int y, int w, sSegm* segm) - -// render 8-pixel attribute text GF_ATEXT -// R0 ... destination data buffer -// R1 ... start X coordinate (in pixels, must be multiple of 4) -// R2 ... start Y coordinate (in graphics lines) -// R3 ... width to display (must be multiple of 4 and > 0) -// [stack] ... segm video segment sSegm -// Output new pointer to destination data buffer. -// 320 pixels takes 11.9 us on 151 MHz. - -.thumb_func -.global RenderAText -RenderAText: - - // push registers - push {r1-r7,lr} - mov r4,r8 - push {r4} - -// Stack content: -// SP+0: R8 -// SP+4: R1 start X coordinate -// SP+8: R2 start Y coordinate (later: base pointer to text data row) -// SP+12: R3 width to display -// SP+16: R4 -// SP+20: R5 -// SP+24: R6 -// SP+28: R7 -// SP+32: LR -// SP+36: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#36] // load video segment -> R4 - - // start divide Y/font height - ldr r6,RenderAText_pSioBase // get address of SIO base -> R6 - str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate - ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2 - str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height - -// - now we must wait at least 8 clock cycles to get result of division - - // [6] get wrap width -> [SP+36] - ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width - movs r7,#3 // [1] mask to align to 32-bit - bics r5,r7 // [1] align wrap - str r5,[sp,#36] // [2] save wrap width - - // [1] align X coordinate to 32-bit - bics r1,r7 // [1] - - // [3] align remaining width - bics r3,r7 // [1] - str r3,[sp,#12] // [2] save new width - - // load result of division Y/font_height -> R6 Y relative at row, R7 Y row - // Note: QUOTIENT must be read last - ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row - ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row - - // pointer to font line -> R3 - lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) - ldr r3,[r4,#SSEGM_PAR] // get pointer to font - add r3,r5 // line offset + font base -> pointer to current font line R3 - - // base pointer to text data (without X) -> [SP+8], R2 - ldrh r5,[r4,#SSEGM_WB] // get pitch of rows - muls r2,r5 // Y * WB -> offset of row in text buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - add r2,r5 // base address of text buffer - str r2,[sp,#8] // save pointer to text buffer - - // prepare pointer to text data with X -> R2 (1 position is 1 character + 1 attributes) - lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) - add r2,r6 // add index - add r2,r6 // add index*2, pointer to source text buffer -> R2 - - // prepare pointer to palettes -> R8 - ldr r5,[r4,#SSEGM_PAR2] // get pointer to palette table -> R4 - mov r8,r5 // save pointer to palette table - - // prepare pointer to conversion table -> LR - ldr r5,RenderAText_Addr // get pointer to conversion table -> R5 - mov lr,r5 // conversion table -> LR - -// ---- render 2nd half of first character -// R0 ... pointer to destination data buffer -// R1 ... start X coordinate -// R2 ... pointer to source text buffer -// R3 ... pointer to font line -// R4 ... background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... foreground color (expanded to 32-bit) -// R7 ... (temporary) -// R8 ... pointer to palette table -// LR ... pointer to conversion table -// [SP+8] ... base pointer to text data (without X) -// [SP+12] ... remaining width -// [SP+36] ... wrap width - - // check bit 2 of X coordinate - check if image starts with 2nd half of first character - lsls r6,r1,#29 // check bit 2 of X coordinate - bpl 2f // bit 2 not set, starting even 4-pixels - - // [6] load background color -> R4 - ldrb r6,[r2,#1] // [2] load color attributes -> R6 - mov r5,r8 // [1] get palette table -> R5 - lsrs r4,r6,#4 // [1] prepare index of background color - ldrb r4,[r5,r4] // [2] load background color - - // [4] load foreground color -> R6 - lsls r6,#28 // [1] isolate lower 4 bits - lsrs r6,#28 // [1] mask lower 4 bits - ldrb r6,[r5,r6] // [2] load foreground color - - // [4] expand background color to 32-bit -> R4 - lsls r5,r4,#8 // [1] shift background color << 8 - orrs r5,r4 // [1] color expanded to 16 bits - lsls r4,r5,#16 // [1] shift 16-bit color << 16 - orrs r4,r5 // [1] color expanded to 32 bits - - // [4] expand foreground color to 32-bit -> R6 - lsls r5,r6,#8 // [1] shift foreground color << 8 - orrs r5,r6 // [1] color expanded to 16 bits - lsls r6,r5,#16 // [1] shift 16-bit color << 16 - orrs r6,r5 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [4] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - adds r2,#2 // [1] shift pointer to source text buffer - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // shift X coordinate - adds r1,#4 // shift X coordinate - - // check end of segment - ldr r7,[sp,#36] // load wrap width - cmp r1,r7 // end of segment? - blo 1f - movs r1,#0 // reset X coordinate - ldr r2,[sp,#8] // get base pointer to text data -> R2 - - // shift remaining width -1: ldr r7,[sp,#12] // get remaining width - subs r7,#4 // shift width - str r7,[sp,#12] // save new width - - // prepare wrap width - start X -> R7 -2: ldr r7,[sp,#36] // load wrap width - subs r7,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of characters to generate in one part of segment -// R2 ... *pointer to source text buffer -// R3 ... *pointer to font line -// R4 ... background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... foreground color (expanded to 32-bit) -// R7 ... *wrap width of this segment, later: temporary -// R8 ... *pointer to palette table -// LR ... *pointer to conversion table -// [SP+8] ... *base pointer to text data (without X) -// [SP+12] ... *remaining width -// [SP+36] ... *wrap width - -RenderAText_OutLoop: - - // limit wrap width by total width -> R7 - ldr r6,[sp,#12] // get remaining width - cmp r7,r6 // compare with wrap width - bls 2f // width is OK - mov r7,r6 // limit wrap width - - // check if remain whole characters -2: cmp r7,#8 // check number of remaining pixels - bhs 5f // enough characters remain - - // check if 1st part of last character remains - cmp r7,#4 // check 1st part of last character - blo 3f // all done - -// ---- render 1st part of last character - -RenderAText_Last: - - // [6] load background color -> R4 - ldrb r6,[r2,#1] // [2] load color attributes -> R6 - mov r5,r8 // [1] get palette table -> R5 - lsrs r4,r6,#4 // [1] prepare index of background color - ldrb r4,[r5,r4] // [2] load background color - - // [4] load foreground color -> R6 - lsls r6,#28 // [1] isolate lower 4 bits - lsrs r6,#28 // [1] mask lower 4 bits - ldrb r6,[r5,r6] // [2] load foreground color - - // [4] expand background color to 32-bit -> R4 - lsls r5,r4,#8 // [1] shift background color << 8 - orrs r5,r4 // [1] color expanded to 16 bits - lsls r4,r5,#16 // [1] shift 16-bit color << 16 - orrs r4,r5 // [1] color expanded to 32 bits - - // [4] expand foreground color to 32-bit -> R6 - lsls r5,r6,#8 // [1] shift foreground color << 8 - orrs r5,r6 // [1] color expanded to 16 bits - lsls r6,r5,#16 // [1] shift 16-bit color << 16 - orrs r6,r5 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [4] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - adds r2,#2 // [1] shift pointer to source text buffer - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r1,[r5,#0] // [2] load mask for higher 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - stmia r0!,{r1} // [2] store first 4 pixels - - // check if continue with next segment - ldr r2,[sp,#8] // get base pointer to text data -> R2 - cmp r7,#4 - bhi RenderAText_OutLoop - - // pop registers and return -3: pop {r4} - mov r8,r4 - pop {r1-r7,pc} - -// ---- prepare to render whole characters - - // prepare number of whole characters to render -> R1 -5: lsrs r1,r7,#2 // shift to get number of characters*2 - lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 - subs r6,r5 // get remaining width - str r6,[sp,#12] // save new remaining width - subs r1,#1 // number of characters*2 - 1 - -// ---- [41*N-1] start inner loop, render characters in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of characters to generate*2 - 1 (loop counter) -// R2 ... *pointer to source text buffer -// R3 ... *pointer to font line -// R4 ... background color (expanded to 32-bit) -// R5 ... font sample -// R6 ... foreground color (expanded to 32-bit) -// R7 ... (temporary) -// R8 ... *pointer to palette table -// LR ... *pointer to conversion table - -RenderAText_InLoop: - - // [6] load background color -> R4 - ldrb r6,[r2,#1] // [2] load color attributes -> R6 - mov r5,r8 // [1] get palette table -> R5 - lsrs r4,r6,#4 // [1] prepare index of background color - ldrb r4,[r5,r4] // [2] load background color - - // [4] load foreground color -> R6 - lsls r6,#28 // [1] isolate lower 4 bits - lsrs r6,#28 // [1] mask lower 4 bits - ldrb r6,[r5,r6] // [2] load foreground color - - // [4] expand background color to 32-bit -> R4 - lsls r5,r4,#8 // [1] shift background color << 8 - orrs r5,r4 // [1] color expanded to 16 bits - lsls r4,r5,#16 // [1] shift 16-bit color << 16 - orrs r4,r5 // [1] color expanded to 32 bits - - // [4] expand foreground color to 32-bit -> R6 - lsls r5,r6,#8 // [1] shift foreground color << 8 - orrs r5,r6 // [1] color expanded to 16 bits - lsls r6,r5,#16 // [1] shift 16-bit color << 16 - orrs r6,r5 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [4] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - adds r2,#2 // [1] shift pointer to source text buffer - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r7,[r5,#0] // [2] load mask for higher 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store first 4 pixels - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // [2,3] loop counter - subs r1,#2 // [1] shift loop counter - bhi RenderAText_InLoop // [1,2] > 0, render next whole character - -// ---- end inner loop, continue with last character, or start new part - - // continue to outer loop - ldr r7,[sp,#36] // load wrap width - beq RenderAText_Last // render 1st half of last character - ldr r2,[sp,#8] // get base pointer to text data -> R2 - b RenderAText_OutLoop // go back to outer loop - - .align 2 -RenderAText_Addr: - .word RenderTextMask -RenderAText_pSioBase: - .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_attrib8.S b/MCUME_pico/picovga_t4/render/vga_attrib8.S deleted file mode 100755 index 78cd5f9..0000000 --- a/MCUME_pico/picovga_t4/render/vga_attrib8.S +++ /dev/null @@ -1,346 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_ATTRIB8 -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// render font pixel mask -.extern RenderTextMask // u32 RenderTextMask[512]; - -// extern "C" u8* RenderAttrib8(u8* dbuf, int x, int y, int w, sSegm* segm) - -// render 8-pixel attribute text GF_ATTRIB8 -// R0 ... destination data buffer -// R1 ... start X coordinate (in pixels, must be multiple of 4) -// R2 ... start Y coordinate (in graphics lines) -// R3 ... width to display (must be multiple of 4 and > 0) -// [stack] ... segm video segment sSegm -// Output new pointer to destination data buffer. -// 320 pixels takes 11 us on 151 MHz. - -.thumb_func -.global RenderAttrib8 -RenderAttrib8: - - // push registers - push {r2-r7,lr} - mov r4,r8 - push {r4} - -// Input variables and stack content: -// R1 ... start X coordinate -// SP+0: R8 -// SP+4: R2 start Y coordinate (later: base pointer to pixel data row) -// SP+8: R3 width to display -// SP+12: R4 -// SP+16: R5 -// SP+20: R6 -// SP+24: R7 -// SP+28: LR -// SP+32: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#32] // load video segment -> R4 - - // get wrap width -> [SP+32] - ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width - movs r7,#3 // mask to align to 32-bit - bics r5,r7 // align wrap - str r5,[sp,#32] // save wrap width - - // align X coordinate to 32-bit -> R1 - bics r1,r7 - - // align remaining width -> [SP+8] - bics r3,r7 // width - str r3,[sp,#8] // save new width - - // base pointer to attributes (without X) -> R3 - lsrs r3,r2,#3 // delete low 3 bits of Y coordinate -> row index - ldrh r5,[r4,#SSEGM_WB] // get pitch of rows - muls r3,r5 // Y * WB -> offset of row in text buffer - ldr r7,[r4,#SSEGM_PAR] // pointer to attributes - add r3,r7 // base address of attributes -> R3 - - // base pointer to pixel data (without X) -> [SP+4], R2 - muls r2,r5 // Y * WB -> offset of row in text buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - add r2,r5 // base address of text buffer - str r2,[sp,#4] // save pointer to text buffer - - // offset of attributes -> R3 - subs r3,r2 // offset of attributes, relative to source text buffer - - // prepare pointer to pixel data with X -> R2 (1 position is 1 character + 1 attributes) - lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) - add r2,r6 // add index, pointer to source text buffer -> R2 - - // prepare pointer to palettes -> R8 - ldr r5,[r4,#SSEGM_PAR2] // get pointer to palette table -> R4 - mov r8,r5 // save pointer to palette table - - // prepare pointer to conversion table -> LR - ldr r5,RenderAttrib8_Addr // get pointer to conversion table -> R5 - mov lr,r5 // conversion table -> LR - -// ---- render 2nd half of first character -// R0 ... pointer to destination data buffer -// R1 ... start X coordinate -// R2 ... pointer to source text buffer -// R3 ... offset of attributes (relative to source text buffer) -// R4 ... background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... foreground color (expanded to 32-bit) -// R7 ... (temporary) -// R8 ... pointer to palette table -// LR ... pointer to conversion table -// [SP+4] ... base pointer to pixel data (without X) -// [SP+8] ... remaining width -// [SP+32] ... wrap width - - // check bit 2 of X coordinate - check if image starts with 2nd half of first character - lsls r6,r1,#29 // check bit 2 of X coordinate - bpl 2f // bit 2 not set, starting even 4-pixels - - // [6] load background color -> R4 - ldrb r6,[r2,r3] // [2] load color attributes -> R6 - mov r5,r8 // [1] get palette table -> R5 - lsrs r4,r6,#4 // [1] prepare index of background color - ldrb r4,[r5,r4] // [2] load background color -> R4 - - // [4] load foreground color -> R6 - lsls r6,#28 // [1] isolate lower 4 bits - lsrs r6,#28 // [1] mask lower 4 bits - ldrb r6,[r5,r6] // [2] load foreground color -> R6 - - // [4] expand background color to 32-bit -> R4 - lsls r5,r4,#8 // [1] shift background color << 8 - orrs r5,r4 // [1] color expanded to 16 bits - lsls r4,r5,#16 // [1] shift 16-bit color << 16 - orrs r4,r5 // [1] color expanded to 32 bits - - // [4] expand foreground color to 32-bit -> R6 - lsls r5,r6,#8 // [1] shift foreground color << 8 - orrs r5,r6 // [1] color expanded to 16 bits - lsls r6,r5,#16 // [1] shift 16-bit color << 16 - orrs r6,r5 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [4] load pixel sample -> R5 - ldrb r5,[r2,#0] // [2] load pixels from source buffer -> R5 - adds r2,#1 // [1] shift pointer to source buffer - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // shift X coordinate - adds r1,#4 // shift X coordinate - - // check end of segment - ldr r7,[sp,#32] // load wrap width - cmp r1,r7 // end of segment? - blo 1f - movs r1,#0 // reset X coordinate - ldr r2,[sp,#4] // get base pointer to pixel data -> R2 - - // shift remaining width -1: ldr r7,[sp,#8] // get remaining width - subs r7,#4 // shift width - str r7,[sp,#8] // save new width - - // prepare wrap width - start X -> R7 -2: ldr r7,[sp,#32] // load wrap width - subs r7,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of characters to generate in one part of segment -// R2 ... *pointer to source text buffer -// R3 ... *offset of attributes (relative to source text buffer) -// R4 ... background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... foreground color (expanded to 32-bit) -// R7 ... *wrap width of this segment, later: temporary -// R8 ... *pointer to palette table -// LR ... *pointer to conversion table -// [SP+4] ... *base pointer to pixel data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderAttrib8_OutLoop: - - // limit wrap width by total width -> R7 - ldr r6,[sp,#8] // get remaining width - cmp r7,r6 // compare with wrap width - bls 2f // width is OK - mov r7,r6 // limit wrap width - - // check if remain whole characters -2: cmp r7,#8 // check number of remaining pixels - bhs 5f // enough characters remain - - // check if 1st part of last character remains - cmp r7,#4 // check 1st part of last character - blo 3f // all done - -// ---- render 1st part of last character - -RenderAttrib8_Last: - - // [6] load background color -> R4 - ldrb r6,[r2,r3] // [2] load color attributes -> R6 - mov r5,r8 // [1] get palette table -> R5 - lsrs r4,r6,#4 // [1] prepare index of background color - ldrb r4,[r5,r4] // [2] load background color -> R4 - - // [4] load foreground color -> R6 - lsls r6,#28 // [1] isolate lower 4 bits - lsrs r6,#28 // [1] mask lower 4 bits - ldrb r6,[r5,r6] // [2] load foreground color -> R6 - - // [4] expand background color to 32-bit -> R4 - lsls r5,r4,#8 // [1] shift background color << 8 - orrs r5,r4 // [1] color expanded to 16 bits - lsls r4,r5,#16 // [1] shift 16-bit color << 16 - orrs r4,r5 // [1] color expanded to 32 bits - - // [4] expand foreground color to 32-bit -> R6 - lsls r5,r6,#8 // [1] shift foreground color << 8 - orrs r5,r6 // [1] color expanded to 16 bits - lsls r6,r5,#16 // [1] shift 16-bit color << 16 - orrs r6,r5 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [4] load pixel sample -> R5 - ldrb r5,[r2,#0] // [2] load pixels from source buffer -> R5 - adds r2,#1 // [1] shift pointer to source buffer - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r1,[r5,#0] // [2] load mask for higher 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - stmia r0!,{r1} // [2] store first 4 pixels - - // check if continue with next segment - ldr r2,[sp,#4] // get base pointer to pixel data -> R2 - cmp r7,#4 - bhi RenderAttrib8_OutLoop - - // pop registers and return -3: pop {r4} - mov r8,r4 - pop {r2-r7,pc} - -// ---- prepare to render whole characters - - // prepare number of whole characters to render -> R1 -5: lsrs r1,r7,#2 // shift to get number of characters*2 - lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 - subs r6,r5 // get remaining width - str r6,[sp,#8] // save new remaining width - subs r1,#1 // number of characters*2 - 1 - -// ---- [38*N-1] start inner loop, render characters in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of characters to generate*2 - 1 (loop counter) -// R2 ... *pointer to source text buffer -// R3 ... *offset of attributes (relative to source text buffer) -// R4 ... background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... foreground color (expanded to 32-bit) -// R7 ... (temporary) -// R8 ... *pointer to palette table -// LR ... *pointer to conversion table -// [SP+4] ... *base pointer to pixel data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderAttrib8_InLoop: - - // [6] load background color -> R4 - ldrb r6,[r2,r3] // [2] load color attributes -> R6 - mov r5,r8 // [1] get palette table -> R5 - lsrs r4,r6,#4 // [1] prepare index of background color - ldrb r4,[r5,r4] // [2] load background color -> R4 - - // [4] load foreground color -> R6 - lsls r6,#28 // [1] isolate lower 4 bits - lsrs r6,#28 // [1] mask lower 4 bits - ldrb r6,[r5,r6] // [2] load foreground color -> R6 - - // [4] expand background color to 32-bit -> R4 - lsls r5,r4,#8 // [1] shift background color << 8 - orrs r5,r4 // [1] color expanded to 16 bits - lsls r4,r5,#16 // [1] shift 16-bit color << 16 - orrs r4,r5 // [1] color expanded to 32 bits - - // [4] expand foreground color to 32-bit -> R6 - lsls r5,r6,#8 // [1] shift foreground color << 8 - orrs r5,r6 // [1] color expanded to 16 bits - lsls r6,r5,#16 // [1] shift 16-bit color << 16 - orrs r6,r5 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [3] load pixel sample -> R7 - ldrb r7,[r2,#0] // [2] load pixels from source buffer -> R7 - adds r2,#1 // [1] shift pointer to source buffer - - // [2] prepare conversion table -> R7 - lsls r7,#3 // [1] multiply sample * 8 - add r7,lr // [1] add pointer to conversion table - - // [4] convert first 4 pixels (higher 4 bits) - ldr r5,[r7,#0] // [2] load mask for higher 4 bits - ands r5,r6 // [1] mask foreground color - eors r5,r4 // [1] combine with background color - - // [4] convert second 4 pixels (lower 4 bits) - ldr r7,[r7,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - - // [3] write pixels - stmia r0!,{r5,r7} // [3] store 8 pixels - - // [2,3] loop counter - subs r1,#2 // [1] shift loop counter - bhi RenderAttrib8_InLoop // [1,2] > 0, render next whole character - -// ---- end inner loop, continue with last character, or start new part - - // continue to outer loop - ldr r7,[sp,#32] // load wrap width - beq RenderAttrib8_Last // render 1st half of last character - ldr r2,[sp,#4] // get base pointer to pixel data -> R2 - b RenderAttrib8_OutLoop // go back to outer loop - - .align 2 -RenderAttrib8_Addr: - .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_color.S b/MCUME_pico/picovga_t4/render/vga_color.S deleted file mode 100755 index 5493f41..0000000 --- a/MCUME_pico/picovga_t4/render/vga_color.S +++ /dev/null @@ -1,89 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_COLOR -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// extern "C" u8* RenderColor(u8* dbuf, u32 color, int w); - -// render color GF_COLOR -// R0 ... pointer to destination data buffer -// R1 ... color pattern 4-pixels -// R2 ... width of this segment as multiply of 4 pixels (=width in pixels/4) -// Output new pointer to destination data buffer. -// 320 pixels takes 1.1 us on 151 MHz. -// - using only small transfer (24 pixels per loop) takes 1.22 us -// - using only single transfer (4 pixels per loop) takes 2.91 us -// - memset takes 1.42 us - -.thumb_func -.global RenderColor -RenderColor: - -// fill memory buffer with u32 words -// buf ... data buffer, must be 32-bit aligned -// data ... data word to store -// num ... number of 32-bit words (= number of bytes/4) -// Returns new destination address. -// extern "C" u32* MemSet4(u32* buf, u32 data, int num); - -.thumb_func -.global MemSet4 -MemSet4: - - // push registers - push {r4,r5,r6,r7,lr} - - // duplicate color pattern - mov r3,r1 - mov r4,r1 - mov r5,r1 - mov r6,r1 - mov r7,r1 - - // go to big transfer - b 3f - -// ---- [38 per loop] big transfer 120 pixels, speed 0.317 clk per pixel - - // [38] store 30 words (=120 pixels) -2: stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels - stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels - stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels - stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels - stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels -3: subs r2,#30 // [1] decrement number of words - bge 2b // [1,2] loop next 30 words - adds r2,#30 // [1] restore - - // go to small transfer - b 6f - -// ---- [10 per loop] small transfer 24 pixels, speed 0.417 clk per pixel - - // [8] store 6 words (=24 pixels) -4: stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels -6: subs r2,#6 // [1] decrement number of words - bge 4b // [1,2] loop next 6 words - adds r2,#6 // [1] restore - - // go to single transfer - b 8f - -// ---- [5 per loop] single transfer 4 pixels, speed 1.25 clk per pixel - - // [4,5] store 1 word (=4 pixels) -7: stmia r0!,{r1} // [2] 1 word, 4 pixels -8: subs r2,#1 // [1] loop counter - bge 7b // [1,2] next word - - // pop registers - pop {r4,r5,r6,r7,pc} diff --git a/MCUME_pico/picovga_t4/render/vga_ctext.S b/MCUME_pico/picovga_t4/render/vga_ctext.S deleted file mode 100755 index 4dca38a..0000000 --- a/MCUME_pico/picovga_t4/render/vga_ctext.S +++ /dev/null @@ -1,335 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_CTEXT -// -// **************************************************************************** -// u32 par SSEGM_PAR pointer to the font -// u16 par3 font height - -#include "../define.h" // common definitions of C and ASM -#include "hardware/regs/sio.h" // registers of hardware divider -#include "hardware/regs/addressmap.h" // SIO base address - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// render font pixel mask -.extern RenderTextMask // u32 RenderTextMask[512]; - -// extern "C" u8* RenderCText(u8* dbuf, int x, int y, int w, sSegm* segm) - -// render 8-pixel color text GF_CTEXT -// R0 ... destination data buffer -// R1 ... start X coordinate (in pixels, must be multiple of 4) -// R2 ... start Y coordinate (in graphics lines) -// R3 ... width to display (must be multiple of 4 and > 0) -// [stack] ... segm video segment sSegm -// Output new pointer to destination data buffer. -// 320 pixels takes 10.4 us on 151 MHz. - -.thumb_func -.global RenderCText -RenderCText: - - // push registers - push {r1-r7,lr} - -// Stack content: -// SP+0: R1 start X coordinate -// SP+4: R2 start Y coordinate (later: base pointer to text data row) -// SP+8: R3 width to display -// SP+12: R4 -// SP+16: R5 -// SP+20: R6 -// SP+24: R7 -// SP+28: LR -// SP+32: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#32] // load video segment -> R4 - - // start divide Y/font height - ldr r6,RenderCText_pSioBase // get address of SIO base -> R6 - str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate - ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2 - str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height - -// - now we must wait at least 8 clock cycles to get result of division - - // [6] get wrap width -> [SP+32] - ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width - movs r7,#3 // [1] mask to align to 32-bit - bics r5,r7 // [1] align wrap - str r5,[sp,#32] // [2] save wrap width - - // [1] align X coordinate to 32-bit - bics r1,r7 // [1] - - // [3] align remaining width - bics r3,r7 // [1] - str r3,[sp,#8] // [2] save new width - - // load result of division Y/font_height -> R6 Y relative at row, R7 Y row - // Note: QUOTIENT must be read last - ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row - ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row - - // pointer to font line -> R3 - lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) - ldr r3,[r4,#SSEGM_PAR] // get pointer to font - add r3,r5 // line offset + font base -> pointer to current font line R3 - - // base pointer to text data (without X) -> [SP+4], R2 - ldrh r5,[r4,#SSEGM_WB] // get pitch of rows - muls r2,r5 // Y * WB -> offset of row in text buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - add r2,r5 // base address of text buffer - str r2,[sp,#4] // save pointer to text buffer - - // prepare pointer to text data with X -> R2 (1 position is 1 character + 1 background + 1 foreground) - lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) - add r2,r6 // add index - add r2,r6 // add index*2 - add r2,r6 // add index*3, pointer to source text buffer -> R2 - - // prepare pointer to conversion table -> LR - ldr r5,RenderCText_Addr // get pointer to conversion table -> R5 - mov lr,r5 // conversion table -> LR - -// ---- render 2nd half of first character -// R0 ... pointer to destination data buffer -// R1 ... start X coordinate -// R2 ... pointer to source text buffer -// R3 ... pointer to font line -// R4 ... background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... foreground color (expanded to 32-bit) -// R7 ... (temporary) -// LR ... pointer to conversion table -// [SP+4] ... base pointer to text data (without X) -// [SP+8] ... remaining width -// [SP+32] ... wrap width - - // check bit 2 of X coordinate - check if image starts with 2nd half of first character - lsls r6,r1,#29 // check bit 2 of X coordinate - bpl 2f // bit 2 not set, starting even 4-pixels - - // [4] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - - // [2] load background color -> R4 - ldrb r4,[r2,#1] // [2] load background color from source text buffer - - // [4] expand background color to 32-bit -> R4 - lsls r7,r4,#8 // [1] shift background color << 8 - orrs r7,r4 // [1] color expanded to 16 bits - lsls r4,r7,#16 // [1] shift 16-bit color << 16 - orrs r4,r7 // [1] color expanded to 32 bits - - // [3] load foreground color -> R6 - ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6 - adds r2,#3 // [1] shift pointer to source text buffer - - // [4] expand foreground color to 32-bit -> R6 - lsls r7,r6,#8 // [1] shift foreground color << 8 - orrs r7,r6 // [1] color expanded to 16 bits - lsls r6,r7,#16 // [1] shift 16-bit color << 16 - orrs r6,r7 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // shift X coordinate - adds r1,#4 // shift X coordinate - - // check end of segment - ldr r7,[sp,#32] // load wrap width - cmp r1,r7 // end of segment? - blo 1f - movs r1,#0 // reset X coordinate - ldr r2,[sp,#4] // get base pointer to text data -> R2 - - // shift remaining width -1: ldr r7,[sp,#8] // get remaining width - subs r7,#4 // shift width - str r7,[sp,#8] // save new width - - // prepare wrap width - start X -> R7 -2: ldr r7,[sp,#32] // load wrap width - subs r7,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of characters to generate in one part of segment -// R2 ... *pointer to source text buffer -// R3 ... *pointer to font line -// R4 ... background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... foreground color (expanded to 32-bit) -// R7 ... *wrap width of this segment, later: temporary -// LR ... *pointer to conversion table -// [SP+4] ... *base pointer to text data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderCText_OutLoop: - - // limit wrap width by total width -> R7 - ldr r6,[sp,#8] // get remaining width - cmp r7,r6 // compare with wrap width - bls 2f // width is OK - mov r7,r6 // limit wrap width - - // check if remain whole characters -2: cmp r7,#8 // check number of remaining pixels - bhs 5f // enough characters remain - - // check if 1st part of last character remains - cmp r7,#4 // check 1st part of last character - blo 3f // all done - -// ---- render 1st part of last character - -RenderCText_Last: - - // [4] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - - // [2] load background color -> R4 - ldrb r4,[r2,#1] // [2] load background color from source text buffer - - // [4] expand background color to 32-bit -> R4 - lsls r1,r4,#8 // [1] shift background color << 8 - orrs r1,r4 // [1] color expanded to 16 bits - lsls r4,r1,#16 // [1] shift 16-bit color << 16 - orrs r4,r1 // [1] color expanded to 32 bits - - // [3] load foreground color -> R6 - ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6 - adds r2,#3 // [1] shift pointer to source text buffer - - // [4] expand foreground color to 32-bit - lsls r1,r6,#8 // [1] shift foreground color << 8 - orrs r1,r6 // [1] color expanded to 16 bits - lsls r6,r1,#16 // [1] shift 16-bit color << 16 - orrs r6,r1 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r1,[r5,#0] // [2] load mask for higher 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - stmia r0!,{r1} // [2] store first 4 pixels - - // check if continue with next segment - ldr r2,[sp,#4] // get base pointer to text data -> R2 - cmp r7,#4 - bhi RenderCText_OutLoop - - // pop registers and return -3: pop {r1-r7,pc} - -// ---- prepare to render whole characters - - // prepare number of whole characters to render -> R1 -5: lsrs r1,r7,#2 // shift to get number of characters*2 - lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 - subs r6,r5 // get remaining width - str r6,[sp,#8] // save new remaining width - subs r1,#1 // number of characters*2 - 1 - -// ---- [35*N-1] start inner loop, render characters in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of characters to generate*2 - 1 (loop counter) -// R2 ... *pointer to source text buffer -// R3 ... *pointer to font line -// R4 ... background color (expanded to 32-bit) -// R5 ... font sample -// R6 ... foreground color (expanded to 32-bit) -// R7 ... (temporary) -// LR ... *pointer to conversion table - -RenderCText_InLoop: - - // [4] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - - // [2] load background color -> R4 - ldrb r4,[r2,#1] // [2] load background color from source text buffer - - // [4] expand background color to 32-bit -> R4 - lsls r7,r4,#8 // [1] shift background color << 8 - orrs r7,r4 // [1] color expanded to 16 bits - lsls r4,r7,#16 // [1] shift 16-bit color << 16 - orrs r4,r7 // [1] color expanded to 32 bits - - // [3] load foreground color -> R6 - ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6 - adds r2,#3 // [1] shift pointer to source text buffer - - // [4] expand foreground color to 32-bit - lsls r7,r6,#8 // [1] shift foreground color << 8 - orrs r7,r6 // [1] color expanded to 16 bits - lsls r6,r7,#16 // [1] shift 16-bit color << 16 - orrs r6,r7 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r7,[r5,#0] // [2] load mask for higher 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store first 4 pixels - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // [2,3] loop counter - subs r1,#2 // [1] shift loop counter - bhi RenderCText_InLoop // [1,2] > 0, render next whole character - -// ---- end inner loop, continue with last character, or start new part - - // continue to outer loop - ldr r7,[sp,#32] // load wrap width - beq RenderCText_Last // render 1st half of last character - ldr r2,[sp,#4] // get base pointer to text data -> R2 - b RenderCText_OutLoop // go back to outer loop - - .align 2 -RenderCText_Addr: - .word RenderTextMask -RenderCText_pSioBase: - .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_dtext.S b/MCUME_pico/picovga_t4/render/vga_dtext.S deleted file mode 100755 index bffe9c9..0000000 --- a/MCUME_pico/picovga_t4/render/vga_dtext.S +++ /dev/null @@ -1,436 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_DTEXT -// -// **************************************************************************** -// u32 par SSEGM_PAR pointer to the font -// u32 par2 SSEGM_PAR2 pointer to font gradient -// u16 par3 LOW background color, HIGH font height - -#include "../define.h" // common definitions of C and ASM -#include "hardware/regs/sio.h" // registers of hardware divider -#include "hardware/regs/addressmap.h" // SIO base address - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// render font pixel mask -.extern RenderTextMask // u32 RenderTextMask[512]; - -// extern "C" u8* RenderDText(u8* dbuf, int x, int y, int w, sSegm* segm) - -// render 8-pixel double gradient color text GF_DTEXT -// R0 ... destination data buffer -// R1 ... start X coordinate (in pixels, must be multiple of 4) -// R2 ... start Y coordinate (in graphics lines) -// R3 ... width to display (must be multiple of 4 and > 0) -// [stack] ... segm video segment sSegm -// Output new pointer to destination data buffer. -// 320 pixels takes us on 151 MHz. - -.thumb_func -.global RenderDText -RenderDText: - -// Stack content: -// SP+0: R8 -// SP+4: R1 start X coordinate (later: base pointer to gradient array) -// SP+8: R2 start Y coordinate (later: base pointer to text data row) -// SP+12: R3 width to display -// SP+16: R4 -// SP+20: R5 -// SP+24: R6 -// SP+28: R7 -// SP+32: LR -// SP+36: video segment (later: wrap width in X direction) - - // push registers - push {r1-r7,lr} - mov r4,r8 - push {r4} - - // get pointer to video segment -> R4 - ldr r4,[sp,#36] // load video segment -> R4 - - // start divide Y/font height - ldr r6,RenderDText_pSioBase // get address of SIO base -> R6 - str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate - ldrb r2,[r4,#SSEGM_PAR3+1] // font height -> R2 - str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height - -// - now we must wait at least 8 clock cycles to get result of division - - // [6] get wrap width -> [SP+36] - ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width - movs r7,#3 // [1] mask to align to 32-bit - bics r5,r7 // [1] align wrap - str r5,[sp,#36] // [1] save wrap width - - // [1] align X coordinate to 32-bit - bics r1,r7 // [1] - - // [3] align remaining width - bics r3,r7 // [1] - str r3,[sp,#12] // [2] save new width - - // load result of division Y/font_height -> R6 Y relative at row, R7 Y row - // Note: QUOTIENT must be read last - ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row - ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row - - // pointer to font line -> R3 - lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) - ldr r3,[r4,#SSEGM_PAR] // get pointer to font - add r3,r5 // line offset + font base -> pointer to current font line R3 - mov r8,r3 - - // base pointer to text data (without X) -> [SP+8], R2 - ldrh r5,[r4,#SSEGM_WB] // get pitch of rows - muls r2,r5 // Y * WB -> offset of row in text buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - add r2,r5 // base address of text buffer - str r2,[sp,#8] // save pointer to text buffer - - // base pointer to gradient array -> [SP+4], R3 - ldr r3,[r4,#SSEGM_PAR2] // pointer to graient array - str r3,[sp,#4] // save pointer to gradient array - - // prepare pointer to text data with X -> R2 - lsrs r6,r1,#3 // convert X to gradient index - lsls r6,#2 // round to 4-bytes - add r3,r6 // pointer to source gradient array - lsrs r6,r1,#4 // convert X to character index (1 character is 16 pixels width) - add r2,r6 // pointer to source text buffer -> R2 - - // prepare background color, expand to 32 bits -> R4 - ldrb r4,[r4,#SSEGM_PAR3] // load background color - lsls r5,r4,#8 // shift background color << 8 - orrs r5,r4 // color expanded to 16 bits - lsls r4,r5,#16 // shift 16-bit color << 16 - orrs r4,r5 // color expanded to 32 bits - - // prepare pointer to conversion table -> LR - ldr r5,RenderDText_Addr // get pointer to conversion table -> R5 - mov lr,r5 // conversion table -> LR - -// ---- render 2nd half of first character -// R0 ... pointer to destination data buffer -// R1 ... start X coordinate -// R2 ... pointer to source text buffer -// R3 ... pointer to gradient array -// R4 ... background color (expanded to 32-bit) -// R5..R7 ... (temporary) -// R8 ... pointer to font line -// LR ... pointer to conversion table -// [SP+4] ... base pointer to gradient array -// [SP+8] ... base pointer to text data (without X) -// [SP+12] ... remaining width -// [SP+36] ... wrap width - - // check if X is aligned - lsls r6,r1,#(32-4) // check if X is aligned - beq 2f // X not aligned - - // shift X coordinate - lsrs r5,r6,#(32-4) // [1] X pixel offset in last character -> R5 - movs r6,16 // character width - subs r6,r5 // pixels remain - adds r1,r6 // shift X coordinate (align to next character) - ldr r7,[sp,#12] - subs r7,r6 // shift width - str r7,[sp,#12] - - push {r1} - - // [6] load font sample -> R7 - ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7 - adds r2,#1 // [1] shift pointer to source text buffer - add r7,r8 // [1] pointer to font line - ldrb r7,[r7] // [2] load font sample -> R7 - - // [3] load foreground color, XOR with background -> R6 - ldmia r3!,{r6} // [2] load foreground color from gradient buffer - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R7 - lsls r7,#3 // [1] multiply font sample * 8 - add r7,lr // [1] add pointer to conversion table - - // [4] convert first 4 pixels (higher 4 bits) -> R1 - ldr r1,[r7,#0] // [2] load mask for higher 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - - cmp r5,#4 // check start position of X - bhi 3f // > 4 - - // [20] store 8 pixels - lsrs r1,#16 // [1] - strb r1,[r0,#0] // [2] - strb r1,[r0,#1] // [2] - lsrs r1,#8 // [1] - strb r1,[r0,#2] // [2] - strb r1,[r0,#3] // [2] - adds r0,#4 // [1] - - // [3] load foreground color, XOR with background -> R6 -3: ldmia r3!,{r6} // [2] load foreground color from gradient buffer - eors r6,r4 // [1] XOR foreground color with background color - - // [4] convert second 4 pixels (lower 4 bits) - ldr r1,[r7,#4] // [2] load mask for lower 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - - // store 8 pixels - cmp r5,#8 // check start position of X - bhi 4f // > 8 - - strb r1,[r0,#0] // [2] - strb r1,[r0,#1] // [2] - lsrs r1,#8 // [1] - strb r1,[r0,#2] // [2] - strb r1,[r0,#3] // [2] - lsls r1,#8 - adds r0,#4 - -4: lsrs r1,#16 // [1] - strb r1,[r0,#0] // [2] - strb r1,[r0,#1] // [2] - lsrs r1,#8 // [1] - strb r1,[r0,#2] // [2] - strb r1,[r0,#3] // [2] - adds r0,#4 // [1] - - pop {r1} - - // check end of segment - ldr r7,[sp,#36] // load wrap width - cmp r1,r7 // end of segment? - blo 2f - movs r1,#0 // reset X coordinate - ldr r2,[sp,#8] // get base pointer to text data -> R2 - ldr r3,[sp,#4] // get base pointer to gradient array -> R3 - - // prepare wrap width - start X -> R7 -2: ldr r7,[sp,#36] // load wrap width - subs r7,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of characters to generate in one part of segment -// R2 ... *pointer to source text buffer -// R3 ... *pointer to gradient array -// R4 ... *background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... (temporary) -// R7 ... *wrap width of this segment, later: temporary -// R8 ... *pointer to font line -// LR ... *pointer to conversion table -// [SP+4] ... base pointer to gradient array -// [SP+8] ... base pointer to text data (without X) -// [SP+12] ... remaining width -// [SP+36] ... wrap width - -RenderDText_OutLoop: - - // limit wrap width by total width -> R7 - ldr r6,[sp,#12] // get remaining width - cmp r7,r6 // compare with wrap width - bls 2f // width is OK - mov r7,r6 // limit wrap width - - // check if remain whole characters -2: cmp r7,#16 // check number of remaining pixels - bhs 5f // enough characters remain - - // check if 1st part of last character remains - cmp r7,#4 // check 1st part of last character - blo 3f // all done - mov r1,r7 // width to render - -// ---- render 1st part of last character - -RenderDText_Last: - - push {r7} - - // [6] load font sample -> R7 - ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7 - adds r2,#1 // [1] shift pointer to source text buffer - add r7,r8 // [1] pointer to font line - ldrb r7,[r7] // [2] load font sample -> R7 - - // [3] load foreground color, XOR with background -> R6 - ldmia r3!,{r6} // [2] load foreground color from gradient buffer - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R7 - lsls r7,#3 // [1] multiply font sample * 8 - add r7,lr // [1] add pointer to conversion table - - // [4] convert first 4 pixels (higher 4 bits) -> R5 - ldr r5,[r7,#0] // [2] load mask for higher 4 bits - ands r5,r6 // [1] mask foreground color - eors r5,r4 // [1] combine with background color - - // [20] store 8 pixels - strb r5,[r0,#0] // [2] - strb r5,[r0,#1] // [2] - lsrs r5,#8 // [1] - strb r5,[r0,#2] // [2] - strb r5,[r0,#3] // [2] - adds r0,#4 // [1] - - cmp r1,#4 - bls 4f - - lsrs r5,#8 // [1] - strb r5,[r0,#0] // [2] - strb r5,[r0,#1] // [2] - lsrs r5,#8 // [1] - strb r5,[r0,#2] // [2] - strb r5,[r0,#3] // [2] - adds r0,#4 // [1] - - cmp r1,#8 - bls 4f - - // [3] load foreground color, XOR with background -> R6 - ldmia r3!,{r6} // [2] load foreground color from gradient buffer - eors r6,r4 // [1] XOR foreground color with background color - - // [4] convert second 4 pixels (lower 4 bits) - ldr r7,[r7,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - - // [20] store 8 pixels - strb r7,[r0,#0] // [2] - strb r7,[r0,#1] // [2] - lsrs r7,#8 // [1] - strb r7,[r0,#2] // [2] - strb r7,[r0,#3] // [2] - adds r0,#4 // [1] - - // check if continue with next segment -4: pop {r7} - - ldr r2,[sp,#8] // get base pointer to text data -> R2 - ldr r3,[sp,#4] // get base pointer to gradient array -> R3 - cmp r7,#16 - bhs RenderDText_OutLoop - - // pop registers and return -3: pop {r4} - mov r8,r4 - pop {r1-r7,pc} - -// ---- prepare to render whole characters - - // prepare number of whole characters to render -> R1 -5: lsrs r1,r7,#2 // shift to get number of characters*2 - lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 - subs r6,r5 // get remaining width - str r6,[sp,#12] // save new remaining width - subs r1,#3 // number of characters*2 - 3 - -// ---- [65*N-1] start inner loop, render characters in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of characters to generate*2 - 3 (loop counter) -// R2 ... *pointer to source text buffer -// R3 ... *pointer to gradient array -// R4 ... *background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... foreground color -// R7 ... font sample -// R8 ... *pointer to font line -// LR ... *pointer to conversion table -// [SP+4] ... base pointer to gradient array -// [SP+8] ... base pointer to text data (without X) -// [SP+12] ... remaining width -// [SP+36] ... wrap width - -RenderDText_InLoop: - - // [6] load font sample -> R7 - ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7 - adds r2,#1 // [1] shift pointer to source text buffer - add r7,r8 // [1] pointer to font line - ldrb r7,[r7] // [2] load font sample -> R7 - - // [3] load foreground color, XOR with background -> R6 - ldmia r3!,{r6} // [2] load foreground color from gradient buffer - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R7 - lsls r7,#3 // [1] multiply font sample * 8 - add r7,lr // [1] add pointer to conversion table - - // [4] convert first 4 pixels (higher 4 bits) -> R5 - ldr r5,[r7,#0] // [2] load mask for higher 4 bits - ands r5,r6 // [1] mask foreground color - eors r5,r4 // [1] combine with background color - - // [20] store 8 pixels - strb r5,[r0,#0] // [2] - strb r5,[r0,#1] // [2] - lsrs r5,#8 // [1] - strb r5,[r0,#2] // [2] - strb r5,[r0,#3] // [2] - lsrs r5,#8 // [1] - strb r5,[r0,#4] // [2] - strb r5,[r0,#5] // [2] - lsrs r5,#8 // [1] - strb r5,[r0,#6] // [2] - strb r5,[r0,#7] // [2] - adds r0,#8 // [1] - - // [3] load foreground color, XOR with background -> R6 - ldmia r3!,{r6} // [2] load foreground color from gradient buffer - eors r6,r4 // [1] XOR foreground color with background color - - // [4] convert second 4 pixels (lower 4 bits) - ldr r7,[r7,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - - // [20] store 8 pixels - strb r7,[r0,#0] // [2] - strb r7,[r0,#1] // [2] - lsrs r7,#8 // [1] - strb r7,[r0,#2] // [2] - strb r7,[r0,#3] // [2] - lsrs r7,#8 // [1] - strb r7,[r0,#4] // [2] - strb r7,[r0,#5] // [2] - lsrs r7,#8 // [1] - strb r7,[r0,#6] // [2] - strb r7,[r0,#7] // [2] - adds r0,#8 // [1] - - // [2,3] loop counter - subs r1,#4 // [1] shift loop counter - bhi RenderDText_InLoop // [1,2] > 0, render next whole character - -// ---- end inner loop, continue with last character, or start new part - - // continue to outer loop - ldr r7,[sp,#36] // load wrap width - adds r1,#3 // return size of last tile - lsls r1,#2 // convert back to pixels - bne RenderDText_Last // render 1st half of last character - ldr r2,[sp,#8] // get base pointer to text data -> R2 - ldr r3,[sp,#4] // get base pointer to gradient array -> R3 - b RenderDText_OutLoop // go back to outer loop - - .align 2 -RenderDText_Addr: - .word RenderTextMask -RenderDText_pSioBase: - .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_fastsprite.S b/MCUME_pico/picovga_t4/render/vga_fastsprite.S deleted file mode 100755 index dfced9a..0000000 --- a/MCUME_pico/picovga_t4/render/vga_fastsprite.S +++ /dev/null @@ -1,160 +0,0 @@ - -// **************************************************************************** -// -// VGA render LAYERMODE_FASTSPRITE* -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// extern "C" u32* RenderFastSprite(u32* cbuf, int y, sLayer* scr, u8* buf) - -// render layers with fast sprites LAYERMODE_FASTSPRITE* -// R0 ... cbuf pointer to control buffer -// R1 ... y coordinate of scanline -// R2 ... scr pointer to layer screen structure sLayer -// R3 ... buf pointer to destination data buffer with transparent color -// Output new pointer to control buffer. - -.thumb_func -.global RenderFastSprite -RenderFastSprite: - - // push registers - push {r1-r7,lr} - -// Stack content and input variables: -// R0 cbuf pointer to control buffer -// SP+0: R1 Y coordinate of scanline -// SP+4: R2 scr pointer to layer screen structure sLayer, later: num number of sprites -// SP+8: R3 buf pointer to data buffer with transparent color -// SP+12: R4 -// SP+16: R5 -// SP+20: R6 -// SP+24: R7 -// SP+28: LR - -// Variables: -// R0 ... pointer to destination control buffer -// R1 ... X0 absolute coordinate counted from start -// R2 ... W layer screen width -// R3 ... s pointer to current sprite, later: absolute X coordinate of start of sprite -// R4 ... Y2 coordinate relative to sprite base, later: s->img[Y2*WB] address of sprite line -// R5 ... relative X2 coordinate of sprite segment -// R6 ... W2 width of sprite segment -// R7 ... (temporary) -// LR ... spr pointer to list of sprites -// [SP+0] ... (R1) Y coordinate of scanline -// [SP+4] ... (R2) num number of sprites (loop counter) -// [SP+8] ... (R3) buf pointer to data buffer with transparent color - - // load pointer to list of sprites -> LR - ldr r7,[r2,#SLAYER_IMG] - mov lr,r7 - - // load number of sprites -> [SP+4] - ldrh r7,[r2,#SLAYER_SPRITENUM] - str r7,[sp,#4] - - // load screen width -> R2 - ldrh r2,[r2,#SLAYER_W] - - // reset absolute coordinate X0 -> R1 - movs r1,#0 // R1 <- 0 - - // count number of sprites, end if num = 0 -2: ldr r7,[sp,#4] // get number of sprites - subs r7,#1 // decrement number of sprites - blo 8f // no other sprites - str r7,[sp,#4] // save new number of sprites - - // get pointer to next sprite -> R3 - mov r7,lr // pointer to list of sprites -> R7 - ldmia r7!,{r3} // pointer to sprite -> R3 - mov lr,r7 // save new pointer to list of sprites -> LR - - // prepare Y2 coordinate relative to sprite base -> R4 - ldrh r7,[r3,#SSPRITE_Y] // get Y coordinate of the sprite -> R7 - sxth r7,r7 // signed extend Y2 - ldr r4,[sp,#0] // Y coordinate of scanline -> R4 - subs r4,r7 // relative coordinate Y2 = Y - s->y - - // check if Y2 coordinate is valid - bmi 2b // Y2 < 0, go next sprite - ldrh r7,[r3,#SSPRITE_H] // get sprite height - cmp r4,r7 // check sprite height - bge 2b // Y2 >= s->h, go next sprite - - // get relative start X2 coordinate of this line segment -> R5 - ldr r7,[r3,#SSPRITE_X0] // get table of X0 of lines - ldrb r5,[r7,r4] // get X2 coordinate -> R5 - lsls r5,#2 // convert X2 coordinate to byte offset - - // get width W2 of this line segment -> R6 - ldr r7,[r3,#SSPRITE_W0] // get table of W0 of lines - ldrb r6,[r7,r4] // get W2 width -> R6 - lsls r6,#2 // convert W2 width to bytes - - // get address of sprite line s->img[Y2*s->wb] -> R4 - ldrh r7,[r3,#SSPRITE_WB] // get sprite pitch w->wb - muls r4,r4,r7 // sprite offset Y2*s->wb - ldr r7,[r3,#SSPRITE_IMG] // get sprite image - add r4,r7 // line address -> R4 - - // get absolute X coordinate of start of line -> R3 - ldrh r3,[r3,#SSPRITE_X] // get sprite X coordinate -> R3 - sxth r3,r3 // signed extend X - adds r3,r3,r5 // s->X + X2, X coordinate of start of line -> R3 - - // check if sprite coordinate X lies below current X0 coordinate - subs r7,r1,r3 // difference X0 - X -> R7 - ble 3f // X0 <= X, sprite does not lie below current X0 - - // sprite correction - adds r5,r7 // X2 += X0 - X - subs r6,r7 // W2 -= X0 - X - mov r3,r1 // X = X0 - - // check line length W2 -3: subs r7,r2,r3 // W - X -> R7 - cmp r6,r7 // compare W2 with W - X - ble 4f // W2 <= W - X, length is OK - mov r6,r7 // limit segment width W2 -> R6 - - // align to word -4: movs r7,#3 // mask to word - bics r3,r7 // align X - bics r5,r7 // align X2 - bics r6,r7 // align W2 - ble 2b // no W2 left (W2 <= 0) - - // decode space before sprite - subs r7,r3,r1 // X - X0 -> R7 - ble 5f // no space left before sprite - lsrs r7,#2 // number of words (X - X0)/4 - stmia r0!,{r7} // write number of words - ldr r7,[sp,#8] // pointer to data buffer -> R7 - stmia r0!,{r7} // write address - mov r1,r3 // shift X0 - - // write sprite line -5: adds r7,r4,r5 // address of pixel &s->img[y2*s->wb+x2] -> R7 - lsrs r4,r6,#2 // W2/4 line length -> R4 - stmia r0!,{r4,r7} // write sprite length and address - adds r1,r6 // add X0 += W2 - b 2b // next sprite - - // clear rest of scanline -8: subs r2,r1 // subtract W - X0 - bls 9f // no pixels left - lsrs r2,#2 // (W - X0)/4 - ldr r3,[sp,#8] // pointer to data buffer -> R3 - stmia r0!,{r2,r3} // write number of pixels and address - - // pop registers and return -9: pop {r1-r7,pc} diff --git a/MCUME_pico/picovga_t4/render/vga_ftext.S b/MCUME_pico/picovga_t4/render/vga_ftext.S deleted file mode 100755 index 90713a0..0000000 --- a/MCUME_pico/picovga_t4/render/vga_ftext.S +++ /dev/null @@ -1,313 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_FTEXT -// -// **************************************************************************** -// u32 par SSEGM_PAR pointer to the font -// u32 par2 SSEGM_PAR2 background color -// u16 par3 font height - -#include "../define.h" // common definitions of C and ASM -#include "hardware/regs/sio.h" // registers of hardware divider -#include "hardware/regs/addressmap.h" // SIO base address - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// render font pixel mask -.extern RenderTextMask // u32 RenderTextMask[512]; - -// extern "C" u8* RenderFText(u8* dbuf, int x, int y, int w, sSegm* segm) - -// render 8-pixel foreground color text GF_FTEXT -// R0 ... destination data buffer -// R1 ... start X coordinate (in pixels, must be multiple of 4) -// R2 ... start Y coordinate (in graphics lines) -// R3 ... width to display (must be multiple of 4 and > 0) -// [stack] ... segm video segment sSegm -// Output new pointer to destination data buffer. -// 320 pixels takes 8.7 us on 151 MHz. - -.thumb_func -.global RenderFText -RenderFText: - - // push registers - push {r1-r7,lr} - -// Stack content: -// SP+0: R1 start X coordinate -// SP+4: R2 start Y coordinate (later: base pointer to text data row) -// SP+8: R3 width to display -// SP+12: R4 -// SP+16: R5 -// SP+20: R6 -// SP+24: R7 -// SP+28: LR -// SP+32: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#32] // load video segment -> R4 - - // start divide Y/font height - ldr r6,RenderFText_pSioBase // get address of SIO base -> R6 - str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate - ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2 - str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height - -// - now we must wait at least 8 clock cycles to get result of division - - // [6] get wrap width -> [SP+32] - ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width - movs r7,#3 // [1] mask to align to 32-bit - bics r5,r7 // [1] align wrap - str r5,[sp,#32] // [2] save wrap width - - // [1] align X coordinate to 32-bit - bics r1,r7 // [1] - - // [3] align remaining width - bics r3,r7 // [1] - str r3,[sp,#8] // [2] save new width - - // load result of division Y/font_height -> R6 Y relative at row, R7 Y row - // Note: QUOTIENT must be read last - ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row - ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row - - // pointer to font line -> R3 - lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) - ldr r3,[r4,#SSEGM_PAR] // get pointer to font - add r3,r5 // line offset + font base -> pointer to current font line R3 - - // base pointer to text data (without X) -> [SP+4], R2 - ldrh r5,[r4,#SSEGM_WB] // get pitch of rows - muls r2,r5 // Y * WB -> offset of row in text buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - add r2,r5 // base address of text buffer - str r2,[sp,#4] // save pointer to text buffer - - // prepare pointer to text data with X -> R2 - lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) - lsls r6,#1 // convert to character offset (1 position is: 1 character + 1 color) - add r2,r6 // pointer to source text buffer -> R2 - - // prepare background color, expand to 32 bits -> R4 - ldrb r4,[r4,#SSEGM_PAR2] // load background color - lsls r5,r4,#8 // shift background color << 8 - orrs r5,r4 // color expanded to 16 bits - lsls r4,r5,#16 // shift 16-bit color << 16 - orrs r4,r5 // color expanded to 32 bits - - // prepare pointer to conversion table -> LR - ldr r5,RenderFText_Addr // get pointer to conversion table -> R5 - mov lr,r5 // conversion table -> LR - -// ---- render 2nd half of first character -// R0 ... pointer to destination data buffer -// R1 ... start X coordinate -// R2 ... pointer to source text buffer -// R3 ... pointer to font line -// R4 ... background color (expanded to 32-bit) -// R5..R7 ... (temporary) -// LR ... pointer to conversion table -// [SP+4] ... base pointer to text data (without X) -// [SP+8] ... remaining width -// [SP+32] ... wrap width - - // check bit 2 of X coordinate - check if image starts with 2nd half of first character - lsls r6,r1,#29 // check bit 2 of X coordinate - bpl 2f // bit 2 not set, starting even 4-pixels - - // [4] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - - // [3] load foreground color -> R6 - ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6 - adds r2,#2 // [1] shift pointer to source text buffer - - // [4] expand foreground color to 32-bit -> R6 - lsls r7,r6,#8 // [1] shift foreground color << 8 - orrs r7,r6 // [1] color expanded to 16 bits - lsls r6,r7,#16 // [1] shift 16-bit color << 16 - orrs r6,r7 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // shift X coordinate - adds r1,#4 // shift X coordinate - - // check end of segment - ldr r7,[sp,#32] // load wrap width - cmp r1,r7 // end of segment? - blo 1f - movs r1,#0 // reset X coordinate - ldr r2,[sp,#4] // get base pointer to text data -> R2 - - // shift remaining width -1: ldr r7,[sp,#8] // get remaining width - subs r7,#4 // shift width - str r7,[sp,#8] // save new width - - // prepare wrap width - start X -> R7 -2: ldr r7,[sp,#32] // load wrap width - subs r7,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of characters to generate in one part of segment -// R2 ... *pointer to source text buffer -// R3 ... *pointer to font line -// R4 ... *background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... (temporary) -// R7 ... *wrap width of this segment, later: temporary -// LR ... *pointer to conversion table -// [SP+4] ... *base pointer to text data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderFText_OutLoop: - - // limit wrap width by total width -> R7 - ldr r6,[sp,#8] // get remaining width - cmp r7,r6 // compare with wrap width - bls 2f // width is OK - mov r7,r6 // limit wrap width - - // check if remain whole characters -2: cmp r7,#8 // check number of remaining pixels - bhs 5f // enough characters remain - - // check if 1st part of last character remains - cmp r7,#4 // check 1st part of last character - blo 3f // all done - -// ---- render 1st part of last character - -RenderFText_Last: - - // [4] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - - // [3] load foreground color -> R6 - ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6 - adds r2,#2 // [1] shift pointer to source text buffer - - // [4] expand foreground color to 32-bit - lsls r1,r6,#8 // [1] shift foreground color << 8 - orrs r1,r6 // [1] color expanded to 16 bits - lsls r6,r1,#16 // [1] shift 16-bit color << 16 - orrs r6,r1 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r1,[r5,#0] // [2] load mask for higher 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - stmia r0!,{r1} // [2] store first 4 pixels - - // check if continue with next segment - ldr r2,[sp,#4] // get base pointer to text data -> R2 - cmp r7,#4 - bhi RenderFText_OutLoop - - // pop registers and return -3: pop {r1-r7,pc} - -// ---- prepare to render whole characters - - // prepare number of whole characters to render -> R1 -5: lsrs r1,r7,#2 // shift to get number of characters*2 - lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 - subs r6,r5 // get remaining width - str r6,[sp,#8] // save new remaining width - subs r1,#1 // number of characters*2 - 1 - -// ---- [29*N-1] start inner loop, render characters in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of characters to generate*2 - 1 (loop counter) -// R2 ... *pointer to source text buffer -// R3 ... *pointer to font line -// R4 ... *background color (expanded to 32-bit) -// R5 ... font sample -// R6 ... foreground color (expanded to 32-bit) -// R7 ... (temporary) -// LR ... *pointer to conversion table - -RenderFText_InLoop: - - // [4] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - - // [3] load foreground color -> R6 - ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6 - adds r2,#2 // [1] shift pointer to source text buffer - - // [4] expand foreground color to 32-bit - lsls r7,r6,#8 // [1] shift foreground color << 8 - orrs r7,r6 // [1] color expanded to 16 bits - lsls r6,r7,#16 // [1] shift 16-bit color << 16 - orrs r6,r7 // [1] color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r7,[r5,#0] // [2] load mask for higher 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store first 4 pixels - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // [2,3] loop counter - subs r1,#2 // [1] shift loop counter - bhi RenderFText_InLoop // [1,2] > 0, render next whole character - -// ---- end inner loop, continue with last character, or start new part - - // continue to outer loop - ldr r7,[sp,#32] // load wrap width - beq RenderFText_Last // render 1st half of last character - ldr r2,[sp,#4] // get base pointer to text data -> R2 - b RenderFText_OutLoop // go back to outer loop - - .align 2 -RenderFText_Addr: - .word RenderTextMask -RenderFText_pSioBase: - .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_graph1.S b/MCUME_pico/picovga_t4/render/vga_graph1.S deleted file mode 100755 index 8f558a5..0000000 --- a/MCUME_pico/picovga_t4/render/vga_graph1.S +++ /dev/null @@ -1,258 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_GRAPH1 -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// render font pixel mask -.extern RenderTextMask // u32 RenderTextMask[512]; - -// extern "C" u8* RenderGraph1(u8* dbuf, int x, int y, int w, sSegm* segm); - -// render 1-bit palette graphics GF_GRAPH1 -// dbuf ... destination data buffer -// x ... start X coordinate (must be multiple of 4) -// y ... start Y coordinate -// w ... width of this segment (must be multiple of 4) -// segm ... video segment -// Output new dbuf pointer. -// 320 pixels takes 6 us on 151 MHz. - -.thumb_func -.global RenderGraph1 -RenderGraph1: - - // push registers - push {r3-r7,lr} - -// Input registers and stack content: -// R0 ... destination data buffer -// R1 ... start X coordinate -// R2 ... start Y coordinate -// SP+0: R3 width to display -// SP+4: R4 -// SP+8: R5 -// SP+12: R6 -// SP+16: R7 -// SP+20: LR -// SP+24: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#24] // load video segment -> R4 - - // get wrap width -> [SP+24] - ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width - movs r7,#3 // mask to align to 32-bit - bics r5,r7 // align wrap - str r5,[sp,#24] // save wrap width - - // align X coordinate to 32-bit -> R1 - bics r1,r7 - - // align remaining width -> [SP+0] - bics r3,r7 - str r3,[sp,#0] // save new width - - // base pointer to image data (without X) -> LR - ldrh r5,[r4,#SSEGM_WB] // get pitch of lines - muls r2,r5 // Y * WB -> offset of row in text buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - add r2,r5 // base address of text buffer - mov lr,r2 // save pointer to text buffer - - // prepare pointer to image data with X -> R2 - lsrs r2,r1,#3 // convert X to character index (1 character is 8 pixels width) - add r2,lr // pointer to source text buffer -> R2 - - // prepare foreground color, expand to 32-bit -> R6 - ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color - lsls r7,r6,#8 // [1] shift foreground color << 8 - orrs r7,r6 // [1] color expanded to 16 bits - lsls r6,r7,#16 // [1] shift 16-bit color << 16 - orrs r6,r7 // [1] color expanded to 32 bits - - // prepare background color, expand to 32 bits -> R4 - ldrb r4,[r4,#SSEGM_PAR] // load background color - lsls r5,r4,#8 // shift background color << 8 - orrs r5,r4 // color expanded to 16 bits - lsls r4,r5,#16 // shift 16-bit color << 16 - orrs r4,r5 // color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // prepare pointer to conversion table -> R3 - ldr r3,RenderGraph1_Addr // get pointer to conversion table -> R3 - -// ---- render 2nd half of first character -// R0 ... pointer to destination data buffer -// R1 ... start X coordinate -// R2 ... current pointer to image buffer -// R3 ... pointer to conversion table -// R4 ... background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... foreground color (expanded to 32-bit) -// R7 ... (temporary) -// LR ... base pointer to image data (without X) -// [SP+0] ... remaining width -// [SP+24] ... wrap width - - // check bit 2 of X coordinate - check if image starts with 2nd half of first character - lsls r5,r1,#29 // check bit 2 of X coordinate - bpl 2f // bit 2 not set, starting even 4-pixels - - // [3] load image sample -> R5 - ldrb r5,[r2,#0] // [2] load image sample -> R5 - adds r2,#1 // [1] shift pointer to image buffer - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply image sample * 8 - add r5,r3 // [1] add pointer to conversion table - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // shift X coordinate - adds r1,#4 // shift X coordinate - - // check end of segment - ldr r7,[sp,#24] // load wrap width - cmp r1,r7 // X=end of segment? - blo 1f - movs r1,#0 // reset X coordinate - mov r2,lr // get base pointer to image data -> R2 - - // shift remaining width -1: ldr r7,[sp,#0] // get remaining width - subs r7,#4 // shift width - str r7,[sp,#0] // save new width - - // prepare wrap width - start X -> R7 -2: ldr r7,[sp,#24] // load wrap width - subs r7,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of characters to generate in one part of segment -// R2 ... *current pointer to image buffer -// R3 ... *pointer to conversion table -// R4 ... *background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... *foreground color (expanded to 32-bit) -// R7 ... *wrap width of this segment, later: temporary -// LR ... *base pointer to image data (without X) -// [SP+0] ... *remaining width -// [SP+24] ... *wrap width - -RenderGraph1_OutLoop: - - // limit wrap width by total width -> R7 - ldr r5,[sp,#0] // get remaining width - cmp r7,r5 // compare with wrap width - bls 2f // width is OK - mov r7,r5 // limit wrap width - - // check if remain whole characters -2: cmp r7,#8 // check number of remaining pixels - bhs 5f // enough characters remain - - // check if 1st part of last character remains - cmp r7,#4 // check 1st part of last character - blo 3f // all done - -// ---- render 1st part of last character - -RenderGraph1_Last: - - // [3] load image sample -> R5 - ldrb r5,[r2,#0] // [2] load image sample -> R5 - adds r2,#1 // [1] shift pointer to image buffer - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply image sample * 8 - add r5,r3 // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r1,[r5,#0] // [2] load mask for higher 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - stmia r0!,{r1} // [2] store first 4 pixels - - // check if continue with next segment - mov r2,lr // get base pointer to image data -> R2 - cmp r7,#4 - bhi RenderGraph1_OutLoop - - // pop registers and return -3: pop {r3-r7,pc} - -// ---- prepare to render whole characters - - // prepare number of whole characters to render -> R1 -5: lsrs r1,r7,#2 // shift to get number of characters*2 - lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7 - subs r5,r7 // get remaining width - str r5,[sp,#0] // save new remaining width - subs r1,#1 // number of characters*2 - 1 - -// ---- [20*N-1] start inner loop, render characters in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of characters to generate*2 - 1 (loop counter) -// R2 ... *current pointer to image buffer -// R3 ... *pointer to conversion table -// R4 ... *background color (expanded to 32-bit) -// R5 ... font sample -// R6 ... *foreground color (expanded to 32-bit) -// R7 ... (temporary) -// LR ... *base pointer to image data (without X) - -RenderGraph1_InLoop: - - // [3] load image sample -> R5 - ldrb r5,[r2,#0] // [2] load image sample -> R5 - adds r2,#1 // [1] shift pointer to image buffer - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply image sample * 8 - add r5,r3 // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r7,[r5,#0] // [2] load mask for higher 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store first 4 pixels - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // [2,3] loop counter - subs r1,#2 // [1] shift loop counter - bhi RenderGraph1_InLoop // [1,2] > 0, render next whole character - -// ---- end inner loop, continue with last character, or start new part - - // continue to outer loop - ldr r7,[sp,#24] // load wrap width - beq RenderGraph1_Last // render 1st half of last character - mov r2,lr // get base pointer to image data -> R2 - b RenderGraph1_OutLoop // go back to outer loop - - .align 2 -RenderGraph1_Addr: - .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_graph2.S b/MCUME_pico/picovga_t4/render/vga_graph2.S deleted file mode 100755 index 205f844..0000000 --- a/MCUME_pico/picovga_t4/render/vga_graph2.S +++ /dev/null @@ -1,173 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_GRAPH2 -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// extern "C" u8* RenderGraph2(u8* dbuf, int x, int y, int w, sSegm* segm); - -// render 2-bit palette graphics GF_GRAPH2 -// R0 ... destination data buffer -// R1 ... start X coordinate (must be multiple of 4) -// R2 ... start Y coordinate -// R3 ... width of this segment (must be multiple of 4) -// segm ... video segment -// Output new dbuf pointer. -// 320 pixels takes 5 us on 151 MHz. - -.thumb_func -.global RenderGraph2 -RenderGraph2: - - // push registers - push {r3-r7,lr} - -// Input registers and stack content: -// R0 ... destination data buffer -// R1 ... start X coordinate -// R2 ... start Y coordinate -// SP+0: R3 ... width to display (remaining width) -// SP+4: R4 -// SP+8: R5 -// SP+12: R6 -// SP+16: R7 -// SP+20: LR -// SP+24: video segment - - // get pointer to video segment -> R4 - ldr r4,[sp,#24] // load video segment -> R4 - - // get wrap width -> R7 - ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width - movs r6,#3 // mask to align to 32-bit - bics r7,r6 // align wrap - - // align X coordinate to 32-bit -> R1 - bics r1,r6 - - // align remaining width -> [SP+0] - bics r3,r6 - str r3,[sp,#0] // save new width - - // base pointer to image data (without X) -> LR, R2 - ldrh r5,[r4,#SSEGM_WB] // get pitch of rows - muls r2,r5 // Y * WB -> offset of row in image buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - add r2,r5 // base address of image buffer - mov lr,r2 // save pointer to image buffer - - // prepare pointer to image data with X -> R2 - lsrs r6,r1,#2 // convert X to character index (1 character is 4 pixels width) - add r2,r6 // add index, pointer to source image buffer -> R2 - - // prepare pointer to palette translation table -> R3 - ldr r3,[r4,#SSEGM_PAR] // get pointer to palette translation table -> R3 - - // prepare wrap width - start X -> R6 - subs r6,r7,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of 4-pixels to generate in one part of segment -// R2 ... *pointer to source image buffer -// R3 ... *pointer to palette translation table -// R4 ... (temporary) -// R5 ... (temporary) -// R6 ... part width -// R7 ... *wrap width -// LR ... *base pointer to image data (without X) -// [SP+0] ... width to display - -RenderGraph2_OutLoop: - - // limit wrap width by total width -> R7 - ldr r4,[sp,#0] // get remaining width - cmp r6,r4 // compare with wrap width - bls 2f // width is OK - mov r6,r4 // limit wrap width - - // check number of pixels -2: cmp r6,#4 // check number of remaining pixels - bhs 5f // enough characters remain - - // pop registers and return - pop {r3-r7,pc} - -// ---- prepare to render whole characters - - // prepare number of 4-pixels to render -> R1 -5: lsrs r1,r6,#2 // shift to get number of 4-pixels - lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6 - subs r4,r6 // get remaining width - str r4,[sp,#0] // save new remaining width - -// ---- generate odd pixel - - // [2,3] check odd pixel - lsrs r1,#1 // [1] check odd pixel - bcc RenderGraph2_InLoop // [1,2] odd pixel not set - - // [3] load image sample -> R4 - ldrb r4,[r2,#0] // [2] load image sample - adds r2,#1 // [1] increase pointer to image data - - // [5] write 4 pixels - lsls r4,#2 // [1] index*4 - ldr r5,[r3,r4] // [2] load colors - stmia r0!,{r5} // [2] write pixels - - // [2,3] check end of data - tst r1,r1 // [1] check counter - beq RenderGraph2_EndLoop // [1,2] end - -// ---- [17*N-1] start inner loop, render pixels in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of 4-pixels to generate (loop counter) -// R2 ... *pointer to source image buffer -// R3 ... *pointer to palette translation table -// R4 ... image sample -// R5 ... output pixels -// R6 ... output pixels -// R7 ... *wrap width -// LR ... *base pointer to image data (without X) - -RenderGraph2_InLoop: - - // [2] load image sample -> R4 - ldrb r4,[r2,#0] // [2] load image sample - - // [3] prepare 4 pixels - lsls r4,#2 // [1] index*4 - ldr r5,[r3,r4] // [2] load colors - - // [3] load image sample -> R4 - ldrb r4,[r2,#1] // [2] load image sample - adds r2,#2 // [1] increase pointer to image data - - // [6] prepare and write next 4 pixels - lsls r4,#2 // [1] index*4 - ldr r6,[r3,r4] // [2] load colors - stmia r0!,{r5,r6} // [3] write pixels - - // [2,3] loop counter - subs r1,#1 // [1] loop counter - bne RenderGraph2_InLoop // [1,2] next step - -// ---- end inner loop, start new part - -RenderGraph2_EndLoop: - - // continue to outer loop - mov r6,r7 // load wrap width -> R6 - mov r2,lr // get base pointer to text data -> R2 - b RenderGraph2_OutLoop // go back to outer loop diff --git a/MCUME_pico/picovga_t4/render/vga_graph4.S b/MCUME_pico/picovga_t4/render/vga_graph4.S deleted file mode 100755 index a063b4c..0000000 --- a/MCUME_pico/picovga_t4/render/vga_graph4.S +++ /dev/null @@ -1,214 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_GRAPH4 -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// extern "C" u8* RenderGraph4(u8* dbuf, int x, int y, int w, sSegm* segm); - -// render 4-bit palette graphics GF_GRAPH4 -// R0 ... destination data buffer -// R1 ... start X coordinate (must be multiple of 4) -// R2 ... start Y coordinate -// R3 ... width of this segment (must be multiple of 4) -// segm ... video segment -// Output new dbuf pointer. -// 320 pixels takes 8.8 us on 151 MHz. - -.thumb_func -.global RenderGraph4 -RenderGraph4: - - // push registers - push {r3-r7,lr} - -// Input registers and stack content: -// R0 ... destination data buffer -// R1 ... start X coordinate -// R2 ... start Y coordinate -// SP+0: R3 ... width to display (remaining width) -// SP+4: R4 -// SP+8: R5 -// SP+12: R6 -// SP+16: R7 -// SP+20: LR -// SP+24: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#24] // load video segment -> R4 - - // get wrap width -> [SP+24] - ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width - movs r6,#3 // mask to align to 32-bit - bics r7,r6 // align wrap - str r7,[sp,#24] // save wrap width - - // align X coordinate to 32-bit -> R1 - bics r1,r6 - - // align remaining width -> [SP+0] - bics r3,r6 - str r3,[sp,#0] // save new width - - // base pointer to image data (without X) -> LR, R2 - ldrh r5,[r4,#SSEGM_WB] // get pitch of rows - muls r2,r5 // Y * WB -> offset of row in image buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - add r2,r5 // base address of image buffer - mov lr,r2 // save pointer to image buffer - - // prepare pointer to image data with X -> R2 - lsrs r6,r1,#1 // convert X to character index (1 character is 2 pixels width) - add r2,r6 // add index, pointer to source image buffer -> R2 - - // prepare pointer to palette translation table -> R3 - ldr r3,[r4,#SSEGM_PAR] // get pointer to palette translation table -> R3 - - // prepare wrap width - start X -> R6 - ldr r6,[sp,#24] // load wrap width - subs r6,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of 4-pixels to generate in one part of segment -// R2 ... *pointer to source image buffer -// R3 ... *pointer to palette translation table -// R4 ... (temporary) -// R5 ... (temporary) -// R6 ... part width -// R7 ... (temporary) -// LR ... *base pointer to image data (without X) -// [SP+0] ... width to display -// [SP+24] ... wrap width - -RenderGraph4_OutLoop: - - // limit wrap width by total width -> R6 - ldr r4,[sp,#0] // get remaining width - cmp r6,r4 // compare with wrap width - bls 2f // width is OK - mov r6,r4 // limit wrap width - - // check number of pixels -2: cmp r6,#4 // check number of remaining pixels - bhs 5f // enough characters remain - - // pop registers and return - pop {r3-r7,pc} - -// ---- prepare to render whole characters - - // prepare number of 4-pixels to render -> R1 -5: lsrs r1,r6,#2 // shift to get number of 4-pixels - lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6 - subs r4,r6 // get remaining width - str r4,[sp,#0] // save new remaining width - -// ---- generate odd pixel - - // [2,3] check odd pixel - lsrs r1,#1 // [1] check odd pixel - bcc RenderGraph4_InLoop // [1,2] odd pixel not set - - // [2] load image sample -> R4 - ldrb r4,[r2,#0] // [2] load image sample - - // [3] prepare 1st and 2nd pixel -> R5 - lsls r4,#1 // [1] index*2 - ldrh r5,[r3,r4] // [2] load 2 pixels - - // [3] load image sample -> R4 - ldrb r4,[r2,#1] // [2] load image sample - adds r2,#2 // [1] increase pointer to image data - - // [3] prepare 3rd and 4th pixel -> R6 - lsls r4,#1 // [1] index*2 - ldrh r6,[r3,r4] // [2] load 2 pixels - - // [2] compose pixels -> R5 - lsls r6,#16 // [1] shift 3rd and 4th pixels - orrs r5,r6 // [1] compose pixels - - // [2] write pixels - stmia r0!,{r5} // [2] write 4 pixels - - // [2,3] check end of data - tst r1,r1 // [1] check counter - beq RenderGraph4_EndLoop // [1,2] end - -// ---- [31*N-1] start inner loop, render pixels in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of 4-pixels to generate (loop counter) -// R2 ... *pointer to source image buffer -// R3 ... *pointer to palette translation table -// R4 ... image sample -// R5 ... output pixels -// R6 ... output pixels -// R7 ... output pixels -// LR ... *base pointer to image data (without X) -// [SP+24] ... wrap width - -RenderGraph4_InLoop: - - // [2] load image sample -> R4 - ldrb r4,[r2,#0] // [2] load image sample - - // [3] prepare 1st and 2nd pixel -> R5 - lsls r4,#1 // [1] index*2 - ldrh r5,[r3,r4] // [2] load 2 pixels - - // [2] load image sample -> R4 - ldrb r4,[r2,#1] // [2] load image sample - - // [3] prepare 3rd and 4th pixel -> R6 - lsls r4,#1 // [1] index*2 - ldrh r6,[r3,r4] // [2] load 2 pixels - - // [2] compose pixels -> R5 - lsls r6,#16 // [1] shift 3rd and 4th pixels - orrs r5,r6 // [1] compose pixels - - // [2] load image sample -> R4 - ldrb r4,[r2,#2] // [2] load image sample - - // [3] prepare 1st and 2nd pixel -> R6 - lsls r4,#1 // [1] index*2 - ldrh r6,[r3,r4] // [2] load 2 pixels - - // [3] load image sample -> R4 - ldrb r4,[r2,#3] // [2] load image sample - adds r2,#4 // [1] increase pointer to image data - - // [3] prepare 3rd and 4th pixel -> R7 - lsls r4,#1 // [1] index*2 - ldrh r7,[r3,r4] // [2] load 2 pixels - - // [2] compose pixels -> R6 - lsls r7,#16 // [1] shift 3rd and 4th pixels - orrs r6,r7 // [1] compose pixels - - // [3] write pixels - stmia r0!,{r5,r6} // [3] write 8 pixels - - // [2,3] loop counter - subs r1,#1 // [1] loop counter - bne RenderGraph4_InLoop // [1,2] next step - -// ---- end inner loop, start new part - -RenderGraph4_EndLoop: - - // continue to outer loop - ldr r6,[sp,#24] // load wrap width -> R6 - mov r2,lr // get base pointer to text data -> R2 - b RenderGraph4_OutLoop // go back to outer loop diff --git a/MCUME_pico/picovga_t4/render/vga_graph8.S b/MCUME_pico/picovga_t4/render/vga_graph8.S deleted file mode 100755 index e31e3d2..0000000 --- a/MCUME_pico/picovga_t4/render/vga_graph8.S +++ /dev/null @@ -1,134 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_GRAPH8 -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// extern "C" u32* RenderGrad1(u32* cbuf, int x, int y, int w, sSegm* segm); - -// render gradient with 1 line GF_GRAD1 -// R0 ... pointer to control buffer -// R1 ... start X coordinate (in pixels, must be multiple of 4) -// R2 ... start Y coordinate (in graphics lines), will be ignored and substituted with 0 -// R3 ... width to display (must be multiple of 4 and > 0) -// [stack] ... segm video segment sSegm -// Output new pointer to control buffer. -// 320 pixels takes 0.45 us on 151 MHz. - -.thumb_func -.global RenderGrad1 -RenderGrad1: - movs r2,#0 - - -// extern "C" u32* RenderGrad2(u32* cbuf, int x, int y, int w, sSegm* segm); - -// render gradient with 2 lines GF_GRAD2 -// R0 ... pointer to control buffer -// R1 ... start X coordinate (in pixels, must be multiple of 4) -// R2 ... start Y coordinate (in graphics lines), will be masked to values 0 and 1 -// R3 ... width to display (must be multiple of 4 and > 0) -// [stack] ... segm video segment sSegm -// Output new pointer to control buffer. -// 320 pixels takes 0.45 us on 151 MHz. - -.thumb_func -.global RenderGrad2 -RenderGrad2: - lsls r2,#31 - lsrs r2,#31 - - -// extern "C" u32* RenderGraph8(u32* cbuf, int x, int y, int w, sSegm* segm); - -// render native 8-bit graphics GF_GRAPH8 -// R0 ... pointer to control buffer -// R1 ... start X coordinate (in pixels, must be multiple of 4) -// R2 ... start Y coordinate (in graphics lines) -// R3 ... width to display (must be multiple of 4 and > 0) -// [stack] ... segm video segment sSegm -// Output new pointer to control buffer. -// 320 pixels takes 0.45 us on 151 MHz. - -.thumb_func -.global RenderGraph8 -RenderGraph8: - - // push registers - push {r4-r7,lr} - -// Stack content: -// SP+0: R4 -// SP+4: R5 -// SP+8: R6 -// SP+12: R7 -// SP+16: LR -// SP+20: video segment - -// Variables: -// R0 ... pointer to control buffer -// R1 ... X coordinate, later: width of one segment -// R2 ... Y coordinate, later: current pointer to data buffer -// R3 ... remaining width -// R4 ... base pointer to data buffer -// R5 ... (temporary) -// R6 ... (temporary) -// R7 ... wrap width - - // get pointer to video segment -> R4 - ldr r4,[sp,#20] // load video segment -> R4 - - // get wrap width -> R7 - ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width - movs r6,#3 // mask to align to 32-bit - bics r7,r6 // align wrap - - // align X coordinate to 32-bit -> R1 - bics r1,r6 - - // align remaining width -> R3 - bics r3,r6 - - // base pointer to data buffer (without X) -> R4 - ldrh r5,[r4,#SSEGM_WB] // get pitch of rows - muls r2,r5 // Y * WB -> offset in data buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - adds r4,r2,r5 // base address of data buffer -> R4 - - // prepare current pointer to image data with X -> R2 - adds r2,r4,r1 // pointer to source data buffer -> R2 - - // prepare wrap width - start X -> R1 - subs r1,r7,r1 // pixels remaining to end of segment - - // check remaining width -2: tst r3,r3 // check remaining width - beq 6f // end of data - - // limit wrap width by total width -> R1 - cmp r1,r3 // compare with wrap width - bls 4f // width is OK - mov r1,r3 // limit wrap width - - // decrease remaining width -4: subs r3,r1 // subtract from remaining width - - // save control block - lsrs r1,#2 // width / 4 - stm r0!,{r1,r2} // save width and pointer to control block - - // continue to next loop - mov r1,r7 // load wrap width -> R1 - mov r2,r4 // get base pointer to text data -> R2 - b 2b // go next loop - - // pop registers and return -6: pop {r4-r7,pc} diff --git a/MCUME_pico/picovga_t4/render/vga_graph8mat.S b/MCUME_pico/picovga_t4/render/vga_graph8mat.S deleted file mode 100755 index 4e07bb4..0000000 --- a/MCUME_pico/picovga_t4/render/vga_graph8mat.S +++ /dev/null @@ -1,310 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_GRAPH8MAT -// -// **************************************************************************** -// data ... image data -// par ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)) -// par2 ... LOW=number of bits of image width, HIGH=number of bits of image height -// image width must be max. 4096 (= 1< LR - lsrs r1,r3,#1 // width/2 - negs r1,r1 // negate - mov lr,r1 // store start coordinate X0 -> LR - - // prepare number of 4-pixels (loop counter) -> R7 - lsrs r7,r3,#2 // width/4 -> R7 - - // get pointer to video segment -> R4 - ldr r4,[sp,#20] // load video segment -> R4 - - // prepare current coordinate Y0 = -h/2 + y -> R12 - ldrh r1,[r4,#SSEGM_WRAPY] // get segment height -> R1 - lsrs r1,#1 // height/2 - negs r1,r1 // negate - adds r1,r2 // add current Y coordinate - mov r12,r1 // store current coordinate Y0 -> R12 - - // get number of bits of image width "xbits" -> R1 - ldrh r1,[r4,#SSEGM_PAR2] // number of bits of image width -> R1 - - // get number of bits of image height "ybits" -> R2 - ldrh r2,[r4,#SSEGM_PAR2+2] // number of bits of image height -> R2 - - // prepare address of interpolator base -> R3 - ldr r3,RenderGraph8Mat_Interp // get address of interpolator base -> R3 - -// R0 ... pointer to data buffer -// R1 ... number of bits of image width xbits -// R2 ... number of bits of image height ybits -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- setup interpolator - - // set image base to base2 - ldr r6,[r4,#SSEGM_DATA] // load image base - str r6,[r3,#BASE2_OFFSET] // set image base - - // set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1 - ldr r6,RenderGraph8Mat_Ctrl // load control word - subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) - lsls r5,r1,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5 - orrs r6,r5 // add xbits to control word - subs r1,#1 // xbits - 1 -> R1 - adds r5,r1,r2 // xbits-1+ybits -> R5 - lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1 - -// R0 ... pointer to data buffer -// R1 ... image width xbits-1 -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - - // set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1 - ldr r6,RenderGraph8Mat_Ctrl // load control word - lsls r1,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position - orrs r6,r1 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- set matrix - - // get pointer to matrix -> R4 - ldr r4,[r4,#SSEGM_PAR] // get pointer to matrix -> R4 - -// r4+0 ... m11 -// r4+4 ... m12 -// r4+8 ... m13 -// r4+12 ... m21 -// r4+16 ... m22 -// r4+20 ... m23 - - // set m11 -> R5 base0 - ldr r5,[r4,#0] // load m11 - str r5,[r3,#BASE0_OFFSET] // set base0 - - // set m21 -> R6 base1 - ldr r6,[r4,#12] // load m21 - str r6,[r3,#BASE1_OFFSET] // set base1 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... pointer to matrix -// R5 ... m11 -// R6 ... m21 -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - - // set x0*m11 + y0*m12 + m13 -> accum0 - mov r2,lr // start coordinate X0 -> X2 - muls r5,r2 // x0*m11 -> R5 - muls r2,r6 // x0*m21 -> R2 - ldr r1,[r4,#4] // load m12 -> R1 - mov r6,r12 // load coordinate Y0 -> R6 - muls r1,r6 // y0*m12 -> R1 - adds r5,r1 // x0*m11 + y0*m12 -> R5 - ldr r1,[r4,#8] // load m13 -> R1 - adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 - str r5,[r3,#ACCUM0_OFFSET] // set accum0 - -// R0 ... pointer to data buffer -// R2 ... x0*m21 -// R3 ... interpolator base -// R4 ... pointer to matrix -// R6 ... current coordinate Y0 -// R7 ... width/4 - - // set x0*m21 + y0*m22 + m23 -> accum1 - ldr r1,[r4,#16] // load m22 -> R1 - muls r1,r6 // y0*m22 -> R1 - adds r2,r1 // x0*m21 + y0*m22 -> R2 - ldr r1,[r4,#20] // load m23 -> R1 - adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 - str r2,[r3,#ACCUM1_OFFSET] // set accum1 - -// ---- process odd 4-pixel - -// R0 ... pointer to destination data buffer -// R3 ... interpolator base -// R4 ... (temporary - get pointer to pixel) -// R5 ... (temporary - load pixel) -// R6 ... (temporary - pixel accumulator) -// R7 ... width/4 (loop counter) - - lsrs r7,#1 // width/4/2 - bcc 2f // no odd 4-pixel - - // [3] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r6,[r4,#0] // [2] load pixel - - // [5] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r5,[r4,#0] // [2] load pixel - lsls r5,#8 // [1] shift 1 byte left - orrs r6,r5 // [1] add pixel to accumulator - - // [5] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r5,[r4,#0] // [2] load pixel - lsls r5,#16 // [1] shift 2 bytes left - orrs r6,r5 // [1] add pixel to accumulator - - // [5] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r5,[r4,#0] // [2] load pixel - lsls r5,#24 // [1] shift 3 bytes left - orrs r6,r5 // [1] add pixel to accumulator - - // [2] store 4 pixels - stmia r0!,{r6} // [2] store 4 pixels - - // check number of remaining pixels -2: tst r7,r7 // check number of pixels - beq 8f // end - -// ---- [42 per 8 pixels] inner loop -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to pixel, load pixel) -// R7 ... width/8 (loop counter) - - // [3] load 1st pixel -6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r1,[r4,#0] // [2] load pixel - - // [5] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [5] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [5] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#24 // [1] shift 3 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [3] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r2,[r4,#0] // [2] load pixel - - // [5] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - - // [5] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r2,r4 // [1] add pixel to accumulator - - // [5] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#24 // [1] shift 3 bytes left - orrs r2,r4 // [1] add pixel to accumulator - - // [3] store 8 pixels - stmia r0!,{r1,r2} // [3] store 8 pixels - - // [2,3] loop counter - subs r7,#1 // [1] 8-pixel counter - bne 6b // [1,2] next 8-pixels - - // pop registers -8: pop {r4-r7,pc} - - .align 2 -// pointer to Interp1 base -RenderGraph8Mat_Interp: - .word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base - -RenderGraph8Mat_Ctrl: // lane control word - .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 - ldr r4,[sp,#20] // load video segment -> R4 - - // prepare current coordinate Y0 = y - h -> R12 - ldrh r1,[r4,#SSEGM_WRAPY] // get segment height -> R1 - subs r2,r1 // y - h = current Y coordinate - mov r12,r2 // store current coordinate Y0 -> R12 - - // start calculating distance coefficient dist = FRACTMUL*h/(y - h + horiz + 1) - lsls r6,r1,#FRACT // segment height * FRACTMUL -> R6 - ldr r5,RenderGraph8Persp_pSioBase // get address of SIO base -> R5 - str r6,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h - ldrh r6,[r4,#SSEGM_PAR3] // horizon offset -> R6 - adds r2,r1 // y = current Y coordinate - adds r6,r2 // horizon + y -> R6 - adds r6,#1 // horizon + y + 1 -> R6 - str r6,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz + 1 - - // prepare start coordinate X0 = -w/2 -> LR - lsrs r5,r3,#1 // width/2 - negs r5,r5 // negate - mov lr,r5 // store start coordinate X0 -> LR - - // prepare number of 4-pixels (loop counter) -> R7 - lsrs r7,r3,#2 // width/4 -> R7 - - // get number of bits of image width "xbits" -> R1 - ldrh r1,[r4,#SSEGM_PAR2] // number of bits of image width -> R1 - - // get number of bits of image height "ybits" -> R2 - ldrh r2,[r4,#SSEGM_PAR2+2] // number of bits of image height -> R2 - - // prepare address of interpolator base -> R3 - ldr r3,RenderGraph8Persp_Interp // get address of interpolator base -> R3 - -// R0 ... pointer to data buffer -// R1 ... number of bits of image width xbits -// R2 ... number of bits of image height ybits -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- setup interpolator - - // set image base to base2 - ldr r6,[r4,#SSEGM_DATA] // load image base - str r6,[r3,#BASE2_OFFSET] // set image base - - // set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1 - ldr r6,RenderGraph8Persp_Ctrl // load control word - subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) - lsls r5,r1,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5 - orrs r6,r5 // add xbits to control word - subs r1,#1 // xbits - 1 -> R1 - adds r5,r1,r2 // xbits-1+ybits -> R5 - lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1 - -// R0 ... pointer to data buffer -// R1 ... image width xbits-1 -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - - // set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1 - ldr r6,RenderGraph8Persp_Ctrl // load control word - lsls r1,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position - orrs r6,r1 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- set matrix - - // get pointer to matrix -> R4 - ldr r4,[r4,#SSEGM_PAR] // get pointer to matrix -> R4 - - // get distance coefficient dist -> R1 - ldr r1,RenderGraph8Persp_pSioBase // get address of SIO base -> R1 - ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient - -// r4+0 ... m11 -// r4+4 ... m12 -// r4+8 ... m13 -// r4+12 ... m21 -// r4+16 ... m22 -// r4+20 ... m23 - - // set m11 -> R5 base0 - ldr r5,[r4,#0] // load m11 - muls r5,r1 // m11*dist - asrs r5,#FRACT // (m11*dist)>>FRACT - str r5,[r3,#BASE0_OFFSET] // set base0 - - // set m21 -> R6 base1 - ldr r6,[r4,#12] // load m21 - muls r6,r1 // m11*dist - asrs r6,#FRACT // (m11*dist)>>FRACT - str r6,[r3,#BASE1_OFFSET] // set base1 - -// R0 ... pointer to data buffer -// R1 ... distance coefficient -// R3 ... interpolator base -// R4 ... pointer to matrix -// R5 ... m11 -// R6 ... m21 -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - - // set x0*m11 + y0*m12 + m13 -> accum0 - mov r2,lr // start coordinate X0 -> X2 - muls r5,r2 // x0*m11 -> R5 - muls r2,r6 // x0*m21 -> R2 - mov lr,r1 // save distance coefficient -> LR - ldr r6,[r4,#4] // load m12 -> R6 - muls r1,r6 // m12*dist - asrs r1,#FRACT // (m12*dist)>>FRACT - mov r6,r12 // load coordinate Y0 -> R6 - muls r1,r6 // y0*m12 -> R1 - adds r5,r1 // x0*m11 + y0*m12 -> R5 - ldr r1,[r4,#8] // load m13 -> R1 - adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 - str r5,[r3,#ACCUM0_OFFSET] // set accum0 - -// R0 ... pointer to data buffer -// R2 ... x0*m21 -// R3 ... interpolator base -// R4 ... pointer to matrix -// R6 ... current coordinate Y0 -// R7 ... width/4 -// LR ... distance coefficient - - // set x0*m21 + y0*m22 + m23 -> accum1 - ldr r1,[r4,#16] // load m22 -> R1 - mov r5,lr // distance coefficient -> R5 - muls r1,r5 // m22*dist - asrs r1,#FRACT // (m22*dist)>>FRACT - muls r1,r6 // y0*m22 -> R1 - adds r2,r1 // x0*m21 + y0*m22 -> R2 - ldr r1,[r4,#20] // load m23 -> R1 - adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 - str r2,[r3,#ACCUM1_OFFSET] // set accum1 - -// ---- process odd 4-pixel - -// R0 ... pointer to destination data buffer -// R3 ... interpolator base -// R4 ... (temporary - get pointer to pixel) -// R5 ... (temporary - load pixel) -// R6 ... (temporary - pixel accumulator) -// R7 ... width/4 (loop counter) - - lsrs r7,#1 // width/4/2 - bcc 2f // no odd 4-pixel - - // [3] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r6,[r4,#0] // [2] load pixel - - // [5] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r5,[r4,#0] // [2] load pixel - lsls r5,#8 // [1] shift 1 byte left - orrs r6,r5 // [1] add pixel to accumulator - - // [5] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r5,[r4,#0] // [2] load pixel - lsls r5,#16 // [1] shift 2 bytes left - orrs r6,r5 // [1] add pixel to accumulator - - // [5] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r5,[r4,#0] // [2] load pixel - lsls r5,#24 // [1] shift 3 bytes left - orrs r6,r5 // [1] add pixel to accumulator - - // [2] store 4 pixels - stmia r0!,{r6} // [2] store 4 pixels - - // check number of remaining pixels -2: tst r7,r7 // check number of pixels - beq 8f // end - -// ---- [42 per 8 pixels] inner loop -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to pixel, load pixel) -// R7 ... width/8 (loop counter) - - // [3] load 1st pixel -6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r1,[r4,#0] // [2] load pixel - - // [5] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [5] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [5] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#24 // [1] shift 3 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [3] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r2,[r4,#0] // [2] load pixel - - // [5] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - - // [5] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r2,r4 // [1] add pixel to accumulator - - // [5] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#24 // [1] shift 3 bytes left - orrs r2,r4 // [1] add pixel to accumulator - - // [3] store 8 pixels - stmia r0!,{r1,r2} // [3] store 8 pixels - - // [2,3] loop counter - subs r7,#1 // [1] 8-pixel counter - bne 6b // [1,2] next 8-pixels - - // pop registers -8: pop {r4-r7,pc} - - .align 2 -// pointer to SIO base -RenderGraph8Persp_pSioBase: - .word SIO_BASE // addres of SIO base - -// pointer to Interp1 base -RenderGraph8Persp_Interp: - .word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base - -RenderGraph8Persp_Ctrl: // lane control word - .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< 0) -// [stack] ... segm video segment sSegm -// Output new pointer to destination data buffer. -// 320 pixels takes 8.3 us on 151 MHz. - -.thumb_func -.global RenderGText -RenderGText: - - // push registers - push {r1-r7,lr} - mov r4,r8 - push {r4} - -// Stack content: -// SP+0: R8 -// SP+4: R1 start X coordinate (later: base pointer to gradient array) -// SP+8: R2 start Y coordinate (later: base pointer to text data row) -// SP+12: R3 width to display -// SP+16: R4 -// SP+20: R5 -// SP+24: R6 -// SP+28: R7 -// SP+32: LR -// SP+36: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#36] // load video segment -> R4 - - // start divide Y/font height - ldr r6,RenderGText_pSioBase // get address of SIO base -> R6 - str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate - ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2 - str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height - -// - now we must wait at least 8 clock cycles to get result of division - - // [6] get wrap width -> [SP+36] - ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width - movs r7,#3 // [1] mask to align to 32-bit - bics r5,r7 // [1] align wrap - str r5,[sp,#36] // [2] save wrap width - - // [1] align X coordinate to 32-bit - bics r1,r7 // [1] - - // [3] align remaining width - bics r3,r7 // [1] - str r3,[sp,#12] // [2] save new width - - // load result of division Y/font_height -> R6 Y relative at row, R7 Y row - // Note: QUOTIENT must be read last - ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row - ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row - - // pointer to font line -> R8 - lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) - ldr r3,[r4,#SSEGM_PAR] // get pointer to font - add r3,r5 // line offset + font base -> pointer to current font line R3 - mov r8,r3 - - // base pointer to text data (without X) -> [SP+8], R2 - ldrh r5,[r4,#SSEGM_WB] // get pitch of rows - muls r2,r5 // Y * WB -> offset of row in text buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - add r2,r5 // base address of text buffer - str r2,[sp,#8] // save pointer to text buffer - - // base pointer to gradient array -> [SP+4], R3 - ldr r3,[r4,#SSEGM_PAR2] // pointer to graient array - str r3,[sp,#4] // save pointer to gradient array - - // prepare pointer to text data with X -> R2 - add r3,r1 // pointer to source gradient array - lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) - add r2,r6 // pointer to source text buffer -> R2 - - // prepare background color, expand to 32 bits -> R4 - ldrb r4,[r4,#SSEGM_PAR3] // load background color - lsls r5,r4,#8 // shift background color << 8 - orrs r5,r4 // color expanded to 16 bits - lsls r4,r5,#16 // shift 16-bit color << 16 - orrs r4,r5 // color expanded to 32 bits - - // prepare pointer to conversion table -> LR - ldr r5,RenderGText_Addr // get pointer to conversion table -> R5 - mov lr,r5 // conversion table -> LR - -// ---- render 2nd half of first character -// R0 ... pointer to destination data buffer -// R1 ... start X coordinate -// R2 ... pointer to source text buffer -// R3 ... pointer to gradient array -// R4 ... background color (expanded to 32-bit) -// R5..R7 ... (temporary) -// R8 ... pointer to font line -// LR ... pointer to conversion table -// [SP+4] ... base pointer to gradient array -// [SP+8] ... base pointer to text data (without X) -// [SP+12] ... remaining width -// [SP+36] ... wrap width - - // check bit 2 of X coordinate - check if image starts with 2nd half of first character - lsls r6,r1,#29 // check bit 2 of X coordinate - bpl 2f // bit 2 not set, starting even 4-pixels - - // [6] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - adds r2,#1 // [1] shift pointer to source text buffer - add r5,r8 // [1] pointer to font line - ldrb r5,[r5] // [2] load font sample -> R5 - - // [3] load foreground color, XOR with background -> R6 - ldmia r3!,{r6} // [2] load foreground color from gradient buffer - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // shift X coordinate - adds r1,#4 // shift X coordinate - - // check end of segment - ldr r7,[sp,#36] // load wrap width - cmp r1,r7 // end of segment? - blo 1f - movs r1,#0 // reset X coordinate - ldr r2,[sp,#8] // get base pointer to text data -> R2 - ldr r3,[sp,#4] // get base pointer to gradient array -> R3 - - // shift remaining width -1: ldr r7,[sp,#12] // get remaining width - subs r7,#4 // shift width - str r7,[sp,#12] // save new width - - // prepare wrap width - start X -> R7 -2: ldr r7,[sp,#36] // load wrap width - subs r7,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of characters to generate in one part of segment -// R2 ... *pointer to source text buffer -// R3 ... *pointer to gradient array -// R4 ... *background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... (temporary) -// R7 ... *wrap width of this segment, later: temporary -// R8 ... *pointer to font line -// LR ... *pointer to conversion table -// [SP+4] ... base pointer to gradient array -// [SP+8] ... base pointer to text data (without X) -// [SP+12] ... remaining width -// [SP+36] ... wrap width - -RenderGText_OutLoop: - - // limit wrap width by total width -> R7 - ldr r6,[sp,#12] // get remaining width - cmp r7,r6 // compare with wrap width - bls 2f // width is OK - mov r7,r6 // limit wrap width - - // check if remain whole characters -2: cmp r7,#8 // check number of remaining pixels - bhs 5f // enough characters remain - - // check if 1st part of last character remains - cmp r7,#4 // check 1st part of last character - blo 3f // all done - -// ---- render 1st part of last character - -RenderGText_Last: - - // [6] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - adds r2,#1 // [1] shift pointer to source text buffer - add r5,r8 // [1] pointer to font line - ldrb r5,[r5] // [2] load font sample -> R5 - - // [3] load foreground color, XOR with background -> R6 - ldmia r3!,{r6} // [2] load foreground color from gradient buffer - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r1,[r5,#0] // [2] load mask for higher 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - stmia r0!,{r1} // [2] store first 4 pixels - - // check if continue with next segment - ldr r2,[sp,#8] // get base pointer to text data -> R2 - ldr r3,[sp,#4] // get base pointer to gradient array -> R3 - cmp r7,#4 - bhi RenderGText_OutLoop - - // pop registers and return -3: pop {r4} - mov r8,r4 - pop {r1-r7,pc} - -// ---- prepare to render whole characters - - // prepare number of whole characters to render -> R1 -5: lsrs r1,r7,#2 // shift to get number of characters*2 - lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 - subs r6,r5 // get remaining width - str r6,[sp,#12] // save new remaining width - subs r1,#1 // number of characters*2 - 1 - -// ---- [28*N-1] start inner loop, render characters in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of characters to generate*2 - 1 (loop counter) -// R2 ... *pointer to source text buffer -// R3 ... *pointer to gradient array -// R4 ... *background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... foreground color -// R7 ... font sample -// R8 ... *pointer to font line -// LR ... *pointer to conversion table -// [SP+4] ... base pointer to gradient array -// [SP+8] ... base pointer to text data (without X) -// [SP+12] ... remaining width -// [SP+36] ... wrap width - -RenderGText_InLoop: - - // [6] load font sample -> R7 - ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7 - adds r2,#1 // [1] shift pointer to source text buffer - add r7,r8 // [1] pointer to font line - ldrb r7,[r7] // [2] load font sample -> R7 - - // [3] load foreground color, XOR with background -> R6 - ldmia r3!,{r6} // [2] load foreground color from gradient buffer - eors r6,r4 // [1] XOR foreground color with background color - - // [2] prepare conversion table -> R7 - lsls r7,#3 // [1] multiply font sample * 8 - add r7,lr // [1] add pointer to conversion table - - // [4] convert first 4 pixels (higher 4 bits) -> R5 - ldr r5,[r7,#0] // [2] load mask for higher 4 bits - ands r5,r6 // [1] mask foreground color - eors r5,r4 // [1] combine with background color - - // [3] load foreground color, XOR with background -> R6 - ldmia r3!,{r6} // [2] load foreground color from gradient buffer - eors r6,r4 // [1] XOR foreground color with background color - - // [7] convert and store second 4 pixels (lower 4 bits) - ldr r7,[r7,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r5,r7} // [3] store 8 pixels - - // [2,3] loop counter - subs r1,#2 // [1] shift loop counter - bhi RenderGText_InLoop // [1,2] > 0, render next whole character - -// ---- end inner loop, continue with last character, or start new part - - // continue to outer loop - ldr r7,[sp,#36] // load wrap width - beq RenderGText_Last // render 1st half of last character - ldr r2,[sp,#8] // get base pointer to text data -> R2 - ldr r3,[sp,#4] // get base pointer to gradient array -> R3 - b RenderGText_OutLoop // go back to outer loop - - .align 2 -RenderGText_Addr: - .word RenderTextMask -RenderGText_pSioBase: - .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_level.S b/MCUME_pico/picovga_t4/render/vga_level.S deleted file mode 100755 index 7057556..0000000 --- a/MCUME_pico/picovga_t4/render/vga_level.S +++ /dev/null @@ -1,431 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_LEVEL -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// render font pixel mask -.extern RenderTextMask // u32 RenderTextMask[512]; - -// extern "C" u8* RenderLevel(u8* dbuf, int x, int y, int w, sSegm* segm); - -// render level graph GF_LEVEL -// dbuf ... destination data buffer -// x ... start X coordinate (must be multiple of 4) -// y ... start Y coordinate -// w ... width of this segment (must be multiple of 4) -// segm ... video segment -// Output new dbuf pointer. -// 320 pixels takes 14 us on 151 MHz. - -.thumb_func -.global RenderLevel -RenderLevel: - - // push registers - push {r1-r7,lr} - -// Input registers and stack content: -// R0 ... pointer to testination data buffer -// SP+0: R1 start X coordinate (later: zero level) -// SP+4: R2 start Y coordinate (later: base pointer to sample data) -// SP+8: R3 width to display -// SP+12: R4 -// SP+16: R5 -// SP+20: R6 -// SP+24: R7 -// SP+28: LR -// SP+32: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#32] // load video segment -> R4 - - // get wrap width -> [SP+32] - ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width - movs r7,#3 // mask to align to 32-bit - bics r5,r7 // align wrap - str r5,[sp,#32] // save wrap width - - // align X coordinate to 32-bit -> R1 - bics r1,r7 - - // align remaining width -> [SP+8] - bics r3,r7 - str r3,[sp,#8] // save new width - - // current Y in direction from bottom to up -> R5 - ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height - subs r5,#1 // wrapy - 1 - subs r5,r2 // subtract Y, get Y relative to bottom -> R5 - - // get zero level -> [SP+0] - ldrb r3,[r4,#SSEGM_PAR2] // get zero level - str r3,[sp,#0] // save zero level - - // base pointer to sample data (without X) -> [SP+4], R2 - ldr r2,[r4,#SSEGM_DATA] // pointer to sample data - str r2,[sp,#4] // save pointer to sample buffer - - // prepare pointer to sample data with X -> R2 - add r2,r1 // pointer to source sample buffer -> R2 - - // prepare foreground color, expand to 32-bit -> R6 - ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color - lsls r3,r6,#8 // [1] shift foreground color << 8 - orrs r3,r6 // [1] color expanded to 16 bits - lsls r6,r3,#16 // [1] shift 16-bit color << 16 - orrs r6,r3 // [1] color expanded to 32 bits - - // prepare background color, expand to 32 bits -> R4 - ldrb r4,[r4,#SSEGM_PAR] // load background color - lsls r3,r4,#8 // shift background color << 8 - orrs r3,r4 // color expanded to 16 bits - lsls r4,r3,#16 // shift 16-bit color << 16 - orrs r4,r3 // color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // prepare pointer to conversion table -> LR - ldr r3,RenderLevel_Addr // get pointer to conversion table -> R5 - mov lr,r3 // conversion table -> LR - - // prepare wrap width - start X -> R7 - ldr r7,[sp,#32] // load wrap width - subs r7,r1 // pixels remaining to end of segment - - // last 4-pixels - cmp r7,#4 - bhi RenderLevel_OutLoop - ldr r7,[sp,#32] // load wrap width - b RenderLevel_Last // render last 4-pixels of first segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of 4-pixels to generate in one part of segment -// R2 ... *pointer to source sample buffer -// R3 ... remaining width, later: (temporary) -// R4 ... *background color (expanded to 32-bit) -// R5 ... *current line Y (in direction from bottom to up) -// R6 ... *foreground color (expanded to 32-bit) -// R7 ... *wrap width of this segment, later: (temporary) -// LR ... *pointer to conversion table -// [SP+0] ... *zero level -// [SP+4] ... *base pointer to sample data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderLevel_OutLoop: - - // limit wrap width by total width -> R7 - ldr r3,[sp,#8] // get remaining width - cmp r7,r3 // compare with wrap width - bls 2f // width is OK - mov r7,r3 // limit wrap width - - // check number of pixels -2: cmp r7,#8 // check number of remaining pixels - bhs 5f // enough pixels remain to render 8-pixels - - // check last 4-pixels - cmp r7,#4 // check last 4-pixels - blo 3f // all done - -// ---- render last 4 pixels - -RenderLevel_Last: - - // check half of graph - ldr r3,[sp,#0] // get zero level - cmp r5,r3 // check current line - blo RenderLevel_Last2 // bottom half of graph - -// ---- top half - - // [1] clear sample accumulator - movs r1,#0 // [1] clear sample accumulator - - // [4] get sample 0 - ldrb r3,[r2,#0] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - - // [4] get sample 1 - ldrb r3,[r2,#1] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - - // [4] get sample 2 - ldrb r3,[r2,#2] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - - // [4] get sample 3 - ldrb r3,[r2,#3] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - adds r2,#4 // [1] shift pointer to source buffer - - // [2] prepare conversion table -> R1 - lsls r1,#3 // [1] multiply sample * 8 - add r1,lr // [1] add pointer to conversion table - - // [7] convert 4 pixels (lower 4 bits) - ldr r1,[r1,#4] // [2] load mask for lower 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - stmia r0!,{r1} // [3] store 4 pixels - - b 7f - -// ---- bottom half - -RenderLevel_Last2: - - // [1] clear sample accumulator - movs r1,#0 // [1] clear sample accumulator - - // [4] get sample 0 - ldrb r3,[r2,#0] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - - // [4] get sample 1 - ldrb r3,[r2,#1] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - - // [4] get sample 2 - ldrb r3,[r2,#2] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - - // [4] get sample 3 - ldrb r3,[r2,#3] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - adds r2,#4 // [1] shift pointer to source buffer - - // [2] prepare conversion table -> R1 - lsls r1,#3 // [1] multiply sample * 8 - add r1,lr // [1] add pointer to conversion table - - // [7] convert 4 pixels (lower 4 bits) - ldr r1,[r1,#4] // [2] load mask for lower 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - stmia r0!,{r1} // [3] store 4 pixels - - - // check if continue with next segment -7: ldr r2,[sp,#4] // get base pointer to sample data -> R2 - cmp r7,#4 - bhi RenderLevel_OutLoop - - // pop registers and return -3: pop {r1-r7,pc} - -// ---- prepare to render 8-pixels - - // prepare number of whole 4-pixels to render -> R1 -5: lsrs r1,r7,#2 // shift width to get number of 4-pixels - lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7 - subs r3,r7 // get remaining width - str r3,[sp,#8] // save new remaining width - subs r1,#1 // number of 4-pixels - 1 - - // check half of graph - ldr r3,[sp,#0] // get zero level - cmp r5,r3 // check current line - blo RenderLevel_InLoopBot // bottom half of graph - -// ---- [50*N-1] start inner loop, render in one part of segment - top half of graph -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter) -// R2 ... *pointer to source sample buffer -// R3 ... sample -// R4 ... *background color (expanded to 32-bit) -// R5 ... *current line Y (in direction from bottom to up) -// R6 ... *foreground color (expanded to 32-bit) -// R7 ... sample accumulator, conversion table -// LR ... *pointer to conversion table -// [SP+0] ... *zero level -// [SP+4] ... *base pointer to sample data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderLevel_InLoopTop: // render 8 pixels in one loop step, top half of graph - - // [1] clear sample accumulator - movs r7,#0 // [1] clear sample accumulator - - // [4] get sample 0 - ldrb r3,[r2,#0] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 1 - ldrb r3,[r2,#1] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 2 - ldrb r3,[r2,#2] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 3 - ldrb r3,[r2,#3] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 4 - ldrb r3,[r2,#4] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 5 - ldrb r3,[r2,#5] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 6 - ldrb r3,[r2,#6] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 7 - ldrb r3,[r2,#7] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - adds r2,#8 // [1] shift pointer to source buffer - - // [2] prepare conversion table -> R7 - lsls r7,#3 // [1] multiply sample * 8 - add r7,lr // [1] add pointer to conversion table - - // [4] convert first 4 pixels (higher 4 bits) - ldr r3,[r7,#0] // [2] load mask for higher 4 bits - ands r3,r6 // [1] mask foreground color - eors r3,r4 // [1] combine with background color - - // [7] convert second 4 pixels (lower 4 bits) - ldr r7,[r7,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r3,r7} // [3] store second 4 pixels - - // [2,3] loop counter - subs r1,#2 // [1] shift loop counter - bhi RenderLevel_InLoopTop // [1,2] > 0, render next whole 8-pixels - -// ---- end inner loop, continue with last 4-pixels, or start new part - - // continue to outer loop - ldr r7,[sp,#32] // load wrap width -8: beq RenderLevel_Last // render last 4-pixels - ldr r2,[sp,#4] // get base pointer to sample data -> R2 - b RenderLevel_OutLoop // go back to outer loop - -// ---- [50*N-1] start inner loop, render in one part of segment - bottom half of graph -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter) -// R2 ... *pointer to source sample buffer -// R3 ... sample -// R4 ... *background color (expanded to 32-bit) -// R5 ... *current line Y (in direction from bottom to up) -// R6 ... *foreground color (expanded to 32-bit) -// R7 ... sample accumulator, conversion table -// LR ... *pointer to conversion table -// [SP+0] ... *zero level -// [SP+4] ... *base pointer to sample data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderLevel_InLoopBot: // render 8 pixels in one loop step, bottom half of graph - - // [1] clear sample accumulator - movs r7,#0 // [1] clear sample accumulator - - // [4] get sample 0 - ldrb r3,[r2,#0] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 1 - ldrb r3,[r2,#1] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 2 - ldrb r3,[r2,#2] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 3 - ldrb r3,[r2,#3] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 4 - ldrb r3,[r2,#4] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 5 - ldrb r3,[r2,#5] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 6 - ldrb r3,[r2,#6] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 7 - ldrb r3,[r2,#7] // [2] get data sample -> R3 - cmp r5,r3 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - adds r2,#8 // [1] shift pointer to source buffer - - // [2] prepare conversion table -> R7 - lsls r7,#3 // [1] multiply sample * 8 - add r7,lr // [1] add pointer to conversion table - - // [4] convert first 4 pixels (higher 4 bits) - ldr r3,[r7,#0] // [2] load mask for higher 4 bits - ands r3,r6 // [1] mask foreground color - eors r3,r4 // [1] combine with background color - - // [7] convert second 4 pixels (lower 4 bits) - ldr r7,[r7,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r3,r7} // [3] store second 4 pixels - - // [2,3] loop counter - subs r1,#2 // [1] shift loop counter - bhi RenderLevel_InLoopBot // [1,2] > 0, render next whole 8-pixels - -// ---- end inner loop, continue with last 4-pixels, or start new part - - // continue to outer loop - ldr r7,[sp,#32] // load wrap width - beq 8b // render last 4-pixels - ldr r2,[sp,#4] // get base pointer to sample data -> R2 - b RenderLevel_OutLoop // go back to outer loop - - .align 2 -RenderLevel_Addr: - .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_levelgrad.S b/MCUME_pico/picovga_t4/render/vga_levelgrad.S deleted file mode 100755 index f2671e6..0000000 --- a/MCUME_pico/picovga_t4/render/vga_levelgrad.S +++ /dev/null @@ -1,287 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_LEVELGRAD -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// render font pixel mask -.extern RenderTextMask // u32 RenderTextMask[512]; - -// extern "C" u8* RenderLevelGrad(u8* dbuf, int x, int y, int w, sSegm* segm); - -// render level gradient graph GF_LEVELGRAD -// dbuf ... destination data buffer -// x ... start X coordinate (must be multiple of 4) -// y ... start Y coordinate -// w ... width of this segment (must be multiple of 4) -// segm ... video segment -// Output new dbuf pointer. -// 320 pixels takes 14 us on 151 MHz. - -.thumb_func -.global RenderLevelGrad -RenderLevelGrad: - - // push registers - push {r1-r7,lr} - -// Input registers and stack content: -// R0 ... pointer to testination data buffer -// SP+0: R1 start X coordinate -// SP+4: R2 start Y coordinate (later: base pointer to sample data) -// SP+8: R3 width to display -// SP+12: R4 -// SP+16: R5 -// SP+20: R6 -// SP+24: R7 -// SP+28: LR -// SP+32: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#32] // load video segment -> R4 - - // get wrap width -> [SP+32] - ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width - movs r7,#3 // mask to align to 32-bit - bics r5,r7 // align wrap - str r5,[sp,#32] // save wrap width - - // align X coordinate to 32-bit -> R1 - bics r1,r7 - - // align remaining width -> [SP+8] - bics r3,r7 - str r3,[sp,#8] // save new width - - // current Y in direction from bottom to up -> R5 - ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height - subs r5,#1 // wrapy - 1 - subs r5,r2 // subtract Y, get Y relative to bottom -> R5 - - // base pointer to sample data (without X) -> [SP+4], R2 - ldr r2,[r4,#SSEGM_DATA] // pointer to sample data - str r2,[sp,#4] // save pointer to sample buffer - - // prepare pointer to sample data with X -> R2 - add r2,r1 // pointer to source sample buffer -> R2 - - // prepare foreground color, expand to 32-bit -> R6 - ldr r6,[r4,#SSEGM_PAR] // pointer to gradient 1 - ldrb r6,[r6,r5] // load foreground color - lsls r3,r6,#8 // [1] shift foreground color << 8 - orrs r3,r6 // [1] color expanded to 16 bits - lsls r6,r3,#16 // [1] shift 16-bit color << 16 - orrs r6,r3 // [1] color expanded to 32 bits - - // prepare background color, expand to 32 bits -> R4 - ldr r4,[r4,#SSEGM_PAR2] // pointer to gradient 2 - ldrb r4,[r4,r5] // load background color - lsls r3,r4,#8 // shift background color << 8 - orrs r3,r4 // color expanded to 16 bits - lsls r4,r3,#16 // shift 16-bit color << 16 - orrs r4,r3 // color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // prepare pointer to conversion table -> LR - ldr r3,RenderLevelGrad_Addr // get pointer to conversion table -> R5 - mov lr,r3 // conversion table -> LR - - // prepare wrap width - start X -> R7 - ldr r7,[sp,#32] // load wrap width - subs r7,r1 // pixels remaining to end of segment - - // last 4-pixels - cmp r7,#4 - bhi RenderLevelGrad_OutLoop - ldr r7,[sp,#32] // load wrap width - b RenderLevelGrad_Last // render last 4-pixels of first segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of 4-pixels to generate in one part of segment -// R2 ... *pointer to source sample buffer -// R3 ... remaining width, later: (temporary) -// R4 ... *background color (expanded to 32-bit) -// R5 ... *current line Y (in direction from bottom to up) -// R6 ... *foreground color (expanded to 32-bit) -// R7 ... *wrap width of this segment, later: (temporary) -// LR ... *pointer to conversion table -// [SP+0] -// [SP+4] ... *base pointer to sample data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderLevelGrad_OutLoop: - - // limit wrap width by total width -> R7 - ldr r3,[sp,#8] // get remaining width - cmp r7,r3 // compare with wrap width - bls 2f // width is OK - mov r7,r3 // limit wrap width - - // check number of pixels -2: cmp r7,#8 // check number of remaining pixels - bhs 5f // enough pixels remain to render 8-pixels - - // check last 4-pixels - cmp r7,#4 // check last 4-pixels - blo 3f // all done - -// ---- render last 4 pixels - -RenderLevelGrad_Last: - - // [1] clear sample accumulator - movs r1,#0 // [1] clear sample accumulator - - // [4] get sample 0 - ldrb r3,[r2,#0] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - - // [4] get sample 1 - ldrb r3,[r2,#1] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - - // [4] get sample 2 - ldrb r3,[r2,#2] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - - // [4] get sample 3 - ldrb r3,[r2,#3] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 - adds r2,#4 // [1] shift pointer to source buffer - - // [2] prepare conversion table -> R1 - lsls r1,#3 // [1] multiply sample * 8 - add r1,lr // [1] add pointer to conversion table - - // [7] convert 4 pixels (lower 4 bits) - ldr r1,[r1,#4] // [2] load mask for lower 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - stmia r0!,{r1} // [3] store 4 pixels - - // check if continue with next segment -7: ldr r2,[sp,#4] // get base pointer to sample data -> R2 - cmp r7,#4 - bhi RenderLevelGrad_OutLoop - - // pop registers and return -3: pop {r1-r7,pc} - -// ---- prepare to render 8-pixels - - // prepare number of whole 4-pixels to render -> R1 -5: lsrs r1,r7,#2 // shift width to get number of 4-pixels - lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7 - subs r3,r7 // get remaining width - str r3,[sp,#8] // save new remaining width - subs r1,#1 // number of 4-pixels - 1 - -// ---- [50*N-1] start inner loop, render in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter) -// R2 ... *pointer to source sample buffer -// R3 ... sample -// R4 ... *background color (expanded to 32-bit) -// R5 ... *current line Y (in direction from bottom to up) -// R6 ... *foreground color (expanded to 32-bit) -// R7 ... sample accumulator, conversion table -// LR ... *pointer to conversion table -// [SP+0] ... -// [SP+4] ... *base pointer to sample data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderLevelGrad_InLoopTop: // render 8 pixels in one loop step, top half of graph - - // [1] clear sample accumulator - movs r7,#0 // [1] clear sample accumulator - - // [4] get sample 0 - ldrb r3,[r2,#0] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 1 - ldrb r3,[r2,#1] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 2 - ldrb r3,[r2,#2] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 3 - ldrb r3,[r2,#3] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 4 - ldrb r3,[r2,#4] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 5 - ldrb r3,[r2,#5] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [4] get sample 6 - ldrb r3,[r2,#6] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 7 - ldrb r3,[r2,#7] // [2] get data sample -> R3 - cmp r3,r5 // [1] compare sample with current line - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - adds r2,#8 // [1] shift pointer to source buffer - - // [2] prepare conversion table -> R7 - lsls r7,#3 // [1] multiply sample * 8 - add r7,lr // [1] add pointer to conversion table - - // [4] convert first 4 pixels (higher 4 bits) - ldr r3,[r7,#0] // [2] load mask for higher 4 bits - ands r3,r6 // [1] mask foreground color - eors r3,r4 // [1] combine with background color - - // [7] convert second 4 pixels (lower 4 bits) - ldr r7,[r7,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r3,r7} // [3] store second 4 pixels - - // [2,3] loop counter - subs r1,#2 // [1] shift loop counter - bhi RenderLevelGrad_InLoopTop // [1,2] > 0, render next whole 8-pixels - -// ---- end inner loop, continue with last 4-pixels, or start new part - - // continue to outer loop - ldr r7,[sp,#32] // load wrap width -8: beq RenderLevelGrad_Last // render last 4-pixels - ldr r2,[sp,#4] // get base pointer to sample data -> R2 - b RenderLevelGrad_OutLoop // go back to outer loop - - .align 2 -RenderLevelGrad_Addr: - .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_mtext.S b/MCUME_pico/picovga_t4/render/vga_mtext.S deleted file mode 100755 index b2abc7d..0000000 --- a/MCUME_pico/picovga_t4/render/vga_mtext.S +++ /dev/null @@ -1,288 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_MTEXT -// -// **************************************************************************** -// u32 par SSEGM_PAR pointer to the font -// u32 par2 SSEGM_PAR2 LOW background color, HIGH foreground color -// u16 par3 font height - -#include "../define.h" // common definitions of C and ASM -#include "hardware/regs/sio.h" // registers of hardware divider -#include "hardware/regs/addressmap.h" // SIO base address - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// render font pixel mask -.extern RenderTextMask // u32 RenderTextMask[512]; - -// extern "C" u8* RenderMText(u8* dbuf, int x, int y, int w, sSegm* segm) - -// render 8-pixel mono text GF_MTEXT -// R0 ... destination data buffer -// R1 ... start X coordinate (in pixels, must be multiple of 4) -// R2 ... start Y coordinate (in graphics lines) -// R3 ... width to display (must be multiple of 4 and > 0) -// [stack] ... segm video segment sSegm -// Output new pointer to destination data buffer. -// 320 pixels takes 6.9 us on 151 MHz. - -.thumb_func -.global RenderMText -RenderMText: - - // push registers - push {r1-r7,lr} - -// Stack content: -// SP+0: R1 start X coordinate -// SP+4: R2 start Y coordinate (later: base pointer to text data row) -// SP+8: R3 width to display -// SP+12: R4 -// SP+16: R5 -// SP+20: R6 -// SP+24: R7 -// SP+28: LR -// SP+32: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#32] // load video segment -> R4 - - // start divide Y/font height - ldr r6,RenderMText_pSioBase // get address of SIO base -> R6 - str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate - ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2 - str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height - -// - now we must wait at least 8 clock cycles to get result of division - - // [6] get wrap width -> [SP+32] - ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width - movs r7,#3 // [1] mask to align to 32-bit - bics r5,r7 // [1] align wrap - str r5,[sp,#32] // [2] save wrap width - - // [1] align X coordinate to 32-bit - bics r1,r7 // [1] - - // [3] align remaining width - bics r3,r7 // [1] - str r3,[sp,#8] // [2] save new width - - // load result of division Y/font_height -> R6 Y relative at row, R7 Y row - // Note: QUOTIENT must be read last - ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row - ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row - - // pointer to font line -> R3 - lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) - ldr r3,[r4,#SSEGM_PAR] // get pointer to font - add r3,r5 // line offset + font base -> pointer to current font line R3 - - // base pointer to text data (without X) -> [SP+4], R2 - ldrh r5,[r4,#SSEGM_WB] // get pitch of rows - muls r2,r5 // Y * WB -> offset of row in text buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - add r2,r5 // base address of text buffer - str r2,[sp,#4] // save pointer to text buffer - - // prepare pointer to text data with X -> R2 - lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) - add r2,r6 // pointer to source text buffer -> R2 - - // prepare foreground color, expand to 32-bit -> R6 - ldrb r6,[r4,#SSEGM_PAR2+1] // load foreground color - lsls r7,r6,#8 // [1] shift foreground color << 8 - orrs r7,r6 // [1] color expanded to 16 bits - lsls r6,r7,#16 // [1] shift 16-bit color << 16 - orrs r6,r7 // [1] color expanded to 32 bits - - // prepare background color, expand to 32 bits -> R4 - ldrb r4,[r4,#SSEGM_PAR2] // load background color - lsls r5,r4,#8 // shift background color << 8 - orrs r5,r4 // color expanded to 16 bits - lsls r4,r5,#16 // shift 16-bit color << 16 - orrs r4,r5 // color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // prepare pointer to conversion table -> LR - ldr r5,RenderMText_Addr // get pointer to conversion table -> R5 - mov lr,r5 // conversion table -> LR - -// ---- render 2nd half of first character -// R0 ... pointer to destination data buffer -// R1 ... start X coordinate -// R2 ... pointer to source text buffer -// R3 ... pointer to font line -// R4 ... background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... foreground color (expanded to 32-bit) -// R7 ... (temporary) -// LR ... pointer to conversion table -// [SP+4] ... base pointer to text data (without X) -// [SP+8] ... remaining width -// [SP+32] ... wrap width - - // check bit 2 of X coordinate - check if image starts with 2nd half of first character - lsls r5,r1,#29 // check bit 2 of X coordinate - bpl 2f // bit 2 not set, starting even 4-pixels - - // [5] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - adds r2,#1 // [1] shift pointer to source text buffer - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // shift X coordinate - adds r1,#4 // shift X coordinate - - // check end of segment - ldr r7,[sp,#32] // load wrap width - cmp r1,r7 // end of segment? - blo 1f - movs r1,#0 // reset X coordinate - ldr r2,[sp,#4] // get base pointer to text data -> R2 - - // shift remaining width -1: ldr r7,[sp,#8] // get remaining width - subs r7,#4 // shift width - str r7,[sp,#8] // save new width - - // prepare wrap width - start X -> R7 -2: ldr r7,[sp,#32] // load wrap width - subs r7,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of characters to generate in one part of segment -// R2 ... *pointer to source text buffer -// R3 ... *pointer to font line -// R4 ... *background color (expanded to 32-bit) -// R5 ... (temporary) -// R6 ... *foreground color (expanded to 32-bit) -// R7 ... *wrap width of this segment, later: temporary -// LR ... *pointer to conversion table -// [SP+4] ... *base pointer to text data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderMText_OutLoop: - - // limit wrap width by total width -> R7 - ldr r5,[sp,#8] // get remaining width - cmp r7,r5 // compare with wrap width - bls 2f // width is OK - mov r7,r5 // limit wrap width - - // check if remain whole characters -2: cmp r7,#8 // check number of remaining pixels - bhs 5f // enough characters remain - - // check if 1st part of last character remains - cmp r7,#4 // check 1st part of last character - blo 3f // all done - -// ---- render 1st part of last character - -RenderMText_Last: - - // [5] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - adds r2,#1 // [1] shift pointer to source text buffer - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r1,[r5,#0] // [2] load mask for higher 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - stmia r0!,{r1} // [2] store first 4 pixels - - // check if continue with next segment - ldr r2,[sp,#4] // get base pointer to text data -> R2 - cmp r7,#4 - bhi RenderMText_OutLoop - - // pop registers and return -3: pop {r1-r7,pc} - -// ---- prepare to render whole characters - - // prepare number of whole characters to render -> R1 -5: lsrs r1,r7,#2 // shift to get number of characters*2 - lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7 - subs r5,r7 // get remaining width - str r5,[sp,#8] // save new remaining width - subs r1,#1 // number of characters*2 - 1 - -// ---- [22*N-1] start inner loop, render characters in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of characters to generate*2 - 1 (loop counter) -// R2 ... *pointer to source text buffer -// R3 ... *pointer to font line -// R4 ... *background color (expanded to 32-bit) -// R5 ... font sample -// R6 ... *foreground color (expanded to 32-bit) -// R7 ... (temporary) -// LR ... *pointer to conversion table - -RenderMText_InLoop: - - // [5] load font sample -> R5 - ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 - ldrb r5,[r3,r5] // [2] load font sample -> R5 - adds r2,#1 // [1] shift pointer to source text buffer - - // [2] prepare conversion table -> R5 - lsls r5,#3 // [1] multiply font sample * 8 - add r5,lr // [1] add pointer to conversion table - - // [6] convert first 4 pixels (higher 4 bits) - ldr r7,[r5,#0] // [2] load mask for higher 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store first 4 pixels - - // [6] convert second 4 pixels (lower 4 bits) - ldr r7,[r5,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r7} // [2] store second 4 pixels - - // [2,3] loop counter - subs r1,#2 // [1] shift loop counter - bhi RenderMText_InLoop // [1,2] > 0, render next whole character - -// ---- end inner loop, continue with last character, or start new part - - // continue to outer loop - ldr r7,[sp,#32] // load wrap width - beq RenderMText_Last // render 1st half of last character - ldr r2,[sp,#4] // get base pointer to text data -> R2 - b RenderMText_OutLoop // go back to outer loop - - .align 2 -RenderMText_Addr: - .word RenderTextMask -RenderMText_pSioBase: - .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_oscil.S b/MCUME_pico/picovga_t4/render/vga_oscil.S deleted file mode 100755 index f3312e0..0000000 --- a/MCUME_pico/picovga_t4/render/vga_oscil.S +++ /dev/null @@ -1,297 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_OSCIL -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// render font pixel mask -.extern RenderTextMask // u32 RenderTextMask[512]; - -// extern "C" u8* RenderOscil(u8* dbuf, int x, int y, int w, sSegm* segm); - -// render oscilloscope graph GF_OSCIL -// dbuf ... destination data buffer -// x ... start X coordinate (must be multiple of 4) -// y ... start Y coordinate -// w ... width of this segment (must be multiple of 4) -// segm ... video segment -// Output new dbuf pointer. -// 320 pixels takes 16.6 us on 151 MHz. - -.thumb_func -.global RenderOscil -RenderOscil: - - // push registers - push {r1-r7,lr} - -// Input registers and stack content: -// R0 ... pointer to testination data buffer -// SP+0: R1 start X coordinate -// SP+4: R2 start Y coordinate (later: base pointer to sample data) -// SP+8: R3 width to display -// SP+12: R4 -// SP+16: R5 -// SP+20: R6 -// SP+24: R7 -// SP+28: LR -// SP+32: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#32] // load video segment -> R4 - - // get wrap width -> [SP+32] - ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width - movs r7,#3 // mask to align to 32-bit - bics r5,r7 // align wrap - str r5,[sp,#32] // save wrap width - - // align X coordinate to 32-bit -> R1 - bics r1,r7 - - // align remaining width -> [SP+8] - bics r3,r7 - str r3,[sp,#8] // save new width - - // current Y in direction from bottom to up -> R5 - ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height - subs r5,#1 // wrapy - 1 - subs r5,r2 // subtract Y, get Y relative to bottom -> R5 - - // get pixel height -> LR - ldrb r3,[r4,#SSEGM_PAR2] // get pixel height - mov lr,r3 // pixel height -> LR - - // base pointer to sample data (without X) -> [SP+4], R2 - ldr r2,[r4,#SSEGM_DATA] // pointer to sample data - str r2,[sp,#4] // save pointer to sample buffer - - // prepare pointer to sample data with X -> R2 - add r2,r1 // pointer to source sample buffer -> R2 - - // prepare foreground color, expand to 32-bit -> R6 - ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color - lsls r3,r6,#8 // [1] shift foreground color << 8 - orrs r3,r6 // [1] color expanded to 16 bits - lsls r6,r3,#16 // [1] shift 16-bit color << 16 - orrs r6,r3 // [1] color expanded to 32 bits - - // prepare background color, expand to 32 bits -> R4 - ldrb r4,[r4,#SSEGM_PAR] // load background color - lsls r3,r4,#8 // shift background color << 8 - orrs r3,r4 // color expanded to 16 bits - lsls r4,r3,#16 // shift 16-bit color << 16 - orrs r4,r3 // color expanded to 32 bits - - // [1] XOR foreground and background color -> R6 - eors r6,r4 // [1] XOR foreground color with background color - - // prepare wrap width - start X -> R7 - ldr r7,[sp,#32] // load wrap width - subs r7,r1 // pixels remaining to end of segment - - // last 4-pixels - cmp r7,#4 - bhi RenderOscil_OutLoop - ldr r7,[sp,#32] // load wrap width - b RenderOscil_Last // render last 4-pixels of first segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of 4-pixels to generate in one part of segment -// R2 ... *pointer to source sample buffer -// R3 ... remaining width, later: (temporary) -// R4 ... *background color (expanded to 32-bit) -// R5 ... *current line Y (in direction from bottom to up) -// R6 ... *foreground color (expanded to 32-bit) -// R7 ... *wrap width of this segment, later: (temporary) -// LR ... *pixel height -// [SP+4] ... *base pointer to sample data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderOscil_OutLoop: - - // limit wrap width by total width -> R7 - ldr r3,[sp,#8] // get remaining width - cmp r7,r3 // compare with wrap width - bls 2f // width is OK - mov r7,r3 // limit wrap width - - // check number of pixels -2: cmp r7,#8 // check number of remaining pixels - bhs 5f // enough pixels remain to render 8-pixels - - // check last 4-pixels - cmp r7,#4 // check last 4-pixels - blo 3f // all done - -// ---- render last 4 pixels - -RenderOscil_Last: - - // [1] clear sample accumulator - movs r1,#0 // [1] clear sample accumulator - - // [5] get sample 0 - ldrb r3,[r2,#0] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 1 - ldrb r3,[r2,#1] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 2 - ldrb r3,[r2,#2] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 3 - ldrb r3,[r2,#3] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7 - adds r2,#4 // [1] shift pointer to source buffer - - // [4] prepare conversion table -> R1 - lsls r1,#3 // [1] multiply sample * 8 - ldr r3,RenderOscil_Addr // [2] get pointer to conversion table -> R3 - add r1,r3 // [1] add pointer to conversion table - - // [7] convert 4 pixels (lower 4 bits) - ldr r1,[r1,#4] // [2] load mask for lower 4 bits - ands r1,r6 // [1] mask foreground color - eors r1,r4 // [1] combine with background color - stmia r0!,{r1} // [3] store 4 pixels - - // check if continue with next segment - ldr r2,[sp,#4] // get base pointer to sample data -> R2 - cmp r7,#4 - bhi RenderOscil_OutLoop - - // pop registers and return -3: pop {r1-r7,pc} - -// ---- prepare to render 8-pixels - - // prepare number of whole 4-pixels to render -> R1 -5: lsrs r1,r7,#2 // shift width to get number of 4-pixels - lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7 - subs r3,r7 // get remaining width - str r3,[sp,#8] // save new remaining width - subs r1,#1 // number of 4-pixels - 1 - -// ---- [50*N-1] start inner loop, render in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter) -// R2 ... *pointer to source sample buffer -// R3 ... sample -// R4 ... *background color (expanded to 32-bit) -// R5 ... *current line Y (in direction from bottom to up) -// R6 ... *foreground color (expanded to 32-bit) -// R7 ... sample accumulator, conversion table -// LR ... *pixel height -// [SP+4] ... *base pointer to sample data (without X) -// [SP+8] ... *remaining width -// [SP+32] ... *wrap width - -RenderOscil_InLoop: // render 8 pixels in one loop step, top half of graph - - // [1] clear sample accumulator - movs r7,#0 // [1] clear sample accumulator - - // [5] get sample 0 - ldrb r3,[r2,#0] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 1 - ldrb r3,[r2,#1] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 2 - ldrb r3,[r2,#2] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 3 - ldrb r3,[r2,#3] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 4 - ldrb r3,[r2,#4] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 5 - ldrb r3,[r2,#5] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [5] get sample 6 - ldrb r3,[r2,#6] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - - // [6] get sample 7 - ldrb r3,[r2,#7] // [2] get data sample -> R3 - subs r3,r5 // [1] distance from current line - cmp lr,r3 // [1] compare with pixel height - adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 - adds r2,#8 // [1] shift pointer to source buffer - - // [4] prepare conversion table -> R7 - lsls r7,#3 // [1] multiply sample * 8 - ldr r3,RenderOscil_Addr // [2] get pointer to conversion table -> R3 - add r7,r3 // [1] add pointer to conversion table - - // [4] convert first 4 pixels (higher 4 bits) - ldr r3,[r7,#0] // [2] load mask for higher 4 bits - ands r3,r6 // [1] mask foreground color - eors r3,r4 // [1] combine with background color - - // [7] convert second 4 pixels (lower 4 bits) - ldr r7,[r7,#4] // [2] load mask for lower 4 bits - ands r7,r6 // [1] mask foreground color - eors r7,r4 // [1] combine with background color - stmia r0!,{r3,r7} // [3] store second 4 pixels - - // [2,3] loop counter - subs r1,#2 // [1] shift loop counter - bhi RenderOscil_InLoop // [1,2] > 0, render next whole 8-pixels - -// ---- end inner loop, continue with last 4-pixels, or start new part - - // continue to outer loop - ldr r7,[sp,#32] // load wrap width -8: beq RenderOscil_Last // render last 4-pixels - ldr r2,[sp,#4] // get base pointer to sample data -> R2 - b RenderOscil_OutLoop // go back to outer loop - - .align 2 -RenderOscil_Addr: - .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_oscline.S b/MCUME_pico/picovga_t4/render/vga_oscline.S deleted file mode 100755 index 978c539..0000000 --- a/MCUME_pico/picovga_t4/render/vga_oscline.S +++ /dev/null @@ -1,190 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_OSCLINE -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// render font pixel mask -.extern RenderTextMask // u32 RenderTextMask[512]; - -// extern "C" u8* RenderOscLine(u8* dbuf, int x, int y, int w, sSegm* segm); - -// render oscilloscope graph GF_OSCLINE -// dbuf ... destination data buffer -// x ... start X coordinate (must be multiple of 4) -// y ... start Y coordinate -// w ... width of this segment (must be multiple of 4) -// segm ... video segment -// Output new dbuf pointer. -// 320 pixels takes 21.5 us on 151 MHz. - -.thumb_func -.global RenderOscLine -RenderOscLine: - - // push registers - push {r2-r7,lr} - -// Input registers and stack content: -// R0 ... pointer to testination data buffer -// R1 ... start X coordinate -// SP+0: R2 start Y coordinate (later: base pointer to sample data) -// SP+4: R3 width to display -// SP+8: R4 -// SP+12: R5 -// SP+16: R6 -// SP+20: R7 -// SP+24: LR -// SP+28: video segment (later: wrap width in X direction) - - // get pointer to video segment -> R4 - ldr r4,[sp,#28] // load video segment -> R4 - - // get wrap width/2 -> [SP+28] - ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width - lsrs r5,#1 // wrap width / 2 - str r5,[sp,#28] // save wrap width - - // X coordinate/2 -> R1 - lsrs r1,#1 - - // remaining width/2 -> [SP+4] - lsrs r3,#1 - str r3,[sp,#4] // save new width - - // current Y in direction from bottom to up -> LR - ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height - subs r5,#1 // wrapy - 1 - subs r5,r2 // subtract Y, get Y relative to bottom -> R5 - mov lr,r5 - - // base pointer to sample data (without X) -> [SP+0], R2 - ldr r2,[r4,#SSEGM_DATA] // pointer to sample data - str r2,[sp,#0] // save pointer to sample buffer - - // prepare pointer to sample data with X -> R2 - add r2,r1 // pointer to source sample buffer -> R2 - - // prepare foreground color -> R6 - ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color - lsls r7,r6,#8 - orrs r6,r7 - - // prepare background color -> R4 - ldrb r4,[r4,#SSEGM_PAR] // load background color - lsls r7,r4,#8 - orrs r4,r7 - - // prepare wrap width - start X -> R1 - ldr r7,[sp,#28] // load wrap width - subs r1,r7,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... *wrap width of this segment, later: number of pixels to generate in one part of segment -// R2 ... *pointer to source sample buffer -// R3 ... remaining width, later: (temporary) -// R4 ... *background color -// R5 ... (temporary) -// R6 ... *foreground color -// R7 ... (temporary) -// LR ... *current line Y (in direction from bottom to up) -// [SP+0] ... *base pointer to sample data (without X) -// [SP+4] ... *remaining width -// [SP+28] ... *wrap width - -RenderOscLine_OutLoop: - - // limit wrap width by total width -> R1 - ldr r3,[sp,#4] // get remaining width - cmp r1,r3 // compare with wrap width - bls 2f // width is OK - mov r1,r3 // limit wrap width - - // check number of pixels -2: cmp r1,#0 // check number of remaining pixels - beq RenderOscLine_Stop // stop - subs r3,r1 // get remaining width - str r3,[sp,#4] // save new remaining width - -// ---- start inner loop, render in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of pixels to generate (loop counter) -// R2 ... *pointer to source sample buffer -// R3 ... sample -// R4 ... *background color -// R5 ... previous sample -// R6 ... *foreground color -// R7 ... current color -// LR ... *current line Y (in direction from bottom to up) -// [SP+0] ... *base pointer to sample data (without X) -// [SP+4] ... *remaining width -// [SP+28] ... *wrap width - - ldrb r5,[r2,#0] // [2] prepare previous sample -> R5 - -RenderOscLine_InLoop: // render 8 pixels in one loop step, top half of graph - - // [3] get sample - ldrb r3,[r2,#0] // [2] get data sample -> R3 - adds r2,#1 // [1] increment pointer - - // [1] preset to background color - mov r7,r4 // [1] preset to background color - - // [3..8] (sample > previous sample) AND (sample > line) AND (line > previous sample) - display pixel - cmp r3,lr // [1] compare sample with line - beq 4f // [1,2] (sample == line), true, display pixel everytime - blo 2f // [1,2] (sample < line), false - cmp r3,r5 // [1] compare sample with previous sample - bls 2f // [1,2] (sample <= previous), false - cmp lr,r5 // [1] compare line with previous sample - bhi 4f // [1,2] (line > previous), true - - // [3..7] (sample < previous sample) AND (sample < line) AND (line < previous sample) - display pixel -2: cmp r3,r5 // [1] compare sample with previous sample - bhs 6f // [1,2] (sample >= previous), false - cmp r3,lr // [1] compare sample with line - bhs 6f // [1,2] (sample >= line), false - cmp lr,r5 // [1] compare line with previous sample - bhs 6f // [1,2] (line >= previous), false - - // [1] use foreground color -4: mov r7,r6 // [1] use foreground color - - // [3] write 2 pixels -6: strh r7,[r0,#0] // [2] write pixel - adds r0,#2 // [1] increment pointer - - // [1] save previous sample - mov r5,r3 // [1] - - // [2,3] loop counter - subs r1,#1 // [1] shift loop counter - bne RenderOscLine_InLoop // [1,2] render next pixel - -// ---- end inner loop, start new part - - // continue to outer loop - ldr r1,[sp,#28] // load wrap width - ldr r2,[sp,#0] // get base pointer to sample data -> R2 - b RenderOscLine_OutLoop // go back to outer loop - -RenderOscLine_Stop: - - // pop registers and return - pop {r2-r7,pc} - - .align 2 -RenderOscLine_Addr: - .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_persp.S b/MCUME_pico/picovga_t4/render/vga_persp.S deleted file mode 100755 index 4056bfd..0000000 --- a/MCUME_pico/picovga_t4/render/vga_persp.S +++ /dev/null @@ -1,360 +0,0 @@ - -// **************************************************************************** -// -// VGA render LAYERMODE_PERSP* -// -// **************************************************************************** -// img ... (const u8*) SLAYER_IMG image data -// par ... (const void*) SLAYER_PAR pointer to 6 matrix integer parameters m11,m12..m23 -// horiz ... (s8) SLAYER_HORIZ horizon offset/4 (0=no perspecitve, <0 ceilling) -// xbits ... (u8) SLAYER_XBITS number of bits of image width -// ybits ... (u8) SLAYER_YBITS number of bits of image height -// w ... (u16) SLAYER_W destination width -// h ... (u16) SLAYER_H destination height - -#include "../define.h" // common definitions of C and ASM -#include "hardware/regs/sio.h" // registers of hardware divider -#include "hardware/regs/addressmap.h" // SIO base address - -#define ACCUM0_OFFSET 0 -#define ACCUM1_OFFSET 4 -#define BASE0_OFFSET 8 -#define BASE1_OFFSET 12 -#define BASE2_OFFSET 16 -#define POP_LANE0_OFFSET 20 -#define POP_LANE1_OFFSET 24 -#define POP_FULL_OFFSET 28 -#define PEEK_LANE0_OFFSET 32 -#define PEEK_LANE1_OFFSET 36 -#define PEEK_FULL_OFFSET 40 -#define CTRL_LANE0_OFFSET 44 -#define CTRL_LANE1_OFFSET 48 -#define ACCUM0_ADD_OFFSET 52 -#define ACCUM1_ADD_OFFSET 56 -#define BASE_1AND0_OFFSET 60 - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// extern "C" void RenderPersp(u8* dbuf, int y, sLayer* scr) - -// render layers with transformatio matrix LAYERMODE_PERSP* -// R0 ... dbuf pointer to data buffer -// R1 ... y coordinate of scanline (relative in destination image) -// R2 ... scr pointer to layer screen structure sLayer - -.thumb_func -.global RenderPersp -RenderPersp: - - // push registers - push {r4-r7,lr} - -// Stack content and input variables: -// R0 dbuf pointer to data buffer -// R1 Y coordinate of scanline -// R2 scr pointer to layer screen structure sLayer -// R3 -// SP+0: R4 -// SP+4: R5 -// SP+8: R6 -// SP+12: R7 -// SP+16: LR - -// R0 ... pointer to destination data buffer -// R1 ... Y coordinate -// R2 ... sLayer - - // load horizon offset -> R4, check if use perspective - ldr r6,RenderPersp_pSioBase // get address of SIO base -> R6 - ldrh r5,[r2,#SLAYER_H] // get destination height -> R5 - ldrb r4,[r2,#SLAYER_HORIZ] // get horizon offset -> R4 - sxtb r4,r4 // signed extension - lsls r4,#2 // horizon * 4, horizon = 0 ? - bne 2f // use perspective - - // not using perspective, start Y coordinate y0 = y - h/2 -> R12 - lsrs r5,#1 // destination height/2 -> R5 - subs r1,r5 // y - h/2 -> R1 - mov r12,r1 // current coordinate Y0 = y - h/2 -> R12 - - // prepare divide result to get 1< R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL - b 4f - - // using perspective, check ceilling mode -2: bpl 3f // horizon is not negative - subs r1,r5,r1 // negate, y = h - y - subs r1,#1 // y = h - 1 - y - negs r4,r4 // absolute value of horizon - - // prepare current coordinate Y0 = y - h -> R12 -3: subs r7,r1,r5 // y - h = current Y coordinate -> R7 - mov r12,r7 // store current coordinate Y0 -> R12 - - // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) - lsls r5,#FRACT // destination height * FRACTMUL -> R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h - adds r1,r4 // horizon + y -> R2 - str r1,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz - -// R0 ... pointer to destination data buffer -// R2 ... sLayer -// R12 ... current coordinate Y0 - - // prepare start coordinate X0 = -w/2 -> LR -4: ldrh r3,[r2,#SLAYER_W] // get destination width -> R3 - lsrs r5,r3,#1 // width/2 - negs r5,r5 // negate - mov lr,r5 // store start coordinate X0 -> LR - - // prepare number of 4-pixels (loop counter) -> R7 - lsrs r7,r3,#2 // width/4 -> R7 - - // get number of bits of image width "xbits" -> R1 - ldrb r1,[r2,#SLAYER_XBITS] // number of bits of image width -> R1 - - // get number of bits of image height "ybits" -> R4 - ldrb r4,[r2,#SLAYER_YBITS] // number of bits of image height -> R4 - - // prepare address of interpolator base -> R3 - ldr r3,RenderPersp_Interp // get address of interpolator base -> R3 - -// R0 ... pointer to destination data buffer -// R1 ... number of bits of image width xbits -// R2 ... sLayer -// R3 ... interpolator base -// R4 ... number of bits of image height ybits -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- setup interpolator - - // set image base to base2 - ldr r6,[r2,#SLAYER_IMG] // load image base - str r6,[r3,#BASE2_OFFSET] // set image base - - // set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1 - ldr r6,RenderPersp_Ctrl // load control word - subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) - lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5 - orrs r6,r5 // add xbits to control word - subs r1,#1 // xbits - 1 -> R1 - adds r5,r1,r4 // xbits-1+ybits -> R5 - lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1 - -// R0 ... pointer to destination data buffer -// R1 ... image width xbits-1 -// R2 ... sLayer -// R3 ... interpolator base -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - - // set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1 - ldr r6,RenderPersp_Ctrl // load control word - lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position - orrs r6,r1 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0 - -// R0 ... pointer to destination data buffer -// R2 ... sLayer -// R3 ... interpolator base -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- set matrix - - // get pointer to matrix -> R4 - ldr r4,[r2,#SLAYER_PAR] // get pointer to matrix -> R4 - - // get distance coefficient dist -> R1 - ldr r1,RenderPersp_pSioBase // get address of SIO base -> R1 - ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient - -// r4+0 ... m11 -// r4+4 ... m12 -// r4+8 ... m13 -// r4+12 ... m21 -// r4+16 ... m22 -// r4+20 ... m23 - - // set m11 -> R5 base0 - ldr r5,[r4,#0] // load m11 - muls r5,r1 // m11*dist - asrs r5,#FRACT // (m11*dist)>>FRACT - str r5,[r3,#BASE0_OFFSET] // set base0 - - // set m21 -> R6 base1 - ldr r6,[r4,#12] // load m21 - muls r6,r1 // m21*dist - asrs r6,#FRACT // (m21*dist)>>FRACT - str r6,[r3,#BASE1_OFFSET] // set base1 - -// R0 ... pointer to destination data buffer -// R1 ... distance coefficient -// R3 ... interpolator base -// R4 ... pointer to matrix -// R5 ... m11 -// R6 ... m21 -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - - // set x0*m11 + y0*m12 + m13 -> accum0 - mov r2,lr // start coordinate X0 -> X2 - muls r5,r2 // x0*m11 -> R5 - muls r2,r6 // x0*m21 -> R2 - mov lr,r1 // save distance coefficient -> LR - ldr r6,[r4,#4] // load m12 -> R6 - muls r1,r6 // m12*dist -> R1 - asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 - mov r6,r12 // load coordinate Y0 -> R6 - muls r1,r6 // y0*m12 -> R1 - adds r5,r1 // x0*m11 + y0*m12 -> R5 - ldr r1,[r4,#8] // load m13 -> R1 - adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 - str r5,[r3,#ACCUM0_OFFSET] // set accum0 - -// R0 ... pointer to destination data buffer -// R2 ... x0*m21 -// R3 ... interpolator base -// R4 ... pointer to matrix -// R6 ... current coordinate Y0 -// R7 ... width/4 -// LR ... distance coefficient - - // set x0*m21 + y0*m22 + m23 -> accum1 - ldr r1,[r4,#16] // load m22 -> R1 - mov r5,lr // distance coefficient -> R5 - muls r1,r5 // m22*dist - asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 - muls r1,r6 // y0*m22 -> R1 - adds r2,r1 // x0*m21 + y0*m22 -> R2 - ldr r1,[r4,#20] // load m23 -> R1 - adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 - str r2,[r3,#ACCUM1_OFFSET] // set accum1 - -// ---- process odd 4-pixel - -// R0 ... pointer to destination data buffer -// R3 ... interpolator base -// R4 ... (temporary - get pointer to pixel) -// R5 ... (temporary - load pixel) -// R6 ... (temporary - pixel accumulator) -// R7 ... width/4 (loop counter) - - lsrs r7,#1 // width/4/2 - bcc 2f // no odd 4-pixel - - // [3] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r6,[r4,#0] // [2] load pixel - - // [5] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r5,[r4,#0] // [2] load pixel - lsls r5,#8 // [1] shift 1 byte left - orrs r6,r5 // [1] add pixel to accumulator - - // [5] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r5,[r4,#0] // [2] load pixel - lsls r5,#16 // [1] shift 2 bytes left - orrs r6,r5 // [1] add pixel to accumulator - - // [5] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r5,[r4,#0] // [2] load pixel - lsls r5,#24 // [1] shift 3 bytes left - orrs r6,r5 // [1] add pixel to accumulator - - // [2] store 4 pixels - stmia r0!,{r6} // [2] store 4 pixels - - // check number of remaining pixels -2: tst r7,r7 // check number of pixels - beq 8f // end - -// ---- [42 per 8 pixels] inner loop -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to pixel, load pixel) -// R7 ... width/8 (loop counter) - - // [3] load 1st pixel -6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r1,[r4,#0] // [2] load pixel - - // [5] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [5] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [5] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#24 // [1] shift 3 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [3] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r2,[r4,#0] // [2] load pixel - - // [5] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - - // [5] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r2,r4 // [1] add pixel to accumulator - - // [5] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#24 // [1] shift 3 bytes left - orrs r2,r4 // [1] add pixel to accumulator - - // [3] store 8 pixels - stmia r0!,{r1,r2} // [3] store 8 pixels - - // [2,3] loop counter - subs r7,#1 // [1] 8-pixel counter - bne 6b // [1,2] next 8-pixels - - // pop registers -8: pop {r4-r7,pc} - - .align 2 -// pointer to SIO base -RenderPersp_pSioBase: - .word SIO_BASE // addres of SIO base - -// pointer to Interp1 base -RenderPersp_Interp: - .word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base - -RenderPersp_Ctrl: // lane control word - .word SIO_INTERP1_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4, check if use perspective - ldr r6,RenderPersp_pSioBase // get address of SIO base -> R6 - ldrh r5,[r2,#SLAYER_H] // get destination height -> R5 - ldrb r4,[r2,#SLAYER_HORIZ] // get horizon offset -> R4 - sxtb r4,r4 // signed extension - lsls r4,#2 // horizon * 4, horizon = 0 ? - bne 2f // use perspective - - // not using perspective, start Y coordinate y0 = y - h/2 -> R12 - lsrs r5,#1 // destination height/2 -> R5 - subs r1,r5 // y - h/2 -> R1 - mov r12,r1 // current coordinate Y0 = y - h/2 -> R12 - - // prepare divide result to get 1< R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL - b 4f - - // using perspective, check ceilling mode -2: bpl 3f // horizon is not negative - subs r1,r5,r1 // negate, y = h - y - subs r1,#1 // y = h - 1 - y - negs r4,r4 // absolute value of horizon - - // prepare current coordinate Y0 = y - h -> R12 -3: subs r7,r1,r5 // y - h = current Y coordinate -> R7 - mov r12,r7 // store current coordinate Y0 -> R12 - - // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) - lsls r5,#FRACT // destination height * FRACTMUL -> R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h - adds r1,r4 // horizon + y -> R2 - str r1,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz - -// R0 ... pointer to destination data buffer -// R2 ... sLayer -// R12 ... current coordinate Y0 - - // prepare start coordinate X0 = -w/2 -> LR -4: ldrh r3,[r2,#SLAYER_W] // get destination width -> R3 - lsrs r5,r3,#1 // width/2 - negs r5,r5 // negate - mov lr,r5 // store start coordinate X0 -> LR - - // prepare number of 4-pixels (loop counter) -> R7 - lsrs r7,r3,#2 // width/4 -> R7 - - // get number of bits of image width "xbits" -> R1 - ldrb r1,[r2,#SLAYER_XBITS] // number of bits of image width -> R1 - - // get number of bits of image height "ybits" -> R4 - ldrb r4,[r2,#SLAYER_YBITS] // number of bits of image height -> R4 - - // prepare address of interpolator base -> R3 - ldr r3,RenderPersp_Interp // get address of interpolator base -> R3 - -// R0 ... pointer to destination data buffer -// R1 ... number of bits of image width xbits -// R2 ... sLayer -// R3 ... interpolator base -// R4 ... number of bits of image height ybits -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- setup interpolator - - // set image base to base2 - ldr r6,[r2,#SLAYER_IMG] // load image base - str r6,[r3,#BASE2_OFFSET] // set image base - - // set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1 - ldr r6,RenderPersp_Ctrl // load control word - subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) - lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5 - orrs r6,r5 // add xbits to control word - subs r1,#1 // xbits - 1 -> R1 - adds r5,r1,r4 // xbits-1+ybits -> R5 - lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1 - -// R0 ... pointer to destination data buffer -// R1 ... image width xbits-1 -// R2 ... sLayer -// R3 ... interpolator base -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - - // set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1 - ldr r6,RenderPersp_Ctrl // load control word - lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position - orrs r6,r1 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0 - -// R0 ... pointer to destination data buffer -// R2 ... sLayer -// R3 ... interpolator base -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- set matrix - - // get pointer to matrix -> R4 - ldr r4,[r2,#SLAYER_PAR] // get pointer to matrix -> R4 - - // get distance coefficient dist -> R1 - ldr r1,RenderPersp_pSioBase // get address of SIO base -> R1 - ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient - -// r4+0 ... m11 -// r4+4 ... m12 -// r4+8 ... m13 -// r4+12 ... m21 -// r4+16 ... m22 -// r4+20 ... m23 - - // set m11 -> R5 base0 - ldr r5,[r4,#0] // load m11 - muls r5,r1 // m11*dist - asrs r5,#FRACT-1 // (m11*dist)>>(FRACT-1) ... 2*delta - str r5,[r3,#BASE0_OFFSET] // set base0 - asrs r5,#1 // (m11*dist)>>FRACT - - // set m21 -> R6 base1 - ldr r6,[r4,#12] // load m21 - muls r6,r1 // m21*dist - asrs r6,#FRACT-1 // (m21*dist)>>(FRACT-1) ... 2*delta - str r6,[r3,#BASE1_OFFSET] // set base1 - asrs r6,#1 // (m21*dist)>>FRACT - -// R0 ... pointer to destination data buffer -// R1 ... distance coefficient -// R3 ... interpolator base -// R4 ... pointer to matrix -// R5 ... m11 -// R6 ... m21 -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - - // set x0*m11 + y0*m12 + m13 -> accum0 - mov r2,lr // start coordinate X0 -> X2 - muls r5,r2 // x0*m11 -> R5 - muls r2,r6 // x0*m21 -> R2 - mov lr,r1 // save distance coefficient -> LR - ldr r6,[r4,#4] // load m12 -> R6 - muls r1,r6 // m12*dist -> R1 - asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 - mov r6,r12 // load coordinate Y0 -> R6 - muls r1,r6 // y0*m12 -> R1 - adds r5,r1 // x0*m11 + y0*m12 -> R5 - ldr r1,[r4,#8] // load m13 -> R1 - adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 - str r5,[r3,#ACCUM0_OFFSET] // set accum0 - -// R0 ... pointer to destination data buffer -// R2 ... x0*m21 -// R3 ... interpolator base -// R4 ... pointer to matrix -// R6 ... current coordinate Y0 -// R7 ... width/4 -// LR ... distance coefficient - - // set x0*m21 + y0*m22 + m23 -> accum1 - ldr r1,[r4,#16] // load m22 -> R1 - mov r5,lr // distance coefficient -> R5 - muls r1,r5 // m22*dist - asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 - muls r1,r6 // y0*m22 -> R1 - adds r2,r1 // x0*m21 + y0*m22 -> R2 - ldr r1,[r4,#20] // load m23 -> R1 - adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 - str r2,[r3,#ACCUM1_OFFSET] // set accum1 - -// ---- process odd 4-pixel - -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to pixel, load pixel) -// R7 ... width/4 (loop counter) - - lsrs r7,#1 // width/4/2 - bcc 2f // no odd 4-pixel - - // [5] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r1,[r4,#0] // [2] load pixel - lsls r4,r1,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [7] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [2] store 4 pixels - stmia r0!,{r1} // [2] store 4 pixels - - // check number of remaining pixels -2: tst r7,r7 // check number of pixels - beq 8f // end - -// ---- [30 per 8 pixels] inner loop -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to pixel, load pixel) -// R7 ... width/8 (loop counter) - - // [5] load 1st pixel -6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r1,[r4,#0] // [2] load pixel - lsls r4,r1,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [7] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [5] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r2,[r4,#0] // [2] load pixel - lsls r4,r2,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - - // [7] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value - ldrb r4,[r4,#0] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r2,r4 // [1] add pixel to accumulator - lsls r4,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - - // [3] store 8 pixels - stmia r0!,{r1,r2} // [3] store 8 pixels - - // [2,3] loop counter - subs r7,#1 // [1] 8-pixel counter - bne 6b // [1,2] next 8-pixels - - // pop registers -8: pop {r4-r7,pc} - - .align 2 -// pointer to SIO base -RenderPersp_pSioBase: - .word SIO_BASE // addres of SIO base - -// pointer to Interp1 base -RenderPersp_Interp: - .word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base - -RenderPersp_Ctrl: // lane control word - .word SIO_INTERP1_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 - ldr r4,[sp,#24] // load video segment -> R4 - - // get wrap width -> [SP+24] - ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width - movs r6,#3 // mask to align to 32-bit - bics r7,r6 // align wrap - str r7,[sp,#24] // save wrap width - - // align X coordinate to 32-bit -> R1 - bics r1,r6 - - // align remaining width -> [SP+0] - bics r3,r6 - str r3,[sp,#0] // save new width - - // base pointer to image data (without X) -> LR, R2 - ldrh r5,[r4,#SSEGM_WB] // get pitch of rows - muls r2,r5 // Y * WB -> offset of row in image buffer - ldr r5,[r4,#SSEGM_DATA] // pointer to data - add r2,r5 // base address of image buffer - mov lr,r2 // save pointer to image buffer - - // prepare pointer to image data with X -> R2 - lsrs r6,r1,#3 // convert X to 8-pixel offset - add r2,r6 // pointer to source image buffer -> R2 - - // prepare size of one plane -> R3 - ldr r3,[r4,#SSEGM_PAR] // get size of one plane -> R3 - - // prepare pointer to palette translation table -> R7 - ldr r7,[r4,#SSEGM_PAR2] // get pointer to palette translation table -> R7 - -// ---- render 2nd half of first 8-pixel -// R0 ... pointer to destination data buffer -// R1 ... start X coordinate -// R2 ... pointer to source image data -// R3 ... size of one plane (= offset of plane 1 from plane 0) -// R4 ... (temporary) -// R5 ... (temporary) -// R6 ... (temporary) -// R7 ... *pointer to palette translation table -// LR ... *base pointer to image data (without X) -// [SP+0] ... *remaining width -// [SP+24] ... *wrap width - - // check bit 2 of X coordinate - check if image starts with 2nd half of first 8-pixel - lsls r5,r1,#29 // check bit 2 of X coordinate - bpl 2f // bit 2 not set, starting even 4-pixels - - // [5] load samples -> R5, R6 - ldrb r5,[r2,#0] // [2] load sample from plane 1 - ldrb r6,[r2,r3] // [2] load sample from plane 2 - adds r2,#1 // [1] increase pointer - - // [5] compose samples LOW -> R5 - lsls r6,#28 // [1] isolate low 4 bits from sample 2 - lsrs r6,#22 // [1] shift to bit position 6 - lsls r5,#28 // [1] isolate low 4 bit from sample 1 - lsrs r5,#26 // [1] shift to bit position 2 - orrs r5,r6 // [1] compose samples - - // [5] write pixels - ldr r5,[r7,r5] // [2] load colors - stmia r0!,{r5} // [3] write pixels - - // shift X coordinate - adds r1,#4 // shift X coordinate - - // check end of segment - ldr r6,[sp,#24] // load wrap width - cmp r1,r6 // X=end of segment? - blo 1f - movs r1,#0 // reset X coordinate - mov r2,lr // get base pointer to image data -> R2 - - // shift remaining width -1: ldr r6,[sp,#0] // get remaining width - subs r6,#4 // shift width - str r6,[sp,#0] // save new width - - // prepare wrap width - start X -> R6 -2: ldr r6,[sp,#24] // load wrap width - subs r6,r1 // pixels remaining to end of segment - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination data buffer -// R1 ... number of 4-pixels - 1 to generate in one part of segment -// R2 ... *pointer to source image data -// R3 ... *size of one plane (= offset of plane 1 from plane 0) -// R4 ... (temporary) -// R5 ... (temporary) -// R6 ... part width -// R7 ... *pointer to palette translation table -// LR ... *base pointer to image data (without X) -// [SP+0] ... *remaining width -// [SP+24] ... *wrap width - -RenderPlane2_OutLoop: - - // limit wrap width by total width -> R7 - ldr r4,[sp,#0] // get remaining width - cmp r6,r4 // compare with wrap width - bls 2f // width is OK - mov r6,r4 // limit wrap width - - // check number of pixels -2: cmp r6,#8 // check number of remaining pixels - bhs 5f // enough 8-pixels remain - - // check if 1st part of last 8-pixel remains - cmp r6,#4 // check number of pixels - blo 3f // all done - -// ---- render 1st part of last 8-pixel - -RenderPlane2_Last: - - // [5] load samples -> R5, R4 - ldrb r5,[r2,#0] // [2] load sample from plane 1 - ldrb r4,[r2,r3] // [2] load sample from plane 2 - adds r2,#1 // [1] increase pointer - - // [5] compose samples HIGH -> R4 - lsrs r4,#4 // [1] isolate high 4 bits from sample 2 - lsls r4,#8 // [1] shift left - orrs r4,r5 // [1] compose sample 2 with sample 1 - lsrs r4,#4 // [1] isolate high 4 bits from sample 1 - lsls r4,#2 // [1] 2 shifts to get index*4 - - // [4] write pixels - ldr r4,[r7,r4] // [2] load colors - stmia r0!,{r4} // [2] write pixels - - // check if continue with next segment - mov r2,lr // get base pointer to image data -> R2 - cmp r6,#4 - bhi RenderPlane2_OutLoop - - // pop registers and return -3: pop {r3-r7,pc} - -// ---- prepare to render whole 8-pixels - - // prepare number of 4-pixels to render -> R1 -5: lsrs r1,r6,#2 // shift to get number of 4-pixels - lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6 - subs r4,r6 // get remaining width - str r4,[sp,#0] // save new remaining width - subs r1,#1 // number of 4-pixels - 1 - -// ---- [25*N-1] start inner loop, render whole 8-pixels in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination data buffer -// R1 ... *number of 4-pixels - 1 to generate (loop counter) -// R2 ... *pointer to source image data -// R3 ... *size of one plane (= offset of plane 1 from plane 0) -// R4 ... output sample -// R5 ... sample from plane 1 -// R6 ... sample from plane 2 -// R7 ... *pointer to palette translation table -// LR ... *base pointer to image data (without X) -// [SP+0] ... *remaining width -// [SP+24] ... *wrap width - -RenderPlane2_InLoop: - - // [5] load samples -> R5, R6 - ldrb r5,[r2,#0] // [2] load sample from plane 1 - ldrb r6,[r2,r3] // [2] load sample from plane 2 - adds r2,#1 // [1] increase pointer - - // [5] compose samples HIGH -> R4 - lsrs r4,r6,#4 // [1] isolate high 4 bits from sample 2 - lsls r4,#8 // [1] shift left - orrs r4,r5 // [1] compose sample 2 with sample 1 - lsrs r4,#4 // [1] isolate high 4 bits from sample 1 - lsls r4,#2 // [1] 2 shifts to get index*4 - - // [2] prepare first 4 pixels - ldr r4,[r7,r4] // [2] load colors - - // [5] compose samples LOW -> R5 - lsls r6,#28 // [1] isolate low 4 bits from sample 2 - lsrs r6,#22 // [1] shift to bit position 6 - lsls r5,#28 // [1] isolate low 4 bit from sample 1 - lsrs r5,#26 // [1] shift to bit position 2 - orrs r5,r6 // [1] compose samples - - // [5] write pixels - ldr r5,[r7,r5] // [2] load colors - stmia r0!,{r4,r5} // [3] write pixels - - // [2,3] loop counter - subs r1,#2 // [1] loop counter - bhi RenderPlane2_InLoop // [1,2] > 0, next step - -// ---- end inner loop - -RenderPlane2_EndLoop: - - // continue to outer loop - ldr r6,[sp,#24] // load wrap width -> R6 - beq RenderPlane2_Last // render 1st half of last 8-pixels - mov r2,lr // get base pointer to image data -> R2 - b RenderPlane2_OutLoop // go back to outer loop diff --git a/MCUME_pico/picovga_t4/render/vga_progress.S b/MCUME_pico/picovga_t4/render/vga_progress.S deleted file mode 100755 index 4ec2b0e..0000000 --- a/MCUME_pico/picovga_t4/render/vga_progress.S +++ /dev/null @@ -1,123 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_PROGRESS -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// extern "C" u32* RenderProgress(u32* cbuf, int x, int y, int w, sSegm* segm); - -// render horizontal progress indicator GF_PROGRESS -// R0 ... pointer to control buffer -// R1 ... start X coordinate (in pixels, must be multiple of 4) -// R2 ... start Y coordinate (in graphics lines) -// R3 ... width to display (must be multiple of 4 and > 0) -// [stack] ... segm video segment sSegm -// Output new pointer to control buffer. -// 320 pixels takes 0.5 us on 151 MHz. - -.thumb_func -.global RenderProgress -RenderProgress: - - // push registers - push {r4-r7,lr} - -// Stack content: -// SP+0: R4 -// SP+4: R5 -// SP+8: R6 -// SP+12: R7 -// SP+16: LR -// SP+20: video segment - -// Variables: -// R0 ... pointer to control buffer -// R1 ... X coordinate/4 -// R2 ... data sample -// R3 ... remaining width -// R4 ... gradient buffer 1 -// R5 ... gradient buffer 2 -// R6 ... (temporary) -// R7 ... current wrap width -// LR ... wrap width - - // get pointer to video segment -> R4 - ldr r4,[sp,#20] // load video segment -> R4 - - // prepare X coordinate/4 -> R1 - lsrs r1,#2 // X coordinate/4 -> R1 - - // load data sample -> R2 - ldr r5,[r4,#SSEGM_DATA] // pointer to data - ldrb r2,[r5,r2] // load data sample -> R2 - - // prepare remaining width/4 -> R3 - lsrs r3,#2 // width/4 -> R3 - - // get wrap width/4 -> LR - ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width - lsrs r7,#2 // wrap width/4 -> R7 - mov lr,r7 - - // prepare gradient buffers -> R4, R5 - ldr r5,[r4,#SSEGM_PAR2] // gradient buffer 2 -> R5 - ldr r4,[r4,#SSEGM_PAR] // gradient buffer 1 -> R4 - - // check remaining width -2: tst r3,r3 // check remaining width - beq 9f // end of data - - // prepare wrap width - start X -> R7 - mov r7,lr // wrap width - subs r7,r1 // pixels remaining to end of segment - - // limit wrap width by total width -> R7 - cmp r7,r3 // compare with wrap width - bls 4f // width is OK - mov r7,r3 // limit wrap width - - // decrease remaining width -4: subs r3,r7 // subtract from remaining width - - // first part visible if x < data - cmp r1,r2 - bhs 6f // x >= data - - // width of this part - subs r6,r2,r1 // width <- data - x - - // limit width - cmp r6,r7 // check width - bls 5f // width is OK - mov r6,r7 // limit width -5: subs r7,r6 // decrease width - - // save control block with 1st part -5: stm r0!,{r6} // write width - adds r6,r4,r1 // gradient address at offset x - stm r0!,{r6} // write address - mov r1,r2 // X <- data - - // check if some width remain -6: tst r7,r7 // check with of this part - beq 7f // end of segment - - // save control block width 2nd part - stm r0!,{r7} // write width - adds r6,r5,r1 // gradient address at offset x - stm r0!,{r6} // write address - - // reset X -7: movs r1,#0 // reset X - b 2b // next segment - - // pop registers and return -9: pop {r4-r7,pc} diff --git a/MCUME_pico/picovga_t4/render/vga_sprite.S b/MCUME_pico/picovga_t4/render/vga_sprite.S deleted file mode 100755 index c8723f4..0000000 --- a/MCUME_pico/picovga_t4/render/vga_sprite.S +++ /dev/null @@ -1,164 +0,0 @@ - -// **************************************************************************** -// -// VGA render LAYERMODE_SPRITE* -// -// **************************************************************************** - -#include "../define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// extern "C" void RenderSprite(u8* dbuf, int y, sLayer* scr) - -// render layers with sprites LAYERMODE_SPRITE* -// R0 ... dbuf pointer to data buffer -// R1 ... y coordinate of scanline -// R2 ... scr pointer to layer screen structure sLayer - -.thumb_func -.global RenderSprite -RenderSprite: - - // push registers - push {r4-r7,lr} - -// Stack content and input variables: -// R0 dbuf pointer to data buffer -// R1 Y coordinate of scanline -// R2 scr pointer to layer screen structure sLayer, later: num number of sprites -// R3 -// SP+0: R4 -// SP+4: R5 -// SP+8: R6 -// SP+12: R7 -// SP+16: LR - -// Variables: -// R0 ... dbuf pointer to data buffer, later: dbuf[x] destination address -// R1 ... Y coordinate of scanline, later: Y2 coordinate relative to sprite base, later: s->img[Y2*WB+X2] address of sprite line -// R2 ... num number of sprites (loop counter), later: W2 width of sprite segment -// R3 ... s pointer to current sprite, later: col key color -// R4 ... (temporary), later: absolute X coordinate of start of line -// R5 ... relative X2 coordinate of sprite segment -// R6 ... W layer screen width -// R7 ... spr pointer to list of sprites -// LR - - // load pointer to list of sprites -> R7 - ldr r7,[r2,#SLAYER_IMG] - - // load screen width -> R6 - ldrh r6,[r2,#SLAYER_W] - - // load number of sprites -> R2 - ldrh r2,[r2,#SLAYER_SPRITENUM] - - // count number of sprites, end if num = 0 -2: subs r2,#1 // decrement number of sprites - blo 9f // no other sprites - -// R0 ... dbuf pointer to data buffer -// R1 ... Y coordinate of scanline -// R2 ... num number of sprites (loop counter) -// R3 ... -// R4 ... -// R5 ... -// R6 ... W layer screen width -// R7 ... spr pointer to list of sprites - - // push registers - push {r0-r2} // push resiters R0..R2 - - // get pointer to next sprite -> R3 - ldmia r7!,{r3} // pointer to sprite -> R3 -// R3 ... s pointer to current sprite - - // prepare Y2 coordinate relative to sprite base -> R1 - ldrh r4,[r3,#SSPRITE_Y] // get Y coordinate of the sprite -> R4 - sxth r4,r4 // signed extend Y2 - subs r1,r1,r4 // relative coordinate Y2 = Y - s->y -// R1 ... Y2 coordinate relative to sprite base - - // check if Y2 coordinate is valid - bmi 8f // Y2 < 0, go next sprite - ldrh r4,[r3,#SSPRITE_H] // get sprite height - cmp r1,r4 // check sprite height - bge 8f // Y2 >= s->h, go next sprite - - // prepare relative start X2 coordinate of this line segment -> R5 - ldr r4,[r3,#SSPRITE_X0] // get table of X0 of lines - ldrb r5,[r4,r1] // get X2 coordinate s->x0[y2] -> R5 - // lsls r5,#2 // convert X2 coordinate to byte offset -// R5 ... relative X2 coordinate of sprite segment - - // get width W2 of this line segment -> R2 - ldr r4,[r3,#SSPRITE_W0] // get table of W0 of lines - ldrb r2,[r4,r1] // get W2 width s->w0[y2] -> R2 - // lsls r2,#2 // convert W2 width to bytes -// R2 ... W2 width of sprite segment - - // get address of sprite line s->img[Y2*s->wb] -> R1 - ldrh r4,[r3,#SSPRITE_WB] // get sprite pitch w->wb - muls r1,r1,r4 // sprite offset Y2*s->wb - ldr r4,[r3,#SSPRITE_IMG] // get sprite image - add r1,r4 // line address -> R1 -// R1 ... s->img[Y2*WB] address of sprite line - - // get absolute X coordinate of start of line -> R4 - ldrh r4,[r3,#SSPRITE_X] // get sprite X coordinate -> R4 - sxth r4,r4 // signed extend X -// R4 ... absolute X coordinate of start of line - - // get key color -> R3 - ldrb r3,[r3,#SSPRITE_KEYCOL] // get key color -> R3 -// R3 ... col key color - - // check if X coordinate >= 0 - adds r4,r4,r5 // s->X + X2, X coordinate of start of line -> R4 - bpl 3f // X >= 0, sprite does not lie below start - - // sprite correction - subs r5,r4 // X2 -= X - adds r2,r4 // W2 += X - movs r4,#0 // X = 0 - - // shift source address -> R1 -3: adds r1,r5 // add X2 -// R1 ... s->img[Y2*WB+X2] address of sprite line -// R5 ... - - // check line length W2 - subs r5,r6,r4 // W - X -> R5 - cmp r2,r5 // compare W2 with W - X - ble 4f // W2 <= W - X, length is OK - mov r2,r5 // limit segment width W2 -> R2 - - // check width W2 -4: tst r2,r2 // check W2 - ble 8f // no W2 left (W2 <= 0) - - // shift destination address - adds r0,r4 - -// R0 ... dbuf pointer to data buffer -// R1 ... s->img[Y2*WB+X2] address of sprite line -// R2 ... W2 width of sprite segment -// R3 ... col key color -// R4 ... -// R5 ... -// R6 ... W layer screen width -// R7 ... spr pointer to list of sprites - - // blit sprite line - bl BlitKey // blit sprite line - - // pop registers and continue loop -8: pop {r0-r2} // pop registers R0..R2 - b 2b // continue loop - - // pop registers and return -9: pop {r4-r7,pc} diff --git a/MCUME_pico/picovga_t4/render/vga_tile.S b/MCUME_pico/picovga_t4/render/vga_tile.S deleted file mode 100755 index 23539b0..0000000 --- a/MCUME_pico/picovga_t4/render/vga_tile.S +++ /dev/null @@ -1,431 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_TILE -// -// **************************************************************************** -// u16 par3; // SSEGM_PAR3 tile width (must be multiple of 4) -// u32 par; // SSEGM_PAR tile table with one column of tiles -// u32 par2; // SSEGM_PAR2 tile height - -#include "../define.h" // common definitions of C and ASM -#include "hardware/regs/sio.h" // registers of hardware divider -#include "hardware/regs/addressmap.h" // SIO base address - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// extern "C" u32* RenderTile(u32* cbuf, int x, int y, int w, sSegm* segm); - -// render tiles GF_TILE -// cbuf ... destination control buffer -// x ... start X coordinate (must be multiple of 4) -// y ... start Y coordinate -// w ... width of this segment (must be multiple of 4) -// segm ... video segment -// Output new cbuf pointer. -// 320 pixels takes on 151 MHz: tiles 8x8 3.5 us, tile 16x16 2 us, tiles 32x32 1.3 us, tiles 64x64 0.9 us. - -.thumb_func -.global RenderTile -RenderTile: - - // push registers - push {r1-r7,lr} - -// Input registers and stack content: -// R0 ... destination control buffer -// SP+0: R1 ... X coordinate -// SP+4: R2 ... Y coordinate -// SP+8: R3 ... width to display -// SP+12: R4 -// SP+16: R5 -// SP+20: R6 -// SP+24: R7 -// SP+28: LR -// SP+32: video segment - - // get pointer to video segment -> R4 - ldr r4,[sp,#32] // load video segment -> R4 - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... Y coordinate -// R3 ... remaining width -// R4 ... sSegm* - - // start divide Y/tile_height - ldr r5,RenderTile_pSioBase // get address of SIO base -> R5 - str r2,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate - ldr r2,[r4,#SSEGM_PAR2] // tile height -> R2 - str r2,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile height - -// - now we must wait at least 8 clock cycles to get result of division - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile height -// R3 ... remaining width -// R4 ... sSegm* -// R5 ... SIO_BASE - - // [6] get wrap width -> [SP+0] - ldrh r7,[r4,#SSEGM_WRAPX] // [2] get wrap width - movs r6,#3 // [1] mask to align to 32-bit - bics r7,r6 // [1] align wrap - str r7,[sp,#0] // [2] save wrap width - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile height -// R3 ... remaining width -// R4 ... sSegm* -// R5 ... SIO_BASE -// R6 ... align mask #3 -// [SP+0] ... wrap width - - // [1] align X coordinate to 32-bit -> R1 - bics r1,r6 // [1] align X - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile height -// R3 ... remaining width -// R4 ... sSegm* -// R5 ... SIO_BASE -// R6 ... align mask #3 -// [SP+0] ... wrap width - - // [3] align remaining width -> [SP+4] - bics r3,r6 // [1] align width - str r3,[sp,#4] // [2] store aligned width to [SP+4] - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile height -// R4 ... sSegm* -// R5 ... SIO_BASE -// [SP+0] ... wrap width -// [SP+4] ... remaining width - - // [4] prepare tile width -> [SP+8], R3 - ldrh r3,[r4,#SSEGM_PAR3] // [2] get tile width -> R3 - str r3,[sp,#8] // [2] save tile width -> [SP+8] - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile height -// R3 ... tile width -// R4 ... sSegm* -// R5 ... SIO_BASE -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - - // load result of division Y/tile_height -> R6 Y relative at row, R7 Y row - // Note: QUOTIENT must be read last - ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, Y coordinate relative to current row - ldr r7,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R7, index of row - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile height -// R3 ... tile width -// R4 ... sSegm* -// R5 ... SIO_BASE -// R6 ... Y relative at row -// R7 ... Y row index -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - - // start divide X/tile_width - str r1,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, X coordinate - str r3,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile width - -// - now we must wait at least 8 clock cycles to get result of division - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile height -// R3 ... tile width -// R4 ... sSegm* -// R5 ... SIO_BASE -// R6 ... Y relative at row -// R7 ... Y row index -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - - // [1] prepare tile size -> R2 - muls r2,r3 // [1] tile height*width -> size R2 - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile size -// R3 -// R4 ... sSegm* -// R5 ... SIO_BASE -// R6 ... Y relative at row -// R7 ... Y row index -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - - // [7] base pointer to source data buffer (without X) -> LR, R7 - ldrh r3,[r4,#SSEGM_WB] // [2] get pitch of rows -> R3 - muls r7,r3 // [1] pitch * row (Y * WB) -> offset of row in data buffer - ldr r3,[r4,#SSEGM_DATA] // [2] pointer to data -> R3 - adds r7,r3 // [1] base address of data buffer - mov lr,r7 // [1] save base address - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile size -// R3 -// R4 ... sSegm* -// R5 ... SIO_BASE -// R6 ... Y relative at row -// R7 ... base address of data buffer (without X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - - // [6] tile base address -> R4 - ldr r3,[sp,#8] // [2] tile width - muls r6,r3 // [1] tile width * Y relative to row -> tile line offset R6 - ldr r4,[r4,#SSEGM_PAR] // [2] pointer to tiles - adds r4,r6 // [1] tile base address -> R4 - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile size -// R3 ... tile width -// R4 ... tile base address -// R5 ... SIO_BASE -// R6 -// R7 ... base address of data buffer (without X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - - // load result of division X/tile_width -> R6 X pixel relative, R5 tile position - // Note: QUOTIENT must be read last - ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, X pixel relative in tile - ldr r5,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R5, tile position - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile size -// R3 ... tile width -// R4 ... tile base address -// R5 ... tile position -// R6 ... X pixel relative in tile -// R7 ... base address of data buffer (without X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - - // prepare current pointer to source data buffer with X -> R7 - adds r7,r5 // tile source address -> R7 - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... tile size -// R3 ... tile width -// R4 ... tile base address -// R5 -// R6 ... X pixel relative in tile -// R7 ... pointer to source data buffer (with X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - -// ---- render rest of first tile - - // check if X is tile-aligned - tst r6,r6 // check tile align - beq 2f // X is tile aligned - - // shift X coordinate - subs r5,r3,r6 // pixels remain in current tile -> R5 - adds r1,r5 // shift X coordinate (align to next tile) - - // shift remaining width - ldr r3,[sp,#4] // get remaining width - subs r3,r5 // shift width - str r3,[sp,#4] // store remaining width - - // write number of 4-pixels - lsrs r5,#2 // number of 4-pixels - stmia r0!,{r5} // save width - - // load tile index -> R3 - ldrb r3,[r7,#0] // [2] load tile index - adds r7,#1 // [1] increase tile address - - // write tile addres - muls r3,r2 // tile index * tile size = tile offset - add r3,r4 // [1] add tile base address - add r3,r6 // [1] shift to tile start - stmia r0!,{r3} // [3] save pointer - - // check end of segment - ldr r3,[sp,#0] // get wrap width - cmp r1,r3 // check end of segment - blo 2f // not end of segment - movs r1,#0 // reset X coordinate - mov r7,lr // get base pointer to tile data - - // prepare wrap width - start X -> R5 -2: ldr r3,[sp,#0] // get wrap width - subs r5,r3,r1 // pixels remaining to end of segment - ldr r3,[sp,#4] // total remaining width -> R3 - -// ---- start outer loop, render one part of segment -// Outer loop variables (* prepared before outer loop): -// R0 ... *pointer to destination control buffer -// R1 ... -// R2 ... *tile size -// R3 ... *total remaining width -// R4 ... *tile base address -// R5 ... *wrap width of this segment -// R6 ... -// R7 ... *pointer to source data buffer -// LR ... *base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - -RenderTile_OutLoop: - - // limit wrap width by total width -> R5 - cmp r5,r3 // compare wrap width with total width - bls 2f // width is OK - mov r5,r3 // limit wrap width - - // check if remain whole tile -2: ldr r1,[sp,#8] // get tile width -> R1 - cmp r5,r1 // check number of remaining pixels - bhs 5f // remain whole tiles - - // check if start of last tile remains - cmp r5,#4 // check start of last tile - blo 3f // all done - mov r1,r5 // width to render - -// ---- render start of last tile -// R0 ... *pointer to destination control buffer -// R1 ... *width to render in this segment -// R2 ... *tile size -// R3 ... *total remaining width -// R4 ... *tile base address -// R5 ... *wrap width of this segment -// R6 ... -// R7 ... *pointer to source data buffer (with X) -// LR ... *base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - -RenderTile_Last: - - // save width - lsrs r6,r1,#2 // number of 4-pixels - stmia r0!,{r6} // save width - - // load tile index -> R6 - ldrb r6,[r7,#0] // [2] load tile index - adds r7,#1 // [1] increase tile index - - // save tile addres - muls r6,r2 // multiply tile index * tile size - add r6,r4 // [1] add tile base address - stmia r0!,{r6} // [3] save pointer - - // check if continue with next segment - mov r7,lr // get base pointer to tile data - ldr r6,[sp,#8] // get tile width -> R6 - cmp r5,r6 // whole tile remains? - bhs RenderTile_OutLoop // render next segment - - // pop registers and return -3: pop {r1-r7,pc} - -// ---- prepare to render whole tiles -// R0 ... pointer to destination control buffer -// R1 -// R2 ... tile size -// R3 ... total remaining width -// R4 ... tile base address -// R5 ... width of this segment -// R6 -// R7 ... pointer to source data buffer (with X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - - // prepare number of 4-pixels to render -> R1 -5: lsrs r1,r5,#2 // shift to get number of tiles in multiply of 4-pixels -> R1 - lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 - subs r3,r5 // update remaining width -> R3 - - ldr r5,[sp,#8] // get tile width -> R5 - lsrs r5,#2 // tile width/4 -> R5 - subs r1,r5 // number of 4-pixels - width/4 - adds r1,#1 // number of 4-pixels - (width/4-1) - -// ---- [11*N-1] start inner loop, render in one part of segment -// Inner loop variables (* prepared before inner loop): -// R0 ... *pointer to destination control buffer -// R1 ... *number of 4-pixels to generate - 1 (loop counter) -// R2 ... *tile size -// R3 ... *total remaining width -// R4 ... *tile base address -// R5 ... *tile width/4 -// R6 ... (temporary) -// R7 ... *pointer to source data buffer (with X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width -// [SP+8] ... tile width - -RenderTile_InLoop: - - // [3] load tile index -> R6 - ldrb r6,[r7,#0] // [2] load tile index - adds r7,#1 // [1] increase tile index - - // [2] get tile addres - muls r6,r2 // [1] multiply tile index * tile size - add r6,r4 // [1] add tile base address - - // [3] save control block - stmia r0!,{r5,r6} // [3] save width and pointer - - // [2,3] loop - subs r1,r5 // [1] shift loop counter, subtract tile width/4 - bhi RenderTile_InLoop // [1,2] > 0, render next whole tile - -// ---- end inner loop, continue with last tile, or start new part - - // continue to outer loop - adds r1,r5 // return size of last tile - subs r1,#1 // add "tile size/4 - 1" - ldr r5,[sp,#0] // load wrap width -> R5 - lsls r1,#2 // convert back to pixels - bne RenderTile_Last // render 1st half of last tile - mov r7,lr // get base pointer to tile data -> R7 - b RenderTile_OutLoop // go back to outer loop - - .align 2 -// pointer to SIO base -RenderTile_pSioBase: - .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_tile2.S b/MCUME_pico/picovga_t4/render/vga_tile2.S deleted file mode 100755 index 7e4db00..0000000 --- a/MCUME_pico/picovga_t4/render/vga_tile2.S +++ /dev/null @@ -1,376 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_TILE2 -// -// **************************************************************************** -// u16 par3; // SSEGM_PAR3 tile width (must be multiple of 4) -// u32 par; // SSEGM_PAR tile table with one column of tiles -// u32 par2; // SSEGM_PAR2 LOW tile height, HIGH tile width bytes - -#include "../define.h" // common definitions of C and ASM -#include "hardware/regs/sio.h" // registers of hardware divider -#include "hardware/regs/addressmap.h" // SIO base address - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// extern "C" u32* RenderTile2(u32* cbuf, int x, int y, int w, sSegm* segm); - -// render tiles GF_TILE2 -// cbuf ... destination control buffer -// x ... start X coordinate (must be multiple of 4) -// y ... start Y coordinate -// w ... width of this segment (must be multiple of 4) -// segm ... video segment -// Output new cbuf pointer. -// 320 pixels takes on 151 MHz: tiles 8x8 3.5 us, tile 16x16 2 us, tiles 32x32 1.3 us, tiles 64x64 0.9 us. - -.thumb_func -.global RenderTile2 -RenderTile2: - - // push registers - push {r2-r7,lr} - -// Input registers and stack content: -// R0 ... destination control buffer -// R1 ... X coordinate -// SP+0: R2 ... Y coordinate -// SP+4: R3 ... width to display -// SP+8: R4 -// SP+12: R5 -// SP+16: R6 -// SP+20: R7 -// SP+24: LR -// SP+28: video segment - - // get pointer to video segment -> R4 - ldr r4,[sp,#28] // load video segment -> R4 - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R2 ... Y coordinate -// R3 ... remaining width -// R4 ... sSegm* - - // start divide Y/tile_height - ldr r5,RenderTile_pSioBase // get address of SIO base -> R5 - str r2,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate - ldrh r2,[r4,#SSEGM_PAR2] // tile height -> R2 - str r2,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile height - -// - now we must wait at least 8 clock cycles to get result of division - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R3 ... remaining width -// R4 ... sSegm* -// R5 ... SIO_BASE - - // [6] get wrap width -> [SP+0] - ldrh r7,[r4,#SSEGM_WRAPX] // [2] get wrap width - movs r6,#3 // [1] mask to align to 32-bit - bics r7,r6 // [1] align wrap - str r7,[sp,#0] // [2] save wrap width - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R3 ... remaining width -// R4 ... sSegm* -// R5 ... SIO_BASE -// R6 ... align mask #3 -// [SP+0] ... wrap width - - // [1] align X coordinate to 32-bit -> R1 - bics r1,r6 // [1] align X - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R3 ... remaining width -// R4 ... sSegm* -// R5 ... SIO_BASE -// R6 ... align mask #3 -// [SP+0] ... wrap width - - // [3] align remaining width -> [SP+4] - bics r3,r6 // [1] align width - str r3,[sp,#4] // [2] store aligned width to [SP+4] - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R4 ... sSegm* -// R5 ... SIO_BASE -// [SP+0] ... wrap width -// [SP+4] ... remaining width - - // [2] prepare tile width -> R3 - ldrh r3,[r4,#SSEGM_PAR3] // [2] get tile width -> R3 - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R3 ... tile width -// R4 ... sSegm* -// R5 ... SIO_BASE -// [SP+0] ... wrap width -// [SP+4] ... remaining width - - // load result of division Y/tile_height -> R6 Y relative at row, R7 Y row - // Note: QUOTIENT must be read last - ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, Y coordinate relative to current row - ldr r7,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R7, index of row - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R3 ... tile width -// R4 ... sSegm* -// R5 ... SIO_BASE -// R6 ... Y relative at row -// R7 ... Y row index -// [SP+0] ... wrap width -// [SP+4] ... remaining width - - // start divide X/tile_width - str r1,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, X coordinate - str r3,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile width - -// - now we must wait at least 8 clock cycles to get result of division - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R3 ... tile width -// R4 ... sSegm* -// R5 ... SIO_BASE -// R6 ... Y relative at row -// R7 ... Y row index -// [SP+0] ... wrap width -// [SP+4] ... remaining width - - // [7] base pointer to source data buffer (without X) -> LR, R7 - ldrh r2,[r4,#SSEGM_WB] // [2] get pitch of rows -> R2 - muls r7,r2 // [1] pitch * row (Y * WB) -> offset of row in data buffer - ldr r2,[r4,#SSEGM_DATA] // [2] pointer to data -> R2 - adds r7,r2 // [1] base address of data buffer - mov lr,r7 // [1] save base address - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R3 ... tile width -// R4 ... sSegm* -// R5 ... SIO_BASE -// R6 ... Y relative at row -// R7 ... base address of data buffer (without X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width - - // [6] tile base address -> R4 - ldrh r2,[r4,#SSEGM_PAR2+2] // [2] tile width bytes -> R2 - muls r6,r2 // [1] tile width bytes * Y relative to row -> tile line offset R6 - ldr r4,[r4,#SSEGM_PAR] // [2] pointer to tiles - adds r4,r6 // [1] tile base address -> R4 - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R3 ... tile width -// R4 ... tile base address -// R5 ... SIO_BASE -// R7 ... base address of data buffer (without X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width - - // load result of division X/tile_width -> R6 X pixel relative, R5 tile position - // Note: QUOTIENT must be read last - ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, X pixel relative in tile - ldr r5,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R5, tile position - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R3 ... tile width -// R4 ... tile base address -// R5 ... tile position -// R6 ... X pixel relative in tile -// R7 ... base address of data buffer (without X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width - - // prepare current pointer to source data buffer with X -> R7 - adds r7,r5 // tile source address -> R7 - -// R0 ... pointer to destination control buffer -// R1 ... X coordinate -// R3 ... tile width -// R4 ... tile base address -// R6 ... X pixel relative in tile -// R7 ... pointer to source data buffer (with X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width -// [SP+4] ... remaining width - -// ---- render rest of first tile - - // check if X is tile-aligned - tst r6,r6 // check tile align - beq 2f // X is tile aligned - - // shift X coordinate - subs r5,r3,r6 // pixels remain in current tile -> R5 - adds r1,r5 // shift X coordinate (align to next tile) - - // shift remaining width - ldr r2,[sp,#4] // get remaining width - subs r2,r5 // shift width - str r2,[sp,#4] // store remaining width - - // write number of 4-pixels - lsrs r5,#2 // number of 4-pixels - stmia r0!,{r5} // save width - - // load tile index -> R2 - ldrb r2,[r7,#0] // [2] load tile index - adds r7,#1 // [1] increase tile address - - // write tile addres - muls r2,r3 // tile index * tile width = tile offset - add r2,r4 // [1] add tile base address - add r2,r6 // [1] shift to tile start - stmia r0!,{r2} // [3] save pointer - - // check end of segment - ldr r2,[sp,#0] // get wrap width - cmp r1,r2 // check end of segment - blo 2f // not end of segment - movs r1,#0 // reset X coordinate - mov r7,lr // get base pointer to tile data - - // prepare wrap width - start X -> R5 -2: ldr r2,[sp,#0] // get wrap width - subs r5,r2,r1 // pixels remaining to end of segment - ldr r2,[sp,#4] // total remaining width -> R3 - -// ---- start outer loop, render one part of segment -// R0 ... pointer to destination control buffer -// R2 ... total remaining width -// R3 ... tile width -// R4 ... tile base address -// R5 ... wrap width of this segment -// R7 ... pointer to source data buffer -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width - -RenderTile_OutLoop: - - // limit wrap width by total width -> R5 - cmp r5,r2 // compare wrap width with total width - bls 2f // width is OK - mov r5,r2 // limit wrap width - - // check if remain whole tile -2: cmp r5,r3 // check number of remaining pixels - bhs 5f // remain whole tiles - - // check if start of last tile remains - cmp r5,#4 // check start of last tile - blo 3f // all done - mov r1,r5 // width to render - -// ---- render start of last tile -// R0 ... pointer to destination control buffer -// R1 ... width to render in this segment -// R2 ... total remaining width -// R3 ... tile width -// R4 ... tile base address -// R5 ... wrap width of this segment -// R7 ... pointer to source data buffer (with X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width - -RenderTile_Last: - - // save width - lsrs r6,r1,#2 // number of 4-pixels - stmia r0!,{r6} // save width - - // load tile index -> R6 - ldrb r6,[r7,#0] // [2] load tile index - adds r7,#1 // [1] increase tile index - - // save tile addres - muls r6,r3 // multiply tile index * tile width - add r6,r4 // [1] add tile base address - stmia r0!,{r6} // [3] save pointer - - // check if continue with next segment - mov r7,lr // get base pointer to tile data - cmp r5,r3 // whole tile remains? - bhs RenderTile_OutLoop // render next segment - - // pop registers and return -3: pop {r2-r7,pc} - -// ---- prepare to render whole tiles -// R0 ... pointer to destination control buffer -// R2 ... total remaining width -// R3 ... tile width -// R4 ... tile base address -// R5 ... width of this segment -// R7 ... pointer to source data buffer (with X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width - - // prepare number of 4-pixels to render -> R1 -5: lsrs r1,r5,#2 // shift to get number of tiles in multiply of 4-pixels -> R1 - lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 - subs r2,r5 // update remaining width -> R2 - - lsrs r5,r3,#2 // tile width/4 -> R5 - subs r1,r5 // number of 4-pixels - width/4 - adds r1,#1 // number of 4-pixels - (width/4-1) - -// ---- [11*N-1] start inner loop, render in one part of segment -// R0 ... pointer to destination control buffer -// R1 ... number of 4-pixels to generate - 1 (loop counter) -// R2 ... total remaining width -// R3 ... tile width -// R4 ... tile base address -// R5 ... tile width/4 -// R7 ... pointer to source data buffer (with X) -// LR ... base address of data buffer (without X) -// [SP+0] ... wrap width - -RenderTile_InLoop: - - // [3] load tile index -> R6 - ldrb r6,[r7,#0] // [2] load tile index - adds r7,#1 // [1] increase tile index - - // [2] get tile addres - muls r6,r3 // [1] multiply tile index * tile width - add r6,r4 // [1] add tile base address - - // [3] save control block - stmia r0!,{r5,r6} // [3] save width and pointer - - // [2,3] loop - subs r1,r5 // [1] shift loop counter, subtract tile width/4 - bhi RenderTile_InLoop // [1,2] > 0, render next whole tile - -// ---- end inner loop, continue with last tile, or start new part - - // continue to outer loop - adds r1,r5 // return size of last tile - subs r1,#1 // add "tile size/4 - 1" - ldr r5,[sp,#0] // load wrap width -> R5 - lsls r1,#2 // convert back to pixels - bne RenderTile_Last // render start of last tile - mov r7,lr // get base pointer to tile data -> R7 - b RenderTile_OutLoop // go back to outer loop - - .align 2 -// pointer to SIO base -RenderTile_pSioBase: - .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_tilepersp.S b/MCUME_pico/picovga_t4/render/vga_tilepersp.S deleted file mode 100755 index 8b9a720..0000000 --- a/MCUME_pico/picovga_t4/render/vga_tilepersp.S +++ /dev/null @@ -1,450 +0,0 @@ - -// **************************************************************************** -// -// VGA render GF_TILEPERSP -// -// **************************************************************************** -// data ... tile map -// par ... column of tile images -// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)) -// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset -// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height -// wrapy ... segment height - -#include "../define.h" // common definitions of C and ASM -#include "hardware/regs/sio.h" // registers of hardware divider -#include "hardware/regs/addressmap.h" // SIO base address - -#define ACCUM0_OFFSET0 0 -#define ACCUM1_OFFSET0 4 -#define BASE0_OFFSET0 8 -#define BASE1_OFFSET0 12 -#define BASE2_OFFSET0 16 -#define POP_LANE0_OFFSET0 20 -#define POP_LANE1_OFFSET0 24 -#define POP_FULL_OFFSET0 28 -#define PEEK_LANE0_OFFSET0 32 -#define PEEK_LANE1_OFFSET0 36 -#define PEEK_FULL_OFFSET0 40 -#define CTRL_LANE0_OFFSET0 44 -#define CTRL_LANE1_OFFSET0 48 -#define ACCUM0_ADD_OFFSET0 52 -#define ACCUM1_ADD_OFFSET0 56 -#define BASE_1AND0_OFFSET0 60 - -#define ACCUM0_OFFSET1 64 -#define ACCUM1_OFFSET1 68 -#define BASE0_OFFSET1 72 -#define BASE1_OFFSET1 76 -#define BASE2_OFFSET1 80 -#define POP_LANE0_OFFSET1 84 -#define POP_LANE1_OFFSET1 88 -#define POP_FULL_OFFSET1 92 -#define PEEK_LANE0_OFFSET1 96 -#define PEEK_LANE1_OFFSET1 100 -#define PEEK_FULL_OFFSET1 104 -#define CTRL_LANE0_OFFSET1 108 -#define CTRL_LANE1_OFFSET1 112 -#define ACCUM0_ADD_OFFSET1 116 -#define ACCUM1_ADD_OFFSET1 120 -#define BASE_1AND0_OFFSET1 124 - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// extern "C" u32* RenderTilePersp(u32* cbuf, int x, int y, int w, sSegm* segm); - -// render tiles with perspective GF_TILEPERSP -// using hardware interpolator inter0 and inter1 (their state is not saved during interrup) -// R0 ... pointer to destination data buffer -// R1 ... start X coordinate (not used) -// R2 ... start Y coordinate (in graphics lines) -// R3 ... width to display (must be multiple of 4) -// [stack] ... segm video segment sSegm -// Output new pointer to data buffer. -// 320 pixels takes ?? us on 151 MHz. - -.thumb_func -.global RenderTilePersp -RenderTilePersp: - -// Input registers and stack: -// R0 ... pointer to destination data buffer -// R1 ... X coordinate (not used) -// R2 ... Y coordinate -// SP+0: R3 ... remaining width -// SP+4: R4 -// SP+8: R5 -// SP+12: R6 -// SP+16: R7 -// SP+20: LR -// SP+24: video segment - - // push registers - push {r3-r7,lr} - -// ---- prepare registers - - // get pointer to video segment -> R4 - ldr r4,[sp,#24] // load video segment -> R4 - -// R0 ... pointer to data buffer -// R2 ... Y coordinate -// R3 ... remaining width -// R4 ... video segment - - // load horizon offset -> R1, check if use perspective - ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6 - ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5 - ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1 - sxtb r1,r1 // signed extension - lsls r1,#2 // horizon * 4, horizon = 0 ? - bne 2f // use perspective - - // not using perspective, start Y coordinate y0 = y - h/2 -> R12 - lsrs r5,#1 // segment height/2 -> R5 - subs r2,r5 // y - h/2 -> R2 - mov r12,r2 // current coordinate Y0 = y - h/2 -> R12 - - // prepare divide result to get 1< R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL - b 4f - - // using perspective, check ceilling mode -2: bpl 3f // horizon is not negative - subs r2,r5,r2 // negate, y = h - y - subs r2,#1 // y = h - 1 - y - negs r1,r1 // absolute value of horizon - - // prepare current coordinate Y0 = y - h -> R12 -3: subs r7,r2,r5 // y - h = current Y coordinate -> R7 - mov r12,r7 // store current coordinate Y0 -> R12 - - // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) - lsls r5,#FRACT // segment height * FRACTMUL -> R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h - adds r2,r1 // horizon + y -> R2 - str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz - -// R0 ... pointer to data buffer -// R3 ... remaining width -// R4 ... video segment -// R12 ... current coordinate Y0 - - // prepare start coordinate X0 = -w/2 -> LR -4: lsrs r5,r3,#1 // width/2 - negs r5,r5 // negate - mov lr,r5 // store start coordinate X0 -> LR - - // prepare number of 4-pixels (loop counter) -> R7 - lsrs r7,r3,#2 // width/4 -> R7 - - // prepare address of interpolator 0 base -> R3 - ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- setup interpolator 0 to get tile index - - // set tile map base to base2 - ldr r6,[r4,#SSEGM_DATA] // load tile map base - str r6,[r3,#BASE2_OFFSET0] // set tile map base - - // set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1 - ldr r6,RenderTilePersp_Ctrl // load control word - ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1 - str r1,[sp,#0] // save tile size -> [SP+0] - adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) - ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2 - subs r5,r2,#1 // mapwbits - 1 - lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0 - - // set control word of lane 1: shift=FRACT+tilebits-mapwbits, - // mask=mapwbits..mapwbits+maphbits-1 - subs r6,r2 // FRACT + tilebits - mapwbits - lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position - orrs r6,r2 // add mapwbits to control word - ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2 - lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position - adds r6,r2 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1 - -// ---- setup interpolator 1 to get pixel index - - // set tile image to base2 - ldr r6,[r4,#SSEGM_PAR] // load tile image base - str r6,[r3,#BASE2_OFFSET1] // set tile image base - - // set control word of lane 0: shift=FRACT, mask=0..tilebits-1 - ldr r6,RenderTilePersp_Ctrl // load control word - subs r5,r1,#1 // tilebits - 1 - lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0 - - // set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1 - subs r6,r1 // FRACT - tilebits - lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position - orrs r6,r5 // add tilebits to control word - lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position - adds r6,r1 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 -// [SP+0] ... number of bits of tile width and height - -// ---- set matrix - - // get pointer to matrix -> R4 - ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4 - - // get distance coefficient dist -> R1 - ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1 - ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient - -// r4+0 ... m11 -// r4+4 ... m12 -// r4+8 ... m13 -// r4+12 ... m21 -// r4+16 ... m22 -// r4+20 ... m23 - - // set m11 -> R5 base0 - ldr r5,[r4,#0] // load m11 - muls r5,r1 // m11*dist - asrs r5,#FRACT // (m11*dist)>>FRACT - str r5,[r3,#BASE0_OFFSET0] // set base0 - str r5,[r3,#BASE0_OFFSET1] // set base0 - - // set m21 -> R6 base1 - ldr r6,[r4,#12] // load m21 - muls r6,r1 // m21*dist - asrs r6,#FRACT // (m21*dist)>>FRACT - str r6,[r3,#BASE1_OFFSET0] // set base1 - str r6,[r3,#BASE1_OFFSET1] // set base1 - -// R0 ... pointer to data buffer -// R1 ... distance coefficient -// R3 ... interpolator base -// R4 ... pointer to matrix -// R5 ... m11 -// R6 ... m21 -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 -// [SP+0] ... number of bits of tile width and height - - // set x0*m11 + y0*m12 + m13 -> accum0 - mov r2,lr // start coordinate X0 -> X2 - muls r5,r2 // x0*m11 -> R5 - muls r2,r6 // x0*m21 -> R2 - mov lr,r1 // save distance coefficient -> LR - ldr r6,[r4,#4] // load m12 -> R6 - muls r1,r6 // m12*dist -> R1 - asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 - mov r6,r12 // load coordinate Y0 -> R6 - muls r1,r6 // y0*m12 -> R1 - adds r5,r1 // x0*m11 + y0*m12 -> R5 - ldr r1,[r4,#8] // load m13 -> R1 - adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 - str r5,[r3,#ACCUM0_OFFSET0] // set accum0 - str r5,[r3,#ACCUM0_OFFSET1] // set accum0 - -// R0 ... pointer to data buffer -// R2 ... x0*m21 -// R3 ... interpolator base -// R4 ... pointer to matrix -// R6 ... current coordinate Y0 -// R7 ... width/4 -// LR ... distance coefficient -// [SP+0] ... number of bits of tile width and height - - // set x0*m21 + y0*m22 + m23 -> accum1 - ldr r1,[r4,#16] // load m22 -> R1 - mov r5,lr // distance coefficient -> R5 - muls r1,r5 // m22*dist - asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 - muls r1,r6 // y0*m22 -> R1 - adds r2,r1 // x0*m21 + y0*m22 -> R2 - ldr r1,[r4,#20] // load m23 -> R1 - adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 - str r2,[r3,#ACCUM1_OFFSET0] // set accum1 - str r2,[r3,#ACCUM1_OFFSET1] // set accum1 - -// ---- process odd 4-pixel - - // prepare tile bits * 2 - ldr r6,[sp,#0] // get tile bits - lsls r6,#1 // tile bits * 2 - -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to tile map, load tile index) -// R5 ... (temporary - get pointer to pixel, load pixel) -// R6 ... tilebits*2 -// R7 ... width/4 (loop counter) -// [SP+0] ... number of bits of tile width and height - - // check odd 4-pixels - lsrs r7,#1 // width/4/2 - bcc 2f // no odd 4-pixel - - // [7] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r1,[r5,r4] // [2] load pixel - - // [9] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [9] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [9] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#24 // [1] shift 3 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [2] store 4 pixels - stmia r0!,{r1} // [2] store 4 pixels - - // check number of remaining pixels -2: tst r7,r7 // check number of pixels - beq 8f // end - -// ---- [74 per 8 pixels] inner loop -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to tile map, load tile index) -// R5 ... (temporary - get pointer to pixel, load pixel) -// R6 ... tilebits*2 -// R7 ... width/8 (loop counter) - - // [7] load 1st pixel -6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r1,[r5,r4] // [2] load pixel - - // [9] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [9] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [9] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#24 // [1] shift 3 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [7] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r2,[r5,r4] // [2] load pixel - - // [9] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - - // [9] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r2,r4 // [1] add pixel to accumulator - - // [9] load 4th pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#24 // [1] shift 3 bytes left - orrs r2,r4 // [1] add pixel to accumulator - - // [3] store 8 pixels - stmia r0!,{r1,r2} // [3] store 8 pixels - - // [2,3] loop counter - subs r7,#1 // [1] 8-pixel counter - bne 6b // [1,2] next 8-pixels - - // pop registers -8: pop {r3-r7,pc} - - .align 2 -// pointer to SIO base -RenderTilePersp_pSioBase: - .word SIO_BASE // addres of SIO base - -// pointer to Interp0 base -RenderTilePersp_Interp: - .word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base - -RenderTilePersp_Ctrl: // lane control word - .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 - ldr r4,[sp,#24] // load video segment -> R4 - -// R0 ... pointer to data buffer -// R2 ... Y coordinate -// R3 ... remaining width -// R4 ... video segment - - // load horizon offset -> R1, check if use perspective - ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6 - ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5 - ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1 - sxtb r1,r1 // signed extension - lsls r1,#2 // horizon * 4, horizon = 0 ? - bne 2f // use perspective - - // not using perspective, start Y coordinate y0 = y - h/2 -> R12 - lsrs r5,#1 // segment height/2 -> R5 - subs r2,r5 // y - h/2 -> R2 - mov r12,r2 // current coordinate Y0 = y - h/2 -> R12 - - // prepare divide result to get 1< R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL - b 4f - - // using perspective, check ceilling mode -2: bpl 3f // horizon is not negative - subs r2,r5,r2 // negate, y = h - y - subs r2,#1 // y = h - 1 - y - negs r1,r1 // absolute value of horizon - - // prepare current coordinate Y0 = y - h -> R12 -3: subs r7,r2,r5 // y - h = current Y coordinate -> R7 - mov r12,r7 // store current coordinate Y0 -> R12 - - // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) - lsls r5,#FRACT // segment height * FRACTMUL -> R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h - adds r2,r1 // horizon + y -> R2 - str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz - -// R0 ... pointer to data buffer -// R3 ... remaining width -// R4 ... video segment -// R12 ... current coordinate Y0 - - // prepare start coordinate X0 = -w/2 -> LR -4: lsrs r5,r3,#1 // width/2 - negs r5,r5 // negate - mov lr,r5 // store start coordinate X0 -> LR - - // prepare number of 4-pixels (loop counter) -> R7 - lsrs r7,r3,#2 // width/4 -> R7 - - // prepare address of interpolator 0 base -> R3 - ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- setup interpolator 0 to get tile index - - // set tile map base to base2 - ldr r6,[r4,#SSEGM_DATA] // load tile map base - str r6,[r3,#BASE2_OFFSET0] // set tile map base - - // set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1 - ldr r6,RenderTilePersp_Ctrl // load control word - ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1 - str r1,[sp,#0] // save tile size -> [SP+0] - adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) - ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2 - subs r5,r2,#1 // mapwbits - 1 - lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0 - - // set control word of lane 1: shift=FRACT+tilebits-mapwbits, - // mask=mapwbits..mapwbits+maphbits-1 - subs r6,r2 // FRACT + tilebits - mapwbits - lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position - orrs r6,r2 // add mapwbits to control word - ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2 - lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position - adds r6,r2 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1 - -// ---- setup interpolator 1 to get pixel index - - // set tile image to base2 - ldr r6,[r4,#SSEGM_PAR] // load tile image base - str r6,[r3,#BASE2_OFFSET1] // set tile image base - - // set control word of lane 0: shift=FRACT, mask=0..tilebits-1 - ldr r6,RenderTilePersp_Ctrl // load control word - subs r5,r1,#1 // tilebits - 1 - lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0 - - // set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1 - subs r6,r1 // FRACT - tilebits - lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position - orrs r6,r5 // add tilebits to control word - lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position - adds r6,r1 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 -// [SP+0] ... number of bits of tile width and height - -// ---- set matrix - - // get pointer to matrix -> R4 - ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4 - - // get distance coefficient dist -> R1 - ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1 - ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient - -// r4+0 ... m11 -// r4+4 ... m12 -// r4+8 ... m13 -// r4+12 ... m21 -// r4+16 ... m22 -// r4+20 ... m23 - - // set m11 -> R5 base0 - ldr r5,[r4,#0] // load m11 - muls r5,r1 // m11*dist - asrs r5,#FRACT // (m11*dist)>>FRACT - asrs r2,r5,#1 // delta/2 - adds r2,r5 // delta*1.5 - str r2,[r3,#BASE0_OFFSET0] // set base0 - str r2,[r3,#BASE0_OFFSET1] // set base0 - - // set m21 -> R6 base1 - ldr r6,[r4,#12] // load m21 - muls r6,r1 // m21*dist - asrs r6,#FRACT // (m21*dist)>>FRACT - asrs r2,r6,#1 // delta/2 - adds r2,r6 // delta*1.5 - str r2,[r3,#BASE1_OFFSET0] // set base1 - str r2,[r3,#BASE1_OFFSET1] // set base1 - -// R0 ... pointer to data buffer -// R1 ... distance coefficient -// R3 ... interpolator base -// R4 ... pointer to matrix -// R5 ... m11 -// R6 ... m21 -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 -// [SP+0] ... number of bits of tile width and height - - // set x0*m11 + y0*m12 + m13 -> accum0 - mov r2,lr // start coordinate X0 -> X2 - muls r5,r2 // x0*m11 -> R5 - muls r2,r6 // x0*m21 -> R2 - mov lr,r1 // save distance coefficient -> LR - ldr r6,[r4,#4] // load m12 -> R6 - muls r1,r6 // m12*dist -> R1 - asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 - mov r6,r12 // load coordinate Y0 -> R6 - muls r1,r6 // y0*m12 -> R1 - adds r5,r1 // x0*m11 + y0*m12 -> R5 - ldr r1,[r4,#8] // load m13 -> R1 - adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 - str r5,[r3,#ACCUM0_OFFSET0] // set accum0 - str r5,[r3,#ACCUM0_OFFSET1] // set accum0 - -// R0 ... pointer to data buffer -// R2 ... x0*m21 -// R3 ... interpolator base -// R4 ... pointer to matrix -// R6 ... current coordinate Y0 -// R7 ... width/4 -// LR ... distance coefficient -// [SP+0] ... number of bits of tile width and height - - // set x0*m21 + y0*m22 + m23 -> accum1 - ldr r1,[r4,#16] // load m22 -> R1 - mov r5,lr // distance coefficient -> R5 - muls r1,r5 // m22*dist - asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 - muls r1,r6 // y0*m22 -> R1 - adds r2,r1 // x0*m21 + y0*m22 -> R2 - ldr r1,[r4,#20] // load m23 -> R1 - adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 - str r2,[r3,#ACCUM1_OFFSET0] // set accum1 - str r2,[r3,#ACCUM1_OFFSET1] // set accum1 - -// ---- process odd 4-pixel - - // prepare tile bits * 2 - ldr r6,[sp,#0] // get tile bits - lsls r6,#1 // tile bits * 2 - -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to tile map, load tile index) -// R5 ... (temporary - get pointer to pixel, load pixel) -// R6 ... tilebits*2 -// R7 ... width/4 (loop counter) -// [SP+0] ... number of bits of tile width and height - - // check odd 4-pixels - lsrs r7,#1 // width/4/2 - bcc 2f // no odd 4-pixel - - // [7] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r1,[r5,r4] // [2] load pixel - - // [9] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [11] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [2] store 4 pixels - stmia r0!,{r1} // [2] store 4 pixels - - // check number of remaining pixels -2: tst r7,r7 // check number of pixels - beq 8f // end - -// ---- [60 per 8 pixels] inner loop -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to tile map, load tile index) -// R5 ... (temporary - get pointer to pixel, load pixel) -// R6 ... tilebits*2 -// R7 ... width/8 (loop counter) - - // [7] load 1st pixel -6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r1,[r5,r4] // [2] load pixel - - // [9] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [11] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [7] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r2,[r5,r4] // [2] load pixel - - // [9] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - - // [11] load 3rd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r2,r4 // [1] add pixel to accumulator - lsls r4,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - - // [3] store 8 pixels - stmia r0!,{r1,r2} // [3] store 8 pixels - - // [2,3] loop counter - subs r7,#1 // [1] 8-pixel counter - bne 6b // [1,2] next 8-pixels - - // pop registers -8: pop {r3-r7,pc} - - .align 2 -// pointer to SIO base -RenderTilePersp_pSioBase: - .word SIO_BASE // addres of SIO base - -// pointer to Interp0 base -RenderTilePersp_Interp: - .word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base - -RenderTilePersp_Ctrl: // lane control word - .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 - ldr r4,[sp,#24] // load video segment -> R4 - -// R0 ... pointer to data buffer -// R2 ... Y coordinate -// R3 ... remaining width -// R4 ... video segment - - // load horizon offset -> R1, check if use perspective - ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6 - ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5 - ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1 - sxtb r1,r1 // signed extension - lsls r1,#2 // horizon * 4, horizon = 0 ? - bne 2f // use perspective - - // not using perspective, start Y coordinate y0 = y - h/2 -> R12 - lsrs r5,#1 // segment height/2 -> R5 - subs r2,r5 // y - h/2 -> R2 - mov r12,r2 // current coordinate Y0 = y - h/2 -> R12 - - // prepare divide result to get 1< R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL - b 4f - - // using perspective, check ceilling mode -2: bpl 3f // horizon is not negative - subs r2,r5,r2 // negate, y = h - y - subs r2,#1 // y = h - 1 - y - negs r1,r1 // absolute value of horizon - - // prepare current coordinate Y0 = y - h -> R12 -3: subs r7,r2,r5 // y - h = current Y coordinate -> R7 - mov r12,r7 // store current coordinate Y0 -> R12 - - // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) - lsls r5,#FRACT // segment height * FRACTMUL -> R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h - adds r2,r1 // horizon + y -> R2 - str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz - -// R0 ... pointer to data buffer -// R3 ... remaining width -// R4 ... video segment -// R12 ... current coordinate Y0 - - // prepare start coordinate X0 = -w/2 -> LR -4: lsrs r5,r3,#1 // width/2 - negs r5,r5 // negate - mov lr,r5 // store start coordinate X0 -> LR - - // prepare number of 4-pixels (loop counter) -> R7 - lsrs r7,r3,#2 // width/4 -> R7 - - // prepare address of interpolator 0 base -> R3 - ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- setup interpolator 0 to get tile index - - // set tile map base to base2 - ldr r6,[r4,#SSEGM_DATA] // load tile map base - str r6,[r3,#BASE2_OFFSET0] // set tile map base - - // set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1 - ldr r6,RenderTilePersp_Ctrl // load control word - ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1 - str r1,[sp,#0] // save tile size -> [SP+0] - adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) - ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2 - subs r5,r2,#1 // mapwbits - 1 - lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0 - - // set control word of lane 1: shift=FRACT+tilebits-mapwbits, - // mask=mapwbits..mapwbits+maphbits-1 - subs r6,r2 // FRACT + tilebits - mapwbits - lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position - orrs r6,r2 // add mapwbits to control word - ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2 - lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position - adds r6,r2 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1 - -// ---- setup interpolator 1 to get pixel index - - // set tile image to base2 - ldr r6,[r4,#SSEGM_PAR] // load tile image base - str r6,[r3,#BASE2_OFFSET1] // set tile image base - - // set control word of lane 0: shift=FRACT, mask=0..tilebits-1 - ldr r6,RenderTilePersp_Ctrl // load control word - subs r5,r1,#1 // tilebits - 1 - lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0 - - // set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1 - subs r6,r1 // FRACT - tilebits - lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position - orrs r6,r5 // add tilebits to control word - lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position - adds r6,r1 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 -// [SP+0] ... number of bits of tile width and height - -// ---- set matrix - - // get pointer to matrix -> R4 - ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4 - - // get distance coefficient dist -> R1 - ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1 - ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient - -// r4+0 ... m11 -// r4+4 ... m12 -// r4+8 ... m13 -// r4+12 ... m21 -// r4+16 ... m22 -// r4+20 ... m23 - - // set m11 -> R5 base0 - ldr r5,[r4,#0] // load m11 - muls r5,r1 // m11*dist - asrs r5,#FRACT-1 // (m11*dist)>>(FRACT-1) ... 2*delta - str r5,[r3,#BASE0_OFFSET0] // set base0 - str r5,[r3,#BASE0_OFFSET1] // set base0 - asrs r5,#1 // (m11*dist)>>FRACT - - // set m21 -> R6 base1 - ldr r6,[r4,#12] // load m21 - muls r6,r1 // m21*dist - asrs r6,#FRACT-1 // (m21*dist)>>(FRACT-1) ... 2*delta - str r6,[r3,#BASE1_OFFSET0] // set base1 - str r6,[r3,#BASE1_OFFSET1] // set base1 - asrs r6,#1 // (m21*dist)>>FRACT - -// R0 ... pointer to data buffer -// R1 ... distance coefficient -// R3 ... interpolator base -// R4 ... pointer to matrix -// R5 ... m11 -// R6 ... m21 -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 -// [SP+0] ... number of bits of tile width and height - - // set x0*m11 + y0*m12 + m13 -> accum0 - mov r2,lr // start coordinate X0 -> X2 - muls r5,r2 // x0*m11 -> R5 - muls r2,r6 // x0*m21 -> R2 - mov lr,r1 // save distance coefficient -> LR - ldr r6,[r4,#4] // load m12 -> R6 - muls r1,r6 // m12*dist -> R1 - asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 - mov r6,r12 // load coordinate Y0 -> R6 - muls r1,r6 // y0*m12 -> R1 - adds r5,r1 // x0*m11 + y0*m12 -> R5 - ldr r1,[r4,#8] // load m13 -> R1 - adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 - str r5,[r3,#ACCUM0_OFFSET0] // set accum0 - str r5,[r3,#ACCUM0_OFFSET1] // set accum0 - -// R0 ... pointer to data buffer -// R2 ... x0*m21 -// R3 ... interpolator base -// R4 ... pointer to matrix -// R6 ... current coordinate Y0 -// R7 ... width/4 -// LR ... distance coefficient -// [SP+0] ... number of bits of tile width and height - - // set x0*m21 + y0*m22 + m23 -> accum1 - ldr r1,[r4,#16] // load m22 -> R1 - mov r5,lr // distance coefficient -> R5 - muls r1,r5 // m22*dist - asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 - muls r1,r6 // y0*m22 -> R1 - adds r2,r1 // x0*m21 + y0*m22 -> R2 - ldr r1,[r4,#20] // load m23 -> R1 - adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 - str r2,[r3,#ACCUM1_OFFSET0] // set accum1 - str r2,[r3,#ACCUM1_OFFSET1] // set accum1 - -// ---- process odd 4-pixel - - // prepare tile bits * 2 - ldr r6,[sp,#0] // get tile bits - lsls r6,#1 // tile bits * 2 - -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to tile map, load tile index) -// R5 ... (temporary - get pointer to pixel, load pixel) -// R6 ... tilebits*2 -// R7 ... width/4 (loop counter) -// [SP+0] ... number of bits of tile width and height - - // check odd 4-pixels - lsrs r7,#1 // width/4/2 - bcc 2f // no odd 4-pixel - - // [9] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r1,[r5,r4] // [2] load pixel - lsls r4,r1,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [11] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [2] store 4 pixels - stmia r0!,{r1} // [2] store 4 pixels - - // check number of remaining pixels -2: tst r7,r7 // check number of pixels - beq 8f // end - -// ---- [46 per 8 pixels] inner loop -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to tile map, load tile index) -// R5 ... (temporary - get pointer to pixel, load pixel) -// R6 ... tilebits*2 -// R7 ... width/8 (loop counter) - - // [9] load 1st pixel -6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r1,[r5,r4] // [2] load pixel - lsls r4,r1,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [11] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [9] load 1st pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r2,[r5,r4] // [2] load pixel - lsls r4,r2,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - - // [11] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r2,r4 // [1] add pixel to accumulator - lsls r4,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - - // [3] store 8 pixels - stmia r0!,{r1,r2} // [3] store 8 pixels - - // [2,3] loop counter - subs r7,#1 // [1] 8-pixel counter - bne 6b // [1,2] next 8-pixels - - // pop registers -8: pop {r3-r7,pc} - - .align 2 -// pointer to SIO base -RenderTilePersp_pSioBase: - .word SIO_BASE // addres of SIO base - -// pointer to Interp0 base -RenderTilePersp_Interp: - .word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base - -RenderTilePersp_Ctrl: // lane control word - .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 - ldr r4,[sp,#24] // load video segment -> R4 - -// R0 ... pointer to data buffer -// R2 ... Y coordinate -// R3 ... remaining width -// R4 ... video segment - - // load horizon offset -> R1, check if use perspective - ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6 - ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5 - ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1 - sxtb r1,r1 // signed extension - lsls r1,#2 // horizon * 4, horizon = 0 ? - bne 2f // use perspective - - // not using perspective, start Y coordinate y0 = y - h/2 -> R12 - lsrs r5,#1 // segment height/2 -> R5 - subs r2,r5 // y - h/2 -> R2 - mov r12,r2 // current coordinate Y0 = y - h/2 -> R12 - - // prepare divide result to get 1< R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL - b 4f - - // using perspective, check ceilling mode -2: bpl 3f // horizon is not negative - subs r2,r5,r2 // negate, y = h - y - subs r2,#1 // y = h - 1 - y - negs r1,r1 // absolute value of horizon - - // prepare current coordinate Y0 = y - h -> R12 -3: subs r7,r2,r5 // y - h = current Y coordinate -> R7 - mov r12,r7 // store current coordinate Y0 -> R12 - - // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) - lsls r5,#FRACT // segment height * FRACTMUL -> R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h - adds r2,r1 // horizon + y -> R2 - str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz - -// R0 ... pointer to data buffer -// R3 ... remaining width -// R4 ... video segment -// R12 ... current coordinate Y0 - - // prepare start coordinate X0 = -w/2 -> LR -4: lsrs r5,r3,#1 // width/2 - negs r5,r5 // negate - mov lr,r5 // store start coordinate X0 -> LR - - // prepare number of 4-pixels (loop counter) -> R7 - lsrs r7,r3,#2 // width/4 -> R7 - - // prepare address of interpolator 0 base -> R3 - ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- setup interpolator 0 to get tile index - - // set tile map base to base2 - ldr r6,[r4,#SSEGM_DATA] // load tile map base - str r6,[r3,#BASE2_OFFSET0] // set tile map base - - // set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1 - ldr r6,RenderTilePersp_Ctrl // load control word - ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1 - str r1,[sp,#0] // save tile size -> [SP+0] - adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) - ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2 - subs r5,r2,#1 // mapwbits - 1 - lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0 - - // set control word of lane 1: shift=FRACT+tilebits-mapwbits, - // mask=mapwbits..mapwbits+maphbits-1 - subs r6,r2 // FRACT + tilebits - mapwbits - lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position - orrs r6,r2 // add mapwbits to control word - ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2 - lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position - adds r6,r2 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1 - -// ---- setup interpolator 1 to get pixel index - - // set tile image to base2 - ldr r6,[r4,#SSEGM_PAR] // load tile image base - str r6,[r3,#BASE2_OFFSET1] // set tile image base - - // set control word of lane 0: shift=FRACT, mask=0..tilebits-1 - ldr r6,RenderTilePersp_Ctrl // load control word - subs r5,r1,#1 // tilebits - 1 - lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0 - - // set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1 - subs r6,r1 // FRACT - tilebits - lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position - orrs r6,r5 // add tilebits to control word - lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position - adds r6,r1 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 -// [SP+0] ... number of bits of tile width and height - -// ---- set matrix - - // get pointer to matrix -> R4 - ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4 - - // get distance coefficient dist -> R1 - ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1 - ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient - -// r4+0 ... m11 -// r4+4 ... m12 -// r4+8 ... m13 -// r4+12 ... m21 -// r4+16 ... m22 -// r4+20 ... m23 - - // set m11 -> R5 base0 - ldr r5,[r4,#0] // load m11 - muls r5,r1 // m11*dist - asrs r5,#FRACT // (m11*dist)>>FRACT ... delta - lsls r2,r5,#1 // delta*2 - adds r2,r5 // delta*3 - str r2,[r3,#BASE0_OFFSET0] // set base0 - str r2,[r3,#BASE0_OFFSET1] // set base0 - - // set m21 -> R6 base1 - ldr r6,[r4,#12] // load m21 - muls r6,r1 // m21*dist - asrs r6,#FRACT // (m21*dist)>>FRACT ... delta - lsls r2,r6,#1 // delta*2 - adds r2,r6 // delta*3 - str r2,[r3,#BASE1_OFFSET0] // set base1 - str r2,[r3,#BASE1_OFFSET1] // set base1 - -// R0 ... pointer to data buffer -// R1 ... distance coefficient -// R3 ... interpolator base -// R4 ... pointer to matrix -// R5 ... m11 -// R6 ... m21 -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 -// [SP+0] ... number of bits of tile width and height - - // set x0*m11 + y0*m12 + m13 -> accum0 - mov r2,lr // start coordinate X0 -> X2 - muls r5,r2 // x0*m11 -> R5 - muls r2,r6 // x0*m21 -> R2 - mov lr,r1 // save distance coefficient -> LR - ldr r6,[r4,#4] // load m12 -> R6 - muls r1,r6 // m12*dist -> R1 - asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 - mov r6,r12 // load coordinate Y0 -> R6 - muls r1,r6 // y0*m12 -> R1 - adds r5,r1 // x0*m11 + y0*m12 -> R5 - ldr r1,[r4,#8] // load m13 -> R1 - adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 - str r5,[r3,#ACCUM0_OFFSET0] // set accum0 - str r5,[r3,#ACCUM0_OFFSET1] // set accum0 - -// R0 ... pointer to data buffer -// R2 ... x0*m21 -// R3 ... interpolator base -// R4 ... pointer to matrix -// R6 ... current coordinate Y0 -// R7 ... width/4 -// LR ... distance coefficient -// [SP+0] ... number of bits of tile width and height - - // set x0*m21 + y0*m22 + m23 -> accum1 - ldr r1,[r4,#16] // load m22 -> R1 - mov r5,lr // distance coefficient -> R5 - muls r1,r5 // m22*dist - asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 - muls r1,r6 // y0*m22 -> R1 - adds r2,r1 // x0*m21 + y0*m22 -> R2 - ldr r1,[r4,#20] // load m23 -> R1 - adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 - str r2,[r3,#ACCUM1_OFFSET0] // set accum1 - str r2,[r3,#ACCUM1_OFFSET1] // set accum1 - -// ---- process odd 4-pixel - - // prepare tile bits * 2 - ldr r6,[sp,#0] // get tile bits - lsls r6,#1 // tile bits * 2 - -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to tile map, load tile index) -// R5 ... (temporary - get pointer to pixel, load pixel) -// R6 ... tilebits*2 -// R7 ... width/4 (loop counter) -// [SP+0] ... number of bits of tile width and height - - // check odd 4-pixels - lsrs r7,#1 // width/4/2 - bcc 2f // no odd 4-pixel - - // load pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r1,[r5,r4] // [2] load pixel - lsls r4,r1,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - lsls r4,r1,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [2] store 4 pixels - stmia r0!,{r1} // [2] store 4 pixels - - // check number of remaining pixels -2: tst r7,r7 // check number of pixels - beq 8f // end - -// ---- [37 per 8 pixels] inner loop -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to tile map, load tile index) -// R5 ... (temporary - get pointer to pixel, load pixel) -// R6 ... tilebits*2 -// R7 ... width/8 (loop counter) - - // [9] load 1st pixel -6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r1,[r5,r4] // [2] load pixel - lsls r4,r1,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [11] load 2nd pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r4,[r5,r4] // [2] load pixel - lsls r4,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - lsls r4,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - - // [11] load pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r2,[r5,r4] // [2] load pixel - lsls r4,r2,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - lsls r4,r2,#16 // [1] shift 2 bytes left - orrs r2,r4 // [1] add pixel to accumulator - - // [3] store 8 pixels - stmia r0!,{r1,r2} // [3] store 8 pixels - - // [2,3] loop counter - subs r7,#1 // [1] 8-pixel counter - bne 6b // [1,2] next 8-pixels - - // pop registers -8: pop {r3-r7,pc} - - .align 2 -// pointer to SIO base -RenderTilePersp_pSioBase: - .word SIO_BASE // addres of SIO base - -// pointer to Interp0 base -RenderTilePersp_Interp: - .word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base - -RenderTilePersp_Ctrl: // lane control word - .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 - ldr r4,[sp,#24] // load video segment -> R4 - -// R0 ... pointer to data buffer -// R2 ... Y coordinate -// R3 ... remaining width -// R4 ... video segment - - // load horizon offset -> R1, check if use perspective - ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6 - ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5 - ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1 - sxtb r1,r1 // signed extension - lsls r1,#2 // horizon * 4, horizon = 0 ? - bne 2f // use perspective - - // not using perspective, start Y coordinate y0 = y - h/2 -> R12 - lsrs r5,#1 // segment height/2 -> R5 - subs r2,r5 // y - h/2 -> R2 - mov r12,r2 // current coordinate Y0 = y - h/2 -> R12 - - // prepare divide result to get 1< R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL - b 4f - - // using perspective, check ceilling mode -2: bpl 3f // horizon is not negative - subs r2,r5,r2 // negate, y = h - y - subs r2,#1 // y = h - 1 - y - negs r1,r1 // absolute value of horizon - - // prepare current coordinate Y0 = y - h -> R12 -3: subs r7,r2,r5 // y - h = current Y coordinate -> R7 - mov r12,r7 // store current coordinate Y0 -> R12 - - // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) - lsls r5,#FRACT // segment height * FRACTMUL -> R5 - str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h - adds r2,r1 // horizon + y -> R2 - str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz - -// R0 ... pointer to data buffer -// R3 ... remaining width -// R4 ... video segment -// R12 ... current coordinate Y0 - - // prepare start coordinate X0 = -w/2 -> LR -4: lsrs r5,r3,#1 // width/2 - negs r5,r5 // negate - mov lr,r5 // store start coordinate X0 -> LR - - // prepare number of 4-pixels (loop counter) -> R7 - lsrs r7,r3,#2 // width/4 -> R7 - - // prepare address of interpolator 0 base -> R3 - ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 - -// ---- setup interpolator 0 to get tile index - - // set tile map base to base2 - ldr r6,[r4,#SSEGM_DATA] // load tile map base - str r6,[r3,#BASE2_OFFSET0] // set tile map base - - // set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1 - ldr r6,RenderTilePersp_Ctrl // load control word - ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1 - str r1,[sp,#0] // save tile size -> [SP+0] - adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) - ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2 - subs r5,r2,#1 // mapwbits - 1 - lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0 - - // set control word of lane 1: shift=FRACT+tilebits-mapwbits, - // mask=mapwbits..mapwbits+maphbits-1 - subs r6,r2 // FRACT + tilebits - mapwbits - lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position - orrs r6,r2 // add mapwbits to control word - ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2 - lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position - adds r6,r2 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1 - -// ---- setup interpolator 1 to get pixel index - - // set tile image to base2 - ldr r6,[r4,#SSEGM_PAR] // load tile image base - str r6,[r3,#BASE2_OFFSET1] // set tile image base - - // set control word of lane 0: shift=FRACT, mask=0..tilebits-1 - ldr r6,RenderTilePersp_Ctrl // load control word - subs r5,r1,#1 // tilebits - 1 - lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position - orrs r6,r5 // add to control word - str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0 - - // set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1 - subs r6,r1 // FRACT - tilebits - lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position - orrs r6,r5 // add tilebits to control word - lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position - adds r6,r1 // add to control word - str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1 - -// R0 ... pointer to data buffer -// R3 ... interpolator base -// R4 ... video segment -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 -// [SP+0] ... number of bits of tile width and height - -// ---- set matrix - - // get pointer to matrix -> R4 - ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4 - - // get distance coefficient dist -> R1 - ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1 - ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient - -// r4+0 ... m11 -// r4+4 ... m12 -// r4+8 ... m13 -// r4+12 ... m21 -// r4+16 ... m22 -// r4+20 ... m23 - - // set m11 -> R5 base0 - ldr r5,[r4,#0] // load m11 - muls r5,r1 // m11*dist - asrs r5,#FRACT-2 // (m11*dist)>>(FRACT-2) ... 4*delta - str r5,[r3,#BASE0_OFFSET0] // set base0 - str r5,[r3,#BASE0_OFFSET1] // set base0 - asrs r5,#2 // (m11*dist)>>FRACT - - // set m21 -> R6 base1 - ldr r6,[r4,#12] // load m21 - muls r6,r1 // m21*dist - asrs r6,#FRACT-2 // (m21*dist)>>(FRACT-2) ... 4*delta - str r6,[r3,#BASE1_OFFSET0] // set base1 - str r6,[r3,#BASE1_OFFSET1] // set base1 - asrs r6,#2 // (m21*dist)>>FRACT - -// R0 ... pointer to data buffer -// R1 ... distance coefficient -// R3 ... interpolator base -// R4 ... pointer to matrix -// R5 ... m11 -// R6 ... m21 -// R7 ... width/4 -// LR ... start coordinate X0 -// R12 ... current coordinate Y0 -// [SP+0] ... number of bits of tile width and height - - // set x0*m11 + y0*m12 + m13 -> accum0 - mov r2,lr // start coordinate X0 -> X2 - muls r5,r2 // x0*m11 -> R5 - muls r2,r6 // x0*m21 -> R2 - mov lr,r1 // save distance coefficient -> LR - ldr r6,[r4,#4] // load m12 -> R6 - muls r1,r6 // m12*dist -> R1 - asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 - mov r6,r12 // load coordinate Y0 -> R6 - muls r1,r6 // y0*m12 -> R1 - adds r5,r1 // x0*m11 + y0*m12 -> R5 - ldr r1,[r4,#8] // load m13 -> R1 - adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 - str r5,[r3,#ACCUM0_OFFSET0] // set accum0 - str r5,[r3,#ACCUM0_OFFSET1] // set accum0 - -// R0 ... pointer to data buffer -// R2 ... x0*m21 -// R3 ... interpolator base -// R4 ... pointer to matrix -// R6 ... current coordinate Y0 -// R7 ... width/4 -// LR ... distance coefficient -// [SP+0] ... number of bits of tile width and height - - // set x0*m21 + y0*m22 + m23 -> accum1 - ldr r1,[r4,#16] // load m22 -> R1 - mov r5,lr // distance coefficient -> R5 - muls r1,r5 // m22*dist - asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 - muls r1,r6 // y0*m22 -> R1 - adds r2,r1 // x0*m21 + y0*m22 -> R2 - ldr r1,[r4,#20] // load m23 -> R1 - adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 - str r2,[r3,#ACCUM1_OFFSET0] // set accum1 - str r2,[r3,#ACCUM1_OFFSET1] // set accum1 - -// ---- process odd 4-pixel - - // prepare tile bits * 2 - ldr r6,[sp,#0] // get tile bits - lsls r6,#1 // tile bits * 2 - -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to tile map, load tile index) -// R5 ... (temporary - get pointer to pixel, load pixel) -// R6 ... tilebits*2 -// R7 ... width/4 (loop counter) -// [SP+0] ... number of bits of tile width and height - - // check odd 4-pixels - lsrs r7,#1 // width/4/2 - bcc 2f // no odd 4-pixel - - // load pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r1,[r5,r4] // [2] load pixel - lsls r4,r1,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - lsls r4,r1,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [2] store 4 pixels - stmia r0!,{r1} // [2] store 4 pixels - - // check number of remaining pixels -2: tst r7,r7 // check number of pixels - beq 8f // end - -// ---- [28 per 8 pixels] inner loop -// R0 ... pointer to destination data buffer -// R1 ... (temporary - pixel accumulator 1) -// R2 ... (temporary - pixel accumulator 2) -// R3 ... interpolator base -// R4 ... (temporary - get pointer to tile map, load tile index) -// R5 ... (temporary - get pointer to pixel, load pixel) -// R6 ... tilebits*2 -// R7 ... width/8 (loop counter) - - // [11] load pixel -6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r1,[r5,r4] // [2] load pixel - lsls r4,r1,#8 // [1] shift 1 byte left - orrs r1,r4 // [1] add pixel to accumulator - lsls r4,r1,#16 // [1] shift 2 bytes left - orrs r1,r4 // [1] add pixel to accumulator - - // [11] load pixel - ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map - ldrb r4,[r4,#0] // [2] load tile index - lsls r4,r6 // [1] tile index * tile size - ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image - ldrb r2,[r5,r4] // [2] load pixel - lsls r4,r2,#8 // [1] shift 1 byte left - orrs r2,r4 // [1] add pixel to accumulator - lsls r4,r2,#16 // [1] shift 2 bytes left - orrs r2,r4 // [1] add pixel to accumulator - - // [3] store 8 pixels - stmia r0!,{r1,r2} // [3] store 8 pixels - - // [2,3] loop counter - subs r7,#1 // [1] 8-pixel counter - bne 6b // [1,2] next 8-pixels - - // pop registers -8: pop {r3-r7,pc} - - .align 2 -// pointer to SIO base -RenderTilePersp_pSioBase: - .word SIO_BASE // addres of SIO base - -// pointer to Interp0 base -RenderTilePersp_Interp: - .word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base - -RenderTilePersp_Ctrl: // lane control word - .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<>= 1; VSync = False; // not vsync break; - - case LINE_IMGEVEN1: // interlaced image even 0, 2, 4,..., 1st subframe - y0 = line - CurVmode.vfirst1; - if (CurVmode.dbly) y0 >>= 1; - y0 <<= 1; - VSync = False; // not vsync - break; - - case LINE_IMGEVEN2: // interlaced image even 0, 2, 4,..., 2nd subframe - y0 = line - CurVmode.vfirst2; - if (CurVmode.dbly) y0 >>= 1; - y0 <<= 1; - VSync = False; // not vsync - break; - - case LINE_IMGODD1: // interlaced image odd 1, 3, 5,..., 1st subframe - y0 = line - CurVmode.vfirst1; - if (CurVmode.dbly) y0 >>= 1; - y0 = (y0 << 1) + 1; - VSync = False; // not vsync - break; - - case LINE_IMGODD2: // interlaced image odd 1, 3, 5,..., 2nd subframe - y0 = line - CurVmode.vfirst2; - if (CurVmode.dbly) y0 >>= 1; - y0 = (y0 << 1) + 1; - VSync = False; // not vsync - break; - default: VSync = True; // vsync break; } - // update DMA control channels of overlapped layers - // check if scanline is visible - if (y0 >= 0) - { - // loop overlapped layers - int layer; - for (layer = 1; layer < LAYERS; layer++) - { - // check if this layer is active - if (CtrlBufNext[layer] == NULL) continue; - - // check if this layer screen is active - sLayer* s = &LayerScreen[layer]; - if (!s->on || (s->w <= 0) || (y0 < s->y) || (y0 >= s->y + s->h)) continue; - - // wait for idle state - // IRQ0 comes a few pixels before end of scanline, when DMA_PIO0 is finished. - // We must wait 1 to 2 us to complete layer DMA. Sometimes it can take - // longer - for such cases we must restart both DMA and state machine. - int sm = VGA_SM(layer); - u32 t1 = time_us_32(); - do { - u8 a = *(volatile u8*)&VGA_PIO->sm[sm].addr & 0x1f; - if (a <= CurLayerProg.maxidle+LAYER_OFFSET) break; - } while ((u32)(time_us_32() - t1) < (u32)10); // wait max. 10 us, low resolution can take long time - - // stop DMA channel - dma_channel_abort(VGA_DMA_PIO(layer)); - dma_channel_abort(VGA_DMA_CB(layer)); - dma_channel_abort(VGA_DMA_PIO(layer)); - dma_channel_abort(VGA_DMA_CB(layer)); - - // restart state machine and clear FIFOs - pio_sm_set_enabled(VGA_PIO, sm, false); - pio_sm_clear_fifos(VGA_PIO, sm); - pio_sm_restart(VGA_PIO, sm); - pio_sm_exec(VGA_PIO, sm, pio_encode_jmp(CurLayerProg.idle+LAYER_OFFSET)); - pio_sm_set_enabled(VGA_PIO, sm, true); - - // enter new scanline - pio_sm_exec(VGA_PIO, sm, pio_encode_jmp(CurLayerProg.entry+LAYER_OFFSET)); - - // start DMA - dma_channel_set_read_addr(VGA_DMA_CB(layer), CtrlBufNext[layer], true); - } - } - return bufinx; } -// render scanline buffers -u32* __not_in_flash_func(VgaBufRender)(u32* cbuf, u32* cbuf0, u8* dbuf, int y0) -{ -// ---- render base layer - // HSYNC + back porch - *cbuf++ = 4; // send 4x u32 - *cbuf++ = (u32)LineBufHsBp; // HSYNC + back porch - - // render scanline - // cbuf ... control buffer - // dbuf ... data buffer (pixel data) - // line ... current line 0.. - // pixnum ... total pixels (must be multiple of 4) - cbuf = Render(cbuf, dbuf, y0, CurVmode.width); - - // front porch - *cbuf++ = 1; // send 1x u32 - *cbuf++ = (u32)&LineBufFp; // front porch - -// ---- render overlapped layers - - int layer; - for (layer = 1; layer < LAYERS; layer++) - { - // shift buffers - cbuf0 += CtrlBufSize[layer-1]; - dbuf += LineBufSize[layer-1]; - - CtrlBufNext[layer] = NULL; - - // check if layer is active - int mode = LayerModeInx[layer]; - if (mode == LAYERMODE_BASE) continue; - - // check if this layer screen is active - sLayer* s = &LayerScreen[layer]; - if (!s->on || (s->w <= 0) || (y0 < s->y) || (y0 >= s->y + s->h)) continue; - int y = y0 - s->y; - - // set next control buffer - u32* cbuf2 = cbuf0; - CtrlBufNext[layer] = cbuf2; - - // write init word - u8* dbuf2 = dbuf; - *cbuf2++ = 1; - *cbuf2++ = (u32)dbuf2; - *(u32*)dbuf2 = BYTESWAP(s->init); - dbuf2 += 4; - - // render data - switch(mode) - { - case LAYERMODE_SPRITEKEY: - case LAYERMODE_SPRITEBLACK: - case LAYERMODE_SPRITEWHITE: - { - *cbuf2++ = s->trans; - *cbuf2++ = (u32)dbuf2; - MemSet4((u32*)dbuf2, s->keycol, s->w/4); - RenderSprite(dbuf2, y, s); - } - break; - - case LAYERMODE_FASTSPRITEKEY: - case LAYERMODE_FASTSPRITEBLACK: - case LAYERMODE_FASTSPRITEWHITE: - { - MemSet4((u32*)dbuf2, s->keycol, s->w/4); - cbuf2 = RenderFastSprite(cbuf2, y, s, dbuf2); - } - break; - - case LAYERMODE_PERSPKEY: // layer with key color and image with transformation matrix - case LAYERMODE_PERSPBLACK: // layer with black key color and image with transformation matrix - case LAYERMODE_PERSPWHITE: // layer with white key color and image with transformation matrix - { - int w = s->w; // destination width - int x = s->x; // destination coordinate X - - // underflow left edge - if (x < 0) - { - x = ALIGN4(x+4098) - 4096; // round X to 4-pixels - w += x; // decrease W - x = -x; // start offset of X - } - else - { - // overflow right edge - if (x + w > CurVmode.width) - { - w = CurVmode.width - x; // limit W - } - x = 0; - } - - // align W down - w = ALIGN4(w); - - if (w <= 0) - { - // minimal transparent pixels - *cbuf2++ = 1; - *cbuf2++ = (u32)dbuf2; - *(u32*)dbuf2 = s->keycol; - } - else - { - // decode image - *cbuf2++ = w/4; - *cbuf2++ = (u32)&dbuf2[x]; - RenderPersp(dbuf2, y, s); - } - } - break; - - case LAYERMODE_PERSP2KEY: // layer with key color and image with transformation matrix - case LAYERMODE_PERSP2BLACK: // layer with black key color and image with transformation matrix - case LAYERMODE_PERSP2WHITE: // layer with white key color and image with transformation matrix - { - int w = s->w; // destination width - int x = s->x; // destination coordinate X - - // underflow left edge - if (x < 0) - { - x = ALIGN4(x+4098) - 4096; // round X to 4-pixels - w += x; // decrease W - x = -x; // start offset of X - } - else - { - // overflow right edge - if (x + w > CurVmode.width) - { - w = CurVmode.width - x; // limit W - } - x = 0; - } - - // align W down - w = ALIGN4(w); - - if (w <= 0) - { - // minimal transparent pixels - *cbuf2++ = 1; - *cbuf2++ = (u32)dbuf2; - *(u32*)dbuf2 = s->keycol; - } - else - { - // decode image - *cbuf2++ = w/4; - *cbuf2++ = (u32)&dbuf2[x]; - RenderPersp2(dbuf2, y, s); - } - } - break; - - case LAYERMODE_RLE: - { - // rows indices - u16* row = (u16*)s->par; - - // lengt of the row - int n = row[y+1] - row[y]; - - // set transfer count - *cbuf2++ = n; - - // start new DMA - *cbuf2++ = (u32)&s->img[row[y]*4]; - } - break; - - default: - { - // set transfer count - *cbuf2++ = s->trans; - - // start new DMA - *cbuf2++ = (u32)&s->img[y*s->wb]; - } - break; - } - - // end mark of layer - *cbuf2++ = 0; // end mark - *cbuf2++ = 0; // end mark - } - - return cbuf; -} // VGA DMA handler - called on end of every scanline extern "C" void __not_in_flash_func(VgaLine)() @@ -377,20 +97,16 @@ extern "C" void __not_in_flash_func(VgaLine)() int bufinx = VgaBufProcess(); // prepare buffers to be processed next - u8* dbuf; // data buffer u32* cbuf; // control buffer if (bufinx == 0) { - dbuf = LineBuf1; cbuf = CtrlBuf1; } else { - dbuf = LineBuf2; cbuf = CtrlBuf2; } - CtrlBufNext[0] = cbuf; - u32* cbuf0 = cbuf; // control buffer base + CtrlBufNext = cbuf; // next rendered scanline int line = ScanLine; // current scanline @@ -399,6 +115,16 @@ extern "C" void __not_in_flash_func(VgaLine)() int y0; u8 linetype = ScanlineType[line]; +#ifdef VGA_VSYNC + if (linetype == LINE_VSYNC) + { + gpio_put(VGA_VSYNC, 0); + } + else + { + gpio_put(VGA_VSYNC, 1); + } +#endif switch (linetype) { case LINE_VSYNC: // long vertical sync @@ -406,63 +132,26 @@ extern "C" void __not_in_flash_func(VgaLine)() *cbuf++ = (u32)&LineBufSync[0]; // VSYNC break; - case LINE_VVSYNC: // short vertical + vertical sync - *cbuf++ = 4; // send 4x u32 - *cbuf++ = (u32)&LineBufSync[4]; // VSYNC - break; - - case LINE_VHSYNC: // short vertical + horizontal sync - *cbuf++ = 4; // send 4x u32 - *cbuf++ = (u32)&LineBufSync[6]; // VSYNC + half - break; - - case LINE_HHSYNC: // short horizontal + horizontal sync - *cbuf++ = 4; // send 4x u32 - *cbuf++ = (u32)&LineBufSync[0]; // half + half - break; - - case LINE_HVSYNC: // short horizontal + vertical sync - *cbuf++ = 4; // send 4x u32 - *cbuf++ = (u32)&LineBufSync[2]; // half + VSYNC - break; - case LINE_DARK: // dark line *cbuf++ = 2; // send 2x u32 *cbuf++ = (u32)LineBufDark; // dark break; case LINE_IMG: // progressive image 0, 1, 2,... - y0 = line - CurVmode.vfirst1; + y0 = line - CurVmode.vfirst; if (CurVmode.dbly) y0 >>= 1; - cbuf = VgaBufRender(cbuf, cbuf0, dbuf, y0); - break; - case LINE_IMGEVEN1: // interlaced image even 0, 2, 4,..., 1st subframe - y0 = line - CurVmode.vfirst1; - if (CurVmode.dbly) y0 >>= 1; - y0 <<= 1; - cbuf = VgaBufRender(cbuf, cbuf0, dbuf, y0); - break; + // HSYNC + back porch + *cbuf++ = 4; // send 4x u32 + *cbuf++ = (u32)LineBufHsBp; // HSYNC + back porch - case LINE_IMGEVEN2: // interlaced image even 0, 2, 4,..., 2nd subframe - y0 = line - CurVmode.vfirst2; - if (CurVmode.dbly) y0 >>= 1; - y0 <<= 1; - cbuf = VgaBufRender(cbuf, cbuf0, dbuf, y0); - break; - - case LINE_IMGODD1: // interlaced image odd 1, 3, 5,..., 1st subframe - y0 = line - CurVmode.vfirst1; - if (CurVmode.dbly) y0 >>= 1; - y0 = (y0 << 1) + 1; - cbuf = VgaBufRender(cbuf, cbuf0, dbuf, y0); - break; - - case LINE_IMGODD2: // interlaced image odd 1, 3, 5,..., 2nd subframe - y0 = line - CurVmode.vfirst2; - if (CurVmode.dbly) y0 >>= 1; - y0 = (y0 << 1) + 1; - cbuf = VgaBufRender(cbuf, cbuf0, dbuf, y0); + // image data + *cbuf++ = fbwidth/4; + *cbuf++ = (u32)&framebuffer[y0*fbwidth]; + + // front porch + *cbuf++ = 1; // send 1x u32 + *cbuf++ = (u32)&LineBufFp; // front porch break; } @@ -484,78 +173,72 @@ extern "C" void __not_in_flash_func(VgaLine)() void VgaDmaInit() { dma_channel_config cfg; - int layer; - for (layer = 0; layer < LAYERS; layer++) - { - // layer is not active - if ((layer > 0) && (LayerModeInx[layer] == LAYERMODE_BASE)) continue; -// ==== prepare DMA control channel + // ==== prepare DMA control channel - // prepare DMA default config - cfg = dma_channel_get_default_config(VGA_DMA_CB(layer)); + // prepare DMA default config + cfg = dma_channel_get_default_config(VGA_DMA_CB0); - // increment address on read from memory - channel_config_set_read_increment(&cfg, true); + // increment address on read from memory + channel_config_set_read_increment(&cfg, true); - // increment address on write to DMA port - channel_config_set_write_increment(&cfg, true); + // increment address on write to DMA port + channel_config_set_write_increment(&cfg, true); - // each DMA transfered entry is 32-bits - channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32); + // each DMA transfered entry is 32-bits + channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32); - // write ring - wrap to 8-byte boundary (TRANS_COUNT and READ_ADDR_TRIG of data DMA) - channel_config_set_ring(&cfg, true, 3); + // write ring - wrap to 8-byte boundary (TRANS_COUNT and READ_ADDR_TRIG of data DMA) + channel_config_set_ring(&cfg, true, 3); - // DMA configure - dma_channel_configure( - VGA_DMA_CB(layer), // channel - &cfg, // configuration - &dma_hw->ch[VGA_DMA_PIO(layer)].al3_transfer_count, // write address - &CtrlBuf1[0], // read address - as first, control buffer 1 will be sent out - 2, // number of transfers in u32 - false // do not start yet - ); - -// ==== prepare DMA data channel - - // prepare DMA default config - cfg = dma_channel_get_default_config(VGA_DMA_PIO(layer)); - - // increment address on read from memory - channel_config_set_read_increment(&cfg, true); - - // do not increment address on write to PIO - channel_config_set_write_increment(&cfg, false); - - // each DMA transfered entry is 32-bits - channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32); - - // DMA data request for sending data to PIO - channel_config_set_dreq(&cfg, pio_get_dreq(VGA_PIO, VGA_SM(layer), true)); - - // chain channel to DMA control block - channel_config_set_chain_to(&cfg, VGA_DMA_CB(layer)); - - // raise the IRQ flag when 0 is written to a trigger register (end of chain) - channel_config_set_irq_quiet(&cfg, true); - - // set byte swapping - channel_config_set_bswap(&cfg, true); - - // set high priority - cfg.ctrl |= DMA_CH0_CTRL_TRIG_HIGH_PRIORITY_BITS; - - // DMA configure - dma_channel_configure( - VGA_DMA_PIO(layer), // channel - &cfg, // configuration - &VGA_PIO->txf[VGA_SM(layer)], // write address - NULL, // read address - 0, // number of transfers in u32 - false // do not start immediately + // DMA configure + dma_channel_configure( + VGA_DMA_CB0, // channel + &cfg, // configuration + &dma_hw->ch[VGA_DMA_PIO0].al3_transfer_count, // write address + &CtrlBuf1[0], // read address - as first, control buffer 1 will be sent out + 2, // number of transfers in u32 + false // do not start yet ); - } + + // ==== prepare DMA data channel + + // prepare DMA default config + cfg = dma_channel_get_default_config(VGA_DMA_PIO0); + + // increment address on read from memory + channel_config_set_read_increment(&cfg, true); + + // do not increment address on write to PIO + channel_config_set_write_increment(&cfg, false); + + // each DMA transfered entry is 32-bits + channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32); + + // DMA data request for sending data to PIO + channel_config_set_dreq(&cfg, pio_get_dreq(VGA_PIO, VGA_SM0, true)); + + // chain channel to DMA control block + channel_config_set_chain_to(&cfg, VGA_DMA_CB0); + + // raise the IRQ flag when 0 is written to a trigger register (end of chain) + channel_config_set_irq_quiet(&cfg, true); + + // set byte swapping + channel_config_set_bswap(&cfg, true); + + // set high priority + cfg.ctrl |= DMA_CH0_CTRL_TRIG_HIGH_PRIORITY_BITS; + + // DMA configure + dma_channel_configure( + VGA_DMA_PIO0, // channel + &cfg, // configuration + &VGA_PIO->txf[VGA_SM0], // write address + NULL, // read address + 0, // number of transfers in u32 + false // do not start immediately + ); // ==== initialize IRQ0, raised from base layer 0 @@ -572,8 +255,6 @@ void VgaDmaInit() // initialize VGA PIO void VgaPioInit() { - int i; - // clear PIO instruction memory pio_clear_instruction_memory(VGA_PIO); @@ -591,83 +272,47 @@ void VgaPioInit() prg.origin = BASE_OFFSET; pio_add_program(VGA_PIO, &prg); - // load layer program - if (LayerProgInx != LAYERPROG_BASE) - { - // configure layer program instructions - memcpy(ins, CurLayerProg.ins, CurLayerProg.length*sizeof(uint16_t)); // copy program into buffer - for (i = 0; i < CurLayerProg.extranum; i++) - { - int extra = (int)cpp - CurLayerProg.extra[i*2+1]; - if (extra < 0) extra = 0; - ins[CurLayerProg.extra[i*2]] |= extra << 8; // update waits - } - - // load layer program into PIO's instruction memory - prg.instructions = ins; - prg.length = CurLayerProg.length; - prg.origin = LAYER_OFFSET; - pio_add_program(VGA_PIO, &prg); - } - // connect PIO to the pad - // JMH - //for (i = VGA_GPIO_FIRST; i <= VGA_GPIO_LAST; i++) pio_gpio_init(VGA_PIO, i); - for (i = VGA_GPIO_FIRST; i < VGA_GPIO_LAST; i++) pio_gpio_init(VGA_PIO, i); + for (int i = VGA_GPIO_FIRST; i < VGA_GPIO_LAST; i++) pio_gpio_init(VGA_PIO, i); pio_gpio_init(VGA_PIO, VGA_GPIO_SYNC); +#if VGA_VSYNC + gpio_init(VGA_VSYNC); +#endif // negative HSYNC output if (!CurVmode.psync) gpio_set_outover(VGA_GPIO_SYNC, GPIO_OVERRIDE_INVERT); - int layer; - for (layer = 0; layer < LAYERS; layer++) - { - // layer is not active - if ((layer > 0) && (LayerModeInx[layer] == LAYERMODE_BASE)) continue; + // set pin direction to output + pio_sm_set_consecutive_pindirs(VGA_PIO, VGA_SM0, VGA_GPIO_FIRST, VGA_GPIO_OUTNUM, true); + pio_sm_set_consecutive_pindirs(VGA_PIO, VGA_SM0, VGA_GPIO_SYNC, 1, true); +#if VGA_VSYNC + gpio_set_dir(VGA_VSYNC, GPIO_OUT); +#endif + // get default config + pio_sm_config cfg = pio_get_default_sm_config(); - // set pin direction to output - // JMH - //pio_sm_set_consecutive_pindirs(VGA_PIO, VGA_SM(layer), VGA_GPIO_FIRST, VGA_GPIO_NUM, true); - pio_sm_set_consecutive_pindirs(VGA_PIO, VGA_SM(layer), VGA_GPIO_FIRST, VGA_GPIO_OUTNUM, true); - pio_sm_set_consecutive_pindirs(VGA_PIO, VGA_SM(layer), VGA_GPIO_SYNC, 1, true); + // map state machine's OUT and MOV pins + sm_config_set_out_pins(&cfg, VGA_GPIO_FIRST, VGA_GPIO_OUTNUM); - // get default config - pio_sm_config cfg = pio_get_default_sm_config(); + // join FIFO to send only + sm_config_set_fifo_join(&cfg, PIO_FIFO_JOIN_TX); - // map state machine's OUT and MOV pins - sm_config_set_out_pins(&cfg, LayerFirstPin[layer], LayerNumPin[layer]); + // PIO clock divider + sm_config_set_clkdiv(&cfg, CurVmode.div); - // join FIFO to send only - sm_config_set_fifo_join(&cfg, PIO_FIFO_JOIN_TX); + // shift left, autopull, pull threshold + sm_config_set_out_shift(&cfg, false, true, 32); - // PIO clock divider - sm_config_set_clkdiv(&cfg, CurVmode.div); + // base layer 0 + // set wrap + sm_config_set_wrap(&cfg, vga_wrap_target+BASE_OFFSET, vga_wrap+BASE_OFFSET); - // shift left, autopull, pull threshold - sm_config_set_out_shift(&cfg, false, true, 32); - - // base layer 0 - if (layer == 0) - { - // set wrap - sm_config_set_wrap(&cfg, vga_wrap_target+BASE_OFFSET, vga_wrap+BASE_OFFSET); + // set sideset pins of base layer + sm_config_set_sideset(&cfg, 1, false, false); + sm_config_set_sideset_pins(&cfg, VGA_GPIO_SYNC); - // set sideset pins of base layer - sm_config_set_sideset(&cfg, 1, false, false); - sm_config_set_sideset_pins(&cfg, VGA_GPIO_SYNC); - - // initialize state machine - pio_sm_init(VGA_PIO, VGA_SM0, vga_offset_entry+BASE_OFFSET, &cfg); - } - else - { - // set wrap - sm_config_set_wrap(&cfg, CurLayerProg.wrap_target+LAYER_OFFSET, CurLayerProg.wrap+LAYER_OFFSET); - - // initialize state machine - pio_sm_init(VGA_PIO, VGA_SM(layer), CurLayerProg.idle+LAYER_OFFSET, &cfg); - } - } + // initialize state machine + pio_sm_init(VGA_PIO, VGA_SM0, vga_offset_entry+BASE_OFFSET, &cfg); } // initialize scanline buffers @@ -689,46 +334,25 @@ void VgaBufInit() LineBufDark[0] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.hsync-3)); // HSYNC LineBufDark[1] = BYTESWAP(VGADARK(CurVmode.htot-CurVmode.hsync-4,0)); // dark line - // TV mode - if (CurVmode.inter) - { - // vertical synchronization - LineBufSync[0] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.hsync/2-3)); // HSYNC - LineBufSync[1] = BYTESWAP(VGADARK(CurVmode.htot/2-CurVmode.hsync/2-4,0)); // dark line - LineBufSync[2] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.hsync/2-3)); // HSYNC - LineBufSync[3] = BYTESWAP(VGADARK((CurVmode.htot+1)/2-CurVmode.hsync/2-4,0)); // dark line - - LineBufSync[4] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.htot/2-CurVmode.hsync-3)); // invert dark line - LineBufSync[5] = BYTESWAP(VGADARK(CurVmode.hsync-4,0)); // invert HSYNC - LineBufSync[6] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,(CurVmode.htot+1)/2-CurVmode.hsync-3)); // invert dark line - LineBufSync[7] = BYTESWAP(VGADARK(CurVmode.hsync-4,0)); // invert HSYNC - - LineBufSync[8] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.hsync/2-3)); // HSYNC - LineBufSync[9] = BYTESWAP(VGADARK(CurVmode.htot/2-CurVmode.hsync/2-4,0)); // dark line - - // control blocks - initialize to VSYNC - CtrlBuf1[0] = 4; // send 4x u32 - CtrlBuf1[1] = (u32)&LineBufSync[4]; // VSYNC - - CtrlBuf2[0] = 4; // send 4x u32 - CtrlBuf2[1] = (u32)&LineBufSync[4]; // VSYNC - } - // VGA mode - else - { - // vertical synchronization - // hsync must be min. 4 - LineBufSync[0] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.htot-CurVmode.hsync-3)); // invert dark line - LineBufSync[1] = BYTESWAP(VGADARK(CurVmode.hsync-4,0)); // invert HSYNC + // vertical synchronization - // control blocks - initialize to VSYNC - CtrlBuf1[0] = 2; // send 2x u32 - CtrlBuf1[1] = (u32)&LineBufSync[0]; // VSYNC +#ifdef VGA_VSYNC + // if VSYNC line + LineBufSync[0] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.hsync-3)); // HSYNC + LineBufSync[1] = BYTESWAP(VGADARK(CurVmode.htot-CurVmode.hsync-4,0)); // dark line +#else + // hsync must be min. 4 + LineBufSync[0] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.htot-CurVmode.hsync-3)); // invert dark line + LineBufSync[1] = BYTESWAP(VGADARK(CurVmode.hsync-4,0)); // invert HSYNC +#endif - CtrlBuf2[0] = 2; // send 2x u32 - CtrlBuf2[1] = (u32)&LineBufSync[0]; // VSYNC - } + // control blocks - initialize to VSYNC + CtrlBuf1[0] = 2; // send 2x u32 + CtrlBuf1[1] = (u32)&LineBufSync[0]; // VSYNC + + CtrlBuf2[0] = 2; // send 2x u32 + CtrlBuf2[1] = (u32)&LineBufSync[0]; // VSYNC CtrlBuf1[2] = 0; // stop mark CtrlBuf1[3] = 0; // stop mark @@ -740,16 +364,9 @@ void VgaBufInit() // terminate VGA service void VgaTerm() { - int i; - // abort DMA channels dma_channel_abort(VGA_DMA_PIO0); // pre-abort, could be chaining right now dma_channel_abort(VGA_DMA_CB0); - for (i = 0; i < LAYERS; i++) - { - dma_channel_abort(VGA_DMA_PIO(i)); - dma_channel_abort(VGA_DMA_CB(i)); - } // disable IRQ0 from DMA0 irq_set_enabled(DMA_IRQ_0, false); @@ -765,11 +382,8 @@ void VgaTerm() pio_restart_sm_mask(VGA_PIO, VGA_SMALL); // clear FIFOs - for (i = 0; i < LAYERS; i++) - { - pio_sm_clear_fifos(VGA_PIO, VGA_SM(i)); - CtrlBufNext[i] = NULL; - } + pio_sm_clear_fifos(VGA_PIO, VGA_SM0); + CtrlBufNext = NULL; // clear PIO instruction memory pio_clear_instruction_memory(VGA_PIO); @@ -785,200 +399,40 @@ void ScanlineTypeInit(const sVmode* v) *d++ = LINE_DARK; // progressive mode (VGA 525) - if (!v->inter) - { - // vertical sync (VGA 2) - for (i = v->vsync1; i > 0; i--) *d++ = LINE_VSYNC; + // vertical sync (VGA 2) + for (i = v->vsync; i > 0; i--) *d++ = LINE_VSYNC; - // dark (VGA 33) - for (i = v->vback1; i > 0; i--) *d++ = LINE_DARK; + // dark (VGA 33) + for (i = v->vback; i > 0; i--) *d++ = LINE_DARK; - // image (VGA 480) - for (i = v->vact1; i > 0; i--) *d++ = LINE_IMG; + // image (VGA 480) + for (i = v->vact; i > 0; i--) *d++ = LINE_IMG; - // dark (VGA 10) - for (i = v->vfront1; i > 0; i--) *d++ = LINE_DARK; - } - - // interlaced mode (PAL 625, NTSC 525) - // - frames start with whole VSYNC - else - { - // vertical sync (PAL 2, NTSC 3) - for (i = v->vsync1/2; i > 0; i--) *d++ = LINE_VVSYNC; - - // vertical sync + half sync (PAL 1, NTSC 0) - if ((v->vsync1 & 1) != 0) *d++ = LINE_VHSYNC; - - // half sync (PAL 2, NTSC 3) - for (i = v->vpost1/2; i > 0; i--) *d++ = LINE_HHSYNC; - - // dark (PAL 18+23, NTSC 10+2) - for (i = v->vback1; i > 0; i--) *d++ = LINE_DARK; - - // image 1st sub-frame (PAL 240, NTSC 240) - if (v->odd) - for (i = v->vact1; i > 0; i--) *d++ = LINE_IMGODD1; // odd lines 1, 3, 5, ... (PAL) - else - for (i = v->vact1; i > 0; i--) *d++ = LINE_IMGEVEN1; // even lines 0, 2, 4, ... (NTSC) - - // dark (PAL 24, NTSC 1) - for (i = v->vfront1; i > 0; i--) *d++ = LINE_DARK; - - // half sync (PAL 2, NTSC 3) - for (i = v->vpre1/2; i > 0; i--) *d++ = LINE_HHSYNC; - - // half sync + vertical sync (PAL 1, NTSC 1) - k = v->vpre1 & 1; - if (k != 0) *d++ = LINE_HVSYNC; - - // vertical sync (PAL 2, NTSC 2) - for (i = (v->vsync2 - k)/2; i > 0; i--) *d++ = LINE_VVSYNC; - - // vertical sync + half sync (PAL 0, NTSC 1) - if (((v->vsync2 - k) & 1) != 0) *d++ = LINE_VHSYNC; - - // half sync (PAL 2, NTSC 2) - for (i = v->vpost2/2; i > 0; i--) *d++ = LINE_HHSYNC; - - // dark (PAL 18+23, NTSC 11+2) - for (i = v->vback2; i > 0; i--) *d++ = LINE_DARK; - - // image 2nd sub-frame (PAL 240, NTSC 240) - if (v->odd) - for (i = v->vact2; i > 0; i--) *d++ = LINE_IMGEVEN2; // even lines 0, 2, 4, ... (PAL) - else - for (i = v->vact2; i > 0; i--) *d++ = LINE_IMGODD2; // odd lines 1, 3, 5, ... (NTSC) - - // dark (PAL 24, NTSC 1) - for (i = v->vfront2; i > 0; i--) *d++ = LINE_DARK; - - // half sync (PAL 3, NTSC 3) - for (i = v->vpre2/2; i > 0; i--) *d++ = LINE_HHSYNC; - } -} - -// scanline names -const char* ScanlineName[] = { - "VSYNC", // long vertical sync - "VVSYNC", // short vertical + vertical sync - "VHSYNC", // short vertical + horizontal sync - "HHSYNC", // short horizontal + horizontal sync - "HVSYNC", // short horizontal + vertical sync - "DARK", // dark line - "IMG", // progressive image 0, 1, 2,... - "IMGEVEN1", // interlaced image even 0, 2, 4,..., 1st subframe - "IMGEVEN2", // interlaced image even 0, 2, 4,..., 2nd subframe - "IMGODD1", // interlaced image odd 1, 3, 5,..., 1st subframe - "IMGODD2", // interlaced image odd 1, 3, 5,..., 2nd subframe -}; - -// print table if scanline types -void ScanlineTypePrint(const u8* scan, int lines) -{ - // skip scanline 0 - scan++; - - // load scanline 1 - u8 last = *scan++; - int num = 1; - int line = 1; - - // process other scanlines - int i; - for (i = 2; i <= lines; i++) - { - if ((*scan != last) || (i == lines)) - { - if (num == 1) - printf("%d (1): %s\n", line, line + num - 1, ScanlineName[last]); - else - printf("%d..%d (%d): %s\n", line, line + num - 1, num, ScanlineName[last]); - - last = *scan; - num = 1; - line = i; - } - else - num++; - scan++; - } + // dark (VGA 10) + for (i = v->vfront; i > 0; i--) *d++ = LINE_DARK; } // initialize videomode (returns False on bad configuration) -// - All layer modes must use same layer program (LAYERMODE_BASE = overlapped layers are OFF) -void VgaInit(const sVmode* vmode) +void VgaInit(const sVmode* vmode, u8* buf, int width, int height, int stride) { int i; + framebuffer = buf; + fbwidth = width; + // stop old state VgaTerm(); // initialize scanline type table ScanlineTypeInit(vmode); - // prepare render font pixel mask - for (i = 0; i < 256; i++) - { - // higher 4 bits - u32 m = 0; - if ((i & B7) != 0) m |= 0xff; - if ((i & B6) != 0) m |= 0xff << 8; - if ((i & B5) != 0) m |= 0xff << 16; - if ((i & B4) != 0) m |= 0xff << 24; - RenderTextMask[2*i] = m; - - // lower 4 bits - m = 0; - if ((i & B3) != 0) m |= 0xff; - if ((i & B2) != 0) m |= 0xff << 8; - if ((i & B1) != 0) m |= 0xff << 16; - if ((i & B0) != 0) m |= 0xff << 24; - RenderTextMask[2*i+1] = m; - } - - // emergency check of structure definitions - if ( (SSPRITE_SIZE != sizeof(sSprite)) || - (SLAYER_SIZE != sizeof(sLayer)) || - (SSEGM_SIZE != sizeof(sSegm)) || - (SSTRIP_SIZE != sizeof(sStrip)) || - (SSCREEN_SIZE != sizeof(sScreen))) - { - while (1) {} - } - - // clear buffer with black color - memset(LineBuf0, COL_BLACK, BLACK_MAX); - // save current videomode memcpy(&CurVmode, vmode, sizeof(sVmode)); // initialize parameters ScanLine = 1; // currently processed scanline -// Frame = 0; BufInx = 0; // at first, control buffer 1 will be sent out - CtrlBufNext[0] = CtrlBuf2; - - // initialize base layer - LayerModeInx[0] = LAYERMODE_BASE; - memcpy(&CurLayerMode[0], &LayerMode[LAYERMODE_BASE], sizeof(sLayerMode)); - memset(&LayerScreen[0], 0, sizeof(sLayer)); - - // save layer modes - LayerModeInx[1] = vmode->mode[1]; - LayerModeInx[2] = vmode->mode[2]; - LayerModeInx[3] = vmode->mode[3]; - - LayerMask = B0; // mask of active layers - for (i = 1; i < LAYERS; i++) - { - memcpy(&CurLayerMode[i], &LayerMode[LayerModeInx[i]], sizeof(sLayerMode)); - if (LayerModeInx[i] != LAYERMODE_BASE) LayerMask |= (1 << i); - } - - // get layer program - LayerProgInx = vmode->prog; - memcpy(&CurLayerProg, &LayerProg[LayerProgInx], sizeof(sLayerProg)); + CtrlBufNext = CtrlBuf2; // initialize VGA PIO VgaPioInit(); @@ -996,74 +450,9 @@ void VgaInit(const sVmode* vmode) dma_channel_start(VGA_DMA_CB0); // run state machines - pio_enable_sm_mask_in_sync(VGA_PIO, LayerMask); + pio_enable_sm_mask_in_sync(VGA_PIO, B0); } -const sVmode* volatile VgaVmodeReq = NULL; // request to reinitialize videomode, 1=only stop driver - -void (* volatile Core1Fnc)() = NULL; // core 1 remote function - -// VGA core -void VgaCore() -{ - const sVmode* v; - void (*fnc)(); - while (1) - { - __dmb(); - - // initialize videomode - v = VgaVmodeReq; - if (v != NULL) - { - if ((u32)v == (u32)1) - VgaTerm(); // terminate - else - VgaInit(v); - __dmb(); - VgaVmodeReq = NULL; - } - - // execute remote function - fnc = Core1Fnc; - if (fnc != NULL) - { - fnc(); - __dmb(); - Core1Fnc = NULL; - } - } -} - -// request to initialize VGA videomode, NULL=only stop driver (wait to initialization completes) -void VgaInitReq(const sVmode* vmode) -{ - if (vmode == NULL) vmode = (const sVmode*)1; - __dmb(); - VgaVmodeReq = vmode; - while (VgaVmodeReq != NULL) { __dmb(); } -} - -// execute core 1 remote function -void Core1Exec(void (*fnc)()) -{ - __dmb(); - Core1Fnc = fnc; - __dmb(); -} - -// check if core 1 is busy (executing remote function) -Bool Core1Busy() -{ - __dmb(); - return Core1Fnc != NULL; -} - -// wait if core 1 is busy (executing remote function) -void Core1Wait() -{ - while (Core1Busy()) {} -} // wait for VSync scanline void WaitVSync() diff --git a/MCUME_pico/picovga_t4/vga.h b/MCUME_pico/picovga_t4/vga.h index 3387740..ff5f4b9 100755 --- a/MCUME_pico/picovga_t4/vga.h +++ b/MCUME_pico/picovga_t4/vga.h @@ -1,8 +1,11 @@ - // **************************************************************************** // // VGA output // +// file derived from the PicoVGA project +// https://github.com/Panda381/PicoVGA +// by Miroslav Nemecek +// // **************************************************************************** #ifndef _VGA_H @@ -10,124 +13,34 @@ // scanline type #define LINE_VSYNC 0 // long vertical sync -#define LINE_VVSYNC 1 // short vertical + vertical sync -#define LINE_VHSYNC 2 // short vertical + horizontal sync -#define LINE_HHSYNC 3 // short horizontal + horizontal sync -#define LINE_HVSYNC 4 // short horizontal + vertical sync -#define LINE_DARK 5 // dark line -#define LINE_IMG 6 // progressive image 0, 1, 2,... -#define LINE_IMGEVEN1 7 // interlaced image even 0, 2, 4,..., 1st subframe -#define LINE_IMGEVEN2 8 // interlaced image even 0, 2, 4,..., 2nd subframe -#define LINE_IMGODD1 9 // interlaced image odd 1, 3, 5,..., 1st subframe -#define LINE_IMGODD2 10 // interlaced image odd 1, 3, 5,..., 2nd subframe +#define LINE_DARK 1 // dark line +#define LINE_IMG 2 // progressive image 0, 1, 2,... extern u8 ScanlineType[MAXLINE]; -extern int DispDev; // current display device extern sVmode CurVmode; // copy of current videomode table -//extern int LayerMode; // current layer mode (LAYERMODE_*) extern volatile int ScanLine; // current scan line 1... extern volatile u32 Frame; // frame counter extern volatile int BufInx; // current buffer set (0..1) extern volatile Bool VSync; // current scan line is vsync or dark // line buffers -extern ALIGNED u8 LineBuf1[DBUF_MAX]; // scanline 1 image data -extern ALIGNED u8 LineBuf2[DBUF_MAX]; // scanline 2 image data -extern int LineBufSize[LAYERS_MAX]; // size of data buffers -extern u32 LineBufHsBp[4]; // HSYNC ... back porch-1 ... IRQ command ... image command -extern u32 LineBufFp; // front porch+1 -extern u32 LineBufDark[2]; // HSYNC ... dark line -extern u32 LineBufSync[10]; // vertical synchronization - // interlaced (5x half scanlines): - // 2x half synchronization (HSYNC pulse/2 ... line dark/2) - // 2x vertical synchronization (invert line dark/2 ... invert HSYNC pulse) - // 1x half synchronization (HSYNC pulse/2 ... line dark/2) - // progressive: 1x scanline with vertical synchronization (invert line dark ... invert HSYNC pulse) - -extern ALIGNED u8 LineBuf0[BLACK_MAX]; // line buffer with black color (used to clear rest of scanline) +extern u32 LineBufHsBp[4]; // HSYNC ... back porch-1 ... IRQ command ... image command +extern u32 LineBufFp; // front porch+1 +extern u32 LineBufDark[2]; // HSYNC ... dark line +extern u32 LineBufSync[10]; // vertical synchronization // control buffers extern u32 CtrlBuf1[CBUF_MAX]; // control pairs: u32 count, read address (must be terminated with [0,0]) extern u32 CtrlBuf2[CBUF_MAX]; // control pairs: u32 count, read address (must be terminated with [0,0]) -extern int CtrlBufSize[LAYERS_MAX]; // size of control buffers - -// render font pixel mask -extern u32 RenderTextMask[512]; - -// fill memory buffer with u32 words -// buf ... data buffer, must be 32-bit aligned -// data ... data word to store -// num ... number of 32-bit words (= number of bytes/4) -// Returns new destination address. -extern "C" u32* MemSet4(u32* buf, u32 data, int num); - -// blit scanline using key color -// dst ... destination buffer -// src ... source buffer -// w ... width -// key ... key color -extern "C" void BlitKey(u8* dst, u8* src, int w, u8 key); - -// render layers with sprites LAYERMODE_SPRITE* -// dbuf ... pointer to data buffer -// y ... coordinate of scanline -// scr ... pointer to layer screen structure sLayer -extern "C" void RenderSprite(u8* dbuf, int y, sLayer* scr); - -// render layers with fast sprites LAYERMODE_FASTSPRITE* -// cbuf ... pointer to control buffer -// y ... coordinate of scanline -// scr ... pointer to layer screen structure sLayer -// buf ... pointer to destination data buffer with transparent color -// Output new pointer to control buffer. -extern "C" u32* RenderFastSprite(u32* cbuf, int y, sLayer* scr, u8* buf); - -// render layers with transformation matrix LAYERMODE_PERSP* -// R0 ... dbuf pointer to data buffer -// R1 ... y coordinate of scanline (relative in destination image) -// R2 ... scr pointer to layer screen structure sLayer -extern "C" void RenderPersp(u8* dbuf, int y, sLayer* scr); - -// render layers double pixel with transformation matrix LAYERMODE_PERSP2* -// R0 ... dbuf pointer to data buffer -// R1 ... y coordinate of scanline (relative in destination image) -// R2 ... scr pointer to layer screen structure sLayer -extern "C" void RenderPersp2(u8* dbuf, int y, sLayer* scr); - -// render scanline -// cbuf ... control buffer -// dbuf ... data buffer (pixel data) -// line ... current line 0.. -// pixnum ... total pixels (must be multiple of 4) -// Returns new pointer to control buffer -extern "C" u32* Render(u32* cbuf, u8* dbuf, int line, int pixnum); // initialize scanline type table void ScanlineTypeInit(const sVmode* v); -// print table if scanline types -void ScanlineTypePrint(const u8* scan, int lines); - // initialize videomode (returns False on bad configuration) // - All layer modes must use same layer program (LAYERMODE_BASE = overlapped layers are OFF) -void VgaInit(const sVmode* vmode); //, u8 layer1mode=LAYERMODE_BASE, u8 layer2mode=LAYERMODE_BASE, u8 layer3mode=LAYERMODE_BASE); - -// VGA core -void VgaCore(); - -// request to initialize VGA videomode, NULL=only stop driver (wait to initialization completes) -void VgaInitReq(const sVmode* vmode); - -// execute core 1 remote function -void Core1Exec(void (*fnc)()); - -// check if core 1 is busy (executing remote function) -Bool Core1Busy(); - -// wait if core 1 is busy (executing remote function) -void Core1Wait(); +void VgaInit(const sVmode* vmode, u8* buf, int width, int height, int stride); //, u8 layer1mode=LAYERMODE_BASE, u8 layer2mode=LAYERMODE_BASE, u8 layer3mode=LAYERMODE_BASE); // wait for VSync scanline void WaitVSync(); diff --git a/MCUME_pico/picovga_t4/vga_blitkey.S b/MCUME_pico/picovga_t4/vga_blitkey.S deleted file mode 100755 index d9534d8..0000000 --- a/MCUME_pico/picovga_t4/vga_blitkey.S +++ /dev/null @@ -1,90 +0,0 @@ - -// **************************************************************************** -// -// VGA sprites -// -// **************************************************************************** -// Takes 100 bytes - -#include "define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.BlitKey, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -// [6,7] blit macro (4 instructions, 8 bytes) -.macro blitkey n - ldrb r4,[r1,#\n] // [2] load 1 pixel - cmp r4,r3 // [1] is it transparent color? - beq 2f // [1,2] pixel is transparent - strb r4,[r0,#\n] // [2] write 1 pixel -2: -.endm - -// blit scanline using key color -// dst ... destination buffer -// src ... source buffer -// w ... width -// key ... key color -//extern "C" void BlitKey(u8* dst, u8* src, int w, u8 key); - -.thumb_func -.global BlitKey -BlitKey: - - // push registers - push {r4,lr} - -// Registers: -// R0 ... destination buffer -// R1 ... source buffer -// R2 ... width counter -// R3 ... key color -// R4 ... (temporary) - - // save start of destination buffer - mov lr,r0 // start buffer - - // get number of pixels aligned to 8 bytes - lsrs r4,r2,#3 // number of pixels / 8 - lsls r4,#3 // number of pixels aligned to 8 bytes down -> R4 - eors r2,r4 // number of pixels last 3 bits (modulo 8) - - // shift pointers to last 8-byte group - add r0,r4 // shift destination pointer to the end - add r1,r4 // shift source pointer to the end - - // jump to blit rest of pixels in last 8-byte group - adr r4,3f // get address of label '3:' (must be word aligned) - lsls r2,#3 // *8, convert number of pixels to offset of blit macro (1 macro is 8 bytes long) - subs r4,r2 // subtract offset of first valid blit macro - adds r4,#1 // set bit 0 - flag to use thumb instructions - bx r4 // jump into loop - -// ---- [53..61 per loop] blend pixels, speed 6.625..7.625 clock cycles per pixel - -.align 2 // address of label '3:' must be word aligned (32 bits) - - // [2] shift pointers 8 bytes down -1: subs r0,#8 // [1] shift destination pointer by 8 bytes down - subs r1,#8 // [1] shift source pointer by 8 bytes down - - // [48..56] blit 8 pixels (32 instructions) - blitkey 7 // [6,7] blit pixel 7 - blitkey 6 // [6,7] blit pixel 6 - blitkey 5 // [6,7] blit pixel 5 - blitkey 4 // [6,7] blit pixel 4 - blitkey 3 // [6,7] blit pixel 3 - blitkey 2 // [6,7] blit pixel 2 - blitkey 1 // [6,7] blit pixel 1 - blitkey 0 // [6,7] blit pixel 0 - -// this address must be word aligned - - // [2,3] next 8 pixels -3: cmp r0,lr // [1] start address reached? - bhi 1b // [1,2] not start address yet - - // pop registers and return from function -9: pop {r4,pc} diff --git a/MCUME_pico/picovga_t4/vga_config.h b/MCUME_pico/picovga_t4/vga_config.h index f52387c..8c20247 100755 --- a/MCUME_pico/picovga_t4/vga_config.h +++ b/MCUME_pico/picovga_t4/vga_config.h @@ -5,59 +5,23 @@ // // VGA configuration // +// file derived from the PicoVGA project +// https://github.com/Panda381/PicoVGA +// by Miroslav Nemecek +// // **************************************************************************** // === Configuration -#define LAYERS 1 //4 // total layers 1..4 (1 base layer + 3 overlapped layers) -#define SEGMAX 8 // max. number of video segment per video strip (size of 1 sSegm = 28 bytes) -#define STRIPMAX 8 // max. number of video strips (size of 1 sStrip = sSegm size*SEGMAX+4 = 228 bytes) - // size of sScreen = sStrip size*STRIPMAX+4 = 1828 bytes - #define MAXX 320 //640 // max. resolution in X direction (must be power of 4) #define MAXY 240 //480 // max. resolution in Y direction -#define MAXLINE 700 // max. number of scanlines (including sync and dark lines) +#define MAXLINE 525 //700 // max. number of scanlines (including sync and dark lines) // === Scanline render buffers (800 pixels: default size of buffers = 2*4*(800+8+800+24)+800 = 13856 bytes // Requirements by format, base layer 0, 1 wrap X segment: -// GF_GRAPH8 ... control buffer 16 bytes -// GF_TILE8 ... control buffer "width"+8 bytes -// GF_TILE16 ... control buffer "width/2"+8 bytes -// GF_TILE32 ... control buffer "width/4"+8 bytes -// GF_TILE64 ... control buffer "width/8"+8 bytes -// GF_PROGRESS ... control buffer 24 bytes -// other formats: data buffer "width" bytes, control buffer 16 bytes -#define DBUF0_MAX (MAXX+8) // max. size of data buffer of layer 0 -#define CBUF0_MAX ((MAXX+24)/4) // max. size of control buffer of layer 0 +// GF_GRAPH8 ... control buffer 4*4=16 bytes +#define CBUF_MAX 8 //((MAXX+24)/4) // max. size of control buffer of layer 0 -// Requirements by format, overlapped layer 1..3: -// LAYERMODE_SPRITE* ... data buffer "width"+4 bytes, control buffer 24 bytes -// LAYERMODE_FASTSPRITE* ... data buffer "width"+4 bytes, control buffer up to "width*2"+16 bytes -// other formats ... data buffer 4 bytes, control buffer 24 bytes -#define DBUF1_MAX (MAXX+8) // max. size of data buffer of layer 1 -#define CBUF1_MAX ((MAXX+24)/4) // max. size of control buffer of layer 1 - -#define DBUF2_MAX (MAXX+8) // max. size of data buffer of layer 2 -#define CBUF2_MAX ((MAXX+24)/4) // max. size of control buffer of layer 2 - -#define DBUF3_MAX (MAXX+8) // max. size of data buffer of layer 3 -#define CBUF3_MAX ((MAXX+24)/4) // max. size of control buffer of layer 3 - -#if LAYERS==1 -#define DBUF_MAX DBUF0_MAX // max. size of data buffer -#define CBUF_MAX CBUF0_MAX // max. size of control buffer -#elif LAYERS==2 -#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX) // max. size of data buffer -#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX) // max. size of control buffer -#elif LAYERS==3 -#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX+DBUF2_MAX) // max. size of data buffer -#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX+CBUF2_MAX) // max. size of control buffer -#elif LAYERS==4 -#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX+DBUF2_MAX+DBUF3_MAX) // max. size of data buffer -#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX+CBUF2_MAX+CBUF3_MAX) // max. size of control buffer -#else -#error Unsupported number of layers! -#endif // === VGA port pins // GP0 ... VGA B0 blue @@ -75,39 +39,21 @@ #define VGA_GPIO_LAST (VGA_GPIO_FIRST+VGA_GPIO_NUM-1) // last VGA GPIO #define VGA_GPIO_SYNC VGA_SYNCBASE // VGA SYNC GPIO +// === VGA PIO program +#define BASE_OFFSET 17 // offset of base layer program + // VGA PIO and state machines #define VGA_PIO pio0 // VGA PIO #define VGA_SM0 0 // VGA state machine of base layer 0 -#define VGA_SM1 1 // VGA state machine of overlapped layer 1 -#define VGA_SM2 2 // VGA state machine of overlapped layer 2 -#define VGA_SM3 3 // VGA state machine of overlapped layer 3 -#define VGA_SM(layer) (VGA_SM0+(layer)) // VGA state machine of the layer -#if LAYERS==1 +// LAYERS==1 #define VGA_SMALL B0 // mask of all state machines -#elif LAYERS==2 -#define VGA_SMALL (B0+B1) // mask of all state machines -#elif LAYERS==3 -#define VGA_SMALL (B0+B1+B2) // mask of all state machines -#elif LAYERS==4 -#define VGA_SMALL (B0+B1+B2+B3) // mask of all state machines -#else -#error Unsupported number of layers! -#endif + // VGA DMA #define VGA_DMA 2 // VGA DMA base channel #define VGA_DMA_CB0 (VGA_DMA+0) // VGA DMA channel - control block of base layer #define VGA_DMA_PIO0 (VGA_DMA+1) // VGA DMA channel - copy data of base layer to PIO (raises IRQ0 on quiet) -#define VGA_DMA_CB1 (VGA_DMA+2) // VGA DMA channel - control block of overlapped layer 1 -#define VGA_DMA_PIO1 (VGA_DMA+3) // VGA DMA channel - copy data of overlapped layer 1 to PIO -#define VGA_DMA_CB2 (VGA_DMA+4) // VGA DMA channel - control block of overlapped layer 1 -#define VGA_DMA_PIO2 (VGA_DMA+5) // VGA DMA channel - copy data of overlapped layer 2 to PIO -#define VGA_DMA_CB3 (VGA_DMA+6) // VGA DMA channel - control block of overlapped layer 1 -#define VGA_DMA_PIO3 (VGA_DMA+7) // VGA DMA channel - copy data of overlapped layer 3 to PIO - -#define VGA_DMA_CB(layer) (VGA_DMA_CB0+(layer)*2) // VGA DMA control channel of the layer -#define VGA_DMA_PIO(layer) (VGA_DMA_PIO0+(layer)*2) // VGA DMA data channel of the layer #define VGA_DMA_NUM (LAYERS*2) // number of used DMA channels #define VGA_DMA_FIRST VGA_DMA // first used DMA diff --git a/MCUME_pico/picovga_t4/vga_layer.cpp b/MCUME_pico/picovga_t4/vga_layer.cpp deleted file mode 100755 index a281b43..0000000 --- a/MCUME_pico/picovga_t4/vga_layer.cpp +++ /dev/null @@ -1,505 +0,0 @@ - -// **************************************************************************** -// -// VGA layers -// -// **************************************************************************** - -#include "include.h" - -// layer program descriptors -const sLayerProg LayerProg[LAYERPROG_NUM] = { - - // LAYERPROG_BASE base layer - { - .ins=vga_program_instructions, // pointer to program instructions - .prg=&vga_program, // pointer to program descriptor - .length=vga_program.length, // program length (number of instructions) - .wrap_target=vga_wrap_target, // offset of wrap target - .wrap=vga_wrap, // offset of wrap end - .idle=vga_offset_entry, // offset of idle - .entry=vga_offset_entry, // offset of entry - .maxidle=2, // max. offset of idle to detect end of job - .extranum=2, // number of extra offsets - .extra={ // extra offsets, pairs: offset, CPP-correction - vga_offset_extra1, 2, - vga_offset_extra2, 2, - }, - }, - - // LAYERPROG_KEY layer with key color - { - .ins=keylayer_program_instructions, // pointer to program instructions - .prg=&keylayer_program, // pointer to program descriptor - .length=keylayer_program.length, // program length (number of instructions) - .wrap_target=keylayer_wrap_target, // offset of wrap target - .wrap=keylayer_wrap, // offset of wrap end - .idle=keylayer_offset_idle, // offset of idle - .entry=keylayer_offset_entry, // offset of entry - .maxidle=2, // max. offset of idle to detect end of job - .extranum=1, // number of extra offsets - .extra={ // extra offsets, pairs: offset, CPP-correction - keylayer_offset_extra1, 6, - }, - }, - - // LAYERPROG_BLACK layer with black key color - { - .ins=blacklayer_program_instructions, // pointer to program instructions - .prg=&blacklayer_program, // pointer to program descriptor - .length=blacklayer_program.length, // program length (number of instructions) - .wrap_target=blacklayer_wrap_target, // offset of wrap target - .wrap=blacklayer_wrap, // offset of wrap end - .idle=blacklayer_offset_idle, // offset of idle - .entry=blacklayer_offset_entry, // offset of entry - .maxidle=2, // max. offset of idle to detect end of job - .extranum=2, // number of extra offsets - .extra={ // extra offsets, pairs: offset, CPP-correction - blacklayer_offset_extra1, 4, - blacklayer_offset_extra2, 3, - }, - }, - - // LAYERPROG_WHITE layer with white key color - { - .ins=whitelayer_program_instructions, // pointer to program instructions - .prg=&whitelayer_program, // pointer to program descriptor - .length=whitelayer_program.length, // program length (number of instructions) - .wrap_target=whitelayer_wrap_target, // offset of wrap target - .wrap=whitelayer_wrap, // offset of wrap end - .idle=whitelayer_offset_idle, // offset of idle - .entry=whitelayer_offset_entry, // offset of entry - .maxidle=2, // max. offset of idle to detect end of job - .extranum=1, // number of extra offsets - .extra={ // extra offsets, pairs: offset, CPP-correction - whitelayer_offset_extra1, 4, - }, - }, - - // LAYERPROG_MONO layer with mono pattern or simple color - { - .ins=monolayer_program_instructions, // pointer to program instructions - .prg=&monolayer_program, // pointer to program descriptor - .length=monolayer_program.length, // program length (number of instructions) - .wrap_target=monolayer_wrap_target, // offset of wrap target - .wrap=monolayer_wrap, // offset of wrap end - .idle=monolayer_offset_idle, // offset of idle - .entry=monolayer_offset_entry, // offset of entry - .maxidle=2, // max. offset of idle to detect end of job - .extranum=2, // number of extra offsets - .extra={ // extra offsets, pairs: offset, CPP-correction - monolayer_offset_extra1, 4, - monolayer_offset_extra2, 2, - }, - }, - - // LAYERPROG_RLE layer with RLE compression - { - .ins=rlelayer_program_instructions, // pointer to program instructions - .prg=&rlelayer_program, // pointer to program descriptor - .length=rlelayer_program.length, // program length (number of instructions) - .wrap_target=rlelayer_wrap_target, // offset of wrap target - .wrap=rlelayer_wrap, // offset of wrap end - .idle=rlelayer_offset_idle, // offset of idle - .entry=rlelayer_offset_entry, // offset of entry - .maxidle=2, // max. offset of idle to detect end of job - .extranum=7, // number of extra offsets - .extra={ // extra offsets, pairs: offset, CPP-correction - rlelayer_offset_extra1, 1, - rlelayer_offset_extra2, 3, - rlelayer_offset_extra3, 2, - rlelayer_offset_extra4, 2, - rlelayer_offset_extra5, 3, - rlelayer_offset_extra6, 2, - rlelayer_offset_extra7, 3, - }, - }, - -}; - -// current layer program of overlapped layers -u8 LayerProgInx; // index of current layer program (LAYERPROG_*) -sLayerProg CurLayerProg; // copy of current layer program - -// layer mode descriptors -const sLayerMode LayerMode[LAYERMODE_NUM] = { - - // LAYERMODE_BASE base layer - { - .prog=LAYERPROG_BASE, // layer program (LAYERPROG_*) - .mincpp=2, // minimal clock cycles per pixel - .maxcpp=17, // maximal clock cycles per pixel - }, - - // LAYERMODE_KEY layers with key color - { - .prog=LAYERPROG_KEY, // layer program (LAYERPROG_*) - .mincpp=6, // minimal clock cycles per pixel - .maxcpp=37, // maximal clock cycles per pixel - }, - - // LAYERMODE_BLACK layers with black key color - { - .prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*) - .mincpp=4, // minimal clock cycles per pixel - .maxcpp=34, // maximal clock cycles per pixel - }, - - // LAYERMODE_WHITE layers with white key color - { - .prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*) - .mincpp=4, // minimal clock cycles per pixel - .maxcpp=35, // maximal clock cycles per pixel - }, - - // LAYERMODE_MONO layers with mono pattern - { - .prog=LAYERPROG_MONO, // layer program (LAYERPROG_*) - .mincpp=4, // minimal clock cycles per pixel - .maxcpp=35, // maximal clock cycles per pixel - }, - - // LAYERMODE_COLOR layers with simple color - { - .prog=LAYERPROG_MONO, // layer program (LAYERPROG_*) - .mincpp=2, // minimal clock cycles per pixel - .maxcpp=33, // maximal clock cycles per pixel - }, - - // LAYERMODE_RLE layers with RLE compression - { - .prog=LAYERPROG_RLE, // layer program (LAYERPROG_*) - .mincpp=3, // minimal clock cycles per pixel - .maxcpp=32, // maximal clock cycles per pixel - }, - - // LAYERMODE_SPRITEKEY layers with sprites with key color - { - .prog=LAYERPROG_KEY, // layer program (LAYERPROG_*) - .mincpp=6, // minimal clock cycles per pixel - .maxcpp=37, // maximal clock cycles per pixel - }, - - // LAYERMODE_SPRITEBLACK layers with sprites with black key color - { - .prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*) - .mincpp=4, // minimal clock cycles per pixel - .maxcpp=34, // maximal clock cycles per pixel - }, - - // LAYERMODE_SPRITEWHITE layers with sprites with white key color - { - .prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*) - .mincpp=4, // minimal clock cycles per pixel - .maxcpp=35, // maximal clock cycles per pixel - }, - - // LAYERMODE_FASTSPRITEKEY layers with fast sprites with key color - { - .prog=LAYERPROG_KEY, // layer program (LAYERPROG_*) - .mincpp=6, // minimal clock cycles per pixel - .maxcpp=37, // maximal clock cycles per pixel - }, - - // LAYERMODE_FASTSPRITEBLACK layers with fast sprites with black key color - { - .prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*) - .mincpp=4, // minimal clock cycles per pixel - .maxcpp=34, // maximal clock cycles per pixel - }, - - // LAYERMODE_FASTSPRITEWHITE layers with fast sprites with white key color - { - .prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*) - .mincpp=4, // minimal clock cycles per pixel - .maxcpp=35, // maximal clock cycles per pixel - }, - - // LAYERMODE_PERSPKEY layer with key color and image with transformation matrix - { - .prog=LAYERPROG_KEY, // layer program (LAYERPROG_*) - .mincpp=6, // minimal clock cycles per pixel - .maxcpp=37, // maximal clock cycles per pixel - }, - - // LAYERMODE_PERSPBLACK layer with black key color and image with transformation matrix - { - .prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*) - .mincpp=4, // minimal clock cycles per pixel - .maxcpp=34, // maximal clock cycles per pixel - }, - - // LAYERMODE_PERSPWHITE layer with white key color and image with transformation matrix - { - .prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*) - .mincpp=4, // minimal clock cycles per pixel - .maxcpp=35, // maximal clock cycles per pixel - }, - - // LAYERMODE_PERSP2KEY layer with key color and double pixel image with transformation matrix - { - .prog=LAYERPROG_KEY, // layer program (LAYERPROG_*) - .mincpp=6, // minimal clock cycles per pixel - .maxcpp=37, // maximal clock cycles per pixel - }, - - // LAYERMODE_PERSP2BLACK layer with black key color and double pixel image with transformation matrix - { - .prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*) - .mincpp=4, // minimal clock cycles per pixel - .maxcpp=34, // maximal clock cycles per pixel - }, - - // LAYERMODE_PERSP2WHITE layer with white key color and double pixel image with transformation matrix - { - .prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*) - .mincpp=4, // minimal clock cycles per pixel - .maxcpp=35, // maximal clock cycles per pixel - }, -}; - -// current layer mode of layers -u8 LayerModeInx[LAYERS]; // index of current layer mode (LAYERMODE_*) -sLayerMode CurLayerMode[LAYERS]; // copy of current layer mode - -// current layer screens -sLayer LayerScreen[LAYERS]; // layer screens - -u8 LayerMask; // mask of active layers - -// index of first pin of layer (base layer should stay VGA_GPIO_FIRST) -u8 LayerFirstPin[LAYERS_MAX] = { VGA_GPIO_FIRST, VGA_GPIO_FIRST, VGA_GPIO_FIRST, VGA_GPIO_FIRST}; - -// number of pins of overlapped layer (base layer should stay VGA_GPIO_OUTNUM) -u8 LayerNumPin[LAYERS_MAX] = { VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM}; - -// set overlapped layer 1..3 ON -void LayerOn(u8 inx) -{ - __dmb(); - LayerScreen[inx].on = True; - __dmb(); -} - -// set overlapped layer 1..3 OFF -void LayerOff(u8 inx) -{ - __dmb(); - LayerScreen[inx].on = False; - __dmb(); -} - -// set coordinate X of overlapped layer -void LayerSetX(u8 inx, s16 x) -{ - sLayer* lay = &LayerScreen[inx]; - s32 cppx = lay->cpp*x; // initial delay - if (cppx < 0) cppx = 0; - u32 w = lay->w; // image width - u32 init = 0; // init word - - // prepare init word - switch (lay->mode) - { - case LAYERMODE_PERSP2KEY: // layer with key color and double pixel image with transformation matrix - case LAYERMODE_PERSPKEY: // layer with key color and image with transformation matrix - case LAYERMODE_FASTSPRITEKEY: // layer with fast sprites with key color - case LAYERMODE_SPRITEKEY: // layer with sprites with key color - case LAYERMODE_KEY: // layer with key color - init = VGAKEY(cppx, w, (lay->keycol & 0xff)); - break; - - case LAYERMODE_PERSP2BLACK: // layer with black key color and double pixel image with transformation matrix - case LAYERMODE_PERSPBLACK: // layer with black key color and image with transformation matrix - case LAYERMODE_FASTSPRITEBLACK: // layer with fast sprites with black key color - case LAYERMODE_SPRITEBLACK: // layer with sprites with black key color - case LAYERMODE_BLACK: // layer with black key color - init = VGABLACK(cppx, w); - break; - - case LAYERMODE_PERSP2WHITE: // layer with white key color and double pixel image with transformation matrix - case LAYERMODE_PERSPWHITE: // layer with white key color and image with transformation matrix - case LAYERMODE_FASTSPRITEWHITE: // layer with fast sprites with white key color - case LAYERMODE_SPRITEWHITE: // layer with sprites with white key color - case LAYERMODE_WHITE: // layer with white key color - init = VGAWHITE(cppx, w); - break; - - case LAYERMODE_MONO: // layer with mono pattern - init = VGAMONO(cppx, w, (lay->keycol & 0xff)); - break; - - case LAYERMODE_COLOR: // layer with simple color - init = VGACOLOR(cppx, w); - break; - - case LAYERMODE_RLE: // layer with RLE compression - init = VGARLE(cppx); - break; - } - lay->init = init; // init word - lay->x = x; // start X coordinate -} - -// set coordinate Y of overlapped layer -void LayerSetY(u8 inx, s16 y) -{ - sLayer* lay = &LayerScreen[inx]; - lay->y = y; -} - -// set width of image of overlapped layer -// Uses auto pitch wb (full line). Set custom wb after calling this function. -void LayerSetW(u8 inx, u16 w) -{ - sLayer* lay = &LayerScreen[inx]; - lay->w = w; // image width - Bool mono = (lay->mode == LAYERMODE_MONO); - lay->trans = mono ? (((w/8)+3)/4) : (w/4); // transfer count - lay->wb = mono ? (w/8) : w; // width bytes - LayerSetX(inx, lay->x); // update init word -} - -// set height of image of overlapped layer -void LayerSetH(u8 inx, u16 h) -{ - sLayer* lay = &LayerScreen[inx]; - lay->h = h; -} - -// setup overlapped layer 1..3 (not for sprites and not for perspective mode) -// inx ... layer index 1..3 -// img ... pointer to image data -// vmode ... pointer to initialized video configuration -// w ... image width in pixels (must be multiple of 4) -// h ... image height -// col ... key color (needed for LAYERMODE_KEY and LAYERMODE_MONO layer mode) -// par ... additional data (RLE index table, integer transformation matrix) -// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn -void LayerSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 col /* = 0 */, const void* par /* = NULL */) -{ - LayerOff(inx); // set layer OFF - sLayer* lay = &LayerScreen[inx]; // get pointer to layer - lay->img = img; // pointer to image data - lay->par = par; // additional parameter - lay->keycol = col | ((u16)col << 8) | ((u32)col << 16) | ((u32)col << 24); // key color - lay->x = 0; // X coordinate - lay->y = 0; // Y coordinate - lay->h = h; // height of image - lay->spritenum = 0; // number of sprites - lay->cpp = vmode->cpp; // save clocks per pixel - lay->mode = vmode->mode[inx]; // layer mode - LayerSetW(inx, w); // set width of image, update parameters init, trans and wb -} - -// setup overlapped layer 1..3 for LAYERMODE_PERSP* modes -// inx ... layer index 1..3 -// img ... pointer to source image data (image width and height must be power of 2) -// vmode ... pointer to initialized video configuration -// w ... destination image width in pixels (must be multiple of 4) -// h ... destination image height -// xbits ... number of bits of width of source image -// ybits ... number of bits of height of source image -// horiz ... horizon of perspective projection/4 (0=no perspecitve, <0 ceilling) -// mat ... integer transformation matrix -// col ... key color (needed for LAYERMODE_PERSPKEY layer mode) -// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn -void LayerPerspSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 xbits, u8 ybits, - s8 horiz, const int* mat, u8 col /* = 0 */) -{ - LayerSetup(inx, img, vmode, w, h, col, mat); - sLayer* lay = &LayerScreen[inx]; // get pointer to layer - lay->xbits = xbits; - lay->ybits = ybits; - lay->horiz = horiz; -} - -// setup overlapped layer 1..3 for LAYERMODE_SPRITE* and LAYERMODE_FASTSPRITE* modes -// inx ... layer index 1..3 -// sprite ... pointer to list of sprites (array of pointers to sprites; sorted by X on LAYERMODE_FASTSPRITE* modes) -// spritenum ... number of sprites in the list (to turn sprite off, you can set its coordinate Y out of the screen) -// vmode ... pointer to initialized video configuration -// x ... start coordinate X of area with sprites -// y ... start coordinate Y of area with sprites -// w ... width of area with sprites (must be multiple of 4) -// h ... height of area with sprites -// col ... key color (needed for LAYERMODE_SPRITEKEY and LAYERMODE_FASTSPRITEKEY layer mode) -// Use functions LayerOn after layer setup. -void LayerSpriteSetup(u8 inx, sSprite** sprite, u16 spritenum, const sVmode* vmode, - s16 x, s16 y, u16 w, u16 h, u8 col /* = 0 */) -{ - LayerSetup(inx, (const u8*)sprite, vmode, w, h, col); - LayerSetX(inx, x); - LayerSetY(inx, y); - sLayer* lay = &LayerScreen[inx]; // get pointer to layer - lay->spritenum = spritenum; -} - -// prepare array of start and length of lines (detects transparent pixels) -// img ... image -// x0 ... array of start of lines -// w0 ... array of length of lines -// w ... sprite width (slow sprite: max. width 255) -// h ... sprite height -// wb ... sprite pitch (bytes between lines) -// col ... key color -// fast ... fast sprite, divide start and length of line by 4 -void SpritePrepLines(const u8* img, u8* x0, u8* w0, u16 w, u16 h, u16 wb, u8 col, Bool fast) -{ - int x1, x2, w2, y; - const u8* d; - - // loop through lines - for (y = 0; y < h; y++) - { - // find start of line - d = &img[y*wb]; - for (x1 = 0; x1 < w; x1++) - { - if (*d != col) break; - d++; - } - - // find end of line - d = &img[y*wb + w - 1]; - for (x2 = w; x2 > x1; x2--) - { - if (*d != col) break; - d--; - } - - // prepare start and length - w2 = x2 - x1; - if (fast) - { - w2 += ((x2 + 3) & ~3) - x2; - x1 /= 4; - w2 = (w2 + 3)/4; - } - if (x1 > 255) x1 = 255; - if (w2 > 255) w2 = 255; - - // store start and length - *x0++ = x1; - *w0++ = w2; - } -} - -// sort fast sprite list by X coordinate -void SortSprite(sSprite** list, int num) -{ - int i; - sSprite* s; - sSprite* s2; - for (i = 0; i < num-1; i++) - { - s = list[i]; - s2 = list[i+1]; - if (s->x > s2->x) - { - list[i] = s2; - list[i+1] = s; - if (i > 0) i -= 2; - } - } -} diff --git a/MCUME_pico/picovga_t4/vga_layer.h b/MCUME_pico/picovga_t4/vga_layer.h deleted file mode 100755 index d0c59ee..0000000 --- a/MCUME_pico/picovga_t4/vga_layer.h +++ /dev/null @@ -1,195 +0,0 @@ - -// **************************************************************************** -// -// VGA layers -// -// **************************************************************************** - -#ifndef _VGA_LAYER_H -#define _VGA_LAYER_H - -// base layer commands -#define VGADARK(num,col) (((u32)(vga_offset_dark+BASE_OFFSET)<<27) | ((u32)(num)<<8) | (u32)(col)) // assemble control word of "dark" command -#define VGACMD(jmp,num) (((u32)(jmp)<<27) | (u32)(num)) // assemble control word - -// --- overlapped layer init word (delay: use number of offset pixels * Vmode.cpp, num: number of pixels) - -// init word of key color layer LAYERPROG_KEY -#define VGAKEY(delay,num,col) (((u32)((delay)+1)<<19) | ((u32)(col)<<11) | (u32)((num)-1)) - -// init word of mono layer LAYERPROG_MONO -#define VGAMONO(delay,num,col) (((u32)((delay)+0)<<20) | ((u32)(col)<<12) | ((u32)((num)-1)<<1) | B0) - -// init word of color layer LAYERPROG_MONO -#define VGACOLOR(delay,num) (((u32)((delay)+2)<<20) | ((u32)0xff<<12) | ((u32)((num)-1)<<1) | 0) - -// init word of black color layer LAYERPROG_BLACK -#define VGABLACK(delay,num) (((u32)((delay)+3)<<16) | (u32)((num)-1)) - -// init word of white color layer LAYERPROG_WHITE -#define VGAWHITE(delay,num) (((u32)((delay)+3)<<16) | (u32)((num)-1)) - -// init word of RLE layer LAYERPROG_RLE -#define VGARLE(delay) ((delay)+1) - -// swap bytes of command -#define BYTESWAP(n) ((((n)&0xff)<<24)|(((n)&0xff00)<<8)|(((n)&0xff0000)>>8)|(((n)&0xff000000)>>24)) - -// align to multiple of 4 -#define ALIGN4(x) ((x) & ~3) - -// layer program descriptor -typedef struct { - const u16* ins; // pointer to program instructions (NULL=layers is OFF) - const struct pio_program* prg; // pointer to program descriptor - u8 length; // program length (number of instructions) - u8 wrap_target; // offset of wrap target - u8 wrap; // offset of wrap end - u8 idle; // offset of idle - u8 entry; // offset of entry - u8 maxidle; // max. offset of idle to detect end of job - u8 extranum; // number of extra offsets - u8 extra[2*16]; // extra offsets, pairs: offset, CPP-correction -} sLayerProg; - -// layer program descriptors -extern const sLayerProg LayerProg[LAYERPROG_NUM]; - -// current layer program of overlapped layers -extern u8 LayerProgInx; // index of current layer program (LAYERPROG_*, LAYERPROG_BASE = overlapped layers are OFF) -extern sLayerProg CurLayerProg; // copy of current layer program - -// layer mode descriptor -typedef struct { - u8 prog; // layer program (LAYERPROG_*) - u8 mincpp; // minimal clock cycles per pixel - u8 maxcpp; // maximal clock cycles per pixel -} sLayerMode; - -// layer mode descriptors -extern const sLayerMode LayerMode[LAYERMODE_NUM]; - -// current layer mode of layers -extern u8 LayerModeInx[LAYERS]; // index of current layer mode (LAYERMODE_*) -extern sLayerMode CurLayerMode[LAYERS]; // copy of current layer mode - -// layer screen descriptor (on change update SLAYER_* in define.h) -typedef struct { - const u8* img; // pointer to image in current layer format, or sprite list - const void* par; // additional parameter (RLE index table, integer transformation matrix) - u32 init; // init word sent on start of scanline (start X coordinate) - u32 keycol; // key color - u16 trans; // trans count - s16 x; // start X coordinate - s16 y; // start Y coordinate - u16 w; // width in pixels - u16 h; // height - u16 wb; // image width in bytes (pitch of lines) - u8 mode; // layer mode - s8 horiz; // horizon of perspective projection/4 (only with LAYERMODE_PERSP* modes, 0=no perspecitve, <0 ceilling) - u8 xbits; // number of bits of width of source image (only with LAYERMODE_PERSP* modes) - u8 ybits; // number of bits of height of source image (only with LAYERMODE_PERSP* modes) - u16 spritenum; // number of sprites - Bool on; // layer is ON - u8 cpp; // current clock pulses per pixel (used to calculate X coordinate) -} sLayer; - -// sprite (on change update SSPRITE_* in define.h) -typedef struct { - u8* img; // SSPRITE_IMG pointer to image data - u8* x0; // SSPRITE_X0 pointer to array of start of lines, or fast sprite start of lines/4 - u8* w0; // SSPRITE_W0 pointer to array of length of lines, or fast sprite length of lines/4 - u32 keycol; // SSPRITE_KEYCOL key color - s16 x; // SSPRITE_X sprite X-coordinate on the screen - s16 y; // SSPRITE_Y sprite Y-coordinate on the screen - u16 w; // SSPRITE_W sprite width (slow sprite: max. width 255) - u16 h; // SSPRITE_H sprite height - u16 wb; // SSPRITE_WB sprite pitch (number of bytes between lines) - u16 res; // ...reserved, structure align -} sSprite; - -// current layer screens -extern sLayer LayerScreen[LAYERS]; // layer screens - -extern u8 LayerMask; // mask of active layers - -// index of first pin of layer (base layer should stay VGA_GPIO_FIRST) -extern u8 LayerFirstPin[LAYERS_MAX]; - -// number of pins of overlapped layer (base layer should stay VGA_GPIO_OUTNUM) -extern u8 LayerNumPin[LAYERS_MAX]; - -// set overlapped layer 1..3 ON -void LayerOn(u8 inx); - -// set overlapped layer 1..3 OFF -void LayerOff(u8 inx); - -// set coordinate X of overlapped layer -void LayerSetX(u8 inx, s16 x); - -// set coordinate Y of overlapped layer -void LayerSetY(u8 inx, s16 y); - -// set width of image of overlapped layer -// Uses auto pitch wb (full line). Set custom wb after calling this function. -void LayerSetW(u8 inx, u16 w); - -// set height of image of overlapped layer -void LayerSetH(u8 inx, u16 h); - -// setup overlapped layer 1..3 (not for sprites and not for perspective mode) -// inx ... layer index 1..3 -// img ... pointer to image data -// vmode ... pointer to initialized video configuration -// w ... image width in pixels (must be multiple of 4) -// h ... image height -// col ... key color (needed for LAYERMODE_KEY and LAYERMODE_MONO layer mode) -// par ... additional data (RLE index table, integer transformation matrix) -// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn -void LayerSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 col = 0, const void* par = NULL); - -// setup overlapped layer 1..3 for LAYERMODE_PERSP* modes -// inx ... layer index 1..3 -// img ... pointer to source image data (image width and height must be power of 2) -// vmode ... pointer to initialized video configuration -// w ... destination image width in pixels (must be multiple of 4) -// h ... destination image height -// xbits ... number of bits of width of source image -// ybits ... number of bits of height of source image -// horiz ... horizon of perspective projection/4 (0=no perspecitve, <0 ceilling) -// mat ... integer transformation matrix -// col ... key color (needed for LAYERMODE_PERSPKEY layer mode) -// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn -void LayerPerspSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 xbits, u8 ybits, - s8 horiz, const int* mat, u8 col = 0); - -// setup overlapped layer 1..3 for LAYERMODE_SPRITE* and LAYERMODE_FASTSPRITE* modes -// inx ... layer index 1..3 -// sprite ... pointer to list of sprites (array of pointers to sprites; sorted by X on LAYERMODE_FASTSPRITE* modes) -// spritenum ... number of sprites in the list (to turn sprite off, you can set its coordinate Y out of the screen) -// vmode ... pointer to initialized video configuration -// x ... start coordinate X of area with sprites -// y ... start coordinate Y of area with sprites -// w ... width of area with sprites (must be multiple of 4) -// h ... height of area with sprites -// col ... key color (needed for LAYERMODE_SPRITEKEY and LAYERMODE_FASTSPRITEKEY layer mode) -// Use functions LayerOn after layer setup. -void LayerSpriteSetup(u8 inx, sSprite** sprite, u16 spritenum, const sVmode* vmode, - s16 x, s16 y, u16 w, u16 h, u8 col = 0); - -// prepare array of start and length of lines (detects transparent pixels) -// img ... image -// x0 ... array of start of lines -// w0 ... array of length of lines -// w ... sprite width (slow sprite: max. width 255) -// h ... sprite height -// wb ... sprite pitch (bytes between lines) -// col ... key color -// fast ... fast sprite, divide start and length of line by 4 -void SpritePrepLines(const u8* img, u8* x0, u8* w0, u16 w, u16 h, u16 wb, u8 col, Bool fast); - -// sort fast sprite list by X coordinate -void SortSprite(sSprite** list, int num); - -#endif // _VGA_LAYER_H diff --git a/MCUME_pico/picovga_t4/vga_pal.h b/MCUME_pico/picovga_t4/vga_pal.h deleted file mode 100755 index ff71e40..0000000 --- a/MCUME_pico/picovga_t4/vga_pal.h +++ /dev/null @@ -1,109 +0,0 @@ - -// **************************************************************************** -// -// VGA colors and palettes -// -// **************************************************************************** - -#ifndef _VGA_PAL_H -#define _VGA_PAL_H - -#define MULTICOL(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) // multiply color pattern (used in mode GF_COLOR) - -// CGA colors -#define CGACOL_0 0 // 0x000000 black -#define CGACOL_1 2 // 0x0000C3 dark blue -#define CGACOL_2 20 // 0x00C300 dark green -#define CGACOL_3 22 // 0x00C3C3 dark cyan -#define CGACOL_4 160 // 0xC30000 dark red -#define CGACOL_5 162 // 0xC300C3 dark magenta -#define CGACOL_6 168 // 0xC35400 brown -#define CGACOL_7 182 // 0xC3C3C3 light gray -#define CGACOL_8 73 // 0x545454 dark gray -#define CGACOL_9 75 // 0x5454FF light blue -#define CGACOL_10 93 // 0x54FF54 light green -#define CGACOL_11 95 // 0x54FFFF light cyan -#define CGACOL_12 233 // 0xFF5454 light red -#define CGACOL_13 235 // 0xFF54FF light magenta -#define CGACOL_14 253 // 0xFFFF54 yellow -#define CGACOL_15 255 // 0xFFFFFF white - -// ZX Spectrum color -#define ZXCOL_0 0 // 0x000000 black -#define ZXCOL_1 2 // 0x0000C3 dark blue -#define ZXCOL_2 160 // 0xC30000 dark red -#define ZXCOL_3 162 // 0xC300C3 dark magenta -#define ZXCOL_4 20 // 0x00C300 dark green -#define ZXCOL_5 22 // 0x00C3C3 dark cyan -#define ZXCOL_6 180 // 0xC3C300 dark yellow -#define ZXCOL_7 182 // 0xC3C3C3 light gray -#define ZXCOL_8 73 // 0x545454 dark gray -#define ZXCOL_9 3 // 0x0000FF light blue -#define ZXCOL_10 224 // 0xFF0000 light red -#define ZXCOL_11 227 // 0xFF00FF light magenta -#define ZXCOL_12 28 // 0x00FF00 light green -#define ZXCOL_13 31 // 0x00FFFF light cyan -#define ZXCOL_14 252 // 0xFFFF00 yellow -#define ZXCOL_15 255 // 0xFFFFFF white - -// Colors -// GP0 ... B0 ... VGA B0 blue -// GP1 ... B1 ... VGA B1 -// GP2 ... B2 ... VGA G0 green -// GP3 ... B3 ... VGA G1 -// GP4 ... B4 ... VGA G2 -// GP5 ... B5 ... VGA R0 red -// GP6 ... B6 ... VGA R1 -// GP7 ... B7 ... VGA R2 - -#define COL_BLACK 0 - -#define COL_DARKBLUE B0 -#define COL_SEMIBLUE B1 -#define COL_BLUE (B0+B1) -#define COL_MOREBLUE (COL_BLUE+B3+B6) -#define COL_LIGHTBLUE (COL_BLUE+B4+B7) - -#define COL_DARKGREEN B3 -#define COL_SEMIGREEN B4 -#define COL_GREEN (B2+B3+B4) -#define COL_MOREGREEN (COL_GREEN+B0+B6) -#define COL_LIGHTGREEN (COL_GREEN+B1+B7) - -#define COL_DARKRED B6 -#define COL_SEMIRED B7 -#define COL_RED (B5+B6+B7) -#define COL_MORERED (COL_RED+B0+B3) -#define COL_LIGHTRED (COL_RED+B1+B4) - -#define COL_DARKCYAN (B0+B3) -#define COL_SEMICYAN (B1+B4) -#define COL_CYAN (B0+B1+B2+B3+B4) - -#define COL_DARKMAGENTA (B0+B6) -#define COL_SEMIMAGENTA (B1+B7) -#define COL_MAGENTA (B0+B1+B5+B6+B7) - -#define COL_DARKYELLOW (B3+B6) -#define COL_SEMIYELLOW (B4+B7) -#define COL_YELLOW (B2+B3+B4+B5+B6+B7) - -#define COL_GRAY0 0 -#define COL_GRAY1 (B2+B5) -#define COL_GRAY2 (B0+B3+B6) -#define COL_GRAY3 (B0+B2+B3+B5+B6) -#define COL_GRAY4 (B1+B4+B7) -#define COL_GRAY5 (B1+B2+B4+B5+B7) -#define COL_GRAY6 (B0+B1+B3+B4+B6+B7) -#define COL_GRAY7 (B0+B1+B2+B3+B4+B5+B6+B7) - -#define COL_WHITE COL_GRAY7 - -// compose color from RGB -#define COLRGB(r,g,b) ((u8)(((r)&0xe0)|(((g)&0xe0)>>3)|((b)>>6))) - -// default 16-color palettes (CGA colors) -// - do not set "const", to stay in faster RAM -extern u8 DefPal16[16]; - -#endif // _VGA_PAL_H diff --git a/MCUME_pico/picovga_t4/vga_render.S b/MCUME_pico/picovga_t4/vga_render.S deleted file mode 100755 index 643603d..0000000 --- a/MCUME_pico/picovga_t4/vga_render.S +++ /dev/null @@ -1,313 +0,0 @@ - -// **************************************************************************** -// -// VGA render -// -// **************************************************************************** - -#include "define.h" // common definitions of C and ASM - - .syntax unified - .section .time_critical.Render, "ax" - .cpu cortex-m0plus - .thumb // use 16-bit instructions - -.extern pScreen // sScreen* pScreen; // pointer to current video screen -.extern LineBuf0 // u8 LineBuf0[BLACK_MAX]; // line buffer with black color - -// extern "C" u32* Render(u32* cbuf, u8* dbuf, int line, int pixnum); - -// render scanline -// cbuf ... control buffer -// dbuf ... data buffer (pixel data) -// line ... current scanline 0.. -// pixnum ... total pixels (must be multiple of 4) -// Returns new pointer to control buffer - -.thumb_func -.global Render -Render: - - // push registers - push {r4-r7,lr} - - // prepare local variables -// SP+0: input argument of render functions -// SP+4: R0 control buffer -// SP+8: R1 data buffer (pixel data) -// SP+12: R2 current scanline 0.. -// SP+16: R3 total pixels -// SP+20: R4 -// SP+24: R5 -// SP+28: R6 -// SP+32: R7 -// SP+36: LR - - sub sp,#20 - str r0,[sp,#4] // control buffer - str r1,[sp,#8] // data buffer - str r3,[sp,#16] // total pixels - -// ---- prepare pointer to current screen -// sScreen* s = pScreen; -// if (s != NULL) { - - // prepare pointer to current screen - ldr r4,Render_pScreenAddr // pointer to pointer to current video Screen (variable pScreen) - ldr r4,[r4,#0] // pointer to current video Screen - cmp r4,#0 // is pointer valid? - beq Render_Clear // pointer is not valid, clear rest of line (display is OFF) - -// ---- find video strip with current scanline -// int stripnum = s->num; -// sStrip* t = &s->strip[0]; -// for (; stripnum > 0; stripnum--) { - - // loop through video strips - ldrh r5,[r4,#SSCREEN_NUM] // u16 number of video strips - tst r5,r5 // check number of video strips - beq Render_Clear // no video strips, return - adds r4,#SSCREEN_STRIP // pointer to first video strip - -// R2 ... current scanline -// R4 ... pointer to video strip -// R5 ... counter of video strips - -Render_StripLoop: - - // chek if current scanline has been found - // if (line < t->height) { - ldrh r3,[r4,#SSTRIP_HEIGHT] // u16 height of this video strip - cmp r2,r3 // check if current scanline fits into this video strip - blo Render_StripOK // scanline < strip height, this strip is OK - - // subtract video strip height from scanline number (to be relative to start of strip) - // line -= t->height; - subs r2,r3 // subtract strip height from scanline number - - // next video strip - // t++; - // for (; stripnum > 0; stripnum--) - adds r4,#SSTRIP_SIZE // shift pointer to next video strip - subs r5,#1 // counter of video strips - bne Render_StripLoop // next video strip - b Render_Clear // video strip not found - -// ---- process all video segments - -Render_StripOK: - - // prepare first video segment - // sSegm* g = &t->seg[0]; - // int segnum = t->num; - // for (; segnum > 0; segnum--) { - str r2,[sp,#12] // save current scanline - ldrh r5,[r4,#SSTRIP_NUM] // u16 number of video segments - tst r5,r5 // check number of video segments - beq Render_Clear // no video strips, return - adds r4,#SSTRIP_SEG // pointer to first video segment - -// R4 ... pointer to video segment -// R5 ... counter of video segments - -Render_SegmLoop: - - // get number of remaining pixels - ldr r2,[sp,#16] // get remaining pixels - tst r2,r2 // check number of pixels - beq Render_Clear // end of scanline, stop rendering - - // get segment width -> R3 - // int w = g->width; - // if (w > pixnum) w = pixnum; - // if (w > 0) { - ldrh r3,[r4,#SSEGM_WIDTH] // get segment width - cmp r3,r2 // check width - blo 2f // width is OK - mov r3,r2 // limit width by total width -2: tst r3,r3 // check width - beq Render_SegmNext // this segment is invisible, skip it - - // update remaining pixels - // pixnum -= w; - subs r2,r3 // decrease remaining width - str r2,[sp,#16] // store new remaining pixels - - // get Y coordinate -> R2 - // int y = g->offy + line; - ldrh r2,[r4,#SSEGM_OFFY] // get offset at Y direction - sxth r2,r2 // expand to signed - ldr r1,[sp,#12] // get current scanline - add r2,r1 // add Y offset and current scanline - - // double lines - // if (g->dbly) y /= 2; - ldrb r1,[r4,#SSEGM_DBLY] // get dbly flag - tst r1,r1 // is dbly flag set? - beq 2f // dbly flag not set - asrs r2,#1 // Y coordinate / 2 - - // wrap Y coordinate - // int wy = g->wrapy; - // while (y < 0) y += wy; - // while (y >= wy) y -= wy; -2: ldrh r1,[r4,#SSEGM_WRAPY] // get wrapy -3: subs r2,r1 // subtract wrapy - bpl 3b // repeat -4: adds r2,r1 // add wrapy - bmi 4b // repeat - - // get X coordinate -> R1 - // int x = g->offx; -6: ldrh r1,[r4,#SSEGM_OFFX] // get offset at X direction - sxth r1,r1 // expand to signed - - // wrap X coordinate - // int wx = g->wrapx; - // while (x < 0) x += wx; - // while (x >= wx) x -= wx; - ldrh r0,[r4,#SSEGM_WRAPX] // get wrapx -3: subs r1,r0 // subtract wrapx - bpl 3b // repeat -4: adds r1,r0 // add wrapx - bmi 4b // repeat - -// ---- process 1st format group: GF_COLOR - - // get format -> R0 -6: ldrb r0,[r4,#SSEGM_FORM] // get current format - - // serve format GF_COLOR - tst r0,r0 // format GF_COLOR ? - bne 7f // no - - // u32 par = ((y & 1) == 0) ? g->par : g->par2 - lsrs r2,#1 // check bit 0 of Y coordinate - ldr r1,[r4,#SSEGM_PAR] // get par for even line - bcc 2f // even line - ldr r1,[r4,#SSEGM_PAR2] // get par2 for odd line - - // *cbuf++ = w/4; // number of pixels/4 -2: lsrs r2,r3,#2 // width/4 - ldr r6,[sp,#4] // get pointer to control buffer - stmia r6!,{r2} // store width/4 - - // *cbuf++ = (u32)dbuf; // pointer to data buffer - ldr r0,[sp,#8] // get pointer to data buffer - stmia r6!,{r0} // store pointer to data - str r6,[sp,#4] // save new pointer to control buffer - - // dbuf = RenderColor(dbuf, par, w/4); - bl RenderColor - str r0,[sp,#8] // store new pointer to data buffer - b Render_SegmNext - -// ---- process 2nd format group: using control buffer cbuf - - // prepare input argument video segment -> [SP+0] -7: str r4,[sp,#0] // prepare 4th argument - current video segment - - // prepare function addres -> R7 - adr r7,Render_FncAddr // get address of jump table - lsls r6,r0,#2 // format * 4 - ldr r7,[r7,r6] // load function address -> R7 - - // check 2nd format group - cmp r0,#GF_GRP2MAX // check 2nd format group - bhi 2f // > 2nd group - - // cbuf = RenderGraph8(cbuf, x, y, w, g); - ldr r0,[sp,#4] // get pointer to control buffer - blx r7 // call render function - str r0,[sp,#4] // save new pointer to control buffer - b Render_SegmNext - -// ---- process 3rd format group: using data buffer dbuf - - // *cbuf++ = w/4; // number of pixels/4 -2: lsrs r0,r3,#2 // width/4 - ldr r6,[sp,#4] // get pointer to control buffer - stmia r6!,{r0} // store width/4 - - // *cbuf++ = (u32)dbuf; // pointer to data buffer - ldr r0,[sp,#8] // get pointer to data buffer - stmia r6!,{r0} // store pointer to data - str r6,[sp,#4] // save new pointer to control buffer - - // dbuf = RenderColor(dbuf, par, w/4); - blx r7 // call render function - str r0,[sp,#8] // store new pointer to data buffer - -Render_SegmNext: - - // next video segment - adds r4,#SSEGM_SIZE // shift pointer to next video segment - subs r5,#1 // counter of video segments - bne Render_SegmLoop // next video segment - -// ---- clear rest of line, write pointer to control buffer - -Render_Clear: - - // return current control buffer - ldr r0,[sp,#4] // control buffer - - // check if some pixels left - ldr r1,[sp,#16] // number of remaining pixels - lsrs r1,#2 // number of pixels/4 (= number of 4-pixels) - beq 9f // no pixels left - - // write size and address to control buffer - ldr r2,Render_LineBuf0Addr // data buffer with black color - stmia r0!,{r1,r2} // write number of 4-pixels and pointer to data buffer to control buffer - - // pop registers and return (return control buffer in r0) -9: add sp,#20 - pop {r4-r7,pc} - - .align 2 - -// pointer to pointer with current video screen -Render_pScreenAddr: - .word pScreen - -// pointer to buffer with black color -Render_LineBuf0Addr: - .word LineBuf0 - -// poiners to render functions -Render_FncAddr: - // 1st format group - .word RenderColor // GF_COLOR simple color (par=color pattern 4-pixels even line, par2=color pattern 4-pixels odd line) - - // 2nd format group - .word RenderGraph8 // GF_GRAPH8 native 8-bit graphics (X1Y1R2G2B2) - fast, transfers "as is" to PIO - .word RenderTile // GF_TILE tiles - .word RenderTile2 // GF_TILE alternate tiles - .word RenderProgress // GF_PROGRESS horizontal progress indicator - .word RenderGrad1 // render gradient with 1 line GF_GRAD1 - .word RenderGrad2 // render gradient with 2 lines GF_GRAD2 - - // 3rd format group - .word RenderGraph4 // GF_GRAPH4 4-bit graphics - .word RenderGraph2 // GF_GRAPH2 2-bit graphics - .word RenderGraph1 // GF_GRAPH1 1-bit graphics - .word RenderMText // GF_MTEXT 8-pixel mono text - .word RenderAText // GF_ATEXT 8-pixel attribute text, character + 2x4 bit attributes - .word RenderFText // GF_FTEXT 8-pixel foreground color text, character + foreground color - .word RenderCText // GF_CTEXT 8-pixel color text, character + background color + foreground color - .word RenderGText // GF_GTEXT 8-pixel gradient text (par = pointer to 1-bit font, par2 = pointer to color array) - .word RenderDText // GF_DTEXT 8-pixel double gradient text (par = pointer to 1-bit font, par2 = pointer to color array) - .word RenderLevel // GF_LEVEL level graph - .word RenderLevelGrad // GF_LEVELGRAD level gradient graph - .word RenderOscil // GF_OSCIL oscilloscope pixel graph - .word RenderOscLine // GF_OSCLINE oscilloscope line graph - .word RenderPlane2 // GF_PLANE2 4 colors on 2 graphic planes - .word RenderAttrib8 // GF_ATTRIB8 2x4 bit color attribute per 8x8 pixel sample - .word RenderGraph8Mat // GF_GRAPH8MAT 8-bit graphics with 2D matrix transformation - .word RenderGraph8Persp // GF_GRAPH8PERSP 8-bit graphics with perspective projection - .word RenderTilePersp // GF_TILEPERSP tiles with perspective - .word RenderTilePersp15 // GF_TILEPERSP15 tiles with perspective, 1.5 pixels - .word RenderTilePersp2 // GF_TILEPERSP2 tiles with perspective, double pixels - .word RenderTilePersp3 // GF_TILEPERSP3 tiles with perspective, triple pixels - .word RenderTilePersp4 // GF_TILEPERSP4 tiles with perspective, quadruple pixels diff --git a/MCUME_pico/picovga_t4/vga_screen.cpp b/MCUME_pico/picovga_t4/vga_screen.cpp deleted file mode 100755 index 0cd92e8..0000000 --- a/MCUME_pico/picovga_t4/vga_screen.cpp +++ /dev/null @@ -1,707 +0,0 @@ - -// **************************************************************************** -// -// VGA screen layout -// -// **************************************************************************** - -#include "include.h" - -// current video screen -sScreen Screen = { .num = 0 }; // default video screen -sScreen* pScreen = &Screen; // pointer to current video screen - -// clear screen (set 0 strips, does not modify sprites) -void ScreenClear(sScreen* s) -{ - __dmb(); - s->num = 0; - __dmb(); -} - -// add empty strip to the screen (returns pointer to the strip) -sStrip* ScreenAddStrip(sScreen* s, int height) -{ - int n = s->num; - sStrip* t = &s->strip[n]; - t->height = height; - t->num = 0; - __dmb(); - s->num = n + 1; - __dmb(); - return t; -} - -// add empty segment to video strip (returns pointer to the segment and initialises is to defaults) -sSegm* ScreenAddSegm(sStrip* strip, int width) -{ - int n = strip->num; - sSegm* g = &strip->seg[n]; - g->width = width; - g->wb = width; - g->offx = 0; - g->offy = 0; - g->wrapx = width; - g->wrapy = strip->height; - g->data = NULL; - g->form = GF_COLOR; - g->dbly = false; - g->par = 0; - g->par2 = 0; - __dmb(); - strip->num = n + 1; - __dmb(); - return g; -} - -// set video segment to simple color format GF_COLOR -// col1 = color pattern 4-pixels even line (use macro MULTICOL) -// col2 = color pattern 4-pixels odd line (use macro MULTICOL) -void ScreenSegmColor(sSegm* segm, u32 col1, u32 col2) -{ - segm->par = col1; - segm->par2 = col2; - __dmb(); - segm->form = GF_COLOR; - __dmb(); -} - -// set video segment to gradient with 1 line -// data = pointer to data buffer with gradient -// wb = pitch - length of buffer -// To scroll gradient, set virtual dimension wrapx, then shift offx -void ScreenSegmGrad1(sSegm* segm, const void* data, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->wb = wb; - __dmb(); - segm->form = GF_GRAD1; - __dmb(); -} - -// set video segment to gradient with 2 lines -// data = pointer to data buffer with gradient -// wb = pitch - lenght of buffer -// To scroll gradient, set virtual dimension wrapx, then shift offx -void ScreenSegmGrad2(sSegm* segm, const void* data, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->wb = wb; - __dmb(); - segm->form = GF_GRAD2; - __dmb(); -} - -// set video segment to native 8-bit graphics (R3G3B2) -// data = pointer to data buffer -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmGraph8(sSegm* segm, const void* data, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->wb = wb; - __dmb(); - segm->form = GF_GRAPH8; - __dmb(); -} - -// generate 16-color palette translation table for functions ScreenSegmGraph4 -// trans = pointer to destination palette translation table (u16 trans[256]) -// pal = pointer to source palette of 16 colors (u8 pal[16]) -void GenPal16Trans(u16* trans, const u8* pal) -{ - int i, j; - u16 k; - for (i = 0; i < 256; i++) - { - j = (i >> 4) & 0x0f; - k = pal[j]; - - j = i & 0x0f; - k |= (u16)pal[j] << 8; - - trans[i] = k; - } -} - -// set video segment to 4-bit palette graphics -// data = pointer to data buffer -// trans = pointer to 16-color palette translation table (generated with GenPal16Trans function) -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmGraph4(sSegm* segm, const void* data, const void* trans, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)trans; - segm->wb = wb; - __dmb(); - segm->form = GF_GRAPH4; - __dmb(); -} - -// generate palette 4 translation table for functions ScreenSegmGraph2 -// trans = pointer to destination palette translation table (u32 trans[256]) -// pal = pointer to source palette of 4 colors (u8 pal[4]) -void GenPal4Trans(u32* trans, const u8* pal) -{ - int i, j; - u32 k; - for (i = 0; i < 256; i++) - { - j = (i >> 6) & 0x03; - k = pal[j]; - - j = (i >> 4) & 0x03; - k |= (u32)pal[j] << 8; - - j = (i >> 2) & 0x03; - k |= (u32)pal[j] << 16; - - j = i & 0x03; - k |= (u32)pal[j] << 24; - - trans[i] = k; - } -} - -// set video segment to 2-bit palette graphics -// data = pointer to data buffer -// trans = pointer to 4-color palette translation table (generated with GenPal4Trans function) -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmGraph2(sSegm* segm, const void* data, const void* trans, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)trans; - segm->wb = wb; - __dmb(); - segm->form = GF_GRAPH2; - __dmb(); -} - -// set video segment to 1-bit palette graphics -// data = pointer to data buffer -// bg = background color -// fg = foreground color -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmGraph1(sSegm* segm, const void* data, u8 bg, u8 fg, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = bg | ((u32)fg << 8); - segm->wb = wb; - __dmb(); - segm->form = GF_GRAPH1; - __dmb(); -} - -// set video segment to 8-pixel mono text -// data = pointer to text buffer -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// bg = background color -// fg = foreground color -// wb = pitch - number of bytes between text lines -void ScreenSegmMText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, u8 fg, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)font; - segm->par2 = bg | ((u32)fg << 8); - segm->par3 = fontheight; - segm->wb = wb; - __dmb(); - segm->form = GF_MTEXT; - __dmb(); -} - -// set video segment to 8-pixel attribute text -// data = pointer to text buffer (character + 2x4 bit attributes) -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// pal = pointer to palette of 16 colors -// wb = pitch - number of bytes between text lines -void ScreenSegmAText(sSegm* segm, const void* data, const void* font, u16 fontheight, const void* pal, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)font; - segm->par2 = (u32)pal; - segm->par3 = fontheight; - segm->wb = wb; - __dmb(); - segm->form = GF_ATEXT; - __dmb(); -} - -// set video segment to 8-pixel foreground color text -// data = pointer to text buffer (character + foreground color) -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// bg = background color -// wb = pitch - number of bytes between text lines -void ScreenSegmFText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)font; - segm->par2 = bg; - segm->par3 = fontheight; - segm->wb = wb; - __dmb(); - segm->form = GF_FTEXT; - __dmb(); -} - -// set video segment to 8-pixel color text -// data = pointer to text buffer (character + background color + foreground color) -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// wb = pitch - number of bytes between text lines -void ScreenSegmCText(sSegm* segm, const void* data, const void* font, u16 fontheight, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)font; - segm->par3 = fontheight; - segm->wb = wb; - __dmb(); - segm->form = GF_CTEXT; - __dmb(); -} - -// set video segment to 8-pixel gradient color text -// data = pointer to text buffer (character + foreground color) -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// bg = background color -// grad = pointer to array of gradient colors -// wb = pitch - number of bytes between text lines -void ScreenSegmGText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)font; - segm->par3 = bg | (fontheight << 8); - segm->par2 = (u32)grad; - segm->wb = wb; - __dmb(); - segm->form = GF_GTEXT; - __dmb(); -} - -// set video segment to 8-pixel double gradient color text -// data = pointer to text buffer (character + foreground color) -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// bg = background color -// grad = pointer to array of gradient colors -// wb = pitch - number of bytes between text lines -void ScreenSegmDText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)font; - segm->par3 = bg | (fontheight << 8); - segm->par2 = (u32)grad; - segm->wb = wb; - __dmb(); - segm->form = GF_DTEXT; - __dmb(); -} - -// set video segment to tiles -// data = pointer to tile map buffer (with tile indices) -// tiles = pointer to 1 column of tiles, 1 pixel = 8 bits -// w = tile width (must be multiple of 4) -// h = tile height -// wb = pitch - number of bytes between tile map rows -void ScreenSegmTile(sSegm* segm, const void* data, const void* tiles, int w, int h, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)tiles; - segm->par2 = (u32)h; - segm->par3 = (u16)w; - segm->wb = wb; - segm->wrapx = (segm->width+w-1)/w*w; - segm->wrapy = (segm->wrapy+h-1)/h*h; - __dmb(); - segm->form = GF_TILE; - __dmb(); -} - -// set video segment to alternate tiles -// data = pointer to tile map buffer (with tile indices) -// tiles = pointer to 1 row of tiles, 1 pixel = 8 bits -// w = tile width (must be multiple of 4) -// h = tile height -// tilewb = tile width bytes (usually tile width * number of tiles) -// wb = pitch - number of bytes between tile map rows -void ScreenSegmTile2(sSegm* segm, const void* data, const void* tiles, int w, int h, int tilewb, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)tiles; - segm->par2 = (u32)h + ((u32)(u16)tilewb << 16); - segm->par3 = (u16)w; - segm->wb = wb; - segm->wrapx = (segm->width+w-1)/w*w; - segm->wrapy = (segm->wrapy+h-1)/h*h; - __dmb(); - segm->form = GF_TILE2; - __dmb(); -} - -// set video segment to level graph GF_LEVEL -// data = pointer to buffer with line samples 0..255 -// bg = background color -// fg = foreground color -// zero = Y zero level -void ScreenSegmLevel(sSegm* segm, const void* data, u8 bg, u8 fg, u8 zero) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = bg | ((u32)fg << 8); - segm->par2 = zero; - __dmb(); - segm->form = GF_LEVEL; - __dmb(); -} - -// set video segment to leve gradient graph GF_LEVELGRAD -// data = pointer to buffer with values 0..255 of 4-pixels in rows -// sample1 = scanline sample < data -// sample2 = scanline sample >= data -void ScreenSegmLevelGrad(sSegm* segm, const void* data, const void* sample1, const void* sample2) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)sample1; - segm->par2 = (u32)sample2; - __dmb(); - segm->form = GF_LEVELGRAD; - __dmb(); -} - -// set video segment to oscilloscope 1-pixel graph GF_OSCIL -// data = pointer to buffer with line samples 0..255 -// bg = background color -// fg = foreground color -// pixh = height of pixels - 1 -void ScreenSegmOscil(sSegm* segm, const void* data, u8 bg, u8 fg, int pixh) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = bg | ((u32)fg << 8); - segm->par2 = pixh; - __dmb(); - segm->form = GF_OSCIL; - __dmb(); -} - -// set video segment to oscilloscope line graph GF_OSCLINE -// data = pointer to buffer with line samples 0..255 -// bg = background color -// fg = foreground color -void ScreenSegmOscLine(sSegm* segm, const void* data, u8 bg, u8 fg) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = bg | ((u32)fg << 8); - __dmb(); - segm->form = GF_OSCLINE; - __dmb(); -} - -// generate palette 4-planes translation table for function ScreenSegmPlane2 -// trans = pointer to destination palette translation table (u32 trans[256]) -// pal = pointer to source palette of 4 colors (u8 pal[4]) -void GenPal4Plane(u32* trans, const u8* pal) -{ - int i, j; - u32 k; - for (i = 0; i < 256; i++) - { - j = 0; - if ((i & B7) != 0) j |= B1; - if ((i & B3) != 0) j |= B0; - k = pal[j]; - - j = 0; - if ((i & B6) != 0) j |= B1; - if ((i & B2) != 0) j |= B0; - k |= (u32)pal[j] << 8; - - j = 0; - if ((i & B5) != 0) j |= B1; - if ((i & B1) != 0) j |= B0; - k |= (u32)pal[j] << 16; - - j = 0; - if ((i & B4) != 0) j |= B1; - if ((i & B0) != 0) j |= B0; - k |= (u32)pal[j] << 24; - - trans[i] = k; - } -} - -// set video segment to 4-color on 2-planes graphics -// data = pointer to data buffer -// plane = offset of 2nd graphics plane (in bytes), size of one graphics plane -// trans = pointer to 4-color palette translation table (generated with GenPal4Plane function) -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmPlane2(sSegm* segm, const void* data, int plane, const void* trans, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = plane; - segm->par2 = (u32)trans; - segm->wb = wb; - __dmb(); - segm->form = GF_PLANE2; - __dmb(); -} - -// set video segment to 2x4 bit color attribute per 8x8 pixel sample graphics -// data = pointer to data buffer with mono pixels -// attr = pointer to color attributes -// pal = pointer to 16-color palette table -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmAttrib8(sSegm* segm, const void* data, const void* attr, const u8* pal, int wb) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)attr; - segm->par2 = (u32)pal; - segm->wb = wb; - __dmb(); - segm->form = GF_ATTRIB8; - __dmb(); -} - -// set video segment to horizontal progress indicator GF_PROGRESS -// data = pointer to buffer with values 0..255 of 4-pixels in rows -// sample1 = scanline sample < data -// sample2 = scanline sample >= data -void ScreenSegmProgress(sSegm* segm, const void* data, const void* sample1, const void* sample2) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->par = (u32)sample1; - segm->par2 = (u32)sample2; - __dmb(); - segm->form = GF_PROGRESS; - __dmb(); -} - -// set video segment to 8-bit graphics with 2D matrix transformation -// data = pointer to image data (width and height of image must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes) -// ybits = number of bits of image height (image height must be power of 2) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmGraph8Mat(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->wb = (1<offx = 0; - segm->offy = 0; - segm->wrapx = segm->width; - segm->par = (u32)mat; - segm->par2 = xbits | ((u32)ybits << 16); - __dmb(); - segm->form = GF_GRAPH8MAT; - __dmb(); -} - -// set video segment to 8-bit graphics with perspective projection -// data = pointer to image data (width and height of image must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes) -// ybits = number of bits of image height (image height must be power of 2) -// horiz = horizon offset -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmGraph8Persp(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits, u16 horiz) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = data; - segm->wb = (1<offx = 0; - segm->offy = 0; - segm->wrapx = segm->width; - segm->par = (u32)mat; - segm->par2 = xbits | ((u32)ybits << 16); - segm->par3 = horiz; - __dmb(); - segm->form = GF_GRAPH8PERSP; - __dmb(); -} - -// set video segment to tiles with perspective -// map = pointer to tile map with tile indices (width and height must be power of 2) -// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// mapwbits = number of bits of tile map width -// maphbits = number of bits of tile map height -// tilebits = number of bits of tile width and height -// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmTilePersp(sSegm* segm, const u8* map, const u8* tiles, const int* mat, - u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = map; - segm->wb = mapwbits | ((u16)maphbits<<8); - segm->offx = 0; - segm->offy = 0; - segm->wrapx = segm->width; - segm->par = (u32)tiles; - segm->par2 = (u32)mat; - segm->par3 = tilebits | ((u16)horizon<<8); - __dmb(); - segm->form = GF_TILEPERSP; - __dmb(); -} - -// set video segment to tiles with perspective, 1.5 pixels -// map = pointer to tile map with tile indices (width and height must be power of 2) -// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// mapwbits = number of bits of tile map width -// maphbits = number of bits of tile map height -// tilebits = number of bits of tile width and height -// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmTilePersp15(sSegm* segm, const u8* map, const u8* tiles, const int* mat, - u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = map; - segm->wb = mapwbits | ((u16)maphbits<<8); - segm->offx = 0; - segm->offy = 0; - segm->wrapx = segm->width; - segm->par = (u32)tiles; - segm->par2 = (u32)mat; - segm->par3 = tilebits | ((u16)horizon<<8); - __dmb(); - segm->form = GF_TILEPERSP15; - __dmb(); -} - -// set video segment to tiles with perspective, double pixels -// map = pointer to tile map with tile indices (width and height must be power of 2) -// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// mapwbits = number of bits of tile map width -// maphbits = number of bits of tile map height -// tilebits = number of bits of tile width and height -// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmTilePersp2(sSegm* segm, const u8* map, const u8* tiles, const int* mat, - u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = map; - segm->wb = mapwbits | ((u16)maphbits<<8); - segm->offx = 0; - segm->offy = 0; - segm->wrapx = segm->width; - segm->par = (u32)tiles; - segm->par2 = (u32)mat; - segm->par3 = tilebits | ((u16)horizon<<8); - __dmb(); - segm->form = GF_TILEPERSP2; - __dmb(); -} - -// set video segment to tiles with perspective, triple pixels -// map = pointer to tile map with tile indices (width and height must be power of 2) -// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// mapwbits = number of bits of tile map width -// maphbits = number of bits of tile map height -// tilebits = number of bits of tile width and height -// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmTilePersp3(sSegm* segm, const u8* map, const u8* tiles, const int* mat, - u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = map; - segm->wb = mapwbits | ((u16)maphbits<<8); - segm->offx = 0; - segm->offy = 0; - segm->wrapx = segm->width; - segm->par = (u32)tiles; - segm->par2 = (u32)mat; - segm->par3 = tilebits | ((u16)horizon<<8); - __dmb(); - segm->form = GF_TILEPERSP3; - __dmb(); -} - -// set video segment to tiles with perspective, quadruple pixels -// map = pointer to tile map with tile indices (width and height must be power of 2) -// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// mapwbits = number of bits of tile map width -// maphbits = number of bits of tile map height -// tilebits = number of bits of tile width and height -// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmTilePersp4(sSegm* segm, const u8* map, const u8* tiles, const int* mat, - u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon) -{ - segm->form = GF_COLOR; - __dmb(); - segm->data = map; - segm->wb = mapwbits | ((u16)maphbits<<8); - segm->offx = 0; - segm->offy = 0; - segm->wrapx = segm->width; - segm->par = (u32)tiles; - segm->par2 = (u32)mat; - segm->par3 = tilebits | ((u16)horizon<<8); - __dmb(); - segm->form = GF_TILEPERSP4; - __dmb(); -} diff --git a/MCUME_pico/picovga_t4/vga_screen.h b/MCUME_pico/picovga_t4/vga_screen.h deleted file mode 100755 index f53ddcc..0000000 --- a/MCUME_pico/picovga_t4/vga_screen.h +++ /dev/null @@ -1,307 +0,0 @@ - -// **************************************************************************** -// -// VGA screen layout -// -// **************************************************************************** - -#ifndef _VGA_SCREEN_H -#define _VGA_SCREEN_H - -// video segment (on change update SSEGM_* in define.h) -typedef struct { - u16 width; // SSEGM_WIDTH width of this video segment in pixels (must be multiple of 4, 0=inactive segment) - u16 wb; // SSEGM_WB pitch - number of bytes between lines - s16 offx; // SSEGM_OFFX display offset at X direction (must be multiple of 4) - s16 offy; // SSEGM_OFFY display offset at Y direction - u16 wrapx; // SSEGM_WRAPX wrap width in X direction (number of pixels, must be multiply of 4 and > 0) - // text modes: wrapx must be multiply of 8 - u16 wrapy; // SSEGM_WRAPY wrap width in Y direction (number of lines, cannot be 0) - const void* data; // SSEGM_DATA pointer to video buffer with image data - u8 form; // SSEGM_FORM graphics format GF_* - bool dbly; // SSEGM_DBLY double Y (2 scanlines per 1 image line) - u16 par3; // SSEGM_PAR3 parameter 3 - u32 par; // SSEGM_PAR parameter 1 - u32 par2; // SSEGM_PAR2 parameter 2 -} sSegm; - -// video strip (on change update SSTRIP_* in define.h) -typedef struct { - u16 height; // SSTRIP_HEIGHT height of this strip in number of scanlines - u16 num; // SSTRIP_NUM number of video segments - sSegm seg[SEGMAX]; // SSTRIP_SEG list of video segments -} sStrip; - -// video screen (on change update SSCREEN_* in define.h) -typedef struct { - u16 num; // SSCREEN_NUM number of video strips - u16 backup; // SSCREEN_BACKUP backup number of video strips during display OFF - sStrip strip[STRIPMAX]; // SSCREEN_STRIP list of video strips -} sScreen; - -// current video screen -extern sScreen Screen; // default video screen -extern sScreen* pScreen; // pointer to current video screen - -// clear screen (set 0 strips, does not modify sprites) -void ScreenClear(sScreen* s); - -// add empty strip to the screen (returns pointer to the strip) -sStrip* ScreenAddStrip(sScreen* s, int height); - -// add empty segment to video strip (returns pointer to the segment and initialises is to defaults) -sSegm* ScreenAddSegm(sStrip* strip, int width); - -// set video segment to simple color format GF_COLOR -// col1 = color pattern 4-pixels even line (use macro MULTICOL) -// col2 = color pattern 4-pixels odd line (use macro MULTICOL) -void ScreenSegmColor(sSegm* segm, u32 col1, u32 col2); - -// set video segment to gradient with 1 line -// data = pointer to data buffer with gradient -// wb = pitch - length of buffer -// To scroll gradient, set virtual dimension wrapx, then shift offx -void ScreenSegmGrad1(sSegm* segm, const void* data, int wb); - -// set video segment to gradient with 2 lines -// data = pointer to data buffer with gradient -// wb = pitch - lenght of buffer -// To scroll gradient, set virtual dimension wrapx, then shift offx -void ScreenSegmGrad2(sSegm* segm, const void* data, int wb); - -// set video segment to native 8-bit graphics (R3G3B2) -// data = pointer to data buffer -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmGraph8(sSegm* segm, const void* data, int wb); - -// generate 16-color palette translation table -// trans = pointer to destination palette translation table (u16 trans[256]) -// pal = pointer to source palette of 16 colors (u8 pal[16]) -void GenPal16Trans(u16* trans, const u8* pal); - -// set video segment to 4-bit palette graphics -// data = pointer to data buffer -// trans = pointer to 16-color palette translation table (generated with GenPal16Trans function) -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmGraph4(sSegm* segm, const void* data, const void* trans, int wb); - -// generate palette 4 translation table for function ScreenSegmGraph2 -// trans = pointer to destination palette translation table (u32 trans[256]) -// pal = pointer to source palette of 4 colors (u8 pal[4]) -void GenPal4Trans(u32* trans, const u8* pal); - -// set video segment to 2-bit palette graphics -// data = pointer to data buffer -// trans = pointer to 4-color palette translation table (generated with GenPal4Trans function) -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmGraph2(sSegm* segm, const void* data, const void* trans, int wb); - -// set video segment to 1-bit palette graphics -// data = pointer to data buffer -// bg = background color -// fg = foreground color -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmGraph1(sSegm* segm, const void* data, u8 bg, u8 fg, int wb); - -// set video segment to 8-pixel mono text -// data = pointer to text buffer -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// bg = background color -// fg = foreground color -// wb = pitch - number of bytes between text lines -void ScreenSegmMText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, u8 fg, int wb); - -// set video segment to 8-pixel attribute text -// data = pointer to text buffer (character + 2x4 bit attributes) -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// pal = pointer to palette of 16 colors -// wb = pitch - number of bytes between text lines -void ScreenSegmAText(sSegm* segm, const void* data, const void* font, u16 fontheight, const void* pal, int wb); - -// set video segment to 8-pixel foreground color text -// data = pointer to text buffer (character + foreground color) -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// bg = background color -// wb = pitch - number of bytes between text lines -void ScreenSegmFText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, int wb); - -// set video segment to 8-pixel color text -// data = pointer to text buffer (character + background color + foreground color) -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// wb = pitch - number of bytes between text lines -void ScreenSegmCText(sSegm* segm, const void* data, const void* font, u16 fontheight, int wb); - -// set video segment to 8-pixel gradient color text -// data = pointer to text buffer (character + foreground color) -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// bg = background color -// grad = pointer to array of gradient colors -// wb = pitch - number of bytes between text lines -void ScreenSegmGText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb); - -// set video segment to 8-pixel double gradient color text -// data = pointer to text buffer (character + foreground color) -// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) -// fontheight = font height -// bg = background color -// grad = pointer to array of gradient colors -// wb = pitch - number of bytes between text lines -void ScreenSegmDText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb); - -// set video segment to tiles -// data = pointer to tile map buffer (with tile indices) -// tiles = pointer to 1 column of tiles, 1 pixel = 8 bits -// w = tile width (must be multiple of 4) -// h = tile height -// wb = pitch - number of bytes between tile map rows -void ScreenSegmTile(sSegm* segm, const void* data, const void* tiles, int w, int h, int wb); - -// set video segment to alternate tiles -// data = pointer to tile map buffer (with tile indices) -// tiles = pointer to 1 row of tiles, 1 pixel = 8 bits -// w = tile width (must be multiple of 4) -// h = tile height -// tilewb = tile width bytes (usually tile width * number of tiles) -// wb = pitch - number of bytes between tile map rows -void ScreenSegmTile2(sSegm* segm, const void* data, const void* tiles, int w, int h, int tilewb, int wb); - -// set video segment to level graph GF_LEVEL -// data = pointer to buffer with line samples 0..255 -// zero = Y zero level -// bg = background color -// fg = foreground color -void ScreenSegmLevel(sSegm* segm, const void* data, u8 zero, u8 bg, u8 fg); - -// set video segment to leve gradient graph GF_LEVELGRAD -// data = pointer to buffer with values 0..255 of 4-pixels in rows -// sample1 = scanline sample < data -// sample2 = scanline sample >= data -void ScreenSegmLevelGrad(sSegm* segm, const void* data, const void* sample1, const void* sample2); - -// set video segment to oscilloscope 1-pixel graph GF_OSCIL -// data = pointer to buffer with line samples 0..255 -// bg = background color -// fg = foreground color -// pixh = height of pixels - 1 -void ScreenSegmOscil(sSegm* segm, const void* data, u8 bg, u8 fg, int pixh); - -// set video segment to oscilloscope line graph GF_OSCLINE -// data = pointer to buffer with line samples 0..255 -// bg = background color -// fg = foreground color -void ScreenSegmOscLine(sSegm* segm, const void* data, u8 bg, u8 fg); - -// generate palette 4-color translation table for function ScreenSegmPlane2 -// trans = pointer to destination palette translation table (u32 trans[256]) -// pal = pointer to source palette of 4 colors (u8 pal[4]) -void GenPal4Plane(u32* trans, const u8* pal); - -// set video segment to 4-color on 2-planes graphics -// data = pointer to data buffer -// plane = offset of 2nd graphics plane (in bytes), size of one graphics plane -// trans = pointer to 4-color palette translation table (generated with GenPal4Plane function) -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmPlane2(sSegm* segm, const void* data, int plane, const void* trans, int wb); - -// set video segment to 2x4 bit color attribute per 8x8 pixel sample graphics -// data = pointer to data buffer with mono pixels -// attr = pointer to color attributes -// pal = pointer to 16-color palette table -// wb = pitch - number of bytes between lines -// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. -void ScreenSegmAttrib8(sSegm* segm, const void* data, const void* attr, const u8* pal, int wb); - -// set video segment to horizontal progress indicator GF_PROGRESS -// data = pointer to buffer with values 0..255 of 4-pixels in rows -// sample1 = scanline sample < data -// sample2 = scanline sample >= data -void ScreenSegmProgress(sSegm* segm, const void* data, const void* sample1, const void* sample2); - -// set video segment to 8-bit graphics with 2D matrix transformation -// data = pointer to image data (width and height of image must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes) -// ybits = number of bits of image height (image height must be power of 2) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmGraph8Mat(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits); - -// set video segment to 8-bit graphics with perspective projection -// data = pointer to image data (width and height of image must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes) -// ybits = number of bits of image height (image height must be power of 2) -// horiz = horizon offset -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmGraph8Persp(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits, u16 horiz); - -// set video segment to tiles with perspective -// map = pointer to tile map with tile indices (width and height must be power of 2) -// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// mapwbits = number of bits of tile map width -// maphbits = number of bits of tile map height -// tilebits = number of bits of tile width and height -// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmTilePersp(sSegm* segm, const u8* map, const u8* tiles, const int* mat, - u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon); - -// set video segment to tiles with perspective, 1.5 pixels -// map = pointer to tile map with tile indices (width and height must be power of 2) -// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// mapwbits = number of bits of tile map width -// maphbits = number of bits of tile map height -// tilebits = number of bits of tile width and height -// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmTilePersp15(sSegm* segm, const u8* map, const u8* tiles, const int* mat, - u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon); - -// set video segment to tiles with perspective, double pixels -// map = pointer to tile map with tile indices (width and height must be power of 2) -// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// mapwbits = number of bits of tile map width -// maphbits = number of bits of tile map height -// tilebits = number of bits of tile width and height -// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmTilePersp2(sSegm* segm, const u8* map, const u8* tiles, const int* mat, - u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon); - -// set video segment to tiles with perspective, triple pixels -// map = pointer to tile map with tile indices (width and height must be power of 2) -// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// mapwbits = number of bits of tile map width -// maphbits = number of bits of tile map height -// tilebits = number of bits of tile width and height -// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmTilePersp3(sSegm* segm, const u8* map, const u8* tiles, const int* mat, - u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon); - -// set video segment to tiles with perspective, quadruple pixels -// map = pointer to tile map with tile indices (width and height must be power of 2) -// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) -// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) -// mapwbits = number of bits of tile map width -// maphbits = number of bits of tile map height -// tilebits = number of bits of tile width and height -// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) -// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height -void ScreenSegmTilePersp4(sSegm* segm, const u8* map, const u8* tiles, const int* mat, - u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon); - -#endif // _VGA_SCREEN_H diff --git a/MCUME_pico/picovga_t4/vga_vmode.cpp b/MCUME_pico/picovga_t4/vga_vmode.cpp index d30351e..4995718 100755 --- a/MCUME_pico/picovga_t4/vga_vmode.cpp +++ b/MCUME_pico/picovga_t4/vga_vmode.cpp @@ -1,40 +1,17 @@ - // **************************************************************************** // // VGA videomodes // +// file derived from the PicoVGA project +// https://github.com/Panda381/PicoVGA +// by Miroslav Nemecek +// // **************************************************************************** #include "include.h" sVmode Vmode; // videomode setup sVgaCfg Cfg; // required configuration -sCanvas Canvas; // canvas of draw box - -// default 16-color palettes (EGA colors) -// - do not set "const", to stay in faster RAM -u8 DefPal16[16] = { - CGACOL_0, // 0 // 0x000000 black - CGACOL_1, // 2 // 0x0000AA dark blue - CGACOL_2, // 20 // 0x00B600 dark green - CGACOL_3, // 22 // 0x00B6AA dark cyan - CGACOL_4, // 160 // 0xB60000 dark red - CGACOL_5, // 162 // 0xB600AA dark magenta - CGACOL_6, // 168 // 0xB64900 brown - CGACOL_7, // 182 // 0xB6B6AA light gray - - CGACOL_8, // 73 // 0x494955 dark gray - CGACOL_9, // 75 // 0x4949FF light blue - CGACOL_10, // 93 // 0x49FF55 light green - CGACOL_11, // 95 // 0x49FFFF light cyan - CGACOL_12, // 233 // 0xFF4955 light red - CGACOL_13, // 235 // 0xFF49FF light magenta - CGACOL_14, // 253 // 0xFFFF55 yellow - CGACOL_15, // 255 // 0xFFFFFF white -}; - -// 16-color palette translation table -u16 Pal16Trans[256]; /* http://martin.hinner.info/vga/pal.html @@ -45,211 +22,9 @@ time 0: - line 3..35: (33) dark - line 36..515: (480) image lines 0..479 - line 516..525: (10) dark - -PAL system (625 lines total): -time 0: -- line 1, 2: (2) vertical sync + vertical sync -- line 3: (1) vertical sync + half sync -- line 4, 5: (2) half sync + half sync -- line 6..23: (18) dark -- line 24..46: (23) dark image -time 46: -- line 47..286: (240) image lines odd 1, 3, 5 ... 479 -- line 287..310: (24) dark image -- line 311..312: (2) half sync + half sync -- line 313: (1) half sync + vertical sync -vsync time 313 (vsync time 312.5): -- line 314..315: (2) vertical sync + vertical sync -- line 316..317: (2) half sync + half sync -- line 318..335: (18) dark -- line 336..358: (23) dark image -time 358 (45.5 from last vsync) -- line 359..598: (240) image lines even 0, 2, ... 478 -- line 599..622: (24) dark image -- line 623..625: (3) half sync + half sync -time 625: - -NTSC system (525 lines total): -time 0, even field: -- line 1..3: (3) vertical sync + vertical sync (6 serration pulses: 27.3 us low, 4.5 us high) -- line 4..6: (3) half sync + half sync (6 equalizing pulses: 2.3 us low, 29.5 us high) -- line 7..16: (10) dark (blanked video: 4.7 us low, 58.9 us high) -- line 17,18: (2) dark image -time 18: -- line 19..258: (240) image lines even 0, 2, ... 478 -- line 259: (1) dark image -- line 260..262: (3) half sync + half sync (7 equalizing pulses) -- line 263: (1) half sync + vertical sync (6 serration pulses) -time 263 (vsync time 262.5): -- line 264,265: (2) vertical sync + vertical sync -- line 266: (1) vertical sync + half sync (5 equalizing pulses) -- line 267..268: (2) half sync + half sync -- line 269..279: (11) dark -- line 280..281: (2) dark image -time 281 (18.5 from last vsync) -- line 282..521: (240) image lines odd 1, 3, 5 ... 479 -- line 522: (1) dark image -- line 523..525: (3) half sync + half sync -time 525: - */ -// === TV videomodes -// TV PAL interlaced 5:4 720x576 (4:3 768x576, 16:9 1024x576) -const sVideo VideoPAL = { - // horizontal (horizontal frequency 15625 Hz, effective sync pulses 16000 Hz) - .htot= 64.00000f, // total scanline in [us] - .hfront= 1.65000f, // H front porch (after image, before HSYNC) in [us] - .hsync= 4.70000f, // H sync pulse in [us] - .hback= 5.70000f, // H back porch (after HSYNC, before image) in [us] - .hfull= 47.36000f, // H full visible in [us] (formally should be 51.95 us) - - // vertical (vertical frequency 50 Hz) - .vtot=625, // total scanlines (both subframes) - .vmax=576, // maximal height - - // subframe 1 - .vsync1=5, // V sync (half-)pulses on subframe 1 - .vpost1=5, // V sync post half-pulses on subframe 1 - .vback1=18+23, // V back porch (after VSYNC, before image) on subframe 1 - .vact1=240, // active visible scanlines, subframe 1 (formally should be 288, 576 total) - .vfront1=24, // V front porch (after image, before VSYNC) on subframe 1 - .vpre1=5, // V sync pre half-pulses on subframe 1 - - // subframe 2 (ignored if not interlaced) - .vsync2=5, // V sync half-pulses on subframe 2 - .vpost2=4, // V sync post half-pulses on subframe 2 - .vback2=18+23, // V back porch (after VSYNC, before image) on subframe 2 - .vact2=240, // active visible scanlines, subframe 2 (formally should be 288, 576 total) - .vfront2=24, // V front porch (after image, before VSYNC) on subframe 2 - .vpre2=6, // V sync pre half-pulses on subframe 2 - - // name - .name = "PAL ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) - - // flags - .inter=True, // interlaced (use subframes) - .psync=False, // positive synchronization - .odd=True, // first sub-frame is odd lines 1, 3, 5,... (PAL) -}; - -// TV PAL progressive 5:4 360x288 (4:3 384x288, 16:9 512x288) -const sVideo VideoPALp = { - // horizontal (horizontal frequency 15625 Hz) - .htot= 64.00000f, // total scanline in [us] - .hfront= 1.65000f, // H front porch (after image, before HSYNC) in [us] - .hsync= 4.70000f, // H sync pulse in [us] - .hback= 5.70000f, // H back porch (after HSYNC, before image) in [us] - .hfull= 47.36000f, // H full visible in [us] (formally should be 51.95 us) - - // vertical (vertical frequency 50 Hz) - .vtot=312, // total scanlines (both subframes) - .vmax=288, // maximal height - - // subframe 1 - .vsync1=2, // V sync (half-)pulses on subframe 1 - .vpost1=0, // V sync post half-pulses on subframe 1 - .vback1=18+23+2, // V back porch (after VSYNC, before image) on subframe 1 - .vact1=240, // active visible scanlines, subframe 1 (formally should be 288, 576 total) - .vfront1=24+3, // V front porch (after image, before VSYNC) on subframe 1 - .vpre1=0, // V sync pre half-pulses on subframe 1 - - // subframe 2 (ignored if not interlaced) - .vsync2=0, // V sync half-pulses on subframe 2 - .vpost2=0, // V sync post half-pulses on subframe 2 - .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 - .vact2=0, // active visible scanlines, subframe 2 (formally should be 288, 576 total) - .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 - .vpre2=0, // V sync pre half-pulses on subframe 2 - - // name - .name = "PALp ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) - - // flags - .inter=False, // interlaced (use subframes) - .psync=False, // positive synchronization - .odd=True, // first sub-frame is odd lines 1, 3, 5,... (PAL) -}; - -// TV NTSC interlaced 4:3 640x480 (5:4 600x480, 16:9 848x480) -// serration pulses (half vsync): 27.3 us low, 4.5 us high -// equalizing pulses (half hsync): 2.3 us low, 29.5 us high -// blanked video (hsync pulses): 4.7 us low, 58.9 us high -const sVideo VideoNTSC = { - // horizontal (horizontal frequency 15734 Hz, effective sync pulses 16274 Hz) - .htot= 63.55582f, // total scanline in [us] - .hfront= 1.50000f, // H front porch (after image, before HSYNC) in [us] - .hsync= 4.70000f, // H sync pulse in [us] - .hback= 4.50000f, // H back porch (after HSYNC, before image) in [us] - .hfull= 47.03130f, // H full visible in [us] - - // vertical - .vtot=525, // total scanlines (both subframes) - .vmax=480, // maximal height - - // subframe 1 - .vsync1=6, // V sync (half-)pulses on subframe 1 - .vpost1=6, // V sync post half-pulses on subframe 1 - .vback1=10+2, // V back porch (after VSYNC, before image) on subframe 1 - .vact1=240, // active visible scanlines, subframe 1 - .vfront1=1, // V front porch (after image, before VSYNC) on subframe 1 - .vpre1=7, // V sync pre half-pulses on subframe 1 - - // subframe 2 (ignored if not interlaced) - .vsync2=6, // V sync half-pulses on subframe 2 - .vpost2=5, // V sync post half-pulses on subframe 2 - .vback2=11+2, // V back porch (after VSYNC, before image) on subframe 2 - .vact2=240, // active visible scanlines, subframe 2 - .vfront2=1, // V front porch (after image, before VSYNC) on subframe 2 - .vpre2=6, // V sync pre half-pulses on subframe 2 - - // name - .name = "NTSC ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) - - // flags - .inter=True, // interlaced (use subframes) - .psync=False, // positive synchronization - .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) -}; - -// TV NTSC progressive 4:3 320x240 (5:4 300x240, 16:9 424x240) -const sVideo VideoNTSCp = { - // horizontal (horizontal frequency 15734 Hz) - .htot= 63.55582f, // total scanline in [us] - .hfront= 1.50000f, // H front porch (after image, before HSYNC) in [us] - .hsync= 4.70000f, // H sync pulse in [us] - .hback= 4.50000f, // H back porch (after HSYNC, before image) in [us] - .hfull= 47.03130f, // H full visible in [us] - - // vertical - .vtot=262, // total scanlines (both subframes) - .vmax=240, // maximal height - - // subframe 1 - .vsync1=3, // V sync (half-)pulses on subframe 1 - .vpost1=0, // V sync post half-pulses on subframe 1 - .vback1=10+2+3, // V back porch (after VSYNC, before image) on subframe 1 - .vact1=240, // active visible scanlines, subframe 1 - .vfront1=1+3, // V front porch (after image, before VSYNC) on subframe 1 - .vpre1=0, // V sync pre half-pulses on subframe 1 - - // subframe 2 (ignored if not interlaced) - .vsync2=0, // V sync half-pulses on subframe 2 - .vpost2=0, // V sync post half-pulses on subframe 2 - .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 - .vact2=0, // active visible scanlines, subframe 2 - .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 - .vpre2=6, // V sync pre half-pulses on subframe 2 - - // name - .name = "NTSCp", // video timing name (VIDEO_NAME_LEN characters + terminating 0) - - // flags - .inter=False, // interlaced (use subframes) - .psync=False, // positive synchronization - .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) -}; // === Monitor videomodes @@ -266,29 +41,16 @@ const sVideo VideoEGA = { .vtot=449, // total scanlines (both subframes) .vmax=400, // maximal height - // subframe 1 - .vsync1=2, // V sync (half-)pulses on subframe 1 - .vpost1=0, // V sync post half-pulses on subframe 1 - .vback1=35, // V back porch (after VSYNC, before image) on subframe 1 - .vact1=400, // active visible scanlines, subframe 1 - .vfront1=12, // V front porch (after image, before VSYNC) on subframe 1 - .vpre1=0, // V sync pre half-pulses on subframe 1 - - // subframe 2 (ignored if not interlaced) - .vsync2=0, // V sync half-pulses on subframe 2 - .vpost2=0, // V sync post half-pulses on subframe 2 - .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 - .vact2=0, // active visible scanlines, subframe 2 - .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 - .vpre2=0, // V sync pre half-pulses on subframe 2 - - // name - .name = "EGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + // frame + .vsync=2, // V sync (half-)pulses + .vpost=0, // V sync post half-pulses + .vback=35, // V back porch (after VSYNC, before image) + .vact=400, // active visible scanlines + .vfront=12, // V front porch (after image, before VSYNC) + .vpre=0, // V sync pre half-pulses // flags - .inter=False, // interlaced (use subframes) .psync=False, // positive synchronization - .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) }; // VGA 4:3 640x480 (16:9 848x480), vert. 60 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz @@ -304,182 +66,38 @@ const sVideo VideoVGA = { .vtot=525, // total scanlines (both subframes) .vmax=480, // maximal height - // subframe 1 - .vsync1=2, // V sync (half-)pulses on subframe 1 - .vpost1=0, // V sync post half-pulses on subframe 1 - .vback1=33, // V back porch (after VSYNC, before image) on subframe 1 - .vact1=480, // active visible scanlines, subframe 1 - .vfront1=10, // V front porch (after image, before VSYNC) on subframe 1 - .vpre1=0, // V sync pre half-pulses on subframe 1 - - // subframe 2 (ignored if not interlaced) - .vsync2=0, // V sync half-pulses on subframe 2 - .vpost2=0, // V sync post half-pulses on subframe 2 - .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 - .vact2=0, // active visible scanlines, subframe 2 - .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 - .vpre2=0, // V sync pre half-pulses on subframe 2 - - // name - .name = "VGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + // frame + .vsync=2, // V sync (half-)pulses + .vpost=0, // V sync post half-pulses + .vback=33, // V back porch (after VSYNC, before image) + .vact=480, // active visible scanlines + .vfront=10, // V front porch (after image, before VSYNC) + .vpre=0, // V sync pre half-pulses // flags - .inter=False, // interlaced (use subframes) .psync=False, // positive synchronization - .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) }; -// SVGA 4:3 800x600 (16:9 1064x600), vert. 60 Hz, hor. 37.897 kHz, pixel clock 40 MHz -const sVideo VideoSVGA = { - // horizontal - .htot= 26.40000f, // total scanline in [us] (1056 pixels) - .hfront= 1.00000f, // H front porch (after image, before HSYNC) in [us] (40 pixels) - .hsync= 3.20000f, // H sync pulse in [us] (128 pixels) - .hback= 2.20000f, // H back porch (after HSYNC, before image) in [us] (88 pixels) - .hfull= 20.00000f, // H full visible in [us] (800 pixels) - // vertical - .vtot=628, // total scanlines (both subframes) - .vmax=600, // maximal height - - // subframe 1 - .vsync1=4, // V sync (half-)pulses on subframe 1 - .vpost1=0, // V sync post half-pulses on subframe 1 - .vback1=23, // V back porch (after VSYNC, before image) on subframe 1 - .vact1=600, // active visible scanlines, subframe 1 - .vfront1=1, // V front porch (after image, before VSYNC) on subframe 1 - .vpre1=0, // V sync pre half-pulses on subframe 1 - - // subframe 2 (ignored if not interlaced) - .vsync2=0, // V sync half-pulses on subframe 2 - .vpost2=0, // V sync post half-pulses on subframe 2 - .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 - .vact2=0, // active visible scanlines, subframe 2 - .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 - .vpre2=0, // V sync pre half-pulses on subframe 2 - - // name - .name = "SVGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) - - // flags - .inter=False, // interlaced (use subframes) - .psync=True, // positive synchronization - .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) +// timings +const sVideo* VideoResTab[DEV_MAX*RES_MAX] = +{ + // DEV_VGA + &VideoEGA, // RES_ZX = 0, // 256x192 + &VideoVGA, // RES_CGA, // 320x200 + &VideoVGA, // RES_QVGA, // 320x240 + &VideoEGA, // RES_EGA, // 528x400 + &VideoVGA, // RES_VGA, // 640x480 }; -// XGA 4:3 1024x768 (16:9 1360x768), vert. 60 Hz, hor. 48.36310 kHz, pixel clock 65 MHz -const sVideo VideoXGA = { - // horizontal - .htot= 20.67692f, // total scanline in [us] (1344 pixels) - .hfront= 0.36923f, // H front porch (after image, before HSYNC) in [us] (24 pixels) - .hsync= 2.09231f, // H sync pulse in [us] (136 pixels) - .hback= 2.46154f, // H back porch (after HSYNC, before image) in [us] (160 pixels) - .hfull= 15.75385f, // H full visible in [us] (1024 pixels) - - // vertical - .vtot=806, // total scanlines (both subframes) - .vmax=768, // maximal height - - // subframe 1 - .vsync1=6, // V sync (half-)pulses on subframe 1 - .vpost1=0, // V sync post half-pulses on subframe 1 - .vback1=29, // V back porch (after VSYNC, before image) on subframe 1 - .vact1=768, // active visible scanlines, subframe 1 - .vfront1=3, // V front porch (after image, before VSYNC) on subframe 1 - .vpre1=0, // V sync pre half-pulses on subframe 1 - - // subframe 2 (ignored if not interlaced) - .vsync2=0, // V sync half-pulses on subframe 2 - .vpost2=0, // V sync post half-pulses on subframe 2 - .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 - .vact2=0, // active visible scanlines, subframe 2 - .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 - .vpre2=0, // V sync pre half-pulses on subframe 2 - - // name - .name = "XGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) - - // flags - .inter=False, // interlaced (use subframes) - .psync=False, // positive synchronization - .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) -}; - -// VESA 4:3 1152x864, vert. 60 Hz, hor. 53.697 kHz, pixel clock 81.62 MHz -const sVideo VideoVESA = { - // horizontal - .htot= 18.62289f, // total scanline in [us] (1520 pixels) - .hfront= 0.78412f, // H front porch (after image, before HSYNC) in [us] (64 pixels) - .hsync= 1.47023f, // H sync pulse in [us] (120 pixels) - .hback= 2.25435f, // H back porch (after HSYNC, before image) in [us] (184 pixels) - .hfull= 14.11419f, // H full visible in [us] (1152 pixels) - - // vertical - .vtot=895, // total scanlines (both subframes) - .vmax=864, // maximal height - - // subframe 1 - .vsync1=3, // V sync (half-)pulses on subframe 1 - .vpost1=0, // V sync post half-pulses on subframe 1 - .vback1=27, // V back porch (after VSYNC, before image) on subframe 1 - .vact1=864, // active visible scanlines, subframe 1 - .vfront1=1, // V front porch (after image, before VSYNC) on subframe 1 - .vpre1=0, // V sync pre half-pulses on subframe 1 - - // subframe 2 (ignored if not interlaced) - .vsync2=0, // V sync half-pulses on subframe 2 - .vpost2=0, // V sync post half-pulses on subframe 2 - .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 - .vact2=0, // active visible scanlines, subframe 2 - .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 - .vpre2=0, // V sync pre half-pulses on subframe 2 - - // name - .name = "VESA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) - - // flags - .inter=False, // interlaced (use subframes) - .psync=True, // positive synchronization - .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) -}; - -// HD 4:3 1280x960, vert. 53 Hz, hor. 51.858 kHz, pixel clock 102.1 MHz -#define HD_SLOW 1.15f -const sVideo VideoHD = { - // horizontal - .htot= 16.76787f*HD_SLOW, // total scanline in [us] (1712 pixels) - .hfront= 0.78355f*HD_SLOW, // H front porch (after image, before HSYNC) in [us] (80 pixels) - .hsync= 1.33203f*HD_SLOW, // H sync pulse in [us] (136 pixels) - .hback= 2.11557f*HD_SLOW, // H back porch (after HSYNC, before image) in [us] (216 pixels) - .hfull= 12.53673f*HD_SLOW, // H full visible in [us] (1280 pixels) - - // vertical - .vtot=994-10, // total scanlines (both subframes) - .vmax=960, // maximal height - - // subframe 1 - .vsync1=3, // V sync (half-)pulses on subframe 1 - .vpost1=0, // V sync post half-pulses on subframe 1 - .vback1=30-10, // V back porch (after VSYNC, before image) on subframe 1 - .vact1=960, // active visible scanlines, subframe 1 - .vfront1=1, // V front porch (after image, before VSYNC) on subframe 1 - .vpre1=0, // V sync pre half-pulses on subframe 1 - - // subframe 2 (ignored if not interlaced) - .vsync2=0, // V sync half-pulses on subframe 2 - .vpost2=0, // V sync post half-pulses on subframe 2 - .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 - .vact2=0, // active visible scanlines, subframe 2 - .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 - .vpre2=0, // V sync pre half-pulses on subframe 2 - - // name - .name = "HD ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) - - // flags - .inter=False, // interlaced (use subframes) - .psync=False, // positive synchronization - .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) +// required resolution width x height +const u16 VideoResReq[RES_MAX*2] = +{ + 256, 192, // RES_ZX = 0, // 256x192 + 320, 200, // RES_CGA, // 320x200 + 320, 240, // RES_QVGA, // 320x240 + 512, 400, // RES_EGA, // 512x400 + 640, 480, // RES_VGA, // 640x480 }; @@ -582,59 +200,23 @@ void VgaCfgDef(sVgaCfg* cfg) cfg->height = 480; // height in lines cfg->wfull = 0; // width of full screen, corresponding to 'hfull' time (0=use 'width' parameter) cfg->video = &VideoVGA; // used video timings - cfg->freq = 250000; //120000; // required minimal system frequency in kHz (real frequency can be higher) + uint freq = clock_get_hz(clk_sys)/1000; + cfg->freq = freq; // required minimal system frequency in kHz (real frequency can be higher) cfg->fmax = 270000; // maximal system frequency in kHz (limit resolution if needed) - cfg->mode[0] = LAYERMODE_BASE; // modes of overlapped layers 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off) - cfg->mode[1] = LAYERMODE_BASE; // - mode of layer 0 is ignored (always use LAYERMODE_BASE) - cfg->mode[2] = LAYERMODE_BASE; // - all overlapped layers must use same layer program - cfg->mode[3] = LAYERMODE_BASE; cfg->dbly = False; // double in Y direction cfg->lockfreq = False; // lock required frequency, do not change it } -// debug print videomode setup -void VgaPrintCfg(const sVmode* vmode) -{ - printf("width=%u height=%u wfull=%u wmax=%u\n", vmode->width, vmode->height, vmode->wfull, vmode->wmax); - printf("freq=%u vco=%u fbdiv=%u pd1=%u pd2=%u\n", vmode->freq, vmode->vco, vmode->fbdiv, vmode->pd1, vmode->pd2); - printf("div=%u cpp=%u prog=%u mode=%u %u %u %u\n", vmode->div, vmode->cpp, vmode->prog, vmode->mode[0], vmode->mode[1], vmode->mode[2], vmode->mode[3]); - printf("htot=%u hfront=%u hsync=%u hback=%u\n", vmode->htot, vmode->hfront, vmode->hsync, vmode->hback); - printf("vtot=%u vmax=%u\n", vmode->vtot, vmode->vmax); - printf("vsync1=%u vpost1=%u vback1=%u vact1=%u vfront1=%u vpre1=%u vfirst1=%u\n", vmode->vsync1, vmode->vpost1, - vmode->vback1, vmode->vact1, vmode->vfront1, vmode->vpre1, vmode->vfirst1); - printf("vsync2=%u vpost2=%u vback2=%u vact2=%u vfront2=%u vpre2=%u vfirst2=%u\n", vmode->vsync2, vmode->vpost2, - vmode->vback2, vmode->vact2, vmode->vfront2, vmode->vpre2, vmode->vfirst2); - printf("lockfreq=%u dbly=%u inter=%u psync=%u odd=%u\n", vmode->lockfreq, vmode->dbly, vmode->inter, vmode->psync, vmode->odd); -} + // calculate videomode setup // cfg ... required configuration // vmode ... destination videomode setup for driver void VgaCfg(const sVgaCfg* cfg, sVmode* vmode) { - int i; - - // prepare layer program, copy layer modes - u8 prog = LAYERMODE_BASE; - vmode->mode[0] = prog; - for (i = 1; i < LAYERS; i++) - { - if (cfg->mode[i] != LAYERMODE_BASE) prog = LayerMode[cfg->mode[i]].prog; - vmode->mode[i] = cfg->mode[i]; - } - vmode->prog = prog; - // prepare minimal and maximal clocks per pixel - int mincpp = LayerMode[LAYERMODE_BASE].mincpp; - int maxcpp = LayerMode[LAYERMODE_BASE].maxcpp; - int cpp; - for (i = 1; i < LAYERS; i++) - { - cpp = LayerMode[cfg->mode[i]].mincpp; - if (cpp > mincpp) mincpp = cpp; - cpp = LayerMode[cfg->mode[i]].maxcpp; - if (cpp < maxcpp) maxcpp = cpp; - } + int mincpp = 2; + int maxcpp = 17; // prepare full width int w = cfg->width; // required width @@ -649,7 +231,7 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode) // calculate cpp from required frequency (rounded down), limit minimal cpp u32 freq = cfg->freq; - cpp = (int)(freq*hfull/1000/wfull + 0.1f); + int cpp = (int)(freq*hfull/1000/wfull + 0.1f); if (cpp < mincpp) cpp = mincpp; // recalculate frequency if not locked @@ -732,14 +314,6 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode) } htot = hfront + hsync + hback + hwidth; // total state machine clocks per line - - // interliced htot must be even (to enable split to half-sync) - if (v->inter && ((htot & 1) != 0)) - { - htot--; - hfront++; - } - vmode->htot = (u16)htot; // total state machine clocks per line vmode->hfront = (u16)hfront; // H front porch in state machine clocks (min. 2) vmode->hsync = (u16)hsync; // H sync pulse in state machine clocks (min. 4) @@ -752,21 +326,7 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode) if (h > v->vmax) h = v->vmax; // limit height if (cfg->dbly) h &= ~1; // must be even number if double lines - int vact1 = h; // active lines in progress mode - int vact2 = 0; - if (v->inter) // interlaced - { - if (v->odd) // first frame is odd lines - { - vact1 = h/2; - vact2 = (h+1)/2; // if even lines, even frame will have more lines - } - else - { - vact1 = (h+1)/2; // if even lines, even frame will have more lines - vact2 = h/2; - } - } + int vact = h; // active lines in progress mode if (cfg->dbly) h /= 2; // return double lines to single lines vmode->height = h; @@ -774,125 +334,37 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode) // vertical timings vmode->vtot = v->vtot; // total scanlines - vmode->vact1 = vact1; // active scanlines of 1st subframe - int dh = vact1 - v->vact1; // difference - vmode->vsync1 = v->vsync1; // V sync (half-)pulses on subframe 1 - vmode->vpost1 = v->vpost1; // V sync post (half-)pulses on subframe 1 - vmode->vback1 = v->vback1 - dh/2; // V back porch (after VSYNC, before image) on subframe 1 - vmode->vfront1 = v->vfront1 - ((dh < 0) ? (dh-1)/2 : (dh+1)/2); // V front porch (after image, before VSYNC) on subframe 1 - vmode->vpre1 = v->vpre1; // V sync pre (half-)pulses on subframe 1 - - vmode->vact2 = vact2; // active scanlines of 2nd subframe - dh = vact2 - v->vact2; // difference - vmode->vsync2 = v->vsync2; // V sync half-pulses on subframe 2 - vmode->vpost2 = v->vpost2; // V sync post half-pulses on subframe 2 - vmode->vback2 = v->vback2 - dh/2; // V back porch (after VSYNC, before image) on subframe 2 - vmode->vfront2 = v->vfront2 - ((dh < 0) ? (dh-1)/2 : (dh+1)/2); // V front porch (after image, before VSYNC) on subframe 2 - vmode->vpre2 = v->vpre2; // V sync pre half-pulses on subframe 2 + vmode->vact = vact; // active scanlines + int dh = vact - v->vact; // difference + vmode->vsync = v->vsync; // V sync (half-)pulses + vmode->vpost = v->vpost; // V sync post (half-)pulses + vmode->vback = v->vback - dh/2; // V back porch (after VSYNC, before image) + vmode->vfront = v->vfront - ((dh < 0) ? (dh-1)/2 : (dh+1)/2); // V front porch (after image, before VSYNC) + vmode->vpre = v->vpre; // V sync pre (half-)pulses // frequency vmode->hfreq = vmode->freq * 1000.0f / vmode->div / vmode->htot; vmode->vfreq = vmode->hfreq / vmode->vtot; - // name - vmode->name = v->name; // video timing name - // flags vmode->lockfreq = cfg->lockfreq; // lock current frequency, do not change it vmode->dbly = cfg->dbly; // double scanlines - vmode->inter = v->inter; // interlaced (use sub-frames) vmode->psync = v->psync; // positive synchronization - vmode->odd = v->odd; // first sub-frame is odd lines 1, 3, 5,... (PAL) // first active scanline - if (v->inter) - { - // interlaced - vmode->vfirst1 = (vmode->vsync1 + vmode->vpost1)/2 + vmode->vback1 + 1; - vmode->vfirst2 = vmode->vfirst1 + vmode->vact1 + vmode->vfront1 + - (vmode->vpre1 + vmode->vsync2 + vmode->vpost2)/2 + vmode->vback2; - } - else - { - // progressive - vmode->vfirst1 = vmode->vsync1 + vmode->vback1 + 1; - vmode->vfirst2 = 0; - } + vmode->vfirst = vmode->vsync + vmode->vback + 1; } -// timings -const sVideo* VideoResTab[DEV_MAX*RES_MAX] = -{ - // DEV_PAL - &VideoPALp, // RES_ZX = 0, // 256x192 - &VideoPALp, // RES_CGA, // 320x200 - &VideoPALp, // RES_QVGA, // 320x240 - &VideoPAL, // RES_EGA, // 528x400 - &VideoPAL, // RES_VGA, // 640x480 - &VideoPAL, // RES_SVGA, // 800x600 (not for TV device) - &VideoPAL, // RES_XGA, // 1024x768 (not for TV device) - &VideoPAL, // RES_HD, // 1280x960 (not for TV device) - // DEV_NTSC - &VideoNTSCp, // RES_ZX = 0, // 256x192 - &VideoNTSCp, // RES_CGA, // 320x200 - &VideoNTSCp, // RES_QVGA, // 320x240 - &VideoNTSC, // RES_EGA, // 528x400 - &VideoNTSC, // RES_VGA, // 640x480 - &VideoNTSC, // RES_SVGA, // 800x600 (not for TV device) - &VideoNTSC, // RES_XGA, // 1024x768 (not for TV device) - &VideoNTSC, // RES_HD, // 1280x960 (not for TV device) - - // DEV_VGA - &VideoEGA, // RES_ZX = 0, // 256x192 - &VideoVGA, // RES_CGA, // 320x200 - &VideoVGA, // RES_QVGA, // 320x240 - &VideoEGA, // RES_EGA, // 528x400 - &VideoVGA, // RES_VGA, // 640x480 - &VideoSVGA, // RES_SVGA, // 800x600 (not for TV device) - &VideoXGA, // RES_XGA, // 1024x768 (not for TV device) - &VideoHD, // RES_HD, // 1280x960 (not for TV device) -}; - -// required resolution width x height -const u16 VideoResReq[RES_MAX*2] = -{ - 256, 192, // RES_ZX = 0, // 256x192 - 320, 200, // RES_CGA, // 320x200 - 320, 240, // RES_QVGA, // 320x240 - 512, 400, // RES_EGA, // 512x400 - 640, 480, // RES_VGA, // 640x480 - 800, 600, // RES_SVGA, // 800x600 (not for TV device) - 1024, 768, // RES_XGA, // 1024x768 (not for TV device) - 1280, 960, // RES_HD, // 1280x960 (not for TV device) -}; // initialize videomode // dev ... device DEV_* // res ... resolution RES_* -// form ... format FORM_* -// buf ... pointer to frame buffer (must be aligned to 4-bites, use ALIGNED attribute) -// buf2 ...pointer to additional buffer: -// FORM_TILE: pointer to column of tiles 32x32 in 8-bit graphics -// FORM_TEXT: pointer to font 8x16 or 8x8 (size 4 KB or 2 KB, ALIGNED attribute, should be in RAM) -// - copy font to 4KB or 2 KB RAM buffer with ALIGNED attribute -// - text uses color attributes PC_* -// FORM_RLE: pointer to image rows (ALIGNED attribute, should be in RAM) -// JMH -const sVmode* Video(u8 dev, u8 res, u8 form, u8* buf, const void* buf2 /* = FontBoldB8x16 */) +const sVmode* Video(u8 dev, u8 res) { - // stop VGA core - // JMH - //multicore_reset_core1(); - - // run VGA core - // JMH - //multicore_launch_core1(VgaCore); - // prepare timings structure if (dev >= DEV_MAX) dev = DEV_VGA; if (res >= RES_MAX) res = RES_MAX-1; - if (form >= FORM_MAX) form = FORM_MAX-1; const sVideo* v = VideoResTab[dev*RES_MAX + res]; // required resolution @@ -900,121 +372,17 @@ const sVmode* Video(u8 dev, u8 res, u8 form, u8* buf, const void* buf2 /* = Font u16 h = VideoResReq[res*2+1]; if (h > v->vmax) h = v->vmax; - if ((form == FORM_TEXT8) || (form == FORM_MTEXT8)) - { - w = w/8*8; - h = h/8*8; - } - - if ((form == FORM_TEXT16) || (form == FORM_MTEXT16)) - { - w = w/8*8; - h = h/16*16; - } - // setup videomode VgaCfgDef(&Cfg); // get default configuration Cfg.video = v; // video timings Cfg.width = w; // screen width Cfg.height = h; // screen height - if (form == FORM_RLE) Cfg.mode[1] = LAYERMODE_RLE; Cfg.dbly = h <= v->vmax/2; // double scanlines VgaCfg(&Cfg, &Vmode); // calculate videomode setup - // initialize base layer 0 - ScreenClear(pScreen); - sStrip* t = ScreenAddStrip(pScreen, h); - sSegm* g = ScreenAddSegm(t, w); - switch (form) - { - case FORM_8BIT: // 8-bit pixel graphics (up to EGA resolution) - ScreenSegmGraph8(g, buf, w); - Canvas.img = buf; - Canvas.w = w; - Canvas.h = h; - Canvas.wb = w; - Canvas.format = CANVAS_8; - break; - - case FORM_4BIT: // 4-bit pixel graphics (up to SVGA graphics) - GenPal16Trans(Pal16Trans, DefPal16); // generate palette translation table - ScreenSegmGraph4(g, buf, Pal16Trans, w/2); - Canvas.img = buf; - Canvas.w = w; - Canvas.h = h; - Canvas.wb = w/2; - Canvas.format = CANVAS_4; - break; - - case FORM_MONO: // 1-bit pixel graphics - ScreenSegmGraph1(g, buf, COL_BLACK, COL_WHITE, w/8); - Canvas.img = buf; - Canvas.w = w; - Canvas.h = h; - Canvas.wb = w/8; - Canvas.format = CANVAS_1; - break; - - case FORM_TILE8: // 8x8 tiles - ScreenSegmTile(g, buf, buf2, 8, 8, (w+7)/8); - break; - - case FORM_TILE12: // 12x12 tiles - ScreenSegmTile(g, buf, buf2, 12, 12, (w+11)/12); - break; - - case FORM_TILE16: // 16x16 tiles - ScreenSegmTile(g, buf, buf2, 16, 16, (w+15)/16); - break; - - case FORM_TILE24: // 24x24 tiles - ScreenSegmTile(g, buf, buf2, 24, 24, (w+23)/24); - break; - - case FORM_TILE32: // 32x32 tiles - ScreenSegmTile(g, buf, buf2, 32, 32, (w+31)/32); - break; - - case FORM_TILE48: // 48x48 tiles - ScreenSegmTile(g, buf, buf2, 48, 48, (w+47)/48); - break; - - case FORM_TILE64: // 64x64 tiles - ScreenSegmTile(g, buf, buf2, 64, 64, (w+63)/64); - break; - - case FORM_MTEXT8: // mono text with font 8x8 - ScreenSegmMText(g, buf, buf2, 8, COL_BLACK, COL_WHITE, w/8); - break; - - case FORM_MTEXT16: // mono text with font 8x16 - ScreenSegmMText(g, buf, buf2, 16, COL_BLACK, COL_WHITE, w/8); - break; - - case FORM_TEXT8: // attribute text with font 8x8 - ScreenSegmAText(g, buf, buf2, 8, DefPal16, w/8*2); - break; - - case FORM_TEXT16: // attribute text with font 8x16 - ScreenSegmAText(g, buf, buf2, 16, DefPal16, w/8*2); - break; - - case FORM_RLE: // images with RLE compression (on overlapped layer 1) - ScreenSegmColor(g, 0, 0); - LayerSetup(1, buf, &Vmode, w, h, 0, buf2); - LayerOn(1); - break; - } - // initialize system clock set_sys_clock_pll(Vmode.vco*1000, Vmode.pd1, Vmode.pd2); - - - // initialize videomode - // JMH - //VgaInitReq(&Vmode); - return &Vmode; } diff --git a/MCUME_pico/picovga_t4/vga_vmode.h b/MCUME_pico/picovga_t4/vga_vmode.h index a39e6ca..303f286 100755 --- a/MCUME_pico/picovga_t4/vga_vmode.h +++ b/MCUME_pico/picovga_t4/vga_vmode.h @@ -1,15 +1,16 @@ - // **************************************************************************** // // VGA videomodes // +// file derived from the PicoVGA project +// https://github.com/Panda381/PicoVGA +// by Miroslav Nemecek +// // **************************************************************************** #ifndef _VGA_VMODE_H #define _VGA_VMODE_H -#define VIDEO_NAME_LEN 5 // length of video timing name - // video timings typedef struct { // horizontal @@ -23,64 +24,26 @@ typedef struct { u16 vtot; // total scanlines (both subframes) u16 vmax; // maximal height - // subframe 1 - u16 vsync1; // V sync (half-)pulses on subframe 1 - u16 vpost1; // V sync post half-pulses on subframe 1 - u16 vback1; // V back porch (after VSYNC, before image) on subframe 1 - u16 vact1; // active visible scanlines, subframe 1 - u16 vfront1; // V front porch (after image, before VSYNC) on subframe 1 - u16 vpre1; // V sync pre half-pulses on subframe 1 + // frame + u16 vsync; // V sync (half-)pulses + u16 vpost; // V sync post half-pulses + u16 vback; // V back porch (after VSYNC, before image) + u16 vact; // active visible scanlines + u16 vfront; // V front porch (after image, before VSYNC) + u16 vpre; // V sync pre half-pulses - // subframe 2 (ignored if not interlaced) - u16 vsync2; // V sync half-pulses on subframe 2 - u16 vpost2; // V sync post half-pulses on subframe 2 - u16 vback2; // V back porch (after VSYNC, before image) on subframe 2 - u16 vact2; // active visible scanlines, subframe 2 - u16 vfront2; // V front porch (after image, before VSYNC) on subframe 2 - u16 vpre2; // V sync pre half-pulses on subframe 2 - - // name - const char* name; // video timing name (VIDEO_NAME_LEN characters + terminating 0) - - // flags - bool inter; // interlaced (use subframes) bool psync; // positive synchronization - bool odd; // first sub-frame is odd lines 1, 3, 5,... (PAL) } sVideo; -// === TV videomodes - -// TV PAL interlaced 5:4 720x576 (4:3 768x576, 16:9 1024x576) -extern const sVideo VideoPAL; - -// TV PAL progressive 5:4 360x288 (4:3 384x288, 16:9 512x288) -extern const sVideo VideoPALp; - -// TV NTSC interlaced 4:3 640x480 (5:4 600x480, 16:9 848x480) -extern const sVideo VideoNTSC; - -// TV NTSC progressive 4:3 320x240 (5:4 300x240, 16:9 424x240) -extern const sVideo VideoNTSCp; // === Monitor videomodes // EGA 8:5 640x400 (5:4 500x400, 4:3 528x400, 16:9 704x400), vert. 70 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz extern const sVideo VideoEGA; - // VGA 4:3 640x480 (16:9 848x480), vert. 60 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz extern const sVideo VideoVGA; -// SVGA 4:3 800x600 (16:9 1064x600), vert. 60 Hz, hor. 37.897 kHz, pixel clock 40 MHz -extern const sVideo VideoSVGA; -// XGA 4:3 1024x768 (16:9 1360x768), vert. 60 Hz, hor. 48.36310 kHz, pixel clock 65 MHz -extern const sVideo VideoXGA; - -// VESA 4:3 1152x864, vert. 60 Hz, hor. 53.697 kHz, pixel clock 81.62 MHz -extern const sVideo VideoVESA; - -// HD 4:3 1280x960, vert. 53 Hz, hor. 51.858 kHz, pixel clock 102.1 MHz -extern const sVideo VideoHD; // required configuration to initialize VGA output typedef struct { @@ -90,9 +53,6 @@ typedef struct { const sVideo* video; // used video timings u32 freq; // required minimal system frequency in kHz (real frequency can be higher) u32 fmax; // maximal system frequency in kHz (limit resolution if needed) - u8 mode[LAYERS_MAX]; // modes of overlapped layers 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off) - // - mode of layer 0 is ignored (always use LAYERMODE_BASE) - // - all overlapped layers must use same layer program bool dbly; // double in Y direction bool lockfreq; // lock required frequency, do not change it } sVgaCfg; @@ -116,7 +76,6 @@ typedef struct { u16 div; // divide base state machine clock u16 cpp; // state machine clocks per pixel u8 prog; // layer program LAYERPROG_* - u8 mode[LAYERS_MAX]; // mode of layer 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off or base layer) // horizontal timings u16 htot; // total state machine clocks per line @@ -130,40 +89,24 @@ typedef struct { u16 vmax; // maximal height float vfreq; // vertical frequency in [Hz] - // subframe 1 - u16 vsync1; // V sync (half-)pulses on subframe 1 - u16 vpost1; // V sync post (half-)pulses on subframe 1 - u16 vback1; // V back porch (after VSYNC, before image) on subframe 1 - u16 vact1; // active visible scanlines, subframe 1 - u16 vfront1; // V front porch (after image, before VSYNC) on subframe 1 - u16 vpre1; // V sync pre (half-)pulses on subframe 1 - u16 vfirst1; // first active scanline, subframe 1 - - // subframe 2 (ignored if not interlaced) - u16 vsync2; // V sync half-pulses on subframe 2 - u16 vpost2; // V sync post half-pulses on subframe 2 - u16 vback2; // V back porch (after VSYNC, before image) on subframe 2 - u16 vact2; // active visible scanlines, subframe 2 - u16 vfront2; // V front porch (after image, before VSYNC) on subframe 2 - u16 vpre2; // V sync pre half-pulses on subframe 2 - u16 vfirst2; // first active scanline, subframe 2 - - // name - const char* name; // video timing name (VIDEO_NAME_LEN characters + terminating 0) + // frame + u16 vsync; // V sync (half-)pulses + u16 vpost; // V sync post (half-)pulses + u16 vback; // V back porch (after VSYNC, before image) + u16 vact; // active visible scanlines + u16 vfront; // V front porch (after image, before VSYNC) + u16 vpre; // V sync pre (half-)pulses + u16 vfirst; // first active scanline // flags bool lockfreq; // lock current frequency, do not change it bool dbly; // double scanlines - bool inter; // interlaced (use sub-frames) bool psync; // positive synchronization - bool odd; // first sub-frame is odd lines 1, 3, 5,... (PAL) } sVmode; // output device enum { - DEV_PAL = 0, // PAL TV - DEV_NTSC, // NTSC TV - DEV_VGA, // VGA monitor + DEV_VGA=0, // VGA monitor DEV_MAX }; @@ -175,47 +118,17 @@ enum { RES_QVGA, // 320x240 RES_EGA, // 512x400 RES_VGA, // 640x480 - RES_SVGA, // 800x600 (not for TV device) - RES_XGA, // 1024x768 (not for TV device) - RES_HD, // 1280x960 (not for TV device) RES_MAX }; -// graphics formats -enum { - FORM_8BIT = 0, // 8-bit pixel graphics (up to EGA resolution) - FORM_4BIT, // 4-bit pixel graphics (up to SVGA graphics) - FORM_MONO, // 1-bit pixel graphics - FORM_TILE8, // 8x8 tiles - FORM_TILE12, // 12x12 tiles - FORM_TILE16, // 16x16 tiles - FORM_TILE24, // 24x24 tiles - FORM_TILE32, // 32x32 tiles - FORM_TILE48, // 48x48 tiles - FORM_TILE64, // 64x64 tiles - FORM_MTEXT8, // mono text with font 8x8 - FORM_MTEXT16, // mono text with font 8x16 - FORM_TEXT8, // attribute text with font 8x8 - FORM_TEXT16, // attribute text with font 8x16 - FORM_RLE, // images with RLE compression (on overlapped layer 1) - - FORM_MAX -}; extern sVmode Vmode; // videomode setup extern sVgaCfg Cfg; // required configuration -extern sCanvas Canvas; // canvas of draw box - -// 16-color palette translation table -extern u16 Pal16Trans[256]; // initialize default VGA configuration void VgaCfgDef(sVgaCfg* cfg); -// debug print videomode setup -void VgaPrintCfg(const sVmode* vmode); - // calculate videomode setup // cfg ... required configuration // vmode ... destination videomode setup for driver @@ -224,15 +137,6 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode); // initialize videomode // dev ... device DEV_* // res ... resolution RES_* -// form ... format FORM_* -// buf ... pointer to frame buffer (must be aligned to 4-bites, use ALIGNED attribute) -// buf2 ...pointer to additional buffer: -// FORM_TILE*: pointer to column of tiles 32x32 in 8-bit graphics -// FORM_TEXT: pointer to font 8x16 or 8x8 (size 4 KB or 2 KB, ALIGNED attribute, should be in RAM) -// - copy font to 4KB or 2 KB RAM buffer with ALIGNED attribute -// - text uses color attributes PC_* -// FORM_RLE: pointer to image rows (ALIGNED attribute, should be in RAM) -// JMH -const sVmode* Video(u8 dev, u8 res, u8 form, u8* buf, const void* buf2 = NULL); +const sVmode* Video(u8 dev, u8 res); #endif // _VGA_VMODE_H diff --git a/MCUME_pico/testvga/testvga.cpp b/MCUME_pico/testvga/testvga.cpp index 4a07042..5564c1d 100644 --- a/MCUME_pico/testvga/testvga.cpp +++ b/MCUME_pico/testvga/testvga.cpp @@ -20,15 +20,15 @@ static char * digits = "0123456789"; static uint8_t pix = 0; int main(void) { - vreg_set_voltage(VREG_VOLTAGE_1_05); +// vreg_set_voltage(VREG_VOLTAGE_1_05); // set_sys_clock_khz(125000, true); // set_sys_clock_khz(150000, true); // set_sys_clock_khz(133000, true); // set_sys_clock_khz(200000, true); +// set_sys_clock_khz(210000, true); + set_sys_clock_khz(230000, true); // set_sys_clock_khz(225000, true); - set_sys_clock_khz(252000, true); - - +// set_sys_clock_khz(250000, true); stdio_init_all(); printf("start\n"); @@ -56,7 +56,6 @@ int main(void) { buf[2] = digits[r3]; vga.drawText(4*8,8,buf,BLUE,LIGHT_BLUE,false); - while (true) { //tft.fillScreenNoDma( pix++ ); vga.waitSync();