diff --git a/MCUME_pico/CMakeLists.txt b/MCUME_pico/CMakeLists.txt index cee186c..093436c 100644 --- a/MCUME_pico/CMakeLists.txt +++ b/MCUME_pico/CMakeLists.txt @@ -18,6 +18,7 @@ add_subdirectory(FatFs_SPI build) include_directories(config) include_directories(vga_t4) +#include_directories(picovga_t4) include_directories(tft_t) include_directories(psram) @@ -250,6 +251,47 @@ set(VGA_T4_SOURCES vga_t4/scanvideo.c ) +set(PICOVGA_T4_SOURCES + picovga_t4/VGA_t4.cpp + picovga_t4/vga.cpp + picovga_t4/vga_vmode.cpp + picovga_t4/vga_layer.cpp + picovga_t4/vga_screen.cpp + picovga_t4/vga_render.S + picovga_t4/vga_blitkey.S + picovga_t4/render/vga_atext.S + picovga_t4/render/vga_attrib8.S + picovga_t4/render/vga_color.S + picovga_t4/render/vga_ctext.S + picovga_t4/render/vga_dtext.S + picovga_t4/render/vga_fastsprite.S + picovga_t4/render/vga_ftext.S + picovga_t4/render/vga_graph1.S + picovga_t4/render/vga_graph2.S + picovga_t4/render/vga_graph4.S + picovga_t4/render/vga_graph8.S + picovga_t4/render/vga_graph8mat.S + picovga_t4/render/vga_graph8persp.S + picovga_t4/render/vga_gtext.S + picovga_t4/render/vga_level.S + picovga_t4/render/vga_levelgrad.S + picovga_t4/render/vga_mtext.S + picovga_t4/render/vga_oscil.S + picovga_t4/render/vga_oscline.S + picovga_t4/render/vga_persp.S + picovga_t4/render/vga_persp2.S + picovga_t4/render/vga_plane2.S + picovga_t4/render/vga_progress.S + picovga_t4/render/vga_sprite.S + picovga_t4/render/vga_tile.S + picovga_t4/render/vga_tile2.S + picovga_t4/render/vga_tilepersp.S + picovga_t4/render/vga_tilepersp15.S + picovga_t4/render/vga_tilepersp2.S + picovga_t4/render/vga_tilepersp3.S + picovga_t4/render/vga_tilepersp4.S + ) + set(PSRAM_SOURCES psram/psram_t.cpp ) @@ -271,9 +313,9 @@ set(TESTVGA_SOURCES testvga/testvga.cpp ) -set(TESTTFT_SOURCES - testtft/testtft.cpp - testtft/emuapi.cpp +set(TESTKEYMAX_SOURCES + testkeymax/testkeymax.cpp + testkeymax/emuapi.cpp ) set(TESTPSRAM_SOURCES @@ -281,10 +323,12 @@ set(TESTPSRAM_SOURCES testpsram/emuapi.cpp ) -#add_compile_definitions(OVERRULE_WIDTH=320 OVERRULE_HEIGHT=192) +# Vic20,ZX81,ZX Spectrum, Colem +add_compile_definitions(OVERRULE_WIDTH=320 OVERRULE_HEIGHT=192) + add_executable(mcume # ${GFXENGINE_SOURCES} - ${PICO20_SOURCES} +# ${PICO20_SOURCES} # ${PICO64_SOURCES} # ${PICO81_SOURCES} # ${PICO800_SOURCES} @@ -297,16 +341,18 @@ add_executable(mcume # ${PICONOFRENDO_SOURCES} # ${PICOSND_SOURCES} # ${TESTIO_SOURCES} -# ${TESTVGA_SOURCES} -# ${TESTTFT_SOURCES} + ${TESTVGA_SOURCES} +# ${TESTKEYMAX_SOURCES} # ${TESTPSRAM_SOURCES} ${PSRAM_SOURCES} - ${VGA_T4_SOURCES} +# ${VGA_T4_SOURCES} + ${PICOVGA_T4_SOURCES} ${TFT_T_SOURCES} ) -pico_generate_pio_header(mcume ${CMAKE_CURRENT_LIST_DIR}/vga_t4/timing.pio) -pico_generate_pio_header(mcume ${CMAKE_CURRENT_LIST_DIR}/vga_t4/scanvideo.pio) +#pico_generate_pio_header(mcume ${CMAKE_CURRENT_LIST_DIR}/vga_t4/timing.pio) +#pico_generate_pio_header(mcume ${CMAKE_CURRENT_LIST_DIR}/vga_t4/scanvideo.pio) +pico_generate_pio_header(mcume ${CMAKE_CURRENT_LIST_DIR}/picovga_t4/picovga.pio) target_link_libraries(mcume pico_multicore diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_pico20.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_pico20.uf2 new file mode 100644 index 0000000..400b3d0 Binary files /dev/null and b/MCUME_pico/bin/PICORETROVGA/mcume_pico20.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_pico64.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_pico64.uf2 new file mode 100644 index 0000000..0b2dbad Binary files /dev/null and b/MCUME_pico/bin/PICORETROVGA/mcume_pico64.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_pico800.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_pico800.uf2 new file mode 100644 index 0000000..abd06cf Binary files /dev/null and b/MCUME_pico/bin/PICORETROVGA/mcume_pico800.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_pico81.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_pico81.uf2 new file mode 100644 index 0000000..ece3223 Binary files /dev/null and b/MCUME_pico/bin/PICORETROVGA/mcume_pico81.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_picocolem.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_picocolem.uf2 new file mode 100644 index 0000000..0cdb4b5 Binary files /dev/null and b/MCUME_pico/bin/PICORETROVGA/mcume_picocolem.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_picoo2em.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_picoo2em.uf2 new file mode 100644 index 0000000..d663c18 Binary files /dev/null and b/MCUME_pico/bin/PICORETROVGA/mcume_picoo2em.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_picospeccy.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_picospeccy.uf2 new file mode 100644 index 0000000..5c8d67e Binary files /dev/null and b/MCUME_pico/bin/PICORETROVGA/mcume_picospeccy.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_picovcs.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_picovcs.uf2 new file mode 100644 index 0000000..6f8a222 Binary files /dev/null and b/MCUME_pico/bin/PICORETROVGA/mcume_picovcs.uf2 differ diff --git a/MCUME_pico/bin/PICORETROVGA/mcume_testkeymax.uf2 b/MCUME_pico/bin/PICORETROVGA/mcume_testkeymax.uf2 new file mode 100644 index 0000000..0c12e39 Binary files /dev/null and b/MCUME_pico/bin/PICORETROVGA/mcume_testkeymax.uf2 differ diff --git a/MCUME_pico/config/iopins.h b/MCUME_pico/config/iopins.h index 89d41ca..ecd4bbb 100644 --- a/MCUME_pico/config/iopins.h +++ b/MCUME_pico/config/iopins.h @@ -51,16 +51,24 @@ #else +#if (defined(PICOMPUTER) && defined(USE_VGA) ) // Speaker -#define AUDIO_PIN 0 //28 - +#define AUDIO_PIN 9 // VGA -/* -2-9 RRRGGGBB -10-11 VSYNC and HSYNC -*/ +/* RRRGGGBB + CSYNC */ +#define VGA_COLORBASE 0 +#define VGA_SYNCBASE 8 +#else +// Speaker +#define AUDIO_PIN 0 +// VGA +/* RRRGGGBB + VSYNC and HSYNC */ #define VGA_COLORBASE 2 #define VGA_SYNCBASE 14 +#endif + // TFT #define TFT_SPIREG spi0 @@ -104,11 +112,40 @@ #ifdef PICOMPUTER +#if defined(USE_VGA) // Keyboard matrix //Cols (out) -//1,2,3,4,5,14 +#define KCOLOUT1 20 +#define KCOLOUT2 21 +#define KCOLOUT3 22 +#define KCOLOUT4 26 +#define KCOLOUT5 27 +#define KCOLOUT6 28 //Rows (in) -//6,9,15,8,7,22 +#define KROWIN1 14 +#define KROWIN2 15 +#define KROWIN3 16 +#define KROWIN4 17 +#define KROWIN5 18 +#define KROWIN6 19 +#else +// Keyboard matrix +//Cols (out) +#define KCOLOUT1 1 +#define KCOLOUT2 2 +#define KCOLOUT3 3 +#define KCOLOUT4 4 +#define KCOLOUT5 5 +#define KCOLOUT6 14 +//Rows (in) +#define KROWIN1 6 +#define KROWIN2 9 +#define KROWIN3 15 +#define KROWIN4 8 +#define KROWIN5 7 +#define KROWIN6 22 +#endif + #define KLED 25 #else diff --git a/MCUME_pico/config/platform_config.h b/MCUME_pico/config/platform_config.h index a99b91b..a024aaf 100644 --- a/MCUME_pico/config/platform_config.h +++ b/MCUME_pico/config/platform_config.h @@ -7,9 +7,10 @@ //#define PICOMPUTER 1 -#define PICOMPUTERMAX 1 +//#define PICOMPUTERMAX 1 +//#define PICORETROVGA 1 //#define MCUME_REV1 1 -//#define MCUME_REV2 1 +#define MCUME_REV2 1 #ifdef PICOMPUTER //#define SWAP_ALT_DEL 1 @@ -30,6 +31,16 @@ #define PICOMPUTER 1 #endif +#ifdef PICORETROVGA +#undef LOHRES +#undef FLIP_SCREEN +#undef ST7789 +#define USE_VGA 1 +#define INVX 1 +#define HAS_SND 1 +#define PICOMPUTER 1 +#endif + #ifdef MCUME_REV1 #define USE_VGA 1 #define INVX 1 diff --git a/MCUME_pico/pico20/emuapi.cpp b/MCUME_pico/pico20/emuapi.cpp index bcf2fac..122a71e 100644 --- a/MCUME_pico/pico20/emuapi.cpp +++ b/MCUME_pico/pico20/emuapi.cpp @@ -492,7 +492,7 @@ int emu_ReadKeys(void) #ifdef PICOMPUTER keymatrix_hitrow = -1; unsigned char row; - unsigned short cols[6]={1,2,3,4,5,14}; + unsigned short cols[6]={KCOLOUT1,KCOLOUT2,KCOLOUT3,KCOLOUT4,KCOLOUT5,KCOLOUT6}; unsigned char keymatrixtmp[6]; for (int i=0;i<6;i++){ @@ -503,15 +503,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -528,15 +528,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -552,15 +552,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -850,57 +850,57 @@ void emu_InitJoysticks(void) { gpio_put(KLED, 1); // Output (rows) - gpio_init(1); - gpio_init(2); - gpio_init(3); - gpio_init(4); - gpio_init(5); - gpio_init(14); - gpio_set_dir(1, GPIO_OUT); - gpio_set_dir(2, GPIO_OUT); - gpio_set_dir(3, GPIO_OUT); - gpio_set_dir(4, GPIO_OUT); - gpio_set_dir(5, GPIO_OUT); - gpio_set_dir(14, GPIO_OUT); - gpio_put(1, 1); - gpio_put(2, 1); - gpio_put(3, 1); - gpio_put(4, 1); - gpio_put(5, 1); - gpio_put(14, 1); + gpio_init(KCOLOUT1); + gpio_init(KCOLOUT2); + gpio_init(KCOLOUT3); + gpio_init(KCOLOUT4); + gpio_init(KCOLOUT5); + gpio_init(KCOLOUT6); + gpio_set_dir(KCOLOUT1, GPIO_OUT); + gpio_set_dir(KCOLOUT2, GPIO_OUT); + gpio_set_dir(KCOLOUT3, GPIO_OUT); + gpio_set_dir(KCOLOUT4, GPIO_OUT); + gpio_set_dir(KCOLOUT5, GPIO_OUT); + gpio_set_dir(KCOLOUT6, GPIO_OUT); + gpio_put(KCOLOUT1, 1); + gpio_put(KCOLOUT2, 1); + gpio_put(KCOLOUT3, 1); + gpio_put(KCOLOUT4, 1); + gpio_put(KCOLOUT5, 1); + gpio_put(KCOLOUT6, 1); // but set as input floating when not used! - gpio_set_dir(1, GPIO_IN); - gpio_set_dir(2, GPIO_IN); - gpio_set_dir(3, GPIO_IN); - gpio_set_dir(4, GPIO_IN); - gpio_set_dir(5, GPIO_IN); - gpio_set_dir(14, GPIO_IN); - gpio_disable_pulls(1); - gpio_disable_pulls(2); - gpio_disable_pulls(3); - gpio_disable_pulls(4); - gpio_disable_pulls(5); - gpio_disable_pulls(14); + gpio_set_dir(KCOLOUT1, GPIO_IN); + gpio_set_dir(KCOLOUT2, GPIO_IN); + gpio_set_dir(KCOLOUT3, GPIO_IN); + gpio_set_dir(KCOLOUT4, GPIO_IN); + gpio_set_dir(KCOLOUT5, GPIO_IN); + gpio_set_dir(KCOLOUT6, GPIO_IN); + gpio_disable_pulls(KCOLOUT1); + gpio_disable_pulls(KCOLOUT2); + gpio_disable_pulls(KCOLOUT3); + gpio_disable_pulls(KCOLOUT4); + gpio_disable_pulls(KCOLOUT5); + gpio_disable_pulls(KCOLOUT6); // Input pins (cols) - gpio_init(6); - gpio_init(9); - gpio_init(15); - gpio_init(8); - gpio_init(7); - gpio_init(22); - gpio_set_dir(6,GPIO_IN); - gpio_set_dir(9,GPIO_IN); - gpio_set_dir(15,GPIO_IN); - gpio_set_dir(8,GPIO_IN); - gpio_set_dir(7,GPIO_IN); - gpio_set_dir(22,GPIO_IN); - gpio_pull_up(6); - gpio_pull_up(9); - gpio_pull_up(15); - gpio_pull_up(8); - gpio_pull_up(7); - gpio_pull_up(22); + gpio_init(KROWIN1); + gpio_init(KROWIN2); + gpio_init(KROWIN3); + gpio_init(KROWIN4); + gpio_init(KROWIN5); + gpio_init(KROWIN6); + gpio_set_dir(KROWIN1,GPIO_IN); + gpio_set_dir(KROWIN2,GPIO_IN); + gpio_set_dir(KROWIN3,GPIO_IN); + gpio_set_dir(KROWIN4,GPIO_IN); + gpio_set_dir(KROWIN5,GPIO_IN); + gpio_set_dir(KROWIN6,GPIO_IN); + gpio_pull_up(KROWIN1); + gpio_pull_up(KROWIN2); + gpio_pull_up(KROWIN3); + gpio_pull_up(KROWIN4); + gpio_pull_up(KROWIN5); + gpio_pull_up(KROWIN6); #endif } @@ -1247,7 +1247,9 @@ void emu_init(void) if (emu_ReadKeys() & MASK_JOY2_UP) { #ifdef PICOMPUTERMAX +#ifndef USE_VGA tft.flipscreen(true); +#endif #else tft.flipscreen(true); #endif @@ -1255,7 +1257,9 @@ void emu_init(void) else { #ifdef PICOMPUTERMAX +#ifndef USE_VGA tft.flipscreen(false); +#endif #else tft.flipscreen(false); #endif diff --git a/MCUME_pico/pico5200/emuapi.cpp b/MCUME_pico/pico5200/emuapi.cpp index facbc89..122a71e 100644 --- a/MCUME_pico/pico5200/emuapi.cpp +++ b/MCUME_pico/pico5200/emuapi.cpp @@ -13,11 +13,19 @@ extern "C" { #include "iopins.h" } +#if (defined(ILI9341) || defined(ST7789)) && defined(USE_VGA) +// Dual display config, initialize TFT +#include "tft_t_dma.h" +static TFT_T_DMA tft; +#else +// Non Dual display config #ifdef USE_VGA #include "vga_t_dma.h" #else #include "tft_t_dma.h" #endif +extern TFT_T_DMA tft; +#endif #define MAX_FILES 64 @@ -39,7 +47,7 @@ extern "C" { #define MENU_VGA_XOFFSET (MENU_FILE_XOFFSET+MENU_FILE_W+8) #define MENU_VGA_YOFFSET (MENU_VBAR_YOFFSET+MENU_FILE_H-32-37) -extern TFT_T_DMA tft; + static char romspath[64]; static int nbFiles=0; @@ -142,8 +150,175 @@ void emu_Free(void * pt) free(pt); } +void emu_drawText(unsigned short x, unsigned short y, const char * text, unsigned short fgcolor, unsigned short bgcolor, int doublesize) +{ + tft.drawText(x, y, text, fgcolor, bgcolor, doublesize?true:false); +} +/******************************** + * OSKB handling +********************************/ +#if (defined(ILI9341) || defined(ST7789)) && defined(USE_VGA) +// On screen keyboard position +#define KXOFF 28 //64 +#define KYOFF 96 +#define KWIDTH 11 //22 +#define KHEIGHT 3 + +static bool oskbOn = false; +static int cxpos = 0; +static int cypos = 0; +static int oskbMap = 0; +static uint16_t oskbBLastState = 0; + +static void lineOSKB2(int kxoff, int kyoff, char * str, int row) +{ + char c[2] = {'A',0}; + const char * cpt = str; + for (int i=0; i.,SP ", 2); + if (oskbMap == 0) { + lineOSKB(KXOFF,KYOFF, keylables_map1_0, 0); + lineOSKB(KXOFF,KYOFF, keylables_map1_1, 1); + lineOSKB(KXOFF,KYOFF, keylables_map1_2, 2); + } + else if (oskbMap == 1) { + lineOSKB(KXOFF,KYOFF, keylables_map2_0, 0); + lineOSKB(KXOFF,KYOFF, keylables_map2_1, 1); + lineOSKB(KXOFF,KYOFF, keylables_map2_2, 2); + } + else { + lineOSKB(KXOFF,KYOFF, keylables_map3_0, 0); + lineOSKB(KXOFF,KYOFF, keylables_map3_1, 1); + lineOSKB(KXOFF,KYOFF, keylables_map3_2, 2); + } +} + +void toggleOskb(bool forceoff) { + if (forceoff) oskbOn=true; + if (oskbOn) { + oskbOn = false; + tft.fillScreenNoDma(RGBVAL16(0x00,0x00,0x00)); + tft.drawTextNoDma(0,32, "Press USER2 to toggle onscreen keyboard.", RGBVAL16(0xff,0xff,0xff), RGBVAL16(0x00,0x00,0x00), true); + } else { + oskbOn = true; + tft.fillScreenNoDma(RGBVAL16(0x00,0x00,0x00)); + tft.drawTextNoDma(0,32, " Press USER2 to exit onscreen keyboard. ", RGBVAL16(0xff,0xff,0xff), RGBVAL16(0x00,0x00,0x00), true); + tft.drawTextNoDma(0,64, " (USER1 to toggle between keymaps) ", RGBVAL16(0x00,0xff,0xff), RGBVAL16(0x00,0x00,0xff), true); + tft.drawRectNoDma(KXOFF,KYOFF, 22*8, 3*16, RGBVAL16(0x00,0x00,0xFF)); + drawOskb(); + } +} + +static int handleOskb(void) +{ + int retval = 0; + + uint16_t bClick = bLastState & ~oskbBLastState; + oskbBLastState = bLastState; + /* + static const char * digits = "0123456789ABCDEF"; + char buf[5] = {0,0,0,0,0}; + int val = bClick; + buf[0] = digits[(val>>12)&0xf]; + buf[1] = digits[(val>>8)&0xf]; + buf[2] = digits[(val>>4)&0xf]; + buf[3] = digits[val&0xf]; + tft.drawTextNoDma(0,KYOFF+ 64,buf,RGBVAL16(0x00,0x00,0x00),RGBVAL16(0xFF,0xFF,0xFF),1); + */ + if (bClick & MASK_KEY_USER2) + { + toggleOskb(false); + } + if (oskbOn) + { + bool updated = true; + if (bClick & MASK_KEY_USER1) + { + oskbMap += 1; + if (oskbMap == 3) oskbMap = 0; + } + else if (bClick & MASK_JOY2_LEFT) + { + cxpos++; + if (cxpos >= KWIDTH) cxpos = 0; + } + else if (bClick & MASK_JOY2_RIGHT) + { + cxpos--; + if (cxpos < 0) cxpos = KWIDTH-1; + } + else if (bClick & MASK_JOY2_DOWN) + { + cypos++; + if (cypos >= KHEIGHT) cypos = 0; + } + else if (bClick & MASK_JOY2_UP) + { + cypos--; + if (cypos < 0) cypos = KHEIGHT-1; + } + else if (oskbBLastState & MASK_JOY2_BTN) + { + retval = cypos*KWIDTH+cxpos+1; + if (retval) { + retval--; + //if (retval & 1) retval = key_map2[retval>>1]; + //else retval = key_map1[retval>>1]; + if (oskbMap == 0) { + retval = key_map1[retval]; + } + else if (oskbMap == 1) { + retval = key_map2[retval]; + } + else { + retval = key_map3[retval]; + } + //if (retval) { toggleOskb(true); updated=false; }; + } + } + else { + updated=false; + } + if (updated) drawOskb(); + } + + return retval; +} +#endif + /******************************** * Input and keyboard ********************************/ @@ -317,45 +492,100 @@ int emu_ReadKeys(void) #ifdef PICOMPUTER keymatrix_hitrow = -1; unsigned char row; - unsigned short cols[6]={1,2,3,4,5,14}; + unsigned short cols[6]={KCOLOUT1,KCOLOUT2,KCOLOUT3,KCOLOUT4,KCOLOUT5,KCOLOUT6}; + unsigned char keymatrixtmp[6]; + for (int i=0;i<6;i++){ -// gpio_set_dir(cols[i], GPIO_OUT); + gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 0); #ifdef SWAP_ALT_DEL sleep_us(1); //__asm volatile ("nop\n"); // 4-8ns -#endif +#endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); + //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); -// gpio_set_dir(cols[i], GPIO_IN); - keymatrix[i]=row; + gpio_set_dir(cols[i], GPIO_IN); + gpio_disable_pulls(cols[i]); + keymatrixtmp[i] = row; } +#ifdef MULTI_DEBOUNCE + for (int i=0;i<6;i++){ + gpio_set_dir(cols[i], GPIO_OUT); + gpio_put(cols[i], 0); +#ifdef SWAP_ALT_DEL + sleep_us(1); + //__asm volatile ("nop\n"); // 4-8ns +#endif + row=0; + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); + //gpio_set_dir(cols[i], GPIO_OUT); + gpio_put(cols[i], 1); + gpio_set_dir(cols[i], GPIO_IN); + gpio_disable_pulls(cols[i]); + keymatrixtmp[i] |= row; + } + + for (int i=0;i<6;i++){ + gpio_set_dir(cols[i], GPIO_OUT); + gpio_put(cols[i], 0); +#ifdef SWAP_ALT_DEL + sleep_us(1); + //__asm volatile ("nop\n"); // 4-8ns +#endif + row=0; + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); + //gpio_set_dir(cols[i], GPIO_OUT); + gpio_put(cols[i], 1); + gpio_set_dir(cols[i], GPIO_IN); + gpio_disable_pulls(cols[i]); + keymatrixtmp[i] |= row; + } +#endif + #ifdef SWAP_ALT_DEL // Swap ALT and DEL - unsigned char alt = keymatrix[0] & 0x02; - unsigned char del = keymatrix[5] & 0x20; - keymatrix[0] &= ~0x02; - keymatrix[5] &= ~0x20; - if (alt) keymatrix[5] |= 0x20; - if (del) keymatrix[0] |= 0x02; + unsigned char alt = keymatrixtmp[0] & 0x02; + unsigned char del = keymatrixtmp[5] & 0x20; + keymatrixtmp[0] &= ~0x02; + keymatrixtmp[5] &= ~0x20; + if (alt) keymatrixtmp[5] |= 0x20; + if (del) keymatrixtmp[0] |= 0x02; #endif bool alt_pressed=false; - if ( keymatrix[5] & 0x20 ) {alt_pressed=true; keymatrix[5] &= ~0x20;}; + if ( keymatrixtmp[5] & 0x20 ) {alt_pressed=true; keymatrixtmp[5] &= ~0x20;}; for (int i=0;i<6;i++){ - row = keymatrix[i]; + row = keymatrixtmp[i]; if (row) keymatrix_hitrow=i; + keymatrix[i] = row; } //6,9,15,8,7,22 @@ -399,7 +629,8 @@ int emu_ReadKeys(void) hundred_ms_cnt += 1; // 2 if (hundred_ms_cnt >= 2) { - hundred_ms_cnt = 0; + hundred_ms_cnt = 0; + /* if ( (time_ms-keypress_t_ms) < 500) { if (key_alt == false) @@ -411,13 +642,14 @@ int emu_ReadKeys(void) key_alt = false; } } + */ } } } else { // Keep press if (hundred_ms_cnt == 1) { - if ((to_ms_since_boot (get_absolute_time())-keypress_t_ms) > 1000) + if ((to_ms_since_boot (get_absolute_time())-keypress_t_ms) > 2000) { if (key_alt == false) { @@ -460,6 +692,12 @@ int emu_ReadKeys(void) || (retval & MASK_KEY_USER4 ) ) { } + +#if (defined(ILI9341) || defined(ST7789)) && defined(USE_VGA) + if (oskbOn) { + retval |= MASK_OSKB; + } +#endif return (retval); } @@ -487,8 +725,6 @@ int emu_ReadI2CKeyboard(void) { } if (keymatrix_hitrow >=0 ) { unsigned short match = ((unsigned short)keymatrix_hitrow<<8) | keymatrix[keymatrix_hitrow]; - //if ( (match == 0x002 ) ) return 0; // shift or fn - //if (match < 0x100 ) match = match & ~0x002; // ignore shift key for (int i=0; i>12)&0xf]; + buf[1] = digits[(val>>8)&0xf]; + buf[2] = digits[(val>>4)&0xf]; + buf[3] = digits[val&0xf]; + tft.drawTextNoDma(0,KYOFF+ 64,buf,RGBVAL16(0x00,0x00,0x00),RGBVAL16(0xFF,0xFF,0xFF),1); + */ if (bClick & MASK_KEY_USER2) { toggleOskb(false); @@ -296,7 +306,7 @@ static int handleOskb(void) else { retval = key_map3[retval]; } - if (retval) { toggleOskb(true); updated=false; }; + //if (retval) { toggleOskb(true); updated=false; }; } } else { @@ -305,17 +315,6 @@ static int handleOskb(void) if (updated) drawOskb(); } - /* - static const char * digits = "0123456789ABCDEF"; - char buf[5] = {0,0,0,0,0}; - int val = retval; - buf[0] = digits[(val>>12)&0xf]; - buf[1] = digits[(val>>8)&0xf]; - buf[2] = digits[(val>>4)&0xf]; - buf[3] = digits[val&0xf]; - tft.drawTextNoDma(0,0,buf,RGBVAL16(0x00,0x00,0x00),RGBVAL16(0xFF,0xFF,0xFF),1); - */ - return retval; } #endif @@ -493,7 +492,7 @@ int emu_ReadKeys(void) #ifdef PICOMPUTER keymatrix_hitrow = -1; unsigned char row; - unsigned short cols[6]={1,2,3,4,5,14}; + unsigned short cols[6]={KCOLOUT1,KCOLOUT2,KCOLOUT3,KCOLOUT4,KCOLOUT5,KCOLOUT6}; unsigned char keymatrixtmp[6]; for (int i=0;i<6;i++){ @@ -504,15 +503,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -529,15 +528,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -553,15 +552,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -851,57 +850,57 @@ void emu_InitJoysticks(void) { gpio_put(KLED, 1); // Output (rows) - gpio_init(1); - gpio_init(2); - gpio_init(3); - gpio_init(4); - gpio_init(5); - gpio_init(14); - gpio_set_dir(1, GPIO_OUT); - gpio_set_dir(2, GPIO_OUT); - gpio_set_dir(3, GPIO_OUT); - gpio_set_dir(4, GPIO_OUT); - gpio_set_dir(5, GPIO_OUT); - gpio_set_dir(14, GPIO_OUT); - gpio_put(1, 1); - gpio_put(2, 1); - gpio_put(3, 1); - gpio_put(4, 1); - gpio_put(5, 1); - gpio_put(14, 1); + gpio_init(KCOLOUT1); + gpio_init(KCOLOUT2); + gpio_init(KCOLOUT3); + gpio_init(KCOLOUT4); + gpio_init(KCOLOUT5); + gpio_init(KCOLOUT6); + gpio_set_dir(KCOLOUT1, GPIO_OUT); + gpio_set_dir(KCOLOUT2, GPIO_OUT); + gpio_set_dir(KCOLOUT3, GPIO_OUT); + gpio_set_dir(KCOLOUT4, GPIO_OUT); + gpio_set_dir(KCOLOUT5, GPIO_OUT); + gpio_set_dir(KCOLOUT6, GPIO_OUT); + gpio_put(KCOLOUT1, 1); + gpio_put(KCOLOUT2, 1); + gpio_put(KCOLOUT3, 1); + gpio_put(KCOLOUT4, 1); + gpio_put(KCOLOUT5, 1); + gpio_put(KCOLOUT6, 1); // but set as input floating when not used! - gpio_set_dir(1, GPIO_IN); - gpio_set_dir(2, GPIO_IN); - gpio_set_dir(3, GPIO_IN); - gpio_set_dir(4, GPIO_IN); - gpio_set_dir(5, GPIO_IN); - gpio_set_dir(14, GPIO_IN); - gpio_disable_pulls(1); - gpio_disable_pulls(2); - gpio_disable_pulls(3); - gpio_disable_pulls(4); - gpio_disable_pulls(5); - gpio_disable_pulls(14); + gpio_set_dir(KCOLOUT1, GPIO_IN); + gpio_set_dir(KCOLOUT2, GPIO_IN); + gpio_set_dir(KCOLOUT3, GPIO_IN); + gpio_set_dir(KCOLOUT4, GPIO_IN); + gpio_set_dir(KCOLOUT5, GPIO_IN); + gpio_set_dir(KCOLOUT6, GPIO_IN); + gpio_disable_pulls(KCOLOUT1); + gpio_disable_pulls(KCOLOUT2); + gpio_disable_pulls(KCOLOUT3); + gpio_disable_pulls(KCOLOUT4); + gpio_disable_pulls(KCOLOUT5); + gpio_disable_pulls(KCOLOUT6); // Input pins (cols) - gpio_init(6); - gpio_init(9); - gpio_init(15); - gpio_init(8); - gpio_init(7); - gpio_init(22); - gpio_set_dir(6,GPIO_IN); - gpio_set_dir(9,GPIO_IN); - gpio_set_dir(15,GPIO_IN); - gpio_set_dir(8,GPIO_IN); - gpio_set_dir(7,GPIO_IN); - gpio_set_dir(22,GPIO_IN); - gpio_pull_up(6); - gpio_pull_up(9); - gpio_pull_up(15); - gpio_pull_up(8); - gpio_pull_up(7); - gpio_pull_up(22); + gpio_init(KROWIN1); + gpio_init(KROWIN2); + gpio_init(KROWIN3); + gpio_init(KROWIN4); + gpio_init(KROWIN5); + gpio_init(KROWIN6); + gpio_set_dir(KROWIN1,GPIO_IN); + gpio_set_dir(KROWIN2,GPIO_IN); + gpio_set_dir(KROWIN3,GPIO_IN); + gpio_set_dir(KROWIN4,GPIO_IN); + gpio_set_dir(KROWIN5,GPIO_IN); + gpio_set_dir(KROWIN6,GPIO_IN); + gpio_pull_up(KROWIN1); + gpio_pull_up(KROWIN2); + gpio_pull_up(KROWIN3); + gpio_pull_up(KROWIN4); + gpio_pull_up(KROWIN5); + gpio_pull_up(KROWIN6); #endif } @@ -1248,7 +1247,9 @@ void emu_init(void) if (emu_ReadKeys() & MASK_JOY2_UP) { #ifdef PICOMPUTERMAX +#ifndef USE_VGA tft.flipscreen(true); +#endif #else tft.flipscreen(true); #endif @@ -1256,7 +1257,9 @@ void emu_init(void) else { #ifdef PICOMPUTERMAX +#ifndef USE_VGA tft.flipscreen(false); +#endif #else tft.flipscreen(false); #endif diff --git a/MCUME_pico/picosnd/emuapi.cpp b/MCUME_pico/picosnd/emuapi.cpp index facbc89..122a71e 100644 --- a/MCUME_pico/picosnd/emuapi.cpp +++ b/MCUME_pico/picosnd/emuapi.cpp @@ -13,11 +13,19 @@ extern "C" { #include "iopins.h" } +#if (defined(ILI9341) || defined(ST7789)) && defined(USE_VGA) +// Dual display config, initialize TFT +#include "tft_t_dma.h" +static TFT_T_DMA tft; +#else +// Non Dual display config #ifdef USE_VGA #include "vga_t_dma.h" #else #include "tft_t_dma.h" #endif +extern TFT_T_DMA tft; +#endif #define MAX_FILES 64 @@ -39,7 +47,7 @@ extern "C" { #define MENU_VGA_XOFFSET (MENU_FILE_XOFFSET+MENU_FILE_W+8) #define MENU_VGA_YOFFSET (MENU_VBAR_YOFFSET+MENU_FILE_H-32-37) -extern TFT_T_DMA tft; + static char romspath[64]; static int nbFiles=0; @@ -142,8 +150,175 @@ void emu_Free(void * pt) free(pt); } +void emu_drawText(unsigned short x, unsigned short y, const char * text, unsigned short fgcolor, unsigned short bgcolor, int doublesize) +{ + tft.drawText(x, y, text, fgcolor, bgcolor, doublesize?true:false); +} +/******************************** + * OSKB handling +********************************/ +#if (defined(ILI9341) || defined(ST7789)) && defined(USE_VGA) +// On screen keyboard position +#define KXOFF 28 //64 +#define KYOFF 96 +#define KWIDTH 11 //22 +#define KHEIGHT 3 + +static bool oskbOn = false; +static int cxpos = 0; +static int cypos = 0; +static int oskbMap = 0; +static uint16_t oskbBLastState = 0; + +static void lineOSKB2(int kxoff, int kyoff, char * str, int row) +{ + char c[2] = {'A',0}; + const char * cpt = str; + for (int i=0; i.,SP ", 2); + if (oskbMap == 0) { + lineOSKB(KXOFF,KYOFF, keylables_map1_0, 0); + lineOSKB(KXOFF,KYOFF, keylables_map1_1, 1); + lineOSKB(KXOFF,KYOFF, keylables_map1_2, 2); + } + else if (oskbMap == 1) { + lineOSKB(KXOFF,KYOFF, keylables_map2_0, 0); + lineOSKB(KXOFF,KYOFF, keylables_map2_1, 1); + lineOSKB(KXOFF,KYOFF, keylables_map2_2, 2); + } + else { + lineOSKB(KXOFF,KYOFF, keylables_map3_0, 0); + lineOSKB(KXOFF,KYOFF, keylables_map3_1, 1); + lineOSKB(KXOFF,KYOFF, keylables_map3_2, 2); + } +} + +void toggleOskb(bool forceoff) { + if (forceoff) oskbOn=true; + if (oskbOn) { + oskbOn = false; + tft.fillScreenNoDma(RGBVAL16(0x00,0x00,0x00)); + tft.drawTextNoDma(0,32, "Press USER2 to toggle onscreen keyboard.", RGBVAL16(0xff,0xff,0xff), RGBVAL16(0x00,0x00,0x00), true); + } else { + oskbOn = true; + tft.fillScreenNoDma(RGBVAL16(0x00,0x00,0x00)); + tft.drawTextNoDma(0,32, " Press USER2 to exit onscreen keyboard. ", RGBVAL16(0xff,0xff,0xff), RGBVAL16(0x00,0x00,0x00), true); + tft.drawTextNoDma(0,64, " (USER1 to toggle between keymaps) ", RGBVAL16(0x00,0xff,0xff), RGBVAL16(0x00,0x00,0xff), true); + tft.drawRectNoDma(KXOFF,KYOFF, 22*8, 3*16, RGBVAL16(0x00,0x00,0xFF)); + drawOskb(); + } +} + +static int handleOskb(void) +{ + int retval = 0; + + uint16_t bClick = bLastState & ~oskbBLastState; + oskbBLastState = bLastState; + /* + static const char * digits = "0123456789ABCDEF"; + char buf[5] = {0,0,0,0,0}; + int val = bClick; + buf[0] = digits[(val>>12)&0xf]; + buf[1] = digits[(val>>8)&0xf]; + buf[2] = digits[(val>>4)&0xf]; + buf[3] = digits[val&0xf]; + tft.drawTextNoDma(0,KYOFF+ 64,buf,RGBVAL16(0x00,0x00,0x00),RGBVAL16(0xFF,0xFF,0xFF),1); + */ + if (bClick & MASK_KEY_USER2) + { + toggleOskb(false); + } + if (oskbOn) + { + bool updated = true; + if (bClick & MASK_KEY_USER1) + { + oskbMap += 1; + if (oskbMap == 3) oskbMap = 0; + } + else if (bClick & MASK_JOY2_LEFT) + { + cxpos++; + if (cxpos >= KWIDTH) cxpos = 0; + } + else if (bClick & MASK_JOY2_RIGHT) + { + cxpos--; + if (cxpos < 0) cxpos = KWIDTH-1; + } + else if (bClick & MASK_JOY2_DOWN) + { + cypos++; + if (cypos >= KHEIGHT) cypos = 0; + } + else if (bClick & MASK_JOY2_UP) + { + cypos--; + if (cypos < 0) cypos = KHEIGHT-1; + } + else if (oskbBLastState & MASK_JOY2_BTN) + { + retval = cypos*KWIDTH+cxpos+1; + if (retval) { + retval--; + //if (retval & 1) retval = key_map2[retval>>1]; + //else retval = key_map1[retval>>1]; + if (oskbMap == 0) { + retval = key_map1[retval]; + } + else if (oskbMap == 1) { + retval = key_map2[retval]; + } + else { + retval = key_map3[retval]; + } + //if (retval) { toggleOskb(true); updated=false; }; + } + } + else { + updated=false; + } + if (updated) drawOskb(); + } + + return retval; +} +#endif + /******************************** * Input and keyboard ********************************/ @@ -317,45 +492,100 @@ int emu_ReadKeys(void) #ifdef PICOMPUTER keymatrix_hitrow = -1; unsigned char row; - unsigned short cols[6]={1,2,3,4,5,14}; + unsigned short cols[6]={KCOLOUT1,KCOLOUT2,KCOLOUT3,KCOLOUT4,KCOLOUT5,KCOLOUT6}; + unsigned char keymatrixtmp[6]; + for (int i=0;i<6;i++){ -// gpio_set_dir(cols[i], GPIO_OUT); + gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 0); #ifdef SWAP_ALT_DEL sleep_us(1); //__asm volatile ("nop\n"); // 4-8ns -#endif +#endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); + //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); -// gpio_set_dir(cols[i], GPIO_IN); - keymatrix[i]=row; + gpio_set_dir(cols[i], GPIO_IN); + gpio_disable_pulls(cols[i]); + keymatrixtmp[i] = row; } +#ifdef MULTI_DEBOUNCE + for (int i=0;i<6;i++){ + gpio_set_dir(cols[i], GPIO_OUT); + gpio_put(cols[i], 0); +#ifdef SWAP_ALT_DEL + sleep_us(1); + //__asm volatile ("nop\n"); // 4-8ns +#endif + row=0; + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); + //gpio_set_dir(cols[i], GPIO_OUT); + gpio_put(cols[i], 1); + gpio_set_dir(cols[i], GPIO_IN); + gpio_disable_pulls(cols[i]); + keymatrixtmp[i] |= row; + } + + for (int i=0;i<6;i++){ + gpio_set_dir(cols[i], GPIO_OUT); + gpio_put(cols[i], 0); +#ifdef SWAP_ALT_DEL + sleep_us(1); + //__asm volatile ("nop\n"); // 4-8ns +#endif + row=0; + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); + //gpio_set_dir(cols[i], GPIO_OUT); + gpio_put(cols[i], 1); + gpio_set_dir(cols[i], GPIO_IN); + gpio_disable_pulls(cols[i]); + keymatrixtmp[i] |= row; + } +#endif + #ifdef SWAP_ALT_DEL // Swap ALT and DEL - unsigned char alt = keymatrix[0] & 0x02; - unsigned char del = keymatrix[5] & 0x20; - keymatrix[0] &= ~0x02; - keymatrix[5] &= ~0x20; - if (alt) keymatrix[5] |= 0x20; - if (del) keymatrix[0] |= 0x02; + unsigned char alt = keymatrixtmp[0] & 0x02; + unsigned char del = keymatrixtmp[5] & 0x20; + keymatrixtmp[0] &= ~0x02; + keymatrixtmp[5] &= ~0x20; + if (alt) keymatrixtmp[5] |= 0x20; + if (del) keymatrixtmp[0] |= 0x02; #endif bool alt_pressed=false; - if ( keymatrix[5] & 0x20 ) {alt_pressed=true; keymatrix[5] &= ~0x20;}; + if ( keymatrixtmp[5] & 0x20 ) {alt_pressed=true; keymatrixtmp[5] &= ~0x20;}; for (int i=0;i<6;i++){ - row = keymatrix[i]; + row = keymatrixtmp[i]; if (row) keymatrix_hitrow=i; + keymatrix[i] = row; } //6,9,15,8,7,22 @@ -399,7 +629,8 @@ int emu_ReadKeys(void) hundred_ms_cnt += 1; // 2 if (hundred_ms_cnt >= 2) { - hundred_ms_cnt = 0; + hundred_ms_cnt = 0; + /* if ( (time_ms-keypress_t_ms) < 500) { if (key_alt == false) @@ -411,13 +642,14 @@ int emu_ReadKeys(void) key_alt = false; } } + */ } } } else { // Keep press if (hundred_ms_cnt == 1) { - if ((to_ms_since_boot (get_absolute_time())-keypress_t_ms) > 1000) + if ((to_ms_since_boot (get_absolute_time())-keypress_t_ms) > 2000) { if (key_alt == false) { @@ -460,6 +692,12 @@ int emu_ReadKeys(void) || (retval & MASK_KEY_USER4 ) ) { } + +#if (defined(ILI9341) || defined(ST7789)) && defined(USE_VGA) + if (oskbOn) { + retval |= MASK_OSKB; + } +#endif return (retval); } @@ -487,8 +725,6 @@ int emu_ReadI2CKeyboard(void) { } if (keymatrix_hitrow >=0 ) { unsigned short match = ((unsigned short)keymatrix_hitrow<<8) | keymatrix[keymatrix_hitrow]; - //if ( (match == 0x002 ) ) return 0; // shift or fn - //if (match < 0x100 ) match = match & ~0x002; // ignore shift key for (int i=0; i>12)&0xf]; + buf[1] = digits[(val>>8)&0xf]; + buf[2] = digits[(val>>4)&0xf]; + buf[3] = digits[val&0xf]; + tft.drawTextNoDma(0,KYOFF+ 64,buf,RGBVAL16(0x00,0x00,0x00),RGBVAL16(0xFF,0xFF,0xFF),1); + */ if (bClick & MASK_KEY_USER2) { toggleOskb(false); @@ -296,7 +306,7 @@ static int handleOskb(void) else { retval = key_map3[retval]; } - //if (retval=2) { toggleOskb(true); updated=false; }; + //if (retval) { toggleOskb(true); updated=false; }; } } else { @@ -305,17 +315,6 @@ static int handleOskb(void) if (updated) drawOskb(); } - /* - static const char * digits = "0123456789ABCDEF"; - char buf[5] = {0,0,0,0,0}; - int val = retval; - buf[0] = digits[(val>>12)&0xf]; - buf[1] = digits[(val>>8)&0xf]; - buf[2] = digits[(val>>4)&0xf]; - buf[3] = digits[val&0xf]; - tft.drawTextNoDma(0,0,buf,RGBVAL16(0x00,0x00,0x00),RGBVAL16(0xFF,0xFF,0xFF),1); - */ - return retval; } #endif @@ -493,7 +492,7 @@ int emu_ReadKeys(void) #ifdef PICOMPUTER keymatrix_hitrow = -1; unsigned char row; - unsigned short cols[6]={1,2,3,4,5,14}; + unsigned short cols[6]={KCOLOUT1,KCOLOUT2,KCOLOUT3,KCOLOUT4,KCOLOUT5,KCOLOUT6}; unsigned char keymatrixtmp[6]; for (int i=0;i<6;i++){ @@ -504,15 +503,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -529,15 +528,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -553,15 +552,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -851,57 +850,57 @@ void emu_InitJoysticks(void) { gpio_put(KLED, 1); // Output (rows) - gpio_init(1); - gpio_init(2); - gpio_init(3); - gpio_init(4); - gpio_init(5); - gpio_init(14); - gpio_set_dir(1, GPIO_OUT); - gpio_set_dir(2, GPIO_OUT); - gpio_set_dir(3, GPIO_OUT); - gpio_set_dir(4, GPIO_OUT); - gpio_set_dir(5, GPIO_OUT); - gpio_set_dir(14, GPIO_OUT); - gpio_put(1, 1); - gpio_put(2, 1); - gpio_put(3, 1); - gpio_put(4, 1); - gpio_put(5, 1); - gpio_put(14, 1); + gpio_init(KCOLOUT1); + gpio_init(KCOLOUT2); + gpio_init(KCOLOUT3); + gpio_init(KCOLOUT4); + gpio_init(KCOLOUT5); + gpio_init(KCOLOUT6); + gpio_set_dir(KCOLOUT1, GPIO_OUT); + gpio_set_dir(KCOLOUT2, GPIO_OUT); + gpio_set_dir(KCOLOUT3, GPIO_OUT); + gpio_set_dir(KCOLOUT4, GPIO_OUT); + gpio_set_dir(KCOLOUT5, GPIO_OUT); + gpio_set_dir(KCOLOUT6, GPIO_OUT); + gpio_put(KCOLOUT1, 1); + gpio_put(KCOLOUT2, 1); + gpio_put(KCOLOUT3, 1); + gpio_put(KCOLOUT4, 1); + gpio_put(KCOLOUT5, 1); + gpio_put(KCOLOUT6, 1); // but set as input floating when not used! - gpio_set_dir(1, GPIO_IN); - gpio_set_dir(2, GPIO_IN); - gpio_set_dir(3, GPIO_IN); - gpio_set_dir(4, GPIO_IN); - gpio_set_dir(5, GPIO_IN); - gpio_set_dir(14, GPIO_IN); - gpio_disable_pulls(1); - gpio_disable_pulls(2); - gpio_disable_pulls(3); - gpio_disable_pulls(4); - gpio_disable_pulls(5); - gpio_disable_pulls(14); + gpio_set_dir(KCOLOUT1, GPIO_IN); + gpio_set_dir(KCOLOUT2, GPIO_IN); + gpio_set_dir(KCOLOUT3, GPIO_IN); + gpio_set_dir(KCOLOUT4, GPIO_IN); + gpio_set_dir(KCOLOUT5, GPIO_IN); + gpio_set_dir(KCOLOUT6, GPIO_IN); + gpio_disable_pulls(KCOLOUT1); + gpio_disable_pulls(KCOLOUT2); + gpio_disable_pulls(KCOLOUT3); + gpio_disable_pulls(KCOLOUT4); + gpio_disable_pulls(KCOLOUT5); + gpio_disable_pulls(KCOLOUT6); // Input pins (cols) - gpio_init(6); - gpio_init(9); - gpio_init(15); - gpio_init(8); - gpio_init(7); - gpio_init(22); - gpio_set_dir(6,GPIO_IN); - gpio_set_dir(9,GPIO_IN); - gpio_set_dir(15,GPIO_IN); - gpio_set_dir(8,GPIO_IN); - gpio_set_dir(7,GPIO_IN); - gpio_set_dir(22,GPIO_IN); - gpio_pull_up(6); - gpio_pull_up(9); - gpio_pull_up(15); - gpio_pull_up(8); - gpio_pull_up(7); - gpio_pull_up(22); + gpio_init(KROWIN1); + gpio_init(KROWIN2); + gpio_init(KROWIN3); + gpio_init(KROWIN4); + gpio_init(KROWIN5); + gpio_init(KROWIN6); + gpio_set_dir(KROWIN1,GPIO_IN); + gpio_set_dir(KROWIN2,GPIO_IN); + gpio_set_dir(KROWIN3,GPIO_IN); + gpio_set_dir(KROWIN4,GPIO_IN); + gpio_set_dir(KROWIN5,GPIO_IN); + gpio_set_dir(KROWIN6,GPIO_IN); + gpio_pull_up(KROWIN1); + gpio_pull_up(KROWIN2); + gpio_pull_up(KROWIN3); + gpio_pull_up(KROWIN4); + gpio_pull_up(KROWIN5); + gpio_pull_up(KROWIN6); #endif } @@ -1248,7 +1247,9 @@ void emu_init(void) if (emu_ReadKeys() & MASK_JOY2_UP) { #ifdef PICOMPUTERMAX +#ifndef USE_VGA tft.flipscreen(true); +#endif #else tft.flipscreen(true); #endif @@ -1256,7 +1257,9 @@ void emu_init(void) else { #ifdef PICOMPUTERMAX +#ifndef USE_VGA tft.flipscreen(false); +#endif #else tft.flipscreen(false); #endif diff --git a/MCUME_pico/picovga_t4/VGA_font8x8.h b/MCUME_pico/picovga_t4/VGA_font8x8.h new file mode 100644 index 0000000..8a556f3 --- /dev/null +++ b/MCUME_pico/picovga_t4/VGA_font8x8.h @@ -0,0 +1,148 @@ + +// Font: c64_lower.64c + +const unsigned char font8x8[128][8] = +{ + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0000 (nul) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0001 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0002 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0003 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0004 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0005 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0006 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0007 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0008 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0009 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000A + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000B + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000C + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000D + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000E + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000F + + { 0x7f, 0x41, 0x41, 0x41, 0x41, 0x41, 0x7f, 0x00 }, // Space // 0x10 + { 0x00, 0x27, 0x31, 0x27, 0x21, 0x71, 0x00, 0x00 }, // F1 // 0x11 + { 0x00, 0x77, 0x41, 0x77, 0x11, 0x71, 0x00, 0x00 }, // F2 + { 0x00, 0x77, 0x41, 0x77, 0x41, 0x71, 0x00, 0x00 }, // F3 + { 0x00, 0x17, 0x51, 0x77, 0x41, 0x41, 0x00, 0x00 }, // F4 + { 0x00, 0x77, 0x11, 0x77, 0x41, 0x71, 0x00, 0x00 }, // F5 + { 0x00, 0x77, 0x11, 0x77, 0x51, 0x71, 0x00, 0x00 }, // F6 + { 0x00, 0x77, 0x41, 0x47, 0x41, 0x41, 0x00, 0x00 }, // F7 + { 0x00, 0x77, 0x51, 0x77, 0x51, 0x71, 0x00, 0x00 }, // F8 // 0x18 + { 0x00, 0x00, 0x20, 0x24, 0x3e, 0x04, 0x00, 0x00 }, // Return // 0x19 + { 0x00, 0x59, 0x4b, 0x5b, 0x4b, 0xd9, 0x00, 0x00 }, // Del // 0x1A + //{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0010 + //{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0011 + //{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0012 + //{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0013 + //{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0014 + //{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0015 + //{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0016 + //{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0017 + //{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0018 + //{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0019 + //{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001A + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001B + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001C + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001D + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001E + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001F + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 (space) + { 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!) + { 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0022 (") + { 0x36, 0x36, 0x7F, 0x36, 0x7F, 0x36, 0x36, 0x00}, // U+0023 (#) + { 0x0C, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x0C, 0x00}, // U+0024 ($) + { 0x00, 0x63, 0x33, 0x18, 0x0C, 0x66, 0x63, 0x00}, // U+0025 (%) + { 0x1C, 0x36, 0x1C, 0x6E, 0x3B, 0x33, 0x6E, 0x00}, // U+0026 (&) + { 0x06, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0027 (') + { 0x18, 0x0C, 0x06, 0x06, 0x06, 0x0C, 0x18, 0x00}, // U+0028 (() + { 0x06, 0x0C, 0x18, 0x18, 0x18, 0x0C, 0x06, 0x00}, // U+0029 ()) + { 0x00, 0x66, 0x3C, 0xFF, 0x3C, 0x66, 0x00, 0x00}, // U+002A (*) + { 0x00, 0x0C, 0x0C, 0x3F, 0x0C, 0x0C, 0x00, 0x00}, // U+002B (+) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+002C (,) + { 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00}, // U+002D (-) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+002E (.) + { 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01, 0x00}, // U+002F (/) + { 0x3E, 0x63, 0x73, 0x7B, 0x6F, 0x67, 0x3E, 0x00}, // U+0030 (0) + { 0x0C, 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x3F, 0x00}, // U+0031 (1) + { 0x1E, 0x33, 0x30, 0x1C, 0x06, 0x33, 0x3F, 0x00}, // U+0032 (2) + { 0x1E, 0x33, 0x30, 0x1C, 0x30, 0x33, 0x1E, 0x00}, // U+0033 (3) + { 0x38, 0x3C, 0x36, 0x33, 0x7F, 0x30, 0x78, 0x00}, // U+0034 (4) + { 0x3F, 0x03, 0x1F, 0x30, 0x30, 0x33, 0x1E, 0x00}, // U+0035 (5) + { 0x1C, 0x06, 0x03, 0x1F, 0x33, 0x33, 0x1E, 0x00}, // U+0036 (6) + { 0x3F, 0x33, 0x30, 0x18, 0x0C, 0x0C, 0x0C, 0x00}, // U+0037 (7) + { 0x1E, 0x33, 0x33, 0x1E, 0x33, 0x33, 0x1E, 0x00}, // U+0038 (8) + { 0x1E, 0x33, 0x33, 0x3E, 0x30, 0x18, 0x0E, 0x00}, // U+0039 (9) + { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+003A (:) + { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+003B (//) + { 0x18, 0x0C, 0x06, 0x03, 0x06, 0x0C, 0x18, 0x00}, // U+003C (<) + { 0x00, 0x00, 0x3F, 0x00, 0x00, 0x3F, 0x00, 0x00}, // U+003D (=) + { 0x06, 0x0C, 0x18, 0x30, 0x18, 0x0C, 0x06, 0x00}, // U+003E (>) + { 0x1E, 0x33, 0x30, 0x18, 0x0C, 0x00, 0x0C, 0x00}, // U+003F (?) + { 0x3E, 0x63, 0x7B, 0x7B, 0x7B, 0x03, 0x1E, 0x00}, // U+0040 (@) + { 0x0C, 0x1E, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x00}, // U+0041 (A) + { 0x3F, 0x66, 0x66, 0x3E, 0x66, 0x66, 0x3F, 0x00}, // U+0042 (B) + { 0x3C, 0x66, 0x03, 0x03, 0x03, 0x66, 0x3C, 0x00}, // U+0043 (C) + { 0x1F, 0x36, 0x66, 0x66, 0x66, 0x36, 0x1F, 0x00}, // U+0044 (D) + { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x46, 0x7F, 0x00}, // U+0045 (E) + { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x06, 0x0F, 0x00}, // U+0046 (F) + { 0x3C, 0x66, 0x03, 0x03, 0x73, 0x66, 0x7C, 0x00}, // U+0047 (G) + { 0x33, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x33, 0x00}, // U+0048 (H) + { 0x1E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0049 (I) + { 0x78, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E, 0x00}, // U+004A (J) + { 0x67, 0x66, 0x36, 0x1E, 0x36, 0x66, 0x67, 0x00}, // U+004B (K) + { 0x0F, 0x06, 0x06, 0x06, 0x46, 0x66, 0x7F, 0x00}, // U+004C (L) + { 0x63, 0x77, 0x7F, 0x7F, 0x6B, 0x63, 0x63, 0x00}, // U+004D (M) + { 0x63, 0x67, 0x6F, 0x7B, 0x73, 0x63, 0x63, 0x00}, // U+004E (N) + { 0x1C, 0x36, 0x63, 0x63, 0x63, 0x36, 0x1C, 0x00}, // U+004F (O) + { 0x3F, 0x66, 0x66, 0x3E, 0x06, 0x06, 0x0F, 0x00}, // U+0050 (P) + { 0x1E, 0x33, 0x33, 0x33, 0x3B, 0x1E, 0x38, 0x00}, // U+0051 (Q) + { 0x3F, 0x66, 0x66, 0x3E, 0x36, 0x66, 0x67, 0x00}, // U+0052 (R) + { 0x1E, 0x33, 0x07, 0x0E, 0x38, 0x33, 0x1E, 0x00}, // U+0053 (S) + { 0x3F, 0x2D, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0054 (T) + { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x3F, 0x00}, // U+0055 (U) + { 0x33, 0x33, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0056 (V) + { 0x63, 0x63, 0x63, 0x6B, 0x7F, 0x77, 0x63, 0x00}, // U+0057 (W) + { 0x63, 0x63, 0x36, 0x1C, 0x1C, 0x36, 0x63, 0x00}, // U+0058 (X) + { 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x0C, 0x1E, 0x00}, // U+0059 (Y) + { 0x7F, 0x63, 0x31, 0x18, 0x4C, 0x66, 0x7F, 0x00}, // U+005A (Z) + { 0x1E, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1E, 0x00}, // U+005B ([) + { 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0x40, 0x00}, // U+005C (\) + { 0x1E, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1E, 0x00}, // U+005D (]) + { 0x08, 0x1C, 0x36, 0x63, 0x00, 0x00, 0x00, 0x00}, // U+005E (^) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF}, // U+005F (_) + { 0x0C, 0x0C, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0060 (`) + { 0x00, 0x00, 0x1E, 0x30, 0x3E, 0x33, 0x6E, 0x00}, // U+0061 (a) + { 0x07, 0x06, 0x06, 0x3E, 0x66, 0x66, 0x3B, 0x00}, // U+0062 (b) + { 0x00, 0x00, 0x1E, 0x33, 0x03, 0x33, 0x1E, 0x00}, // U+0063 (c) + { 0x38, 0x30, 0x30, 0x3e, 0x33, 0x33, 0x6E, 0x00}, // U+0064 (d) + { 0x00, 0x00, 0x1E, 0x33, 0x3f, 0x03, 0x1E, 0x00}, // U+0065 (e) + { 0x1C, 0x36, 0x06, 0x0f, 0x06, 0x06, 0x0F, 0x00}, // U+0066 (f) + { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0067 (g) + { 0x07, 0x06, 0x36, 0x6E, 0x66, 0x66, 0x67, 0x00}, // U+0068 (h) + { 0x0C, 0x00, 0x0E, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0069 (i) + { 0x30, 0x00, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E}, // U+006A (j) + { 0x07, 0x06, 0x66, 0x36, 0x1E, 0x36, 0x67, 0x00}, // U+006B (k) + { 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+006C (l) + { 0x00, 0x00, 0x33, 0x7F, 0x7F, 0x6B, 0x63, 0x00}, // U+006D (m) + { 0x00, 0x00, 0x1F, 0x33, 0x33, 0x33, 0x33, 0x00}, // U+006E (n) + { 0x00, 0x00, 0x1E, 0x33, 0x33, 0x33, 0x1E, 0x00}, // U+006F (o) + { 0x00, 0x00, 0x3B, 0x66, 0x66, 0x3E, 0x06, 0x0F}, // U+0070 (p) + { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x78}, // U+0071 (q) + { 0x00, 0x00, 0x3B, 0x6E, 0x66, 0x06, 0x0F, 0x00}, // U+0072 (r) + { 0x00, 0x00, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x00}, // U+0073 (s) + { 0x08, 0x0C, 0x3E, 0x0C, 0x0C, 0x2C, 0x18, 0x00}, // U+0074 (t) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x33, 0x6E, 0x00}, // U+0075 (u) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0076 (v) + { 0x00, 0x00, 0x63, 0x6B, 0x7F, 0x7F, 0x36, 0x00}, // U+0077 (w) + { 0x00, 0x00, 0x63, 0x36, 0x1C, 0x36, 0x63, 0x00}, // U+0078 (x) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0079 (y) + { 0x00, 0x00, 0x3F, 0x19, 0x0C, 0x26, 0x3F, 0x00}, // U+007A (z) + { 0x38, 0x0C, 0x0C, 0x07, 0x0C, 0x0C, 0x38, 0x00}, // U+007B ({) + { 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00}, // U+007C (|) + { 0x07, 0x0C, 0x0C, 0x38, 0x0C, 0x0C, 0x07, 0x00}, // U+007D (}) + { 0x6E, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+007E (~) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // U+007F +}; + + diff --git a/MCUME_pico/picovga_t4/VGA_t4.cpp b/MCUME_pico/picovga_t4/VGA_t4.cpp new file mode 100755 index 0000000..a5666ae --- /dev/null +++ b/MCUME_pico/picovga_t4/VGA_t4.cpp @@ -0,0 +1,1689 @@ +/* + This file is part of VGA_t4 library. + Just to make picovga compatible with VGA_t4 API + + VGA_t4 library is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Copyright (C) 2020 J-M Harvengt +*/ + +#include "platform_config.h" +#include "iopins.h" + +#ifdef USE_VGA + +#include "VGA_t4.h" +#include "VGA_font8x8.h" +#include "include.h" + +#define R16(rgb) ((rgb>>8)&0xf8) +#define G16(rgb) ((rgb>>3)&0xfc) +#define B16(rgb) ((rgb<<3)&0xf8) + +// 8 bits 320x240 frame buffer => 64K +static vga_pixel * visible_framebuffer = NULL; +static vga_pixel * framebuffer = NULL; +static vga_pixel * fb0 = NULL; +static vga_pixel * fb1 = NULL; + +static int fb_width; +static int fb_height; +static int fb_stride; + + +PolyDef PolySet; // will contain a polygon data + +static const sVmode* vmode=NULL; +static const sVmode* volatile VgaVmodeReq = NULL; // request to reinitialize videomode, 1=only stop driver + + +static semaphore_t core1_initted; +static void core1_func(); + +#define RGBVAL16(r,g,b) ( (((b>>3)&0x1f)<<11) | (((g>>2)&0x3f)<<5) | (((r>>3)&0x1f)<<0) ) + +static void core1_sio_irq(); + +static void VgaInitReql(const sVmode* vmode) +{ + if (vmode == NULL) vmode = (const sVmode*)1; + __dmb(); + VgaVmodeReq = vmode; + while (VgaVmodeReq != NULL) { __dmb(); } +} + +static void core1_func() +{ + const sVmode* v; + + multicore_fifo_clear_irq(); + irq_set_exclusive_handler(SIO_IRQ_PROC1,core1_sio_irq); + //irq_set_priority (SIO_IRQ_PROC1, 129); + irq_set_enabled(SIO_IRQ_PROC1,true); + + sem_release(&core1_initted); + + while (true) + { + __dmb(); + + // initialize videomode + v = VgaVmodeReq; + if (v != NULL) + { + if ((u32)v == (u32)1) { + //VgaTerm(); // terminate + } + else + VgaInit(v); + __dmb(); + VgaVmodeReq = NULL; + } + } + + /* + v = VgaVmodeReq; + VgaInit(v); + + while (true) { + tight_loop_contents(); + } + */ +} + +VGA_T4::VGA_T4() +{ +} + +void VGA_T4::tweak_video(int shiftdelta, int numdelta, int denomdelta) +{ +} + +// display VGA image +vga_error_t VGA_T4::begin(vga_mode_t mode) +{ + switch(mode) { + case VGA_MODE_320x240: + fb_width = 320; + fb_height = 240; + fb_stride = fb_width; + break; + case VGA_MODE_352x240: + break; + case VGA_MODE_400x240: + break; + } + + /* initialize gfx buffer */ + if (fb0 == NULL) { + void *mallocpt = malloc(fb_stride*fb_height*sizeof(vga_pixel)+4); + fb0 = (vga_pixel *)((void*)((intptr_t)mallocpt & ~3)); + } + + visible_framebuffer = fb0; + framebuffer = fb0; + for (uint i = 0; i < fb_height*fb_width; i++) { + framebuffer[i] = VGA_RGB(rand() % 255,rand() % 255,rand() % 255); + } + + // create a semaphore to be posted when audio init is complete + sem_init(&core1_initted, 0, 1); + + multicore_launch_core1(core1_func); + vmode = Video(DEV_VGA, RES_QVGA, FORM_8BIT, framebuffer); + VgaInitReql(vmode); + + // wait for initialization of audio to be complete + sem_acquire_blocking(&core1_initted); + + return(VGA_OK); +} + +void VGA_T4::end() +{ +} + +void VGA_T4::debug() +{ +} + +// retrieve size of the frame buffer +int VGA_T4::get_frame_buffer_size(int *width, int *height) +{ + if (width != nullptr) *width = fb_width; + if (height != nullptr) *height = fb_height; + return fb_stride; +} + +void VGA_T4::waitSync() +{ + WaitVSync(); +} + +void VGA_T4::waitLine(int line) +{ +// while (currentLine != line) {}; +} + +void VGA_T4::clear(vga_pixel color) { + int i,j; + for (j=0; j=0) && (x<=fb_width) && (y>=0) && (y<=fb_height)) + framebuffer[y*fb_stride+x] = color; +} + +vga_pixel VGA_T4::getPixel(int x, int y){ + return(framebuffer[y*fb_stride+x]); +} + +vga_pixel * VGA_T4::getLineBuffer(int j) { + return (&framebuffer[j*fb_stride]); +} + +void VGA_T4::drawRect(int16_t x, int16_t y, int16_t w, int16_t h, vga_pixel color) { + int i,j,l=y; + for (j=0; j> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + l++; + } + dst=&framebuffer[l*fb_stride+x]; + bits = *charpt++; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + l++; + } + x +=8; + } +} + +void VGA_T4::drawSprite(int16_t x, int16_t y, const int16_t *bitmap) { + drawSprite(x,y,bitmap, 0,0,0,0); +} + +void VGA_T4::drawSprite(int16_t x, int16_t y, const int16_t *bitmap, uint16_t arx, uint16_t ary, uint16_t arw, uint16_t arh) +{ + int bmp_offx = 0; + int bmp_offy = 0; + int16_t *bmp_ptr; + + int w =*bitmap++; + int h = *bitmap++; + + + if ( (arw == 0) || (arh == 0) ) { + // no crop window + arx = x; + ary = y; + arw = w; + arh = h; + } + else { + if ( (x>(arx+arw)) || ((x+w)(ary+arh)) || ((y+h) arx) && (x<(arx+arw)) ) { + arw = arw - (x-arx); + arx = arx + (x-arx); + } else { + bmp_offx = arx; + } + if ( ((x+w) > arx) && ((x+w)<(arx+arw)) ) { + arw -= (arx+arw-x-w); + } + if ( (y > ary) && (y<(ary+arh)) ) { + arh = arh - (y-ary); + ary = ary + (y-ary); + } else { + bmp_offy = ary; + } + if ( ((y+h) > ary) && ((y+h)<(ary+arh)) ) { + arh -= (ary+arh-y-h); + } + } + + + int l=ary; + bitmap = bitmap + bmp_offy*w + bmp_offx; + for (int row=0;row 2) ) y += (fb_height-height)/2; + vga_pixel * dst=&framebuffer[y*fb_stride]; + if (width > fb_width) { +#ifdef TFT_LINEARINT + int delta = (width/(width-fb_width))-1; + int pos = delta; + for (int i=0; i> 8]]; + pos +=step; + } +#endif + } + else if ((width*2) == fb_width) { + for (int i=0; i 2) ) y += (fb_height-height)/2; + uint8_t * dst=&framebuffer[y*fb_stride]; + if (width > fb_width) { + int step = ((width << 8)/fb_width); + int pos = 0; + for (int i=0; i> 8]; + pos +=step; + } + } + else if ((width*2) == fb_width) { + for (int i=0; i 2) ) y += (fb_height-height)/2; + uint8_t * dst=&framebuffer[y*fb_stride]; + if (width > fb_width) { + int step = ((width << 8)/fb_width); + int pos = 0; + for (int i=0; i> 8]; + *dst++ = VGA_RGB(R16(pix),G16(pix),B16(pix)); + pos +=step; + } + } + else if ((width*2) == fb_width) { + for (int i=0; i 2) ) { + ysrc += (fb_height-height)/2; + ydst += (fb_height-height)/2; + } + uint8_t * src=&framebuffer[ysrc*fb_stride]; + uint8_t * dst=&framebuffer[ydst*fb_stride]; + memcpy(dst,src,width); +} + + +//-------------------------------------------------------------- +// Draw a line between 2 points +// x1,y1 : 1st point +// x2,y2 : 2nd point +// Color : 16bits color +//-------------------------------------------------------------- +void VGA_T4::drawline(int16_t x1, int16_t y1, int16_t x2, int16_t y2, vga_pixel color){ + uint8_t yLonger = 0; + int incrementVal, endVal; + int shortLen = y2-y1; + int longLen = x2-x1; + int decInc; + int j = 0, i = 0; + + if(ABS(shortLen) > ABS(longLen)) { + int swap = shortLen; + shortLen = longLen; + longLen = swap; + yLonger = 1; + } + + endVal = longLen; + + if(longLen < 0) { + incrementVal = -1; + longLen = -longLen; + endVal--; + } else { + incrementVal = 1; + endVal++; + } + + if(longLen == 0) + decInc = 0; + else + decInc = (shortLen << 16) / longLen; + + if(yLonger) { + for(i = 0;i != endVal;i += incrementVal) { + drawPixel(x1 + (j >> 16),y1 + i,color); + j += decInc; + } + } else { + for(i = 0;i != endVal;i += incrementVal) { + drawPixel(x1 + i,y1 + (j >> 16),color); + j += decInc; + } + } +} + +//-------------------------------------------------------------- +// Draw a horizontal line +// x1,y1 : starting point +// lenght : lenght in pixels +// color : 16bits color +//-------------------------------------------------------------- +void VGA_T4::draw_h_line(int16_t x, int16_t y, int16_t lenght, vga_pixel color){ + drawline(x , y , x + lenght , y , color); +} + +//-------------------------------------------------------------- +// Draw a vertical line +// x1,y1 : starting point +// lenght : lenght in pixels +// color : 16bits color +//-------------------------------------------------------------- +void VGA_T4::draw_v_line(int16_t x, int16_t y, int16_t lenght, vga_pixel color){ + drawline(x , y , x , y + lenght , color); +} + +//-------------------------------------------------------------- +// Draw a circle. +// x, y - center of circle. +// r - radius. +// color - color of the circle. +//-------------------------------------------------------------- +void VGA_T4::drawcircle(int16_t x, int16_t y, int16_t radius, vga_pixel color){ + int16_t a, b, P; + + a = 0; + b = radius; + P = 1 - radius; + + do { + drawPixel(a+x, b+y, color); + drawPixel(b+x, a+y, color); + drawPixel(x-a, b+y, color); + drawPixel(x-b, a+y, color); + drawPixel(b+x, y-a, color); + drawPixel(a+x, y-b, color); + drawPixel(x-a, y-b, color); + drawPixel(x-b, y-a, color); + + if(P < 0) + P+= 3 + 2*a++; + else + P+= 5 + 2*(a++ - b--); + } while(a <= b); +} + +//-------------------------------------------------------------- +// Displays a full circle. +// x : specifies the X position +// y : specifies the Y position +// radius : specifies the Circle Radius +// fillcolor : specifies the Circle Fill Color +// bordercolor: specifies the Circle Border Color +//-------------------------------------------------------------- +void VGA_T4::drawfilledcircle(int16_t x, int16_t y, int16_t radius, vga_pixel fillcolor, vga_pixel bordercolor){ + int32_t D; /* Decision Variable */ + uint32_t CurX;/* Current X Value */ + uint32_t CurY;/* Current Y Value */ + + D = 3 - (radius << 1); + + CurX = 0; + CurY = radius; + + while (CurX <= CurY) + { + if(CurY > 0) + { + draw_v_line(x - CurX, y - CurY, 2*CurY, fillcolor); + draw_v_line(x + CurX, y - CurY, 2*CurY, fillcolor); + } + + if(CurX > 0) + { + draw_v_line(x - CurY, y - CurX, 2*CurX, fillcolor); + draw_v_line(x + CurY, y - CurX, 2*CurX, fillcolor); + } + if (D < 0) + { + D += (CurX << 2) + 6; + } + else + { + D += ((CurX - CurY) << 2) + 10; + CurY--; + } + CurX++; + } + + drawcircle(x, y, radius,bordercolor); +} + +//-------------------------------------------------------------- +// Displays an Ellipse. +// cx: specifies the X position +// cy: specifies the Y position +// radius1: minor radius of ellipse. +// radius2: major radius of ellipse. +// color: specifies the Color to use for draw the Border from the Ellipse. +//-------------------------------------------------------------- +void VGA_T4::drawellipse(int16_t cx, int16_t cy, int16_t radius1, int16_t radius2, vga_pixel color){ + int x = -radius1, y = 0, err = 2-2*radius1, e2; + float K = 0, rad1 = 0, rad2 = 0; + + rad1 = radius1; + rad2 = radius2; + + if (radius1 > radius2) + { + do { + K = (float)(rad1/rad2); + drawPixel(cx-x,cy+(uint16_t)(y/K),color); + drawPixel(cx+x,cy+(uint16_t)(y/K),color); + drawPixel(cx+x,cy-(uint16_t)(y/K),color); + drawPixel(cx-x,cy-(uint16_t)(y/K),color); + + e2 = err; + if (e2 <= y) { + err += ++y*2+1; + if (-x == y && e2 <= x) e2 = 0; + } + if (e2 > x) err += ++x*2+1; + } + while (x <= 0); + } + else + { + y = -radius2; + x = 0; + do { + K = (float)(rad2/rad1); + drawPixel(cx-(uint16_t)(x/K),cy+y,color); + drawPixel(cx+(uint16_t)(x/K),cy+y,color); + drawPixel(cx+(uint16_t)(x/K),cy-y,color); + drawPixel(cx-(uint16_t)(x/K),cy-y,color); + + e2 = err; + if (e2 <= x) { + err += ++x*2+1; + if (-y == x && e2 <= y) e2 = 0; + } + if (e2 > y) err += ++y*2+1; + } + while (y <= 0); + } +} + +// Draw a filled ellipse. +// cx: specifies the X position +// cy: specifies the Y position +// radius1: minor radius of ellipse. +// radius2: major radius of ellipse. +// fillcolor : specifies the Color to use for Fill the Ellipse. +// bordercolor: specifies the Color to use for draw the Border from the Ellipse. +void VGA_T4::drawfilledellipse(int16_t cx, int16_t cy, int16_t radius1, int16_t radius2, vga_pixel fillcolor, vga_pixel bordercolor){ + int x = -radius1, y = 0, err = 2-2*radius1, e2; + float K = 0, rad1 = 0, rad2 = 0; + + rad1 = radius1; + rad2 = radius2; + + if (radius1 > radius2) + { + do + { + K = (float)(rad1/rad2); + draw_v_line((cx+x), (cy-(uint16_t)(y/K)), (2*(uint16_t)(y/K) + 1) , fillcolor); + draw_v_line((cx-x), (cy-(uint16_t)(y/K)), (2*(uint16_t)(y/K) + 1) , fillcolor); + + e2 = err; + if (e2 <= y) + { + err += ++y*2+1; + if (-x == y && e2 <= x) e2 = 0; + } + if (e2 > x) err += ++x*2+1; + + } + while (x <= 0); + } + else + { + y = -radius2; + x = 0; + do + { + K = (float)(rad2/rad1); + draw_h_line((cx-(uint16_t)(x/K)), (cy+y), (2*(uint16_t)(x/K) + 1) , fillcolor); + draw_h_line((cx-(uint16_t)(x/K)), (cy-y), (2*(uint16_t)(x/K) + 1) , fillcolor); + + e2 = err; + if (e2 <= x) + { + err += ++x*2+1; + if (-y == x && e2 <= y) e2 = 0; + } + if (e2 > y) err += ++y*2+1; + } + while (y <= 0); + } + drawellipse(cx,cy,radius1,radius2,bordercolor); +} + +//-------------------------------------------------------------- +// Draw a Triangle. +// ax,ay, bx,by, cx,cy - the triangle points. +// color - color of the triangle. +//-------------------------------------------------------------- +void VGA_T4::drawtriangle(int16_t ax, int16_t ay, int16_t bx, int16_t by, int16_t cx, int16_t cy, vga_pixel color){ + drawline(ax , ay , bx , by , color); + drawline(bx , by , cx , cy , color); + drawline(cx , cy , ax , ay , color); +} + +//-------------------------------------------------------------- +// Draw a Filled Triangle. +// ax,ay, bx,by, cx,cy - the triangle points. +// fillcolor - specifies the Color to use for Fill the triangle. +// bordercolor - specifies the Color to use for draw the Border from the triangle. +//-------------------------------------------------------------- +void VGA_T4::drawfilledtriangle(int16_t ax, int16_t ay, int16_t bx, int16_t by, int16_t cx, int16_t cy, vga_pixel fillcolor, vga_pixel bordercolor){ + float ma, mb, mc ; //'gradient of the lines + float start, finish ; //'draw a line from start to finish! + float tempspace ; //'temporary storage for swapping values... + double x1,x2,x3 ; + double y1,y2,y3 ; + int16_t n ; + + //' need to sort out ay, by and cy into order.. highest to lowest + //' + if(ay < by) + { + //'swap x's + tempspace = ax; + ax = bx; + bx = tempspace; + + //'swap y's + tempspace = ay; + ay = by; + by = tempspace; + } + + if(ay < cy) + { + //'swap x's + tempspace = ax; + ax = cx; + cx = tempspace; + + //'swap y's + tempspace = ay; + ay = cy; + cy = tempspace; + } + + if(by < cy) + { + //'swap x's + tempspace = bx; + bx = cx; + cx = tempspace; + + //'swap y's + tempspace = by; + by = cy; + cy = tempspace; + } + + //' Finally - copy the values in order... + + x1 = ax; x2 = bx; x3 = cx; + y1 = ay; y2 = by; y3 = cy; + + //'bodge if y coordinates are the same + if(y1 == y2) y2 = y2 + 0.01; + if(y2 == y3) y3 = y3 + 0.01; + if(y1 == y3) y3 = y3 + 0.01; + + ma = (x1 - x2) / (y1 - y2); + mb = (x3 - x2) / (y2 - y3); + mc = (x3 - x1) / (y1 - y3); + + //'from y1 to y2 + for(n = 0;n >= (y2 - y1);n--) + { + start = n * mc; + finish = n * ma; + drawline((int16_t)(x1 - start), (int16_t)(n + y1), (int16_t)(x1 + finish), (int16_t)(n + y1), fillcolor); + } + + + //'and from y2 to y3 + + for(n = 0;n >= (y3 - y2);n--) + { + start = n * mc; + finish = n * mb; + drawline((int16_t)(x1 - start - ((y2 - y1) * mc)), (int16_t)(n + y2), (int16_t)(x2 - finish), (int16_t)(n + y2), fillcolor); + } + + // draw the border color triangle + drawtriangle(ax,ay,bx,by,cx,cy,bordercolor); +} + + +//-------------------------------------------------------------- +// Displays a Rectangle at a given Angle. +// centerx : specifies the center of the Rectangle. +// centery +// w,h : specifies the size of the Rectangle. +// angle : specifies the angle for drawing the rectangle +// color : specifies the Color to use for Fill the Rectangle. +//-------------------------------------------------------------- +void VGA_T4::drawquad(int16_t centerx, int16_t centery, int16_t w, int16_t h, int16_t angle, vga_pixel color){ + int16_t px[4],py[4]; + float l; + float raddeg = 3.14159 / 180; + float w2 = w / 2.0; + float h2 = h / 2.0; + float vec = (w2*w2)+(h2*h2); + float w2l; + float pangle[4]; + + l = sqrtf(vec); + w2l = w2 / l; + pangle[0] = acosf(w2l) / raddeg; + pangle[1] = 180.0 - (acosf(w2l) / raddeg); + pangle[2] = 180.0 + (acosf(w2l) / raddeg); + pangle[3] = 360.0 - (acosf(w2l) / raddeg); + px[0] = (int16_t)(calcco[((int16_t)(pangle[0]) + angle) % 360] * l + centerx); + py[0] = (int16_t)(calcsi[((int16_t)(pangle[0]) + angle) % 360] * l + centery); + px[1] = (int16_t)(calcco[((int16_t)(pangle[1]) + angle) % 360] * l + centerx); + py[1] = (int16_t)(calcsi[((int16_t)(pangle[1]) + angle) % 360] * l + centery); + px[2] = (int16_t)(calcco[((int16_t)(pangle[2]) + angle) % 360] * l + centerx); + py[2] = (int16_t)(calcsi[((int16_t)(pangle[2]) + angle) % 360] * l + centery); + px[3] = (int16_t)(calcco[((int16_t)(pangle[3]) + angle) % 360] * l + centerx); + py[3] = (int16_t)(calcsi[((int16_t)(pangle[3]) + angle) % 360] * l + centery); + // here we draw the quad + drawline(px[0],py[0],px[1],py[1],color); + drawline(px[1],py[1],px[2],py[2],color); + drawline(px[2],py[2],px[3],py[3],color); + drawline(px[3],py[3],px[0],py[0],color); +} + +//-------------------------------------------------------------- +// Displays a filled Rectangle at a given Angle. +// centerx : specifies the center of the Rectangle. +// centery +// w,h : specifies the size of the Rectangle. +// angle : specifies the angle for drawing the rectangle +// fillcolor : specifies the Color to use for Fill the Rectangle. +// bordercolor : specifies the Color to use for draw the Border from the Rectangle. +//-------------------------------------------------------------- +void VGA_T4::drawfilledquad(int16_t centerx, int16_t centery, int16_t w, int16_t h, int16_t angle, vga_pixel fillcolor, vga_pixel bordercolor){ + int16_t px[4],py[4]; + float l; + float raddeg = 3.14159 / 180; + float w2 = w / 2.0; + float h2 = h / 2.0; + float vec = (w2*w2)+(h2*h2); + float w2l; + float pangle[4]; + + l = sqrtf(vec); + w2l = w2 / l; + pangle[0] = acosf(w2l) / raddeg; + pangle[1] = 180.0 - (acosf(w2l) / raddeg); + pangle[2] = 180.0 + (acosf(w2l) / raddeg); + pangle[3] = 360.0 - (acosf(w2l) / raddeg); + px[0] = (int16_t)(calcco[((int16_t)(pangle[0]) + angle) % 360] * l + centerx); + py[0] = (int16_t)(calcsi[((int16_t)(pangle[0]) + angle) % 360] * l + centery); + px[1] = (int16_t)(calcco[((int16_t)(pangle[1]) + angle) % 360] * l + centerx); + py[1] = (int16_t)(calcsi[((int16_t)(pangle[1]) + angle) % 360] * l + centery); + px[2] = (int16_t)(calcco[((int16_t)(pangle[2]) + angle) % 360] * l + centerx); + py[2] = (int16_t)(calcsi[((int16_t)(pangle[2]) + angle) % 360] * l + centery); + px[3] = (int16_t)(calcco[((int16_t)(pangle[3]) + angle) % 360] * l + centerx); + py[3] = (int16_t)(calcsi[((int16_t)(pangle[3]) + angle) % 360] * l + centery); + // We draw 2 filled triangle for made the quad + // To be uniform we have to use only the Fillcolor + drawfilledtriangle(px[0],py[0],px[1],py[1],px[2],py[2],fillcolor,fillcolor); + drawfilledtriangle(px[2],py[2],px[3],py[3],px[0],py[0],fillcolor,fillcolor); + // here we draw the BorderColor from the quad + drawline(px[0],py[0],px[1],py[1],bordercolor); + drawline(px[1],py[1],px[2],py[2],bordercolor); + drawline(px[2],py[2],px[3],py[3],bordercolor); + drawline(px[3],py[3],px[0],py[0],bordercolor); +} + +//-------------------------------------------------------------- +// Displays a Polygon. +// centerx : are specified with PolySet.Center.x and y. +// centery +// cx : Translate the polygon in x direction +// cy : Translate the polygon in y direction +// bordercolor : specifies the Color to use for draw the Border from the polygon. +// polygon points : are specified with PolySet.Pts[n].x and y +// After the last polygon point , set PolySet.Pts[n + 1].x to 10000 +// Max number of point for the polygon is set by MaxPolyPoint previously defined. +//-------------------------------------------------------------- +void VGA_T4::drawpolygon(int16_t cx, int16_t cy, vga_pixel bordercolor){ + uint8_t n = 1; + while((PolySet.Pts[n].x < 10000) && (n < MaxPolyPoint)){ + drawline(PolySet.Pts[n].x + cx, + PolySet.Pts[n].y + cy, + PolySet.Pts[n - 1].x + cx , + PolySet.Pts[n - 1].y + cy, + bordercolor); + n++; + } + // close the polygon + drawline(PolySet.Pts[0].x + cx, + PolySet.Pts[0].y + cy, + PolySet.Pts[n - 1].x + cx, + PolySet.Pts[n - 1].y + cy, + bordercolor); +} + +//-------------------------------------------------------------- +// Displays a filled Polygon. +// centerx : are specified with PolySet.Center.x and y. +// centery +// cx : Translate the polygon in x direction +// cy : Translate the polygon in y direction +// fillcolor : specifies the Color to use for filling the polygon. +// bordercolor : specifies the Color to use for draw the Border from the polygon. +// polygon points : are specified with PolySet.Pts[n].x and y +// After the last polygon point , set PolySet.Pts[n + 1].x to 10000 +// Max number of point for the polygon is set by MaxPolyPoint previously defined. +//-------------------------------------------------------------- +void VGA_T4::drawfullpolygon(int16_t cx, int16_t cy, vga_pixel fillcolor, vga_pixel bordercolor){ + int n,i,j,k,dy,dx; + int y,temp; + int a[MaxPolyPoint][2],xi[MaxPolyPoint]; + float slope[MaxPolyPoint]; + + n = 0; + + while((PolySet.Pts[n].x < 10000) && (n < MaxPolyPoint)){ + a[n][0] = PolySet.Pts[n].x; + a[n][1] = PolySet.Pts[n].y; + n++; + } + + a[n][0]=PolySet.Pts[0].x; + a[n][1]=PolySet.Pts[0].y; + + for(i=0;iy))|| + ((a[i][1]>y)&&(a[i+1][1]<=y))) + { + xi[k]=(int)(a[i][0]+slope[i]*(y-a[i][1])); + k++; + } + } + + for(j=0;jxi[i+1]) + { + temp=xi[i]; + xi[i]=xi[i+1]; + xi[i+1]=temp; + } + } + + for(i=0;i= (fb_width-hscr_mask)) return; + if ((y + SPRITES_H) <= 0) return; + if (y >= fb_height) return; + + vga_pixel * src=&spritesbuffer[index*SPRITES_W*SPRITES_H]; + int i,j; + vga_pixel pix; + for (j=0; j (fb_width-hscr_mask)) || ((y+j) < 0) || ((y+j) >= fb_height) ) dst++; + else *dst++ = pix; + } + } +} + + +static void drawTile(unsigned char tile, int x, int y) { + vga_pixel * src=&tilesbuffer[tile*TILES_W*TILES_H]; + int i,j; + for (j=0; j (fb_width-hscr_mask)) *dst++=0; + else + *dst++ = pix; + } + } +} + +static void drawTransTile(unsigned char tile, int x, int y) { + vga_pixel * src=&tilesbuffer[tile*TILES_W*TILES_H]; + vga_pixel pix; + int i,j; + for (j=0; j (fb_width-hscr_mask)) src++; + else + if ((pix=*src++)) *dst++ = pix; + else *dst++; + } + } +} + + + +static void tileText(unsigned char index, int16_t x, int16_t y, const char * text, vga_pixel fgcolor, vga_pixel bgcolor, vga_pixel *dstbuffer, int dstwidth, int dstheight) { + vga_pixel c; + vga_pixel * dst; + + while ((c = *text++)) { + const unsigned char * charpt=&font8x8[c][0]; + int l=y; + for (int i=0;i<8;i++) + { + unsigned char bits; + dst=&dstbuffer[(index*dstheight+l)*dstwidth+x]; + bits = *charpt++; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else *dst++=bgcolor; + l++; + } + x +=8; + } +} + +static void tileTextOverlay(int16_t x, int16_t y, const char * text, vga_pixel fgcolor) { + vga_pixel c; + vga_pixel * dst; + + while ((c = *text++)) { + const unsigned char * charpt=&font8x8[c][0]; + int l=y; + for (int i=0;i<8;i++) + { + unsigned char bits; + dst=&framebuffer[+l*fb_stride+x]; + bits = *charpt++; + if (bits&0x01) *dst++=fgcolor; + else dst++; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else dst++; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else dst++; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else dst++; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else dst++; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else dst++; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else dst++; + bits = bits >> 1; + if (bits&0x01) *dst++=fgcolor; + else dst++; + l++; + } + x +=8; + } +} +static const char * hex = "0123456789ABCDEF"; + +void VGA_T4::begin_gfxengine(int nblayers, int nbtiles, int nbsprites) +{ + // Try double buffering + if (fb1 == NULL) { + void *mallocpt = malloc(fb_stride*fb_height*sizeof(vga_pixel)+4); + fb1 = (vga_pixel *)((void*)((intptr_t)mallocpt & ~3)); + } + if (fb1 != NULL) { + framebuffer = fb1; + } + + + nb_layers = nblayers; + nb_tiles = nbtiles; + nb_sprites = nbsprites; + + if (spritesbuffer == NULL) spritesbuffer = (vga_pixel*)malloc(SPRITES_W*SPRITES_H*sizeof(vga_pixel)*nb_sprites); + if (tilesbuffer == NULL) tilesbuffer = (vga_pixel*)malloc(TILES_W*TILES_H*sizeof(vga_pixel)*nb_tiles); + if (tilesram == NULL) tilesram = (unsigned char*)malloc(TILES_COLS*TILES_ROWS*nb_layers); + if (spritesdata == NULL) spritesdata = (Sprite_t *)malloc(SPRITES_MAX*sizeof(Sprite_t)); + + memset((void*)spritesbuffer,0, SPRITES_W*SPRITES_H*sizeof(vga_pixel)*nb_sprites); + memset((void*)tilesbuffer,0, TILES_W*TILES_H*sizeof(vga_pixel)*nb_tiles); + memset((void*)tilesram,0,TILES_COLS*TILES_ROWS*nb_layers); + + /* Random test tiles */ + char numhex[3]; + for (int i=0; i>4) & 0xf]; + numhex[1] = hex[i & 0xf]; + numhex[2] = 0; + if (TILES_W == 16 )tileText(i, 0, 0, numhex, VGA_RGB(0xff,0xff,0xff), VGA_RGB(0x40,0x40,0x40), tilesbuffer,TILES_W,TILES_H); + } + } + /* Random test sprites */ + for (int i=0; i>4) & 0xf]; + numhex[1] = hex[i & 0xf]; + numhex[2] = 0; + tileText(i, 0, 0, numhex, VGA_RGB(0xff,0xff,0x00), VGA_RGB(0x00,0x00,0x00),spritesbuffer,SPRITES_W,SPRITES_H); + } + } +} + + +void VGA_T4::run_gfxengine() +{ + waitSync(); + + if (fb1 != NULL) { + if (visible_framebuffer == fb0) { + visible_framebuffer = fb1; + framebuffer = fb0; + } + else { + visible_framebuffer = fb0; + framebuffer = fb1; + } + } + + unsigned char * tilept; + + // Layer 0 + for (int j=0; j=hscr_beg[0]) && (j<=hscr_end[0]) ) { + int modcol = (hscr[0] >> TILES_HBITS) % TILES_COLS; + for (int i=0; i 1) { + int lcount = 1; + while (lcount < nb_layers) { + for (int j=0; j=hscr_beg[lcount]) && (j<=hscr_end[lcount]) ) { + int modcol = (hscr[lcount] >> TILES_HBITS) % TILES_COLS; + for (int i=0; i> 8); + cnt = cnt & (sampleBufferSize*2-1); + + if (cnt == 0) { + fillfirsthalf = false; + //irq_set_pending(RTC_IRQ+1); + multicore_fifo_push_blocking(0); + } + else if (cnt == sampleBufferSize) { + fillfirsthalf = true; + //irq_set_pending(RTC_IRQ+1); + multicore_fifo_push_blocking(0); + } +} + +static void core1_sio_irq() { + irq_clear(SIO_IRQ_PROC1); + while(multicore_fifo_rvalid()) { + uint16_t raw = multicore_fifo_pop_blocking(); + SOFTWARE_isr(); + } + multicore_fifo_clear_irq(); +} + + +void VGA_T4::begin_audio(int samplesize, void (*callback)(short * stream, int len)) +{ + fillsamples = callback; + i2s_tx_buffer = (uint32_t*)malloc(samplesize*sizeof(uint32_t)); + + if (i2s_tx_buffer == NULL) { + printf("sound buffer could not be allocated!!!!!\n"); + return; + } + memset((void*)i2s_tx_buffer,0, samplesize*sizeof(uint32_t)); + printf("sound buffer allocated\n"); + + i2s_tx_buffer16 = (short*)i2s_tx_buffer; + + sampleBufferSize = samplesize; + + gpio_set_function(AUDIO_PIN, GPIO_FUNC_PWM); + int audio_pin_slice = pwm_gpio_to_slice_num(AUDIO_PIN); + // Setup PWM interrupt to fire when PWM cycle is complete + pwm_clear_irq(audio_pin_slice); + pwm_set_irq_enabled(audio_pin_slice, true); + irq_set_exclusive_handler(PWM_IRQ_WRAP, AUDIO_isr); + irq_set_priority (PWM_IRQ_WRAP, 128); + irq_set_enabled(PWM_IRQ_WRAP, true); + + //irq_set_exclusive_handler(RTC_IRQ+1,SOFTWARE_isr); + //irq_set_priority (RTC_IRQ+1, 120); + //irq_set_enabled(RTC_IRQ+1,true); + + + // Setup PWM for audio output + pwm_config config = pwm_get_default_config(); +// pwm_config_set_clkdiv(&config, 5.5f); + pwm_config_set_clkdiv(&config, 50.0f); + pwm_config_set_wrap(&config, 254); + pwm_init(audio_pin_slice, &config, true); + + pwm_set_gpio_level(AUDIO_PIN, 0); + printf("sound initialized\n"); +} + +void VGA_T4::end_audio() +{ + if (i2s_tx_buffer != NULL) { + free(i2s_tx_buffer); + } +} + +#endif + + diff --git a/MCUME_pico/picovga_t4/VGA_t4.h b/MCUME_pico/picovga_t4/VGA_t4.h new file mode 100755 index 0000000..0d21ef3 --- /dev/null +++ b/MCUME_pico/picovga_t4/VGA_t4.h @@ -0,0 +1,262 @@ +/* + This file is part of VGA_t4 library. + + VGA_t4 library is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Copyright (C) 2020 J-M Harvengt + + Inspired from the original Teensy3 uVGA library of Eric PREVOTEAU. + QTIMER/FlexIO code based on Teensy4 examples of KurtE, Manitou and easone + from the Teensy4 forum (https://forum.pjrc.com) +*/ + +#ifndef _VGA_T4_H +#define _VGA_T4_H + + +#include +#include +#include "pico.h" + + +// Enable debug info (requires serial initialization) +//#define DEBUG + +typedef uint8_t vga_pixel; +#define VGA_RGB(r,g,b) ( (((r>>5)&0x07)<<5) | (((g>>5)&0x07)<<2) | (((b>>6)&0x3)<<0) ) + + +typedef enum vga_mode_t +{ + VGA_MODE_320x240 = 0, + VGA_MODE_352x240 = 1, + VGA_MODE_400x240 = 2, +} vga_mode_t; + + +typedef enum vga_error_t +{ + VGA_OK = 0, + VGA_ERROR = -1 +} vga_error_t; + +#define MaxPolyPoint 100 + +#define AUDIO_SAMPLE_BUFFER_SIZE 256 + +// 2D point structure +typedef struct { + int16_t x; // X Coordinate on screen + int16_t y; // Y Coordinate on screen +}Point2D; + +// Polygon structure +typedef struct { + Point2D Center; // Polygon Center (point where the polygon can rotate arround) + Point2D Pts[MaxPolyPoint]; // Points for the polygon +}PolyDef; + + +#define DEFAULT_VSYNC_PIN 8 + +#ifndef ABS +#define ABS(X) ((X) > 0 ? (X) : -(X)) +#endif + +extern PolyDef PolySet; // polygon data to declare in c file + + +// Precomputed sinus and cosinus table from 0 to 359 degrees +// The tables are in Degrees not in Radian ! +const float calcsi[360]={ + 0.000001 , // 0 + 0.01745239 , 0.03489947 , 0.05233591 , 0.06975641 , 0.08715567 , 0.1045284 , 0.1218692 , 0.139173 , 0.1564343 , 0.173648 , // 1 à 10 + 0.1908088 , 0.2079115 , 0.2249509 , 0.2419217 , 0.2588188 , 0.2756371 , 0.2923715 , 0.3090167 , 0.3255679 , 0.3420198 , // 11 à 20 + 0.3583677 , 0.3746063 , 0.3907308 , 0.4067363 , 0.4226179 , 0.4383708 , 0.4539901 , 0.4694712 , 0.4848093 , 0.4999996 , // 21 à 30 + 0.5150377 , 0.5299189 , 0.5446386 , 0.5591925 , 0.573576 , 0.5877848 , 0.6018146 , 0.615661 , 0.62932 , 0.6427872 , // 31 à 40 + 0.6560586 , 0.6691301 , 0.6819978 , 0.6946579 , 0.7071063 , 0.7193394 , 0.7313532 , 0.7431444 , 0.7547091 , 0.7660439 , // 41 à 50 + 0.7771455 , 0.7880103 , 0.798635 , 0.8090165 , 0.8191515 , 0.8290371 , 0.8386701 , 0.8480476 , 0.8571668 , 0.8660249 , // 51 à 60 + 0.8746193 , 0.8829472 , 0.8910061 , 0.8987936 , 0.9063074 , 0.913545 , 0.9205045 , 0.9271835 , 0.9335801 , 0.9396922 , // 61 à 70 + 0.9455183 , 0.9510562 , 0.9563044 , 0.9612614 , 0.9659255 , 0.9702954 , 0.9743698 , 0.9781474 , 0.981627 , 0.9848075 , // 71 à 80 + 0.9876881 , 0.9902679 , 0.992546 , 0.9945218 , 0.9961946 , 0.9975639 , 0.9986295 , 0.9993908 , 0.9998476 , 0.99999 , // 81 à 90 + 0.9998477 , 0.9993909 , 0.9986296 , 0.9975642 , 0.9961948 , 0.994522 , 0.9925463 , 0.9902682 , 0.9876886 , 0.984808 , // 91 à 100 + 0.9816275 , 0.9781479 , 0.9743704 , 0.9702961 , 0.9659262 , 0.9612621 , 0.9563052 , 0.9510571 , 0.9455191 , 0.9396932 , // 101 à 110 + 0.933581 , 0.9271844 , 0.9205055 , 0.9135461 , 0.9063086 , 0.8987948 , 0.8910073 , 0.8829485 , 0.8746206 , 0.8660263 , // 111 à 120 + 0.8571682 , 0.8480491 , 0.8386716 , 0.8290385 , 0.8191531 , 0.8090182 , 0.7986366 , 0.7880119 , 0.7771472 , 0.7660457 , // 121 à 130 + 0.7547108 , 0.7431462 , 0.7313551 , 0.7193412 , 0.7071083 , 0.6946598 , 0.6819999 , 0.6691321 , 0.6560606 , 0.6427892 , // 131 à 140 + 0.629322 , 0.6156631 , 0.6018168 , 0.5877869 , 0.5735782 , 0.5591948 , 0.5446408 , 0.5299212 , 0.5150401 , 0.5000019 , // 141 à 150 + 0.4848116 , 0.4694737 , 0.4539925 , 0.4383733 , 0.4226205 , 0.4067387 , 0.3907333 , 0.3746087 , 0.3583702 , 0.3420225 , // 151 à 160 + 0.3255703 , 0.3090193 , 0.2923741 , 0.2756396 , 0.2588214 , 0.2419244 , 0.2249534 , 0.2079142 , 0.1908116 , 0.1736506 , // 161 à 170 + 0.156437 , 0.1391758 , 0.1218719 , 0.1045311 , 0.08715825 , 0.06975908 , 0.05233867 , 0.03490207 , 0.01745508 , 0.0277 , // 171 à 180 + -0.01744977 , -0.03489676 , -0.05233313 , -0.06975379 , -0.08715296 , -0.1045256 , -0.1218666 , -0.1391703 , -0.1564316 , -0.1736454 ,// 181 à 190 + -0.1908061 , -0.207909 , -0.2249483 , -0.241919 , -0.2588163 , -0.2756345 , -0.2923688 , -0.3090142 , -0.3255653 , -0.3420173 , // 191 à 200 + -0.3583652 , -0.3746038 , -0.3907282 , -0.4067339 , -0.4226155 , -0.4383683 , -0.4539878 , -0.4694688 , -0.4848068 , -0.4999973 , // 201 à 210 + -0.5150353 , -0.5299166 , -0.5446364 , -0.5591902 , -0.5735739 , -0.5877826 , -0.6018124 , -0.615659 , -0.6293178 , -0.642785 , // 211 à 220 + -0.6560566 , -0.6691281 , -0.6819958 , -0.694656 , -0.7071043 , -0.7193374 , -0.7313514 , -0.7431425 , -0.7547074 , -0.7660421 , // 221 à 230 + -0.7771439 , -0.7880087 , -0.7986334 , -0.8090149 , -0.8191499 , -0.8290355 , -0.8386687 , -0.8480463 , -0.8571655 , -0.8660236 , // 231 à 240 + -0.8746178 , -0.882946 , -0.8910049 , -0.8987925 , -0.9063062 , -0.9135439 , -0.9205033 , -0.9271825 , -0.9335791 , -0.9396913 , // 241 à 250 + -0.9455173 , -0.9510553 , -0.9563036 , -0.9612607 , -0.9659248 , -0.9702948 , -0.9743692 , -0.9781467 , -0.9816265 , -0.9848071 , // 251 à 260 + -0.9876878 , -0.9902675 , -0.9925456 , -0.9945215 , -0.9961944 , -0.9975638 , -0.9986293 , -0.9993907 , -0.9998476 , -0.99999 , // 261 à 270 + -0.9998478 , -0.9993909 , -0.9986298 , -0.9975643 , -0.9961951 , -0.9945223 , -0.9925466 , -0.9902686 , -0.987689 , -0.9848085 , // 271 à 280 + -0.981628 , -0.9781484 , -0.974371 , -0.9702968 , -0.965927 , -0.9612629 , -0.9563061 , -0.9510578 , -0.9455199 , -0.9396941 , // 281 à 290 + -0.933582 , -0.9271856 , -0.9205065 , -0.9135472 , -0.9063097 , -0.898796 , -0.8910086 , -0.8829498 , -0.8746218 , -0.8660276 , // 291 à 300 + -0.8571696 , -0.8480505 , -0.8386731 , -0.8290402 , -0.8191546 , -0.8090196 , -0.7986383 , -0.7880136 , -0.777149 , -0.7660476 , // 301 à 310 + -0.7547125 , -0.7431479 , -0.7313569 , -0.7193431 , -0.7071103 , -0.6946616 , -0.6820017 , -0.6691341 , -0.6560627 , -0.6427914 , // 311 à 320 + -0.6293243 , -0.6156651 , -0.6018188 , -0.5877892 , -0.5735805 , -0.5591971 , -0.5446434 , -0.5299233 , -0.5150422 , -0.5000043 , // 321 à 330 + -0.484814 , -0.4694761 , -0.4539948 , -0.4383755 , -0.4226228 , -0.4067413 , -0.3907359 , -0.3746115 , -0.3583725 , -0.3420248 , // 331 à 340 + -0.325573 , -0.3090219 , -0.2923768 , -0.2756425 , -0.2588239 , -0.2419269 , -0.2249561 , -0.2079169 , -0.1908143 , -0.1736531 , // 341 à 350 + -0.1564395 , -0.1391783 , -0.1218746 , -0.1045339 , -0.08716125 , -0.06976161 , -0.0523412 , -0.03490484 , -0.01745785 }; // 351 à 359 + +const float calcco[360]={ + 0.99999 , // 0 + 0.9998477 , 0.9993908 , 0.9986295 , 0.9975641 , 0.9961947 , 0.9945219 , 0.9925462 , 0.9902681 , 0.9876884 , 0.9848078 , // 1 à 10 + 0.9816272 , 0.9781477 , 0.9743701 , 0.9702958 , 0.9659259 , 0.9612617 , 0.9563049 , 0.9510566 , 0.9455186 , 0.9396928 , // 11 à 20 + 0.9335806 , 0.927184 , 0.920505 , 0.9135456 , 0.906308 , 0.8987943 , 0.8910067 , 0.8829478 , 0.8746199 , 0.8660256 , // 21 à 30 + 0.8571675 , 0.8480483 , 0.8386709 , 0.8290379 , 0.8191524 , 0.8090173 , 0.7986359 , 0.7880111 , 0.7771463 , 0.7660448 , // 31 à 40 + 0.75471 , 0.7431452 , 0.7313541 , 0.7193403 , 0.7071072 , 0.6946589 , 0.6819989 , 0.6691311 , 0.6560596 , 0.6427882 , // 41 à 50 + 0.629321 , 0.6156621 , 0.6018156 , 0.5877859 , 0.5735771 , 0.5591936 , 0.5446398 , 0.52992 , 0.5150389 , 0.5000008 , // 51 à 60 + 0.4848104 , 0.4694724 , 0.4539914 , 0.438372 , 0.4226191 , 0.4067376 , 0.3907321 , 0.3746075 , 0.3583689 , 0.3420211 , // 61 à 70 + 0.3255692 , 0.309018 , 0.2923728 , 0.2756384 , 0.2588201 , 0.241923 , 0.2249522 , 0.2079128 , 0.1908101 , 0.1736494 , // 71 à 80 + 0.1564357 , 0.1391743 , 0.1218706 , 0.1045297 , 0.08715699 , 0.06975782 , 0.05233728 , 0.0349008 , 0.01745369 , 0.0138 , // 81 à 90 + -0.01745104 , -0.03489815 , -0.05233451 , -0.06975505 , -0.08715434 , -0.1045271 , -0.1218679 , -0.1391717 , -0.156433 , -0.1736467 ,// 91 à 100 + -0.1908075 , -0.2079102 , -0.2249495 , -0.2419204 , -0.2588175 , -0.2756359 , -0.2923701 , -0.3090155 , -0.3255666 , -0.3420185 , // 101 à 110 + -0.3583664 , -0.3746051 , -0.3907295 , -0.4067351 , -0.4226166 , -0.4383696 , -0.4539889 , -0.4694699 , -0.4848081 , -0.4999984 , // 111 à 120 + -0.5150366 , -0.5299177 , -0.5446375 , -0.5591914 , -0.5735749 , -0.5877837 , -0.6018136 , -0.6156599 , -0.6293188 , -0.6427862 , // 121 à 130 + -0.6560575 , -0.669129 , -0.6819969 , -0.6946569 , -0.7071053 , -0.7193384 , -0.7313522 , -0.7431435 , -0.7547083 , -0.7660431 , // 131 à 140 + -0.7771447 , -0.7880094 , -0.7986342 , -0.8090158 , -0.8191508 , -0.8290363 , -0.8386694 , -0.8480469 , -0.8571661 , -0.8660243 , // 141 à 150 + -0.8746186 , -0.8829465 , -0.8910055 , -0.898793 , -0.9063068 , -0.9135445 , -0.9205039 , -0.927183 , -0.9335796 , -0.9396918 , // 151 à 160 + -0.9455178 , -0.9510558 , -0.956304 , -0.9612611 , -0.9659252 , -0.9702951 , -0.9743695 , -0.978147 , -0.9816267 , -0.9848073 , // 161 à 170 + -0.9876879 , -0.9902677 , -0.9925459 , -0.9945216 , -0.9961945 , -0.9975639 , -0.9986294 , -0.9993907 , -0.9998476 , -0.99999 , // 171 à 180 + -0.9998477 , -0.9993909 , -0.9986297 , -0.9975642 , -0.9961949 , -0.9945222 , -0.9925465 , -0.9902685 , -0.9876888 , -0.9848083 , // 181 à 190 + -0.9816277 , -0.9781482 , -0.9743707 , -0.9702965 , -0.9659266 , -0.9612625 , -0.9563056 , -0.9510574 , -0.9455196 , -0.9396937 , // 191 à 200 + -0.9335815 , -0.927185 , -0.9205061 , -0.9135467 , -0.9063091 , -0.8987955 , -0.8910079 , -0.8829491 , -0.8746213 , -0.866027 , // 201 à 210 + -0.857169 , -0.8480497 , -0.8386723 , -0.8290394 , -0.8191538 , -0.8090189 , -0.7986375 , -0.7880127 , -0.7771481 , -0.7660466 , // 211 à 220 + -0.7547117 , -0.743147 , -0.731356 , -0.7193421 , -0.7071092 , -0.6946609 , -0.6820008 , -0.6691331 , -0.6560616 , -0.6427905 , // 221 à 230 + -0.6293229 , -0.6156641 , -0.6018178 , -0.5877882 , -0.5735794 , -0.5591961 , -0.5446419 , -0.5299222 , -0.5150412 , -0.5000032 , // 231 à 240 + -0.4848129 , -0.4694746 , -0.4539936 , -0.4383744 , -0.4226216 , -0.4067401 , -0.3907347 , -0.3746099 , -0.3583714 , -0.3420237 , // 241 à 250 + -0.3255718 , -0.3090207 , -0.2923756 , -0.2756409 , -0.2588227 , -0.2419256 , -0.2249549 , -0.2079156 , -0.1908126 , -0.1736519 , // 251 à 260 + -0.1564383 , -0.139177 , -0.1218734 , -0.1045326 , -0.08715951 , -0.06976035 , -0.05233994 , -0.03490358 , -0.01745659 , -0.0427 , // 261 à 270 + 0.01744851 , 0.0348955 , 0.05233186 , 0.06975229 , 0.08715146 , 0.1045246 , 0.1218654 , 0.139169 , 0.1564303 , 0.1736439 , // 271 à 280 + 0.1908047 , 0.2079078 , 0.224947 , 0.2419178 , 0.2588149 , 0.2756331 , 0.2923674 , 0.309013 , 0.3255641 , 0.3420161 , // 281 à 290 + 0.3583638 , 0.3746024 , 0.3907273 , 0.4067327 , 0.4226143 , 0.4383671 , 0.4539864 , 0.4694674 , 0.4848059 , 0.4999962 , // 291 à 300 + 0.5150342 , 0.5299154 , 0.5446351 , 0.559189 , 0.5735728 , 0.5877816 , 0.6018113 , 0.6156578 , 0.6293167 , 0.6427839 , // 301 à 310 + 0.6560556 , 0.6691272 , 0.6819949 , 0.6946549 , 0.7071033 , 0.7193366 , 0.7313506 , 0.7431416 , 0.7547064 , 0.7660413 , // 311 à 320 + 0.7771428 , 0.7880079 , 0.7986327 , 0.8090141 , 0.8191492 , 0.8290347 , 0.8386678 , 0.8480456 , 0.8571648 , 0.8660229 , // 321 à 330 + 0.8746172 , 0.8829452 , 0.8910043 , 0.8987919 , 0.9063057 , 0.9135434 , 0.9205029 , 0.9271819 , 0.9335786 , 0.9396909 , // 331 à 340 + 0.9455169 , 0.9510549 , 0.9563032 , 0.9612602 , 0.9659245 , 0.9702945 , 0.9743689 , 0.9781465 , 0.9816261 , 0.9848069 , // 341 à 350 + 0.9876875 , 0.9902673 , 0.9925455 , 0.9945213 , 0.9961942 , 0.9975637 , 0.9986292 , 0.9993906 , 0.9998476 }; // 351 à 359 + + +class VGA_T4 +{ +public: + + VGA_T4(); + + // display VGA image + vga_error_t begin(vga_mode_t mode); + void begin_audio(int samplesize, void (*callback)(short * stream, int len)); + void end(); + void end_audio(); + void debug(); + void tweak_video(int shiftdelta, int numdelta, int denomdelta); + + // retrieve real size of the frame buffer + int get_frame_buffer_size(int *width, int *height); + + // wait next Vsync + void waitSync(); + void waitLine(int line); + + // ========================================================= + // graphic primitives + // ========================================================= + + void clear(vga_pixel col) ; + void drawPixel(int x, int y, vga_pixel color); + vga_pixel getPixel(int x, int y); + vga_pixel * getLineBuffer(int j); + void drawRect(int16_t x, int16_t y, int16_t w, int16_t h, vga_pixel color); + void drawText(int16_t x, int16_t y, const char * text, vga_pixel fgcolor, vga_pixel bgcolor, bool doublesize); + void drawSprite(int16_t x, int16_t y, const int16_t *bitmap); + void drawSprite(int16_t x, int16_t y, const int16_t *bitmap, uint16_t croparx, uint16_t cropary, uint16_t croparw, uint16_t croparh); + void writeScreen(const vga_pixel *pcolors); + void writeLine(int width, int height, int y, vga_pixel *buf); + void writeLine(int width, int height, int stride, uint8_t *buffer, vga_pixel *palette); + void writeLine16(int width, int height, int y, uint16_t *buf); + void writeScreen(int width, int height, int stride, uint8_t *buffer, vga_pixel *palette); + void copyLine(int width, int height, int ysrc, int ydst); + + // ************************************** GFX API extension from darthvader ****************************************************** + void drawline(int16_t x1, int16_t y1, int16_t x2, int16_t y2, vga_pixel color); + void draw_h_line(int16_t x1, int16_t y1, int16_t lenght, vga_pixel color); + void draw_v_line(int16_t x1, int16_t y1, int16_t lenght, vga_pixel color); + void drawcircle(int16_t x, int16_t y, int16_t radius, vga_pixel color); + void drawfilledcircle(int16_t x, int16_t y, int16_t radius, vga_pixel fillcolor, vga_pixel bordercolor); + void drawellipse(int16_t cx, int16_t cy, int16_t radius1, int16_t radius2, vga_pixel color); + void drawfilledellipse(int16_t cx, int16_t cy, int16_t radius1, int16_t radius2, vga_pixel fillcolor, vga_pixel bordercolor); + void drawtriangle(int16_t ax, int16_t ay, int16_t bx, int16_t by, int16_t cx, int16_t cy, vga_pixel color); + void drawfilledtriangle(int16_t ax, int16_t ay, int16_t bx, int16_t by, int16_t cx, int16_t cy, vga_pixel fillcolor, vga_pixel bordercolor); + void drawquad(int16_t centerx, int16_t centery, int16_t w, int16_t h, int16_t angle, vga_pixel color); + void drawfilledquad(int16_t centerx, int16_t centery, int16_t w, int16_t h, int16_t angle, vga_pixel fillcolor, vga_pixel bordercolor); + void drawpolygon(int16_t cx, int16_t cy, vga_pixel bordercolor); + void drawfullpolygon(int16_t cx, int16_t cy, vga_pixel fillcolor, vga_pixel bordercolor); + void drawrotatepolygon(int16_t cx, int16_t cy, int16_t Angle, vga_pixel fillcolor, vga_pixel bordercolor, uint8_t filled); + // ******************************************************************************************************************************* + + + // ========================================================= + // Game engine + // ========================================================= + + #define TILES_MAX_LAYERS 2 + + // 16x16 pixels tiles or 8x8 if USE_8PIXTILES is set + //#define USE_8PIXTILES 1 + #ifdef USE_8PIXTILES + #define TILES_COLS 40 + #define TILES_ROWS 30 + #define TILES_W 8 + #define TILES_H 8 + #define TILES_HBITS 3 + #define TILES_HMASK 0x7 + #else + #define TILES_COLS 20 + #define TILES_ROWS 15 + #define TILES_W 16 + #define TILES_H 16 + #define TILES_HBITS 4 + #define TILES_HMASK 0xf + #endif + + // 32 sprites 16x32 or max 64 16x16 (not larger!!!) + #define SPRITES_MAX 32 + #define SPRITES_W 16 + #define SPRITES_H 32 + + void begin_gfxengine(int nblayers, int nbtiles, int nbsprites); + void run_gfxengine(); + void tile_data(unsigned char index, vga_pixel * data, int len); + void sprite_data(unsigned char index, vga_pixel * data, int len); + void sprite(int id , int x, int y, unsigned char index); + void sprite_hide(int id); + void tile_draw(int layer, int x, int y, unsigned char index); + void tile_draw_row(int layer, int x, int y, unsigned char * data, int len); + void tile_draw_col(int layer, int x, int y, unsigned char * data, int len); + void set_hscroll(int layer, int rowbeg, int rowend, int mask); + void set_vscroll(int layer, int colbeg, int colend, int mask); + void hscroll(int layer, int value); + void vscroll(int layer, int value); + + +private: + static uint8_t _vsync_pin; +}; + + +#endif + + diff --git a/MCUME_pico/picovga_t4/canvas.h b/MCUME_pico/picovga_t4/canvas.h new file mode 100755 index 0000000..fff6ef2 --- /dev/null +++ b/MCUME_pico/picovga_t4/canvas.h @@ -0,0 +1,142 @@ + +// **************************************************************************** +// +// Canvas +// +// **************************************************************************** + +#ifndef _CANVAS_H +#define _CANVAS_H + +#define DRAW_HWINTER 1 // 1=use hardware interpolator to draw images + +// canvas format +// Note: do not use enum, symbols could not be used by the preprocessor +#define CANVAS_8 0 // 8-bit pixels +#define CANVAS_4 1 // 4-bit pixels +#define CANVAS_2 2 // 2-bit pixels +#define CANVAS_1 3 // 1-bit pixels +#define CANVAS_PLANE2 4 // 4 colors on 2 planes +#define CANVAS_ATTRIB8 5 // 2x4 bit color attributes per 8x8 pixel sample + // draw functions: bit 0..3 = draw color + // bit 4 = draw color is background color + +// canvas descriptor +typedef struct { + u8* img; // image data + u8* img2; // image data 2 (2nd plane of CANVAS_PLANE2, attributes of CANVAS_ATTRIB8) + int w; // width + int h; // height + int wb; // pitch (bytes between lines) + u8 format; // canvas format CANVAS_* +} sCanvas; + +// Draw rectangle +void DrawRect(sCanvas* canvas, int x, int y, int w, int h, u8 col); + +// Draw frame +void DrawFrame(sCanvas* canvas, int x, int y, int w, int h, u8 col); + +// clear canvas (fill with black color) +void DrawClear(sCanvas* canvas); + +// Draw point +void DrawPoint(sCanvas* canvas, int x, int y, u8 col); + +// Draw line +void DrawLine(sCanvas* canvas, int x1, int y1, int x2, int y2, u8 col); + +// Draw filled circle +// x0, y0 ... coordinate of center +// r ... radius +// col ... color +// col with CANVAS_ATTRIB8 format: bit 0..3 = draw color, bit 4 = draw color is background color +// mask ... mask of used octants (0xff = 255 = draw whole circle) +// . B2|B1 . +// B3 . | . B0 +// ------o------ +// B4 . | . B7 +// . B5|B6 . +void DrawFillCircle(sCanvas* canvas, int x0, int y0, int r, u8 col, u8 mask=0xff); + +// Draw circle +// x0, y0 ... coordinate of center +// r ... radius +// col ... color +// col with CANVAS_ATTRIB8 format: bit 0..3 = draw color, bit 4 = draw color is background color +// mask ... mask of used octants (0xff = 255 = draw whole circle) +// . B2|B1 . +// B3 . | . B0 +// ------o------ +// B4 . | . B7 +// . B5|B6 . +void DrawCircle(sCanvas* canvas, int x0, int y0, int r, u8 col, u8 mask=0xff); + +// Draw text (transparent background) +// font = pointer to 1-bit font +void DrawText(sCanvas* canvas, const char* text, int x, int y, u8 col, + const void* font, int fontheight=8, int scalex=1, int scaley=1); + +// Draw text with background +// font = pointer to 1-bit font +void DrawTextBg(sCanvas* canvas, const char* text, int x, int y, u8 col, u8 bgcol, + const void* font, int fontheight=8, int scalex=1, int scaley=1); + +// Draw image +void DrawImg(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int w, int h); + +// Draw image with transparency (source and destination must have same format, col = transparency key color) +// CANVAS_ATTRIB8 format replaced by DrawImg function +void DrawBlit(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int w, int h, u8 col); + +// DrawImgMat mode +enum { + DRAWIMG_WRAP, // wrap image + DRAWIMG_NOBORDER, // no border (transparent border) + DRAWIMG_CLAMP, // clamp image (use last pixel as border) + DRAWING_COLOR, // color border + DRAWIMG_TRANSP, // transparent image with key color + DRAWIMG_PERSP, // perspective floor +}; + +// draw 8-bit image with 2D transformation matrix +// canvas ... destination canvas +// src ... source canvas with image +// x ... destination coordinate X +// y ... destination coordinate Y +// w ... destination width +// h ... destination height +// m ... transformation matrix (should be prepared using PrepDrawImg or PrepDrawPersp function) +// mode ... draw mode DRAWIMG_* +// color ... key or border color +// Note to wrap and perspective mode: Width and height of source image must be power of 2! +void DrawImgMat(sCanvas* canvas, const sCanvas* src, int x, int y, int w, int h, + const class cMat2Df* m, u8 mode, u8 color); + +// draw tile map using perspective projection +// canvas ... destination canvas +// src ... source canvas with column of 8-bit square tiles (width = tile size, must be power of 2) +// map ... byte map of tile indices +// mapwbits ... number of bits of map width (number of tiles; width must be power of 2) +// maphbits ... number of bits of map height (number of tiles; height must be power of 2) +// tilebits ... number of bits of tile size (e.g. 5 = tile 32x32 pixel) +// x ... destination coordinate X +// y ... destination coordinate Y +// w ... destination width +// h ... destination height +// mat ... transformation matrix (should be prepared using PrepDrawPersp function) +// horizon ... horizon offset (0=do not use perspective projection) +void DrawTileMap(sCanvas* canvas, const sCanvas* src, const u8* map, int mapwbits, int maphbits, + int tilebits, int x, int y, int w, int h, const cMat2Df* mat, u8 horizon); + +// draw image line interpolated +// canvas = destination canvas (8-bit pixel format) +// src = source canvas (source image in 8-bit pixel format) +// xd,yd = destination coordinates +// xs,ys = source coordinates +// wd = destination width +// ws = source width +// Overflow in X direction is not checked! +void DrawImgLine(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int wd, int ws); + +#endif // _CANVAS_H diff --git a/MCUME_pico/picovga_t4/define.h b/MCUME_pico/picovga_t4/define.h new file mode 100755 index 0000000..a88759c --- /dev/null +++ b/MCUME_pico/picovga_t4/define.h @@ -0,0 +1,198 @@ + +// **************************************************************************** +// +// VGA common definitions of C and ASM +// +// **************************************************************************** + +#include "vga_config.h" // VGA configuration + +#define LAYERS_MAX 4 // max. number of layers (should be 4) + +#define BLACK_MAX MAXX // size of buffer with black color (used to clear rest of unused line) + +// VGA PIO program +#define BASE_OFFSET 17 // offset of base layer program +#define LAYER_OFFSET 0 // offset of overlapped layer program + +// layer program +#define LAYERPROG_BASE 0 // program of base layer (overlapped layers are OFF) +#define LAYERPROG_KEY 1 // layer with key color +#define LAYERPROG_BLACK 2 // layer with black key color +#define LAYERPROG_WHITE 3 // layer with white key color +#define LAYERPROG_MONO 4 // layer with mono pattern or simple color +#define LAYERPROG_RLE 5 // layer with RLE compression + +#define LAYERPROG_NUM 6 // number of layer programs + +// layer mode (CPP = clock cycles per pixel) +// Control buffer: 16 bytes +// Data buffer: 4 bytes +// fast sprites can be up Control buffer: width*2 bytes +// sprites Data buffer: width bytes +#define LAYERMODE_BASE 0 // base layer +#define LAYERMODE_KEY 1 // layer with key color +#define LAYERMODE_BLACK 2 // layer with black key color +#define LAYERMODE_WHITE 3 // layer with white key color +#define LAYERMODE_MONO 4 // layer with mono pattern +#define LAYERMODE_COLOR 5 // layer with simple color +#define LAYERMODE_RLE 6 // layer with RLE compression +#define LAYERMODE_SPRITEKEY 7 // layer with sprites with key color +#define LAYERMODE_SPRITEBLACK 8 // layer with sprites with black key color +#define LAYERMODE_SPRITEWHITE 9 // layer with sprites with white key color +#define LAYERMODE_FASTSPRITEKEY 10 // layer with fast sprites with key color +#define LAYERMODE_FASTSPRITEBLACK 11 // layer with fast sprites with black key color +#define LAYERMODE_FASTSPRITEWHITE 12 // layer with fast sprites with white key color +#define LAYERMODE_PERSPKEY 13 // layer with key color and image with transformation matrix +#define LAYERMODE_PERSPBLACK 14 // layer with black key color and image with transformation matrix +#define LAYERMODE_PERSPWHITE 15 // layer with white key color and image with transformation matrix +#define LAYERMODE_PERSP2KEY 16 // layer with key color and double-pixel image with transformation matrix +#define LAYERMODE_PERSP2BLACK 17 // layer with black key color and double-pixel image with transformation matrix +#define LAYERMODE_PERSP2WHITE 18 // layer with white key color and double-pixel image with transformation matrix + +#define LAYERMODE_NUM 19 // number of overlapped layer modes + +// Structure of sprite sSprite (on change update structure sSprite in vga_layer.h) +#define SSPRITE_IMG 0 // u8* img; // pointer to image data +#define SSPRITE_X0 4 // u8* x0; // pointer to pixel offset of start of lines/4 (used with fast sprites) +#define SSPRITE_W0 8 // u8* w0; // pointer to pixel length of length of lines/4 (used with fast sprites) +#define SSPRITE_KEYCOL 12 // u32 keycol; // key color +#define SSPRITE_X 16 // s16 x; // sprite X-coordinate on the screen +#define SSPRITE_Y 18 // s16 y; // sprite Y-coordinate on the screen +#define SSPRITE_W 20 // u16 w; // sprite width +#define SSPRITE_H 22 // u16 h; // sprite height +#define SSPRITE_WB 24 // u16 wb; // sprite pitch (number of bytes between lines) + // u16 res; // ...reserved, structure align +#define SSPRITE_SIZE 28 // size of sSprite structure + +// Structure of layer screen sLayer (on change update structure sLayer in vga_layer.h) +#define SLAYER_IMG 0 // const u8* img; // pointer to image in current layer format, or sprite list +#define SLAYER_PAR 4 // const void* par; // additional parameter (RLE index table, transformation matrix) +#define SLAYER_INIT 8 // u32 init; // init word sent on start of scanline +#define SLAYER_KEYCOL 12 // u32 keycol; // key color +#define SLAYER_TRANS 16 // u16 trans; // trans count +#define SLAYER_X 18 // s16 x; // start X coordinate +#define SLAYER_Y 20 // s16 y; // start Y coordinate +#define SLAYER_W 22 // u16 w; // width in pixels +#define SLAYER_H 24 // u16 h; // height +#define SLAYER_WB 26 // u16 wb; // image width in bytes (pitch of lines) +#define SLAYER_MODE 28 // u8 mode; // layer mode +#define SLAYER_HORIZ 29 // s8 horiz; // horizon of perspective projection/4 (only with LAYERMODE_PERSP* modes, 0=no perspecitve, <0 ceilling) +#define SLAYER_XBITS 30 // u8 xbits; // number of bits of width of source image (only with LAYERMODE_PERSP* modes) +#define SLAYER_YBITS 31 // u8 ybits; // number of bits of height of source image (only with LAYERMODE_PERSP* modes) +#define SLAYER_SPRITENUM 32 // u16 spritenum; // number of sprites +#define SLAYER_ON 34 // Bool on; // layer is ON +#define SLAYER_CPP 35 // u8 cpp; // current clock pulses per pixel (used to calculate X coordinate) +#define SLAYER_SIZE 36 // size of sLayer structure + +// Structure of video segment sSegm (on change update structure sSegm in vga_screen.h) +#define SSEGM_WIDTH 0 // u16 width; // width of this video segment in pixels (must be multiple of 4, 0=inactive segment) +#define SSEGM_WB 2 // u16 wb; // pitch - number of bytes between lines +#define SSEGM_OFFX 4 // s16 offx; // display offset at X direction (must be multiple of 4) +#define SSEGM_OFFY 6 // s16 offy; // display offset at Y direction +#define SSEGM_WRAPX 8 // u16 wrapx; // wrap width in X direction (number of pixels, must be multiply of 4 and > 0) + // text modes: wrapx must be multiply of 8 +#define SSEGM_WRAPY 10 // u16 wrapy; // wrap width in Y direction (number of lines, cannot be 0) +#define SSEGM_DATA 12 // const void* data; // pointer to video buffer with image data +#define SSEGM_FORM 16 // u8 form; // graphics format GF_* +#define SSEGM_DBLY 17 // bool dbly; // double Y (2 scanlines per 1 image line) +#define SSEGM_PAR3 18 // u16 par3; // SSEGM_PAR3 parameter 3 +#define SSEGM_PAR 20 // u32 par; // parameter 1: color, pointer to palettes, tile source, font +#define SSEGM_PAR2 24 // u32 par2; // parameter 2 +#define SSEGM_SIZE 28 // size of sSegm structure + +// Structure of video strip sStrip (on change update structure sStrip in vga_screen.h) +#define SSTRIP_HEIGHT 0 // u16 height; // height of this strip in number of scanlines +#define SSTRIP_NUM 2 // u16 num; // number of video segments +#define SSTRIP_SEG 4 // sSegm seg[SEGMAX]; +#define SSTRIP_SIZE (4+SSEGM_SIZE*SEGMAX) // size of sStrip structure (= 4 + 28*8 = 228 bytes) + +// Structure of video screen sScreen (on change update structure sScreen in vga_screen.h) +#define SSCREEN_NUM 0 // u16 num; // number of video strips +#define SSCREEN_BACKUP 2 // u16 num_backup; // backup number of video strips during display OFF +#define SSCREEN_STRIP 4 // sStrip strip[STRIPMAX]; // list of video strips +#define SSCREEN_SIZE (4+SSTRIP_SIZE*STRIPMAX) // size of sScreen structure (= 4 + 228*8 = 1828 bytes) + +// --- graphics formats +// There are 3 groups of formats - separated due internal reasons, do not mix them. + +// 1st group of formats - rendered specially +#define GF_COLOR 0 // simple color (par=color pattern 4-pixels even line, par2=color pattern 4-pixels odd line) +// Data buffer: width bytes (320 pixels: 320 bytes) +// Control buffer: 8 bytes + +// 2nd group of formats - rendering into control buffer cbuf +#define GF_GRAPH8 1 // native 8-bit graphics (X1Y1R2G2B2) - fast, transfers "as is" to PIO + // (num = number of pixels/4 = number of bytes/4) +// Control buffer: 8 bytes (320 pixels: 8 bytes) +#define GF_TILE 2 // tiles (par = tile table with one column of tiles, + // par2 = tile height, par3 = tile width as multiple of 4) +// Control buffer: width/tile width*8 bytes (320 pixels of 32x32: 80 bytes) +#define GF_TILE2 3 // alternate tiles (par = tile table with one row of tiles, + // par2 = LOW tile height, HIGH tile width bytes, + // par3 = tile width as multiple of 4) +// Control buffer: width/tile width*8 bytes (320 pixels of 32x32: 80 bytes) +#define GF_PROGRESS 4 // horizontal progress indicator (data = values 0..255 of 4-pixels in rows, + // par = scanline gradient < data, par2 = scanline gradient >= data) +// Control buffer: 16 bytes +#define GF_GRAD1 5 // gradient with 1 line +// Control buffer: 8 bytes (320 pixels: 8 bytes) +#define GF_GRAD2 6 // gradient with 2 lines +// Control buffer: 8 bytes (320 pixels: 8 bytes) + +#define GF_GRP2MIN GF_GRAPH8 // 2nd group minimal format +#define GF_GRP2MAX GF_GRAD2 // 2nd group maximal format + +// 3rd group of formats - rendering into data buffer dbuf +// Control buffer: 8 bytes +// Data buffer: width bytes +#define GF_GRAPH4 7 // 4-bit graphics (num = number of pixels/4 = number of bytes/2; + // par = pointer to 16-color palette translation table) +#define GF_GRAPH2 8 // 2-bit graphics (num = number of pixels/4 = number of bytes, + // par = pointer to 4-color palette translation table) +#define GF_GRAPH1 9 // 1-bit graphics (num = number of pixels/8 = number of bytes, + // par = 2 colors of palettes) +#define GF_MTEXT 10 // 8-pixel mono text (num = number of characters, font is 8-bit width, + // par = pointer to 1-bit font, par2 = 2 colors of palettes) +#define GF_ATEXT 11 // 8-pixel attribute text, character + 2x4 bit attributes + // (num = number of characters, font is 8-bit width, + // par = pointer to 1-bit font, par2 = pointer to 16 colors of palettes) +#define GF_FTEXT 12 // 8-pixel foreground color text, character + foreground color + // (num = number of characters, font is 8-bit width, + // par = pointer to 1-bit font, par2 = background color) +#define GF_CTEXT 13 // 8-pixel color text, character + background color + foreground color + // (num = number of characters, font is 8-bit width, + // par = pointer to 1-bit font) +#define GF_GTEXT 14 // 8-pixel gradient text (par = pointer to 1-bit font, par2 = pointer to color array) +#define GF_DTEXT 15 // 8-pixel double gradient text (par = pointer to 1-bit font, par2 = pointer to color array) +#define GF_LEVEL 16 // level graph (data=samples 0..255, par = 2 colors of palettes, par2 = Y zero level 0..255) +#define GF_LEVELGRAD 17 // level gradient graph (data = samples 0..255, par = scanline gradient < data, par2 = scanline gradient >= data) +#define GF_OSCIL 18 // oscilloscope pixel graph (data=samples 0..255, par = 2 colors of palettes, par2 = height of pixels - 1) +#define GF_OSCLINE 19 // oscilloscope line graph (data=samples 0..255, par = 2 colors of palettes) +#define GF_PLANE2 20 // 4 colors on 2 graphic planes (data=graphic, par=offset of 2nd graphic plane, + // par2 = pointer to 4-color palette translation table) +#define GF_ATTRIB8 21 // 2x4 bit color attribute per 8x8 pixel sample (data=mono graphic, par=offset of color attributes, + // par2 = pointer to 16-color palette table) +#define GF_GRAPH8MAT 22 // 8-bit graphics with 2D matrix transformation, using hardware interpolator inter1 (inter1 state is not saved during interrup) + // (data=image, par=pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)), + // par2 LOW=number of bits of image width, par2 HIGH=number of bits of image height) +#define GF_GRAPH8PERSP 23 // 8-bit graphics with perspective, using hardware interpolator inter1 (inter1 state is not saved during interrup) + // (data=image, par=pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)), + // par2 LOW=number of bits of image width, par2 HIGH=number of bits of image height, + // par3=horizon offset) +#define GF_TILEPERSP 24 // tiles with perspective, using hardware interpolators inter0 and inter1 (their state is not saved during interrup) + // (data=tile map, par=one column of tiles, par2=pointer to integer matrix, + // wb LOW=number of bits of map width, wb HIGH=number of bits of map height, + // par3 LOW=number of bits of tile size, par3 HIGH=horizon offset/4 or 0=no perspective or <0=ceilling, + // wrapy=segment height) +#define GF_TILEPERSP15 25 // tiles with perspective, 1.5 pixels (parameters as GF_TILEPERSP) +#define GF_TILEPERSP2 26 // tiles with perspective, double pixels (parameters as GF_TILEPERSP) +#define GF_TILEPERSP3 27 // tiles with perspective, triple pixels (parameters as GF_TILEPERSP) +#define GF_TILEPERSP4 28 // tiles with perspective, quadruple pixels (parameters as GF_TILEPERSP) + +#define GF_GRP3MIN GF_GRAPH4 // 3rd group minimal format +#define GF_GRP3MAX GF_TILEPERSP4 // 3rd group maximal format + + +#define FRACT 12 // number of bits of fractional part of fractint number (use max. 13, min. 8) +#define FRACTMUL (1<>5)&0x07)<<5) | (((g>>5)&0x07)<<2) | (((b>>6)&0x3)<<0) ) + + +// system includes +#include + +// SDK includes +#include "pico.h" +#include "pico/stdlib.h" +#include "pico/multicore.h" +#include "pico/sync.h" +#include "pico/platform.h" +#include "pico/sem.h" +#include "hardware/clocks.h" +#include "hardware/dma.h" +#include "hardware/gpio.h" +#include "hardware/pio.h" +#include "hardware/irq.h" +#include "hardware/divider.h" +#include "hardware/structs/bus_ctrl.h" +#include "pico/binary_info.h" +#include "pico/printf.h" +#include "pico/float.h" +#include "pico/int64_ops.h" + + +// PicoVGA includes +#include "define.h" // common definitions of C and ASM +#include "canvas.h" // canvas +#include "vga_vmode.h" // VGA videomodes +#include "vga_layer.h" // VGA layers +#include "vga_screen.h" // VGA screen layout +#include "vga_pal.h" // VGA palette +#include "vga.h" // VGA output +#include "picovga.pio.h" // PIO + diff --git a/MCUME_pico/picovga_t4/picovga.pio b/MCUME_pico/picovga_t4/picovga.pio new file mode 100644 index 0000000..fac181f --- /dev/null +++ b/MCUME_pico/picovga_t4/picovga.pio @@ -0,0 +1,277 @@ + +; ============================================================================ +; VGA output - base layer (15 instructions) +; ============================================================================ +; Control word of "dark" command (left shift): +; - bit 0..7 (8 bits) output color (set to 0 if not used) +; - bit 8..26 (19 bits) loop counter N +; - bit 27..31 (5 bits) jump address +; Control word of other commands (left shift): +; - bit 0..27 (27 bits) loop counter N +; - bit 27..31 (5 bits) jump address +; Clocks per pixel: minimum 2, maximum 17. + +.program vga +.side_set 1 ; SYNC output (no opt, wait can be max. 15) +.origin 17 ; must load at offset 17 (BASE_OFF) + +; ===== [3 instructions] SYNC pulse, N=delay in clock cycles - 3 + +public sync: + out x,27 side 1 ; [1] get length of SYNC pulse - 3, start of SYNC pulse +sync_loop: + jmp x--,sync_loop side 1 ; [N+1] loop +public entry: + out pc,5 side 1 ; [1] get next control word and jump to function + +; ===== [4 instructions] DARK (or color) pulse, N=delay in clock cycles - 4 +; Sets color output at time +1 + +public dark: + out x,19 side 0 ; [1] get length of delay pulse - 4, start of delay pulse + out pins,8 side 0 ; [1] dark output (or color) +dark_loop: + jmp x--,dark_loop side 0 ; [N+1] loop + out pc,5 side 0 ; [1] get next control word and jump to function + +; ===== [4 instructions] layer synchronisation (delay 9 clock cycles) +; Output first pixel at time +9 after IRQ + +public irqset: + irq clear 4 side 0 ; [1] clear IRQ4 flag + out null,27 side 0 ; [1] destroy command parameter + irq set 4 side 0 [5] ; [6] set IRQ flag +.wrap_target + out pc,5 side 0 ; [1] get next control word and jump to function + +; ===== [4 instructions] output pixels at CPP clock, N=number of pixels-2 (number of pixels must be multiple of 4) +; Output first pixel at time +1 +; Missing 2 clock cycles after last pixel + +public output: + out x,27 side 0 ; [1] get number of pixels-2 +output_loop: +public extra1: + out pins,8 side 0 [0] ; [1+CPP-2] output pixels (set extra wait CPP-2) + jmp x--,output_loop side 0 ; [1] loop +public extra2: + out pins,8 side 0 [0] ; [1+CPP-2] output pixels (set extra wait CPP-2) + ; missing 1 extra clock cycles - add it to front porch + ; wrap jump to instruction out pc,5 +.wrap + +; ============================================================================ +; VGA output - layer with key color (13 instructions) +; ============================================================================ +; Control word (left shift): +; - bit 0..10 (11 bits) number of pixels - 1 (number of pixels must be multiple of 4) +; - bit 11..18 (8 bits) key color +; - bit 19..31 (13 bits) start delay D = clock cycles - 7 between irq and first pixel +; Clocks per pixel: minimum 6, maximum 37. + +.program keylayer +.origin 0 ; must load at offset 0 (LAYER_OFF) + + ; idle wait +.wrap_target +public idle: + pull block ; [1] idle wait + +public entry: + wait 0 irq 4 ; [1] wait for IRQ sync goes 0 + out x,13 ; [1] get length of delay - 7 +layer_wait: + jmp x--,layer_wait ; [1] delay loop + out y,8 ; [1] get key color + out x,11 ; [1] get number of pixels-1 +layer_loop: + mov isr,x ; [1] save pixel counter into ISR + out x,8 ; [1] get output pixel + jmp x!=y,layer_2 ; [1] jump if pixel is not transparent + jmp layer_3 ; [1] jump to end of loop +layer_2: + mov pins,x ; [1] output pixel to pins +layer_3: +public extra1: + mov x,isr [0] ; [1+CPP-6] return pixel counter (set extra wait CPP-6) + jmp x--,layer_loop ; [1] loop next pixel + ; wrap jump to idle +.wrap + +; ============================================================================ +; VGA output - layer with black key color (11 instructions) +; ============================================================================ +; Control word (left shift): +; - bit 0..15 (16 bits) number of pixels - 1 (number of pixels must be multiple of 4) +; - bit 16..31 (16 bits) start delay D = clock cycles - 5 between irq and first pixel +; Cannot display black pixel (it is used as transparency) +; Clocks per pixel: minimum 4, maximum 34. + +.program blacklayer +.origin 0 ; must load at offset 0 (LAYER_OFF) + + ; idle wait +.wrap_target +public idle: + pull block ; [1] idle wait + +public entry: + wait 0 irq 4 ; [1] wait for IRQ sync goes 0 + out x,16 ; [1] get length of delay - 5 +layer_wait: + jmp x--,layer_wait ; [1] delay loop + out x,16 ; [1] get number of pixels-1 +layer_loop: + out y,8 ; [1] get output pixel + jmp !y,layer_2 ; [1] jump if pixel is transparent (color = 0) + mov pins,y ; [1] output pixel to pins +public extra1: + jmp x--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4) + jmp idle ; [1] go idle +layer_2: +public extra2: + jmp x--,layer_loop [0] ; [1+CPP-3] loop next pixel (set extra wait CPP-3) + ; wrap jump to idle +.wrap + +; ============================================================================ +; VGA output - layer with white key color (10 instructions) +; ============================================================================ +; Control word (left shift): +; - bit 0..15 (16 bits) number of pixels - 1 (number of pixels must be multiple of 4) +; - bit 16..31 (16 bits) start delay D = clock cycles - 5 between irq and first pixel +; Cannot display white pixel (it is used as transparency). Source pixels must be incremented + 1. +; Clocks per pixel: minimum 4, maximum 35. + +.program whitelayer +.origin 0 ; must load at offset 0 (LAYER_OFF) + + ; idle wait +.wrap_target +public idle: + pull block ; [1] idle wait + +public entry: + wait 0 irq 4 ; [1] wait for IRQ sync goes 0 + out x,16 ; [1] get length of delay - 7 +layer_wait: + jmp x--,layer_wait ; [1] delay loop + out x,16 ; [1] get number of pixels-1 +layer_loop: + out y,8 ; [1] get output pixel + jmp y--,layer_2 ; [1] jump if pixel is not transparent (color != 0) + jmp layer_3 ; [1] jump to end of loop +layer_2: + mov pins,y ; [1] output pixel to pins +public extra1: +layer_3: + jmp x--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4) + ; wrap jump to idle +.wrap + +; ============================================================================ +; VGA output - layer with mono or color pattern (16 instructions) +; ============================================================================ +; Control word (left shift): +; - bit 0 (1 bit) flag 0=use color opaque mode, 1=use mono transparent mode +; - bit 1..11 (11 bits) number of pixels - 1 (number of pixels must be multiple of 32 in mono, or 4 in color) +; - bit 12..19 (8 bits) key color +; - bit 20..31 (12 bits) start delay D = clock cycles - 8 between irq and first mono pixel, or 6 for color pixel +; Mono, clocks per pixel: minimum 4, maximum 35. +; Color, clocks per pixel: minimum 2, maximum 33. + +.program monolayer +.origin 0 ; must load at offset 0 (LAYER_OFF) + +.wrap_target +public idle: + pull block ; [1] idle wait + +public entry: + wait 0 irq 4 ; [1] wait for IRQ sync goes 0 + out x,12 ; [1] get length of delay - 8 (or 6 in color) +layer_wait: + jmp x--,layer_wait ; [1] delay loop + out isr,8 ; [1] get key color + out y,11 ; [1] get number of pixels-1 + out x,1 ; [1] get mode flag + jmp !x,layer_color ; [1] 0=use color mode +layer_loop: + out x,1 ; [1] get one bit + jmp !x,layer_out ; [1] bit=0, output pixel + jmp layer_skip ; [1] jump to end of loop +layer_out: + mov pins,isr ; [1] output pixel +layer_skip: +public extra1: + jmp y--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4) + jmp idle + +layer_color: + out pins,8 +public extra2: + jmp y--,layer_color [0] ; [1+CPP-2] loop next pixel (set extra wait CPP-2) + ; wrap jump to idle +.wrap + +; ============================================================================ +; VGA output - layer with RLE compression (17 instructions) +; ============================================================================ +; Input is left shifted with byte-swap (lower byte comes first) +; Requires 3 clock cycles per pixel. +; Clocks per pixel: minimum 3, maximum 32. + +.program rlelayer +.origin 0 ; must load at offset 0 (LAYER_OFF) + + ; [1 instruction] idle wait (tokens: {8} ignored, {8} 'idle' command) +public idle: + out pc,8 ; [1] idle wait + + ; [4 instructions] start +public entry: + wait 0 irq 4 ; [1] wait for IRQ sync goes 0 + out x,32 [2] ; [3] get length of delay - 7 +entry_wait: + jmp x--,entry_wait ; [1] delay + jmp raw_next ; [1] + + ; [1 instruction] skip N+2 (2..257) pixels (tokens: {8} N = number of pixels - 2, {8} 'skip' command) +public skip: +public extra1: + jmp x--,skip [0] ; [1+CPP-1] wait (set extra wait CPP-1) + + ; [1 instruction] skip 1 pixel (tokens: {8} ignored, {8} 'skip1' command) +public skip1: +public extra2: + jmp raw_next [0] ; [1+CPP-3] jump (set extra wait CPP-3) + + ; [4 instructions] repeat N+3 (3..258) pixels (tokens: {8} pixel to repeat, {8} 'run' command, {8} N = number of pixels - 3) +public run: +public extra3: + mov pins,x [0] ; [1+CPP-2] output pixel (set extra wait CPP-2) + out y,8 ; [1] get counter N +run_loop: +public extra4: + mov pins,x [0] ; [1+CPP-2] output pixel (set extra wait CPP-2) + jmp y--,run_loop ; [1] next pixel + + ; [1 instruction] output 1 RAW pixel (tokens: {8} pixel, {8} 'raw1' command) +public raw1: +public extra5: + mov pins,x [0] ; [1+CPP-3] output pixel (set extra wait CPP-3) +.wrap_target +raw_next: + out x,8 ; [1] get counter N + out pc,8 ; [1] jump + + ; [5 instructions] output N+2 (2..257) RAW pixels (tokens: {8} N = number of pixels - 2, {8} 'raw' command, {(N+2)*8} pixels) +public raw: ; 14: +raw_loop: +public extra6: + out pins,8 [0] ; [1+CPP-2] output pixel (set extra wait CPP-2) + jmp x--,raw_loop ; [1] loop next pixel +public extra7: + out pins,8 [0] ; [1+CPP-3] output pixel (set extra wait CPP-3) + ; wrap jump to raw_next +.wrap diff --git a/MCUME_pico/picovga_t4/render/vga_atext.S b/MCUME_pico/picovga_t4/render/vga_atext.S new file mode 100755 index 0000000..038f096 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_atext.S @@ -0,0 +1,362 @@ + +// **************************************************************************** +// +// VGA render GF_ATEXT +// +// **************************************************************************** +// u32 par SSEGM_PAR pointer to the font +// u32 par2 SSEGM_PAR2 pointer to 16 colors of palettes +// u16 par3 font height + +#include "../define.h" // common definitions of C and ASM +#include "hardware/regs/sio.h" // registers of hardware divider +#include "hardware/regs/addressmap.h" // SIO base address + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// render font pixel mask +.extern RenderTextMask // u32 RenderTextMask[512]; + +// extern "C" u8* RenderAText(u8* dbuf, int x, int y, int w, sSegm* segm) + +// render 8-pixel attribute text GF_ATEXT +// R0 ... destination data buffer +// R1 ... start X coordinate (in pixels, must be multiple of 4) +// R2 ... start Y coordinate (in graphics lines) +// R3 ... width to display (must be multiple of 4 and > 0) +// [stack] ... segm video segment sSegm +// Output new pointer to destination data buffer. +// 320 pixels takes 11.9 us on 151 MHz. + +.thumb_func +.global RenderAText +RenderAText: + + // push registers + push {r1-r7,lr} + mov r4,r8 + push {r4} + +// Stack content: +// SP+0: R8 +// SP+4: R1 start X coordinate +// SP+8: R2 start Y coordinate (later: base pointer to text data row) +// SP+12: R3 width to display +// SP+16: R4 +// SP+20: R5 +// SP+24: R6 +// SP+28: R7 +// SP+32: LR +// SP+36: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#36] // load video segment -> R4 + + // start divide Y/font height + ldr r6,RenderAText_pSioBase // get address of SIO base -> R6 + str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate + ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2 + str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height + +// - now we must wait at least 8 clock cycles to get result of division + + // [6] get wrap width -> [SP+36] + ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width + movs r7,#3 // [1] mask to align to 32-bit + bics r5,r7 // [1] align wrap + str r5,[sp,#36] // [2] save wrap width + + // [1] align X coordinate to 32-bit + bics r1,r7 // [1] + + // [3] align remaining width + bics r3,r7 // [1] + str r3,[sp,#12] // [2] save new width + + // load result of division Y/font_height -> R6 Y relative at row, R7 Y row + // Note: QUOTIENT must be read last + ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row + ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row + + // pointer to font line -> R3 + lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) + ldr r3,[r4,#SSEGM_PAR] // get pointer to font + add r3,r5 // line offset + font base -> pointer to current font line R3 + + // base pointer to text data (without X) -> [SP+8], R2 + ldrh r5,[r4,#SSEGM_WB] // get pitch of rows + muls r2,r5 // Y * WB -> offset of row in text buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + add r2,r5 // base address of text buffer + str r2,[sp,#8] // save pointer to text buffer + + // prepare pointer to text data with X -> R2 (1 position is 1 character + 1 attributes) + lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) + add r2,r6 // add index + add r2,r6 // add index*2, pointer to source text buffer -> R2 + + // prepare pointer to palettes -> R8 + ldr r5,[r4,#SSEGM_PAR2] // get pointer to palette table -> R4 + mov r8,r5 // save pointer to palette table + + // prepare pointer to conversion table -> LR + ldr r5,RenderAText_Addr // get pointer to conversion table -> R5 + mov lr,r5 // conversion table -> LR + +// ---- render 2nd half of first character +// R0 ... pointer to destination data buffer +// R1 ... start X coordinate +// R2 ... pointer to source text buffer +// R3 ... pointer to font line +// R4 ... background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... foreground color (expanded to 32-bit) +// R7 ... (temporary) +// R8 ... pointer to palette table +// LR ... pointer to conversion table +// [SP+8] ... base pointer to text data (without X) +// [SP+12] ... remaining width +// [SP+36] ... wrap width + + // check bit 2 of X coordinate - check if image starts with 2nd half of first character + lsls r6,r1,#29 // check bit 2 of X coordinate + bpl 2f // bit 2 not set, starting even 4-pixels + + // [6] load background color -> R4 + ldrb r6,[r2,#1] // [2] load color attributes -> R6 + mov r5,r8 // [1] get palette table -> R5 + lsrs r4,r6,#4 // [1] prepare index of background color + ldrb r4,[r5,r4] // [2] load background color + + // [4] load foreground color -> R6 + lsls r6,#28 // [1] isolate lower 4 bits + lsrs r6,#28 // [1] mask lower 4 bits + ldrb r6,[r5,r6] // [2] load foreground color + + // [4] expand background color to 32-bit -> R4 + lsls r5,r4,#8 // [1] shift background color << 8 + orrs r5,r4 // [1] color expanded to 16 bits + lsls r4,r5,#16 // [1] shift 16-bit color << 16 + orrs r4,r5 // [1] color expanded to 32 bits + + // [4] expand foreground color to 32-bit -> R6 + lsls r5,r6,#8 // [1] shift foreground color << 8 + orrs r5,r6 // [1] color expanded to 16 bits + lsls r6,r5,#16 // [1] shift 16-bit color << 16 + orrs r6,r5 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [4] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + adds r2,#2 // [1] shift pointer to source text buffer + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // shift X coordinate + adds r1,#4 // shift X coordinate + + // check end of segment + ldr r7,[sp,#36] // load wrap width + cmp r1,r7 // end of segment? + blo 1f + movs r1,#0 // reset X coordinate + ldr r2,[sp,#8] // get base pointer to text data -> R2 + + // shift remaining width +1: ldr r7,[sp,#12] // get remaining width + subs r7,#4 // shift width + str r7,[sp,#12] // save new width + + // prepare wrap width - start X -> R7 +2: ldr r7,[sp,#36] // load wrap width + subs r7,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of characters to generate in one part of segment +// R2 ... *pointer to source text buffer +// R3 ... *pointer to font line +// R4 ... background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... foreground color (expanded to 32-bit) +// R7 ... *wrap width of this segment, later: temporary +// R8 ... *pointer to palette table +// LR ... *pointer to conversion table +// [SP+8] ... *base pointer to text data (without X) +// [SP+12] ... *remaining width +// [SP+36] ... *wrap width + +RenderAText_OutLoop: + + // limit wrap width by total width -> R7 + ldr r6,[sp,#12] // get remaining width + cmp r7,r6 // compare with wrap width + bls 2f // width is OK + mov r7,r6 // limit wrap width + + // check if remain whole characters +2: cmp r7,#8 // check number of remaining pixels + bhs 5f // enough characters remain + + // check if 1st part of last character remains + cmp r7,#4 // check 1st part of last character + blo 3f // all done + +// ---- render 1st part of last character + +RenderAText_Last: + + // [6] load background color -> R4 + ldrb r6,[r2,#1] // [2] load color attributes -> R6 + mov r5,r8 // [1] get palette table -> R5 + lsrs r4,r6,#4 // [1] prepare index of background color + ldrb r4,[r5,r4] // [2] load background color + + // [4] load foreground color -> R6 + lsls r6,#28 // [1] isolate lower 4 bits + lsrs r6,#28 // [1] mask lower 4 bits + ldrb r6,[r5,r6] // [2] load foreground color + + // [4] expand background color to 32-bit -> R4 + lsls r5,r4,#8 // [1] shift background color << 8 + orrs r5,r4 // [1] color expanded to 16 bits + lsls r4,r5,#16 // [1] shift 16-bit color << 16 + orrs r4,r5 // [1] color expanded to 32 bits + + // [4] expand foreground color to 32-bit -> R6 + lsls r5,r6,#8 // [1] shift foreground color << 8 + orrs r5,r6 // [1] color expanded to 16 bits + lsls r6,r5,#16 // [1] shift 16-bit color << 16 + orrs r6,r5 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [4] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + adds r2,#2 // [1] shift pointer to source text buffer + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r1,[r5,#0] // [2] load mask for higher 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + stmia r0!,{r1} // [2] store first 4 pixels + + // check if continue with next segment + ldr r2,[sp,#8] // get base pointer to text data -> R2 + cmp r7,#4 + bhi RenderAText_OutLoop + + // pop registers and return +3: pop {r4} + mov r8,r4 + pop {r1-r7,pc} + +// ---- prepare to render whole characters + + // prepare number of whole characters to render -> R1 +5: lsrs r1,r7,#2 // shift to get number of characters*2 + lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 + subs r6,r5 // get remaining width + str r6,[sp,#12] // save new remaining width + subs r1,#1 // number of characters*2 - 1 + +// ---- [41*N-1] start inner loop, render characters in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of characters to generate*2 - 1 (loop counter) +// R2 ... *pointer to source text buffer +// R3 ... *pointer to font line +// R4 ... background color (expanded to 32-bit) +// R5 ... font sample +// R6 ... foreground color (expanded to 32-bit) +// R7 ... (temporary) +// R8 ... *pointer to palette table +// LR ... *pointer to conversion table + +RenderAText_InLoop: + + // [6] load background color -> R4 + ldrb r6,[r2,#1] // [2] load color attributes -> R6 + mov r5,r8 // [1] get palette table -> R5 + lsrs r4,r6,#4 // [1] prepare index of background color + ldrb r4,[r5,r4] // [2] load background color + + // [4] load foreground color -> R6 + lsls r6,#28 // [1] isolate lower 4 bits + lsrs r6,#28 // [1] mask lower 4 bits + ldrb r6,[r5,r6] // [2] load foreground color + + // [4] expand background color to 32-bit -> R4 + lsls r5,r4,#8 // [1] shift background color << 8 + orrs r5,r4 // [1] color expanded to 16 bits + lsls r4,r5,#16 // [1] shift 16-bit color << 16 + orrs r4,r5 // [1] color expanded to 32 bits + + // [4] expand foreground color to 32-bit -> R6 + lsls r5,r6,#8 // [1] shift foreground color << 8 + orrs r5,r6 // [1] color expanded to 16 bits + lsls r6,r5,#16 // [1] shift 16-bit color << 16 + orrs r6,r5 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [4] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + adds r2,#2 // [1] shift pointer to source text buffer + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r7,[r5,#0] // [2] load mask for higher 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store first 4 pixels + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // [2,3] loop counter + subs r1,#2 // [1] shift loop counter + bhi RenderAText_InLoop // [1,2] > 0, render next whole character + +// ---- end inner loop, continue with last character, or start new part + + // continue to outer loop + ldr r7,[sp,#36] // load wrap width + beq RenderAText_Last // render 1st half of last character + ldr r2,[sp,#8] // get base pointer to text data -> R2 + b RenderAText_OutLoop // go back to outer loop + + .align 2 +RenderAText_Addr: + .word RenderTextMask +RenderAText_pSioBase: + .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_attrib8.S b/MCUME_pico/picovga_t4/render/vga_attrib8.S new file mode 100755 index 0000000..78cd5f9 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_attrib8.S @@ -0,0 +1,346 @@ + +// **************************************************************************** +// +// VGA render GF_ATTRIB8 +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// render font pixel mask +.extern RenderTextMask // u32 RenderTextMask[512]; + +// extern "C" u8* RenderAttrib8(u8* dbuf, int x, int y, int w, sSegm* segm) + +// render 8-pixel attribute text GF_ATTRIB8 +// R0 ... destination data buffer +// R1 ... start X coordinate (in pixels, must be multiple of 4) +// R2 ... start Y coordinate (in graphics lines) +// R3 ... width to display (must be multiple of 4 and > 0) +// [stack] ... segm video segment sSegm +// Output new pointer to destination data buffer. +// 320 pixels takes 11 us on 151 MHz. + +.thumb_func +.global RenderAttrib8 +RenderAttrib8: + + // push registers + push {r2-r7,lr} + mov r4,r8 + push {r4} + +// Input variables and stack content: +// R1 ... start X coordinate +// SP+0: R8 +// SP+4: R2 start Y coordinate (later: base pointer to pixel data row) +// SP+8: R3 width to display +// SP+12: R4 +// SP+16: R5 +// SP+20: R6 +// SP+24: R7 +// SP+28: LR +// SP+32: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#32] // load video segment -> R4 + + // get wrap width -> [SP+32] + ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width + movs r7,#3 // mask to align to 32-bit + bics r5,r7 // align wrap + str r5,[sp,#32] // save wrap width + + // align X coordinate to 32-bit -> R1 + bics r1,r7 + + // align remaining width -> [SP+8] + bics r3,r7 // width + str r3,[sp,#8] // save new width + + // base pointer to attributes (without X) -> R3 + lsrs r3,r2,#3 // delete low 3 bits of Y coordinate -> row index + ldrh r5,[r4,#SSEGM_WB] // get pitch of rows + muls r3,r5 // Y * WB -> offset of row in text buffer + ldr r7,[r4,#SSEGM_PAR] // pointer to attributes + add r3,r7 // base address of attributes -> R3 + + // base pointer to pixel data (without X) -> [SP+4], R2 + muls r2,r5 // Y * WB -> offset of row in text buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + add r2,r5 // base address of text buffer + str r2,[sp,#4] // save pointer to text buffer + + // offset of attributes -> R3 + subs r3,r2 // offset of attributes, relative to source text buffer + + // prepare pointer to pixel data with X -> R2 (1 position is 1 character + 1 attributes) + lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) + add r2,r6 // add index, pointer to source text buffer -> R2 + + // prepare pointer to palettes -> R8 + ldr r5,[r4,#SSEGM_PAR2] // get pointer to palette table -> R4 + mov r8,r5 // save pointer to palette table + + // prepare pointer to conversion table -> LR + ldr r5,RenderAttrib8_Addr // get pointer to conversion table -> R5 + mov lr,r5 // conversion table -> LR + +// ---- render 2nd half of first character +// R0 ... pointer to destination data buffer +// R1 ... start X coordinate +// R2 ... pointer to source text buffer +// R3 ... offset of attributes (relative to source text buffer) +// R4 ... background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... foreground color (expanded to 32-bit) +// R7 ... (temporary) +// R8 ... pointer to palette table +// LR ... pointer to conversion table +// [SP+4] ... base pointer to pixel data (without X) +// [SP+8] ... remaining width +// [SP+32] ... wrap width + + // check bit 2 of X coordinate - check if image starts with 2nd half of first character + lsls r6,r1,#29 // check bit 2 of X coordinate + bpl 2f // bit 2 not set, starting even 4-pixels + + // [6] load background color -> R4 + ldrb r6,[r2,r3] // [2] load color attributes -> R6 + mov r5,r8 // [1] get palette table -> R5 + lsrs r4,r6,#4 // [1] prepare index of background color + ldrb r4,[r5,r4] // [2] load background color -> R4 + + // [4] load foreground color -> R6 + lsls r6,#28 // [1] isolate lower 4 bits + lsrs r6,#28 // [1] mask lower 4 bits + ldrb r6,[r5,r6] // [2] load foreground color -> R6 + + // [4] expand background color to 32-bit -> R4 + lsls r5,r4,#8 // [1] shift background color << 8 + orrs r5,r4 // [1] color expanded to 16 bits + lsls r4,r5,#16 // [1] shift 16-bit color << 16 + orrs r4,r5 // [1] color expanded to 32 bits + + // [4] expand foreground color to 32-bit -> R6 + lsls r5,r6,#8 // [1] shift foreground color << 8 + orrs r5,r6 // [1] color expanded to 16 bits + lsls r6,r5,#16 // [1] shift 16-bit color << 16 + orrs r6,r5 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [4] load pixel sample -> R5 + ldrb r5,[r2,#0] // [2] load pixels from source buffer -> R5 + adds r2,#1 // [1] shift pointer to source buffer + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // shift X coordinate + adds r1,#4 // shift X coordinate + + // check end of segment + ldr r7,[sp,#32] // load wrap width + cmp r1,r7 // end of segment? + blo 1f + movs r1,#0 // reset X coordinate + ldr r2,[sp,#4] // get base pointer to pixel data -> R2 + + // shift remaining width +1: ldr r7,[sp,#8] // get remaining width + subs r7,#4 // shift width + str r7,[sp,#8] // save new width + + // prepare wrap width - start X -> R7 +2: ldr r7,[sp,#32] // load wrap width + subs r7,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of characters to generate in one part of segment +// R2 ... *pointer to source text buffer +// R3 ... *offset of attributes (relative to source text buffer) +// R4 ... background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... foreground color (expanded to 32-bit) +// R7 ... *wrap width of this segment, later: temporary +// R8 ... *pointer to palette table +// LR ... *pointer to conversion table +// [SP+4] ... *base pointer to pixel data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderAttrib8_OutLoop: + + // limit wrap width by total width -> R7 + ldr r6,[sp,#8] // get remaining width + cmp r7,r6 // compare with wrap width + bls 2f // width is OK + mov r7,r6 // limit wrap width + + // check if remain whole characters +2: cmp r7,#8 // check number of remaining pixels + bhs 5f // enough characters remain + + // check if 1st part of last character remains + cmp r7,#4 // check 1st part of last character + blo 3f // all done + +// ---- render 1st part of last character + +RenderAttrib8_Last: + + // [6] load background color -> R4 + ldrb r6,[r2,r3] // [2] load color attributes -> R6 + mov r5,r8 // [1] get palette table -> R5 + lsrs r4,r6,#4 // [1] prepare index of background color + ldrb r4,[r5,r4] // [2] load background color -> R4 + + // [4] load foreground color -> R6 + lsls r6,#28 // [1] isolate lower 4 bits + lsrs r6,#28 // [1] mask lower 4 bits + ldrb r6,[r5,r6] // [2] load foreground color -> R6 + + // [4] expand background color to 32-bit -> R4 + lsls r5,r4,#8 // [1] shift background color << 8 + orrs r5,r4 // [1] color expanded to 16 bits + lsls r4,r5,#16 // [1] shift 16-bit color << 16 + orrs r4,r5 // [1] color expanded to 32 bits + + // [4] expand foreground color to 32-bit -> R6 + lsls r5,r6,#8 // [1] shift foreground color << 8 + orrs r5,r6 // [1] color expanded to 16 bits + lsls r6,r5,#16 // [1] shift 16-bit color << 16 + orrs r6,r5 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [4] load pixel sample -> R5 + ldrb r5,[r2,#0] // [2] load pixels from source buffer -> R5 + adds r2,#1 // [1] shift pointer to source buffer + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r1,[r5,#0] // [2] load mask for higher 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + stmia r0!,{r1} // [2] store first 4 pixels + + // check if continue with next segment + ldr r2,[sp,#4] // get base pointer to pixel data -> R2 + cmp r7,#4 + bhi RenderAttrib8_OutLoop + + // pop registers and return +3: pop {r4} + mov r8,r4 + pop {r2-r7,pc} + +// ---- prepare to render whole characters + + // prepare number of whole characters to render -> R1 +5: lsrs r1,r7,#2 // shift to get number of characters*2 + lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 + subs r6,r5 // get remaining width + str r6,[sp,#8] // save new remaining width + subs r1,#1 // number of characters*2 - 1 + +// ---- [38*N-1] start inner loop, render characters in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of characters to generate*2 - 1 (loop counter) +// R2 ... *pointer to source text buffer +// R3 ... *offset of attributes (relative to source text buffer) +// R4 ... background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... foreground color (expanded to 32-bit) +// R7 ... (temporary) +// R8 ... *pointer to palette table +// LR ... *pointer to conversion table +// [SP+4] ... *base pointer to pixel data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderAttrib8_InLoop: + + // [6] load background color -> R4 + ldrb r6,[r2,r3] // [2] load color attributes -> R6 + mov r5,r8 // [1] get palette table -> R5 + lsrs r4,r6,#4 // [1] prepare index of background color + ldrb r4,[r5,r4] // [2] load background color -> R4 + + // [4] load foreground color -> R6 + lsls r6,#28 // [1] isolate lower 4 bits + lsrs r6,#28 // [1] mask lower 4 bits + ldrb r6,[r5,r6] // [2] load foreground color -> R6 + + // [4] expand background color to 32-bit -> R4 + lsls r5,r4,#8 // [1] shift background color << 8 + orrs r5,r4 // [1] color expanded to 16 bits + lsls r4,r5,#16 // [1] shift 16-bit color << 16 + orrs r4,r5 // [1] color expanded to 32 bits + + // [4] expand foreground color to 32-bit -> R6 + lsls r5,r6,#8 // [1] shift foreground color << 8 + orrs r5,r6 // [1] color expanded to 16 bits + lsls r6,r5,#16 // [1] shift 16-bit color << 16 + orrs r6,r5 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [3] load pixel sample -> R7 + ldrb r7,[r2,#0] // [2] load pixels from source buffer -> R7 + adds r2,#1 // [1] shift pointer to source buffer + + // [2] prepare conversion table -> R7 + lsls r7,#3 // [1] multiply sample * 8 + add r7,lr // [1] add pointer to conversion table + + // [4] convert first 4 pixels (higher 4 bits) + ldr r5,[r7,#0] // [2] load mask for higher 4 bits + ands r5,r6 // [1] mask foreground color + eors r5,r4 // [1] combine with background color + + // [4] convert second 4 pixels (lower 4 bits) + ldr r7,[r7,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + + // [3] write pixels + stmia r0!,{r5,r7} // [3] store 8 pixels + + // [2,3] loop counter + subs r1,#2 // [1] shift loop counter + bhi RenderAttrib8_InLoop // [1,2] > 0, render next whole character + +// ---- end inner loop, continue with last character, or start new part + + // continue to outer loop + ldr r7,[sp,#32] // load wrap width + beq RenderAttrib8_Last // render 1st half of last character + ldr r2,[sp,#4] // get base pointer to pixel data -> R2 + b RenderAttrib8_OutLoop // go back to outer loop + + .align 2 +RenderAttrib8_Addr: + .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_color.S b/MCUME_pico/picovga_t4/render/vga_color.S new file mode 100755 index 0000000..5493f41 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_color.S @@ -0,0 +1,89 @@ + +// **************************************************************************** +// +// VGA render GF_COLOR +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// extern "C" u8* RenderColor(u8* dbuf, u32 color, int w); + +// render color GF_COLOR +// R0 ... pointer to destination data buffer +// R1 ... color pattern 4-pixels +// R2 ... width of this segment as multiply of 4 pixels (=width in pixels/4) +// Output new pointer to destination data buffer. +// 320 pixels takes 1.1 us on 151 MHz. +// - using only small transfer (24 pixels per loop) takes 1.22 us +// - using only single transfer (4 pixels per loop) takes 2.91 us +// - memset takes 1.42 us + +.thumb_func +.global RenderColor +RenderColor: + +// fill memory buffer with u32 words +// buf ... data buffer, must be 32-bit aligned +// data ... data word to store +// num ... number of 32-bit words (= number of bytes/4) +// Returns new destination address. +// extern "C" u32* MemSet4(u32* buf, u32 data, int num); + +.thumb_func +.global MemSet4 +MemSet4: + + // push registers + push {r4,r5,r6,r7,lr} + + // duplicate color pattern + mov r3,r1 + mov r4,r1 + mov r5,r1 + mov r6,r1 + mov r7,r1 + + // go to big transfer + b 3f + +// ---- [38 per loop] big transfer 120 pixels, speed 0.317 clk per pixel + + // [38] store 30 words (=120 pixels) +2: stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels + stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels + stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels + stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels + stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels +3: subs r2,#30 // [1] decrement number of words + bge 2b // [1,2] loop next 30 words + adds r2,#30 // [1] restore + + // go to small transfer + b 6f + +// ---- [10 per loop] small transfer 24 pixels, speed 0.417 clk per pixel + + // [8] store 6 words (=24 pixels) +4: stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels +6: subs r2,#6 // [1] decrement number of words + bge 4b // [1,2] loop next 6 words + adds r2,#6 // [1] restore + + // go to single transfer + b 8f + +// ---- [5 per loop] single transfer 4 pixels, speed 1.25 clk per pixel + + // [4,5] store 1 word (=4 pixels) +7: stmia r0!,{r1} // [2] 1 word, 4 pixels +8: subs r2,#1 // [1] loop counter + bge 7b // [1,2] next word + + // pop registers + pop {r4,r5,r6,r7,pc} diff --git a/MCUME_pico/picovga_t4/render/vga_ctext.S b/MCUME_pico/picovga_t4/render/vga_ctext.S new file mode 100755 index 0000000..4dca38a --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_ctext.S @@ -0,0 +1,335 @@ + +// **************************************************************************** +// +// VGA render GF_CTEXT +// +// **************************************************************************** +// u32 par SSEGM_PAR pointer to the font +// u16 par3 font height + +#include "../define.h" // common definitions of C and ASM +#include "hardware/regs/sio.h" // registers of hardware divider +#include "hardware/regs/addressmap.h" // SIO base address + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// render font pixel mask +.extern RenderTextMask // u32 RenderTextMask[512]; + +// extern "C" u8* RenderCText(u8* dbuf, int x, int y, int w, sSegm* segm) + +// render 8-pixel color text GF_CTEXT +// R0 ... destination data buffer +// R1 ... start X coordinate (in pixels, must be multiple of 4) +// R2 ... start Y coordinate (in graphics lines) +// R3 ... width to display (must be multiple of 4 and > 0) +// [stack] ... segm video segment sSegm +// Output new pointer to destination data buffer. +// 320 pixels takes 10.4 us on 151 MHz. + +.thumb_func +.global RenderCText +RenderCText: + + // push registers + push {r1-r7,lr} + +// Stack content: +// SP+0: R1 start X coordinate +// SP+4: R2 start Y coordinate (later: base pointer to text data row) +// SP+8: R3 width to display +// SP+12: R4 +// SP+16: R5 +// SP+20: R6 +// SP+24: R7 +// SP+28: LR +// SP+32: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#32] // load video segment -> R4 + + // start divide Y/font height + ldr r6,RenderCText_pSioBase // get address of SIO base -> R6 + str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate + ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2 + str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height + +// - now we must wait at least 8 clock cycles to get result of division + + // [6] get wrap width -> [SP+32] + ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width + movs r7,#3 // [1] mask to align to 32-bit + bics r5,r7 // [1] align wrap + str r5,[sp,#32] // [2] save wrap width + + // [1] align X coordinate to 32-bit + bics r1,r7 // [1] + + // [3] align remaining width + bics r3,r7 // [1] + str r3,[sp,#8] // [2] save new width + + // load result of division Y/font_height -> R6 Y relative at row, R7 Y row + // Note: QUOTIENT must be read last + ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row + ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row + + // pointer to font line -> R3 + lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) + ldr r3,[r4,#SSEGM_PAR] // get pointer to font + add r3,r5 // line offset + font base -> pointer to current font line R3 + + // base pointer to text data (without X) -> [SP+4], R2 + ldrh r5,[r4,#SSEGM_WB] // get pitch of rows + muls r2,r5 // Y * WB -> offset of row in text buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + add r2,r5 // base address of text buffer + str r2,[sp,#4] // save pointer to text buffer + + // prepare pointer to text data with X -> R2 (1 position is 1 character + 1 background + 1 foreground) + lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) + add r2,r6 // add index + add r2,r6 // add index*2 + add r2,r6 // add index*3, pointer to source text buffer -> R2 + + // prepare pointer to conversion table -> LR + ldr r5,RenderCText_Addr // get pointer to conversion table -> R5 + mov lr,r5 // conversion table -> LR + +// ---- render 2nd half of first character +// R0 ... pointer to destination data buffer +// R1 ... start X coordinate +// R2 ... pointer to source text buffer +// R3 ... pointer to font line +// R4 ... background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... foreground color (expanded to 32-bit) +// R7 ... (temporary) +// LR ... pointer to conversion table +// [SP+4] ... base pointer to text data (without X) +// [SP+8] ... remaining width +// [SP+32] ... wrap width + + // check bit 2 of X coordinate - check if image starts with 2nd half of first character + lsls r6,r1,#29 // check bit 2 of X coordinate + bpl 2f // bit 2 not set, starting even 4-pixels + + // [4] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + + // [2] load background color -> R4 + ldrb r4,[r2,#1] // [2] load background color from source text buffer + + // [4] expand background color to 32-bit -> R4 + lsls r7,r4,#8 // [1] shift background color << 8 + orrs r7,r4 // [1] color expanded to 16 bits + lsls r4,r7,#16 // [1] shift 16-bit color << 16 + orrs r4,r7 // [1] color expanded to 32 bits + + // [3] load foreground color -> R6 + ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6 + adds r2,#3 // [1] shift pointer to source text buffer + + // [4] expand foreground color to 32-bit -> R6 + lsls r7,r6,#8 // [1] shift foreground color << 8 + orrs r7,r6 // [1] color expanded to 16 bits + lsls r6,r7,#16 // [1] shift 16-bit color << 16 + orrs r6,r7 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // shift X coordinate + adds r1,#4 // shift X coordinate + + // check end of segment + ldr r7,[sp,#32] // load wrap width + cmp r1,r7 // end of segment? + blo 1f + movs r1,#0 // reset X coordinate + ldr r2,[sp,#4] // get base pointer to text data -> R2 + + // shift remaining width +1: ldr r7,[sp,#8] // get remaining width + subs r7,#4 // shift width + str r7,[sp,#8] // save new width + + // prepare wrap width - start X -> R7 +2: ldr r7,[sp,#32] // load wrap width + subs r7,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of characters to generate in one part of segment +// R2 ... *pointer to source text buffer +// R3 ... *pointer to font line +// R4 ... background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... foreground color (expanded to 32-bit) +// R7 ... *wrap width of this segment, later: temporary +// LR ... *pointer to conversion table +// [SP+4] ... *base pointer to text data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderCText_OutLoop: + + // limit wrap width by total width -> R7 + ldr r6,[sp,#8] // get remaining width + cmp r7,r6 // compare with wrap width + bls 2f // width is OK + mov r7,r6 // limit wrap width + + // check if remain whole characters +2: cmp r7,#8 // check number of remaining pixels + bhs 5f // enough characters remain + + // check if 1st part of last character remains + cmp r7,#4 // check 1st part of last character + blo 3f // all done + +// ---- render 1st part of last character + +RenderCText_Last: + + // [4] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + + // [2] load background color -> R4 + ldrb r4,[r2,#1] // [2] load background color from source text buffer + + // [4] expand background color to 32-bit -> R4 + lsls r1,r4,#8 // [1] shift background color << 8 + orrs r1,r4 // [1] color expanded to 16 bits + lsls r4,r1,#16 // [1] shift 16-bit color << 16 + orrs r4,r1 // [1] color expanded to 32 bits + + // [3] load foreground color -> R6 + ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6 + adds r2,#3 // [1] shift pointer to source text buffer + + // [4] expand foreground color to 32-bit + lsls r1,r6,#8 // [1] shift foreground color << 8 + orrs r1,r6 // [1] color expanded to 16 bits + lsls r6,r1,#16 // [1] shift 16-bit color << 16 + orrs r6,r1 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r1,[r5,#0] // [2] load mask for higher 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + stmia r0!,{r1} // [2] store first 4 pixels + + // check if continue with next segment + ldr r2,[sp,#4] // get base pointer to text data -> R2 + cmp r7,#4 + bhi RenderCText_OutLoop + + // pop registers and return +3: pop {r1-r7,pc} + +// ---- prepare to render whole characters + + // prepare number of whole characters to render -> R1 +5: lsrs r1,r7,#2 // shift to get number of characters*2 + lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 + subs r6,r5 // get remaining width + str r6,[sp,#8] // save new remaining width + subs r1,#1 // number of characters*2 - 1 + +// ---- [35*N-1] start inner loop, render characters in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of characters to generate*2 - 1 (loop counter) +// R2 ... *pointer to source text buffer +// R3 ... *pointer to font line +// R4 ... background color (expanded to 32-bit) +// R5 ... font sample +// R6 ... foreground color (expanded to 32-bit) +// R7 ... (temporary) +// LR ... *pointer to conversion table + +RenderCText_InLoop: + + // [4] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + + // [2] load background color -> R4 + ldrb r4,[r2,#1] // [2] load background color from source text buffer + + // [4] expand background color to 32-bit -> R4 + lsls r7,r4,#8 // [1] shift background color << 8 + orrs r7,r4 // [1] color expanded to 16 bits + lsls r4,r7,#16 // [1] shift 16-bit color << 16 + orrs r4,r7 // [1] color expanded to 32 bits + + // [3] load foreground color -> R6 + ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6 + adds r2,#3 // [1] shift pointer to source text buffer + + // [4] expand foreground color to 32-bit + lsls r7,r6,#8 // [1] shift foreground color << 8 + orrs r7,r6 // [1] color expanded to 16 bits + lsls r6,r7,#16 // [1] shift 16-bit color << 16 + orrs r6,r7 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r7,[r5,#0] // [2] load mask for higher 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store first 4 pixels + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // [2,3] loop counter + subs r1,#2 // [1] shift loop counter + bhi RenderCText_InLoop // [1,2] > 0, render next whole character + +// ---- end inner loop, continue with last character, or start new part + + // continue to outer loop + ldr r7,[sp,#32] // load wrap width + beq RenderCText_Last // render 1st half of last character + ldr r2,[sp,#4] // get base pointer to text data -> R2 + b RenderCText_OutLoop // go back to outer loop + + .align 2 +RenderCText_Addr: + .word RenderTextMask +RenderCText_pSioBase: + .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_dtext.S b/MCUME_pico/picovga_t4/render/vga_dtext.S new file mode 100755 index 0000000..bffe9c9 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_dtext.S @@ -0,0 +1,436 @@ + +// **************************************************************************** +// +// VGA render GF_DTEXT +// +// **************************************************************************** +// u32 par SSEGM_PAR pointer to the font +// u32 par2 SSEGM_PAR2 pointer to font gradient +// u16 par3 LOW background color, HIGH font height + +#include "../define.h" // common definitions of C and ASM +#include "hardware/regs/sio.h" // registers of hardware divider +#include "hardware/regs/addressmap.h" // SIO base address + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// render font pixel mask +.extern RenderTextMask // u32 RenderTextMask[512]; + +// extern "C" u8* RenderDText(u8* dbuf, int x, int y, int w, sSegm* segm) + +// render 8-pixel double gradient color text GF_DTEXT +// R0 ... destination data buffer +// R1 ... start X coordinate (in pixels, must be multiple of 4) +// R2 ... start Y coordinate (in graphics lines) +// R3 ... width to display (must be multiple of 4 and > 0) +// [stack] ... segm video segment sSegm +// Output new pointer to destination data buffer. +// 320 pixels takes us on 151 MHz. + +.thumb_func +.global RenderDText +RenderDText: + +// Stack content: +// SP+0: R8 +// SP+4: R1 start X coordinate (later: base pointer to gradient array) +// SP+8: R2 start Y coordinate (later: base pointer to text data row) +// SP+12: R3 width to display +// SP+16: R4 +// SP+20: R5 +// SP+24: R6 +// SP+28: R7 +// SP+32: LR +// SP+36: video segment (later: wrap width in X direction) + + // push registers + push {r1-r7,lr} + mov r4,r8 + push {r4} + + // get pointer to video segment -> R4 + ldr r4,[sp,#36] // load video segment -> R4 + + // start divide Y/font height + ldr r6,RenderDText_pSioBase // get address of SIO base -> R6 + str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate + ldrb r2,[r4,#SSEGM_PAR3+1] // font height -> R2 + str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height + +// - now we must wait at least 8 clock cycles to get result of division + + // [6] get wrap width -> [SP+36] + ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width + movs r7,#3 // [1] mask to align to 32-bit + bics r5,r7 // [1] align wrap + str r5,[sp,#36] // [1] save wrap width + + // [1] align X coordinate to 32-bit + bics r1,r7 // [1] + + // [3] align remaining width + bics r3,r7 // [1] + str r3,[sp,#12] // [2] save new width + + // load result of division Y/font_height -> R6 Y relative at row, R7 Y row + // Note: QUOTIENT must be read last + ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row + ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row + + // pointer to font line -> R3 + lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) + ldr r3,[r4,#SSEGM_PAR] // get pointer to font + add r3,r5 // line offset + font base -> pointer to current font line R3 + mov r8,r3 + + // base pointer to text data (without X) -> [SP+8], R2 + ldrh r5,[r4,#SSEGM_WB] // get pitch of rows + muls r2,r5 // Y * WB -> offset of row in text buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + add r2,r5 // base address of text buffer + str r2,[sp,#8] // save pointer to text buffer + + // base pointer to gradient array -> [SP+4], R3 + ldr r3,[r4,#SSEGM_PAR2] // pointer to graient array + str r3,[sp,#4] // save pointer to gradient array + + // prepare pointer to text data with X -> R2 + lsrs r6,r1,#3 // convert X to gradient index + lsls r6,#2 // round to 4-bytes + add r3,r6 // pointer to source gradient array + lsrs r6,r1,#4 // convert X to character index (1 character is 16 pixels width) + add r2,r6 // pointer to source text buffer -> R2 + + // prepare background color, expand to 32 bits -> R4 + ldrb r4,[r4,#SSEGM_PAR3] // load background color + lsls r5,r4,#8 // shift background color << 8 + orrs r5,r4 // color expanded to 16 bits + lsls r4,r5,#16 // shift 16-bit color << 16 + orrs r4,r5 // color expanded to 32 bits + + // prepare pointer to conversion table -> LR + ldr r5,RenderDText_Addr // get pointer to conversion table -> R5 + mov lr,r5 // conversion table -> LR + +// ---- render 2nd half of first character +// R0 ... pointer to destination data buffer +// R1 ... start X coordinate +// R2 ... pointer to source text buffer +// R3 ... pointer to gradient array +// R4 ... background color (expanded to 32-bit) +// R5..R7 ... (temporary) +// R8 ... pointer to font line +// LR ... pointer to conversion table +// [SP+4] ... base pointer to gradient array +// [SP+8] ... base pointer to text data (without X) +// [SP+12] ... remaining width +// [SP+36] ... wrap width + + // check if X is aligned + lsls r6,r1,#(32-4) // check if X is aligned + beq 2f // X not aligned + + // shift X coordinate + lsrs r5,r6,#(32-4) // [1] X pixel offset in last character -> R5 + movs r6,16 // character width + subs r6,r5 // pixels remain + adds r1,r6 // shift X coordinate (align to next character) + ldr r7,[sp,#12] + subs r7,r6 // shift width + str r7,[sp,#12] + + push {r1} + + // [6] load font sample -> R7 + ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7 + adds r2,#1 // [1] shift pointer to source text buffer + add r7,r8 // [1] pointer to font line + ldrb r7,[r7] // [2] load font sample -> R7 + + // [3] load foreground color, XOR with background -> R6 + ldmia r3!,{r6} // [2] load foreground color from gradient buffer + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R7 + lsls r7,#3 // [1] multiply font sample * 8 + add r7,lr // [1] add pointer to conversion table + + // [4] convert first 4 pixels (higher 4 bits) -> R1 + ldr r1,[r7,#0] // [2] load mask for higher 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + + cmp r5,#4 // check start position of X + bhi 3f // > 4 + + // [20] store 8 pixels + lsrs r1,#16 // [1] + strb r1,[r0,#0] // [2] + strb r1,[r0,#1] // [2] + lsrs r1,#8 // [1] + strb r1,[r0,#2] // [2] + strb r1,[r0,#3] // [2] + adds r0,#4 // [1] + + // [3] load foreground color, XOR with background -> R6 +3: ldmia r3!,{r6} // [2] load foreground color from gradient buffer + eors r6,r4 // [1] XOR foreground color with background color + + // [4] convert second 4 pixels (lower 4 bits) + ldr r1,[r7,#4] // [2] load mask for lower 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + + // store 8 pixels + cmp r5,#8 // check start position of X + bhi 4f // > 8 + + strb r1,[r0,#0] // [2] + strb r1,[r0,#1] // [2] + lsrs r1,#8 // [1] + strb r1,[r0,#2] // [2] + strb r1,[r0,#3] // [2] + lsls r1,#8 + adds r0,#4 + +4: lsrs r1,#16 // [1] + strb r1,[r0,#0] // [2] + strb r1,[r0,#1] // [2] + lsrs r1,#8 // [1] + strb r1,[r0,#2] // [2] + strb r1,[r0,#3] // [2] + adds r0,#4 // [1] + + pop {r1} + + // check end of segment + ldr r7,[sp,#36] // load wrap width + cmp r1,r7 // end of segment? + blo 2f + movs r1,#0 // reset X coordinate + ldr r2,[sp,#8] // get base pointer to text data -> R2 + ldr r3,[sp,#4] // get base pointer to gradient array -> R3 + + // prepare wrap width - start X -> R7 +2: ldr r7,[sp,#36] // load wrap width + subs r7,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of characters to generate in one part of segment +// R2 ... *pointer to source text buffer +// R3 ... *pointer to gradient array +// R4 ... *background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... (temporary) +// R7 ... *wrap width of this segment, later: temporary +// R8 ... *pointer to font line +// LR ... *pointer to conversion table +// [SP+4] ... base pointer to gradient array +// [SP+8] ... base pointer to text data (without X) +// [SP+12] ... remaining width +// [SP+36] ... wrap width + +RenderDText_OutLoop: + + // limit wrap width by total width -> R7 + ldr r6,[sp,#12] // get remaining width + cmp r7,r6 // compare with wrap width + bls 2f // width is OK + mov r7,r6 // limit wrap width + + // check if remain whole characters +2: cmp r7,#16 // check number of remaining pixels + bhs 5f // enough characters remain + + // check if 1st part of last character remains + cmp r7,#4 // check 1st part of last character + blo 3f // all done + mov r1,r7 // width to render + +// ---- render 1st part of last character + +RenderDText_Last: + + push {r7} + + // [6] load font sample -> R7 + ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7 + adds r2,#1 // [1] shift pointer to source text buffer + add r7,r8 // [1] pointer to font line + ldrb r7,[r7] // [2] load font sample -> R7 + + // [3] load foreground color, XOR with background -> R6 + ldmia r3!,{r6} // [2] load foreground color from gradient buffer + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R7 + lsls r7,#3 // [1] multiply font sample * 8 + add r7,lr // [1] add pointer to conversion table + + // [4] convert first 4 pixels (higher 4 bits) -> R5 + ldr r5,[r7,#0] // [2] load mask for higher 4 bits + ands r5,r6 // [1] mask foreground color + eors r5,r4 // [1] combine with background color + + // [20] store 8 pixels + strb r5,[r0,#0] // [2] + strb r5,[r0,#1] // [2] + lsrs r5,#8 // [1] + strb r5,[r0,#2] // [2] + strb r5,[r0,#3] // [2] + adds r0,#4 // [1] + + cmp r1,#4 + bls 4f + + lsrs r5,#8 // [1] + strb r5,[r0,#0] // [2] + strb r5,[r0,#1] // [2] + lsrs r5,#8 // [1] + strb r5,[r0,#2] // [2] + strb r5,[r0,#3] // [2] + adds r0,#4 // [1] + + cmp r1,#8 + bls 4f + + // [3] load foreground color, XOR with background -> R6 + ldmia r3!,{r6} // [2] load foreground color from gradient buffer + eors r6,r4 // [1] XOR foreground color with background color + + // [4] convert second 4 pixels (lower 4 bits) + ldr r7,[r7,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + + // [20] store 8 pixels + strb r7,[r0,#0] // [2] + strb r7,[r0,#1] // [2] + lsrs r7,#8 // [1] + strb r7,[r0,#2] // [2] + strb r7,[r0,#3] // [2] + adds r0,#4 // [1] + + // check if continue with next segment +4: pop {r7} + + ldr r2,[sp,#8] // get base pointer to text data -> R2 + ldr r3,[sp,#4] // get base pointer to gradient array -> R3 + cmp r7,#16 + bhs RenderDText_OutLoop + + // pop registers and return +3: pop {r4} + mov r8,r4 + pop {r1-r7,pc} + +// ---- prepare to render whole characters + + // prepare number of whole characters to render -> R1 +5: lsrs r1,r7,#2 // shift to get number of characters*2 + lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 + subs r6,r5 // get remaining width + str r6,[sp,#12] // save new remaining width + subs r1,#3 // number of characters*2 - 3 + +// ---- [65*N-1] start inner loop, render characters in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of characters to generate*2 - 3 (loop counter) +// R2 ... *pointer to source text buffer +// R3 ... *pointer to gradient array +// R4 ... *background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... foreground color +// R7 ... font sample +// R8 ... *pointer to font line +// LR ... *pointer to conversion table +// [SP+4] ... base pointer to gradient array +// [SP+8] ... base pointer to text data (without X) +// [SP+12] ... remaining width +// [SP+36] ... wrap width + +RenderDText_InLoop: + + // [6] load font sample -> R7 + ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7 + adds r2,#1 // [1] shift pointer to source text buffer + add r7,r8 // [1] pointer to font line + ldrb r7,[r7] // [2] load font sample -> R7 + + // [3] load foreground color, XOR with background -> R6 + ldmia r3!,{r6} // [2] load foreground color from gradient buffer + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R7 + lsls r7,#3 // [1] multiply font sample * 8 + add r7,lr // [1] add pointer to conversion table + + // [4] convert first 4 pixels (higher 4 bits) -> R5 + ldr r5,[r7,#0] // [2] load mask for higher 4 bits + ands r5,r6 // [1] mask foreground color + eors r5,r4 // [1] combine with background color + + // [20] store 8 pixels + strb r5,[r0,#0] // [2] + strb r5,[r0,#1] // [2] + lsrs r5,#8 // [1] + strb r5,[r0,#2] // [2] + strb r5,[r0,#3] // [2] + lsrs r5,#8 // [1] + strb r5,[r0,#4] // [2] + strb r5,[r0,#5] // [2] + lsrs r5,#8 // [1] + strb r5,[r0,#6] // [2] + strb r5,[r0,#7] // [2] + adds r0,#8 // [1] + + // [3] load foreground color, XOR with background -> R6 + ldmia r3!,{r6} // [2] load foreground color from gradient buffer + eors r6,r4 // [1] XOR foreground color with background color + + // [4] convert second 4 pixels (lower 4 bits) + ldr r7,[r7,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + + // [20] store 8 pixels + strb r7,[r0,#0] // [2] + strb r7,[r0,#1] // [2] + lsrs r7,#8 // [1] + strb r7,[r0,#2] // [2] + strb r7,[r0,#3] // [2] + lsrs r7,#8 // [1] + strb r7,[r0,#4] // [2] + strb r7,[r0,#5] // [2] + lsrs r7,#8 // [1] + strb r7,[r0,#6] // [2] + strb r7,[r0,#7] // [2] + adds r0,#8 // [1] + + // [2,3] loop counter + subs r1,#4 // [1] shift loop counter + bhi RenderDText_InLoop // [1,2] > 0, render next whole character + +// ---- end inner loop, continue with last character, or start new part + + // continue to outer loop + ldr r7,[sp,#36] // load wrap width + adds r1,#3 // return size of last tile + lsls r1,#2 // convert back to pixels + bne RenderDText_Last // render 1st half of last character + ldr r2,[sp,#8] // get base pointer to text data -> R2 + ldr r3,[sp,#4] // get base pointer to gradient array -> R3 + b RenderDText_OutLoop // go back to outer loop + + .align 2 +RenderDText_Addr: + .word RenderTextMask +RenderDText_pSioBase: + .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_fastsprite.S b/MCUME_pico/picovga_t4/render/vga_fastsprite.S new file mode 100755 index 0000000..dfced9a --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_fastsprite.S @@ -0,0 +1,160 @@ + +// **************************************************************************** +// +// VGA render LAYERMODE_FASTSPRITE* +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// extern "C" u32* RenderFastSprite(u32* cbuf, int y, sLayer* scr, u8* buf) + +// render layers with fast sprites LAYERMODE_FASTSPRITE* +// R0 ... cbuf pointer to control buffer +// R1 ... y coordinate of scanline +// R2 ... scr pointer to layer screen structure sLayer +// R3 ... buf pointer to destination data buffer with transparent color +// Output new pointer to control buffer. + +.thumb_func +.global RenderFastSprite +RenderFastSprite: + + // push registers + push {r1-r7,lr} + +// Stack content and input variables: +// R0 cbuf pointer to control buffer +// SP+0: R1 Y coordinate of scanline +// SP+4: R2 scr pointer to layer screen structure sLayer, later: num number of sprites +// SP+8: R3 buf pointer to data buffer with transparent color +// SP+12: R4 +// SP+16: R5 +// SP+20: R6 +// SP+24: R7 +// SP+28: LR + +// Variables: +// R0 ... pointer to destination control buffer +// R1 ... X0 absolute coordinate counted from start +// R2 ... W layer screen width +// R3 ... s pointer to current sprite, later: absolute X coordinate of start of sprite +// R4 ... Y2 coordinate relative to sprite base, later: s->img[Y2*WB] address of sprite line +// R5 ... relative X2 coordinate of sprite segment +// R6 ... W2 width of sprite segment +// R7 ... (temporary) +// LR ... spr pointer to list of sprites +// [SP+0] ... (R1) Y coordinate of scanline +// [SP+4] ... (R2) num number of sprites (loop counter) +// [SP+8] ... (R3) buf pointer to data buffer with transparent color + + // load pointer to list of sprites -> LR + ldr r7,[r2,#SLAYER_IMG] + mov lr,r7 + + // load number of sprites -> [SP+4] + ldrh r7,[r2,#SLAYER_SPRITENUM] + str r7,[sp,#4] + + // load screen width -> R2 + ldrh r2,[r2,#SLAYER_W] + + // reset absolute coordinate X0 -> R1 + movs r1,#0 // R1 <- 0 + + // count number of sprites, end if num = 0 +2: ldr r7,[sp,#4] // get number of sprites + subs r7,#1 // decrement number of sprites + blo 8f // no other sprites + str r7,[sp,#4] // save new number of sprites + + // get pointer to next sprite -> R3 + mov r7,lr // pointer to list of sprites -> R7 + ldmia r7!,{r3} // pointer to sprite -> R3 + mov lr,r7 // save new pointer to list of sprites -> LR + + // prepare Y2 coordinate relative to sprite base -> R4 + ldrh r7,[r3,#SSPRITE_Y] // get Y coordinate of the sprite -> R7 + sxth r7,r7 // signed extend Y2 + ldr r4,[sp,#0] // Y coordinate of scanline -> R4 + subs r4,r7 // relative coordinate Y2 = Y - s->y + + // check if Y2 coordinate is valid + bmi 2b // Y2 < 0, go next sprite + ldrh r7,[r3,#SSPRITE_H] // get sprite height + cmp r4,r7 // check sprite height + bge 2b // Y2 >= s->h, go next sprite + + // get relative start X2 coordinate of this line segment -> R5 + ldr r7,[r3,#SSPRITE_X0] // get table of X0 of lines + ldrb r5,[r7,r4] // get X2 coordinate -> R5 + lsls r5,#2 // convert X2 coordinate to byte offset + + // get width W2 of this line segment -> R6 + ldr r7,[r3,#SSPRITE_W0] // get table of W0 of lines + ldrb r6,[r7,r4] // get W2 width -> R6 + lsls r6,#2 // convert W2 width to bytes + + // get address of sprite line s->img[Y2*s->wb] -> R4 + ldrh r7,[r3,#SSPRITE_WB] // get sprite pitch w->wb + muls r4,r4,r7 // sprite offset Y2*s->wb + ldr r7,[r3,#SSPRITE_IMG] // get sprite image + add r4,r7 // line address -> R4 + + // get absolute X coordinate of start of line -> R3 + ldrh r3,[r3,#SSPRITE_X] // get sprite X coordinate -> R3 + sxth r3,r3 // signed extend X + adds r3,r3,r5 // s->X + X2, X coordinate of start of line -> R3 + + // check if sprite coordinate X lies below current X0 coordinate + subs r7,r1,r3 // difference X0 - X -> R7 + ble 3f // X0 <= X, sprite does not lie below current X0 + + // sprite correction + adds r5,r7 // X2 += X0 - X + subs r6,r7 // W2 -= X0 - X + mov r3,r1 // X = X0 + + // check line length W2 +3: subs r7,r2,r3 // W - X -> R7 + cmp r6,r7 // compare W2 with W - X + ble 4f // W2 <= W - X, length is OK + mov r6,r7 // limit segment width W2 -> R6 + + // align to word +4: movs r7,#3 // mask to word + bics r3,r7 // align X + bics r5,r7 // align X2 + bics r6,r7 // align W2 + ble 2b // no W2 left (W2 <= 0) + + // decode space before sprite + subs r7,r3,r1 // X - X0 -> R7 + ble 5f // no space left before sprite + lsrs r7,#2 // number of words (X - X0)/4 + stmia r0!,{r7} // write number of words + ldr r7,[sp,#8] // pointer to data buffer -> R7 + stmia r0!,{r7} // write address + mov r1,r3 // shift X0 + + // write sprite line +5: adds r7,r4,r5 // address of pixel &s->img[y2*s->wb+x2] -> R7 + lsrs r4,r6,#2 // W2/4 line length -> R4 + stmia r0!,{r4,r7} // write sprite length and address + adds r1,r6 // add X0 += W2 + b 2b // next sprite + + // clear rest of scanline +8: subs r2,r1 // subtract W - X0 + bls 9f // no pixels left + lsrs r2,#2 // (W - X0)/4 + ldr r3,[sp,#8] // pointer to data buffer -> R3 + stmia r0!,{r2,r3} // write number of pixels and address + + // pop registers and return +9: pop {r1-r7,pc} diff --git a/MCUME_pico/picovga_t4/render/vga_ftext.S b/MCUME_pico/picovga_t4/render/vga_ftext.S new file mode 100755 index 0000000..90713a0 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_ftext.S @@ -0,0 +1,313 @@ + +// **************************************************************************** +// +// VGA render GF_FTEXT +// +// **************************************************************************** +// u32 par SSEGM_PAR pointer to the font +// u32 par2 SSEGM_PAR2 background color +// u16 par3 font height + +#include "../define.h" // common definitions of C and ASM +#include "hardware/regs/sio.h" // registers of hardware divider +#include "hardware/regs/addressmap.h" // SIO base address + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// render font pixel mask +.extern RenderTextMask // u32 RenderTextMask[512]; + +// extern "C" u8* RenderFText(u8* dbuf, int x, int y, int w, sSegm* segm) + +// render 8-pixel foreground color text GF_FTEXT +// R0 ... destination data buffer +// R1 ... start X coordinate (in pixels, must be multiple of 4) +// R2 ... start Y coordinate (in graphics lines) +// R3 ... width to display (must be multiple of 4 and > 0) +// [stack] ... segm video segment sSegm +// Output new pointer to destination data buffer. +// 320 pixels takes 8.7 us on 151 MHz. + +.thumb_func +.global RenderFText +RenderFText: + + // push registers + push {r1-r7,lr} + +// Stack content: +// SP+0: R1 start X coordinate +// SP+4: R2 start Y coordinate (later: base pointer to text data row) +// SP+8: R3 width to display +// SP+12: R4 +// SP+16: R5 +// SP+20: R6 +// SP+24: R7 +// SP+28: LR +// SP+32: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#32] // load video segment -> R4 + + // start divide Y/font height + ldr r6,RenderFText_pSioBase // get address of SIO base -> R6 + str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate + ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2 + str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height + +// - now we must wait at least 8 clock cycles to get result of division + + // [6] get wrap width -> [SP+32] + ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width + movs r7,#3 // [1] mask to align to 32-bit + bics r5,r7 // [1] align wrap + str r5,[sp,#32] // [2] save wrap width + + // [1] align X coordinate to 32-bit + bics r1,r7 // [1] + + // [3] align remaining width + bics r3,r7 // [1] + str r3,[sp,#8] // [2] save new width + + // load result of division Y/font_height -> R6 Y relative at row, R7 Y row + // Note: QUOTIENT must be read last + ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row + ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row + + // pointer to font line -> R3 + lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) + ldr r3,[r4,#SSEGM_PAR] // get pointer to font + add r3,r5 // line offset + font base -> pointer to current font line R3 + + // base pointer to text data (without X) -> [SP+4], R2 + ldrh r5,[r4,#SSEGM_WB] // get pitch of rows + muls r2,r5 // Y * WB -> offset of row in text buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + add r2,r5 // base address of text buffer + str r2,[sp,#4] // save pointer to text buffer + + // prepare pointer to text data with X -> R2 + lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) + lsls r6,#1 // convert to character offset (1 position is: 1 character + 1 color) + add r2,r6 // pointer to source text buffer -> R2 + + // prepare background color, expand to 32 bits -> R4 + ldrb r4,[r4,#SSEGM_PAR2] // load background color + lsls r5,r4,#8 // shift background color << 8 + orrs r5,r4 // color expanded to 16 bits + lsls r4,r5,#16 // shift 16-bit color << 16 + orrs r4,r5 // color expanded to 32 bits + + // prepare pointer to conversion table -> LR + ldr r5,RenderFText_Addr // get pointer to conversion table -> R5 + mov lr,r5 // conversion table -> LR + +// ---- render 2nd half of first character +// R0 ... pointer to destination data buffer +// R1 ... start X coordinate +// R2 ... pointer to source text buffer +// R3 ... pointer to font line +// R4 ... background color (expanded to 32-bit) +// R5..R7 ... (temporary) +// LR ... pointer to conversion table +// [SP+4] ... base pointer to text data (without X) +// [SP+8] ... remaining width +// [SP+32] ... wrap width + + // check bit 2 of X coordinate - check if image starts with 2nd half of first character + lsls r6,r1,#29 // check bit 2 of X coordinate + bpl 2f // bit 2 not set, starting even 4-pixels + + // [4] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + + // [3] load foreground color -> R6 + ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6 + adds r2,#2 // [1] shift pointer to source text buffer + + // [4] expand foreground color to 32-bit -> R6 + lsls r7,r6,#8 // [1] shift foreground color << 8 + orrs r7,r6 // [1] color expanded to 16 bits + lsls r6,r7,#16 // [1] shift 16-bit color << 16 + orrs r6,r7 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // shift X coordinate + adds r1,#4 // shift X coordinate + + // check end of segment + ldr r7,[sp,#32] // load wrap width + cmp r1,r7 // end of segment? + blo 1f + movs r1,#0 // reset X coordinate + ldr r2,[sp,#4] // get base pointer to text data -> R2 + + // shift remaining width +1: ldr r7,[sp,#8] // get remaining width + subs r7,#4 // shift width + str r7,[sp,#8] // save new width + + // prepare wrap width - start X -> R7 +2: ldr r7,[sp,#32] // load wrap width + subs r7,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of characters to generate in one part of segment +// R2 ... *pointer to source text buffer +// R3 ... *pointer to font line +// R4 ... *background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... (temporary) +// R7 ... *wrap width of this segment, later: temporary +// LR ... *pointer to conversion table +// [SP+4] ... *base pointer to text data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderFText_OutLoop: + + // limit wrap width by total width -> R7 + ldr r6,[sp,#8] // get remaining width + cmp r7,r6 // compare with wrap width + bls 2f // width is OK + mov r7,r6 // limit wrap width + + // check if remain whole characters +2: cmp r7,#8 // check number of remaining pixels + bhs 5f // enough characters remain + + // check if 1st part of last character remains + cmp r7,#4 // check 1st part of last character + blo 3f // all done + +// ---- render 1st part of last character + +RenderFText_Last: + + // [4] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + + // [3] load foreground color -> R6 + ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6 + adds r2,#2 // [1] shift pointer to source text buffer + + // [4] expand foreground color to 32-bit + lsls r1,r6,#8 // [1] shift foreground color << 8 + orrs r1,r6 // [1] color expanded to 16 bits + lsls r6,r1,#16 // [1] shift 16-bit color << 16 + orrs r6,r1 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r1,[r5,#0] // [2] load mask for higher 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + stmia r0!,{r1} // [2] store first 4 pixels + + // check if continue with next segment + ldr r2,[sp,#4] // get base pointer to text data -> R2 + cmp r7,#4 + bhi RenderFText_OutLoop + + // pop registers and return +3: pop {r1-r7,pc} + +// ---- prepare to render whole characters + + // prepare number of whole characters to render -> R1 +5: lsrs r1,r7,#2 // shift to get number of characters*2 + lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 + subs r6,r5 // get remaining width + str r6,[sp,#8] // save new remaining width + subs r1,#1 // number of characters*2 - 1 + +// ---- [29*N-1] start inner loop, render characters in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of characters to generate*2 - 1 (loop counter) +// R2 ... *pointer to source text buffer +// R3 ... *pointer to font line +// R4 ... *background color (expanded to 32-bit) +// R5 ... font sample +// R6 ... foreground color (expanded to 32-bit) +// R7 ... (temporary) +// LR ... *pointer to conversion table + +RenderFText_InLoop: + + // [4] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + + // [3] load foreground color -> R6 + ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6 + adds r2,#2 // [1] shift pointer to source text buffer + + // [4] expand foreground color to 32-bit + lsls r7,r6,#8 // [1] shift foreground color << 8 + orrs r7,r6 // [1] color expanded to 16 bits + lsls r6,r7,#16 // [1] shift 16-bit color << 16 + orrs r6,r7 // [1] color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r7,[r5,#0] // [2] load mask for higher 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store first 4 pixels + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // [2,3] loop counter + subs r1,#2 // [1] shift loop counter + bhi RenderFText_InLoop // [1,2] > 0, render next whole character + +// ---- end inner loop, continue with last character, or start new part + + // continue to outer loop + ldr r7,[sp,#32] // load wrap width + beq RenderFText_Last // render 1st half of last character + ldr r2,[sp,#4] // get base pointer to text data -> R2 + b RenderFText_OutLoop // go back to outer loop + + .align 2 +RenderFText_Addr: + .word RenderTextMask +RenderFText_pSioBase: + .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_graph1.S b/MCUME_pico/picovga_t4/render/vga_graph1.S new file mode 100755 index 0000000..8f558a5 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_graph1.S @@ -0,0 +1,258 @@ + +// **************************************************************************** +// +// VGA render GF_GRAPH1 +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// render font pixel mask +.extern RenderTextMask // u32 RenderTextMask[512]; + +// extern "C" u8* RenderGraph1(u8* dbuf, int x, int y, int w, sSegm* segm); + +// render 1-bit palette graphics GF_GRAPH1 +// dbuf ... destination data buffer +// x ... start X coordinate (must be multiple of 4) +// y ... start Y coordinate +// w ... width of this segment (must be multiple of 4) +// segm ... video segment +// Output new dbuf pointer. +// 320 pixels takes 6 us on 151 MHz. + +.thumb_func +.global RenderGraph1 +RenderGraph1: + + // push registers + push {r3-r7,lr} + +// Input registers and stack content: +// R0 ... destination data buffer +// R1 ... start X coordinate +// R2 ... start Y coordinate +// SP+0: R3 width to display +// SP+4: R4 +// SP+8: R5 +// SP+12: R6 +// SP+16: R7 +// SP+20: LR +// SP+24: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#24] // load video segment -> R4 + + // get wrap width -> [SP+24] + ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width + movs r7,#3 // mask to align to 32-bit + bics r5,r7 // align wrap + str r5,[sp,#24] // save wrap width + + // align X coordinate to 32-bit -> R1 + bics r1,r7 + + // align remaining width -> [SP+0] + bics r3,r7 + str r3,[sp,#0] // save new width + + // base pointer to image data (without X) -> LR + ldrh r5,[r4,#SSEGM_WB] // get pitch of lines + muls r2,r5 // Y * WB -> offset of row in text buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + add r2,r5 // base address of text buffer + mov lr,r2 // save pointer to text buffer + + // prepare pointer to image data with X -> R2 + lsrs r2,r1,#3 // convert X to character index (1 character is 8 pixels width) + add r2,lr // pointer to source text buffer -> R2 + + // prepare foreground color, expand to 32-bit -> R6 + ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color + lsls r7,r6,#8 // [1] shift foreground color << 8 + orrs r7,r6 // [1] color expanded to 16 bits + lsls r6,r7,#16 // [1] shift 16-bit color << 16 + orrs r6,r7 // [1] color expanded to 32 bits + + // prepare background color, expand to 32 bits -> R4 + ldrb r4,[r4,#SSEGM_PAR] // load background color + lsls r5,r4,#8 // shift background color << 8 + orrs r5,r4 // color expanded to 16 bits + lsls r4,r5,#16 // shift 16-bit color << 16 + orrs r4,r5 // color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // prepare pointer to conversion table -> R3 + ldr r3,RenderGraph1_Addr // get pointer to conversion table -> R3 + +// ---- render 2nd half of first character +// R0 ... pointer to destination data buffer +// R1 ... start X coordinate +// R2 ... current pointer to image buffer +// R3 ... pointer to conversion table +// R4 ... background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... foreground color (expanded to 32-bit) +// R7 ... (temporary) +// LR ... base pointer to image data (without X) +// [SP+0] ... remaining width +// [SP+24] ... wrap width + + // check bit 2 of X coordinate - check if image starts with 2nd half of first character + lsls r5,r1,#29 // check bit 2 of X coordinate + bpl 2f // bit 2 not set, starting even 4-pixels + + // [3] load image sample -> R5 + ldrb r5,[r2,#0] // [2] load image sample -> R5 + adds r2,#1 // [1] shift pointer to image buffer + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply image sample * 8 + add r5,r3 // [1] add pointer to conversion table + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // shift X coordinate + adds r1,#4 // shift X coordinate + + // check end of segment + ldr r7,[sp,#24] // load wrap width + cmp r1,r7 // X=end of segment? + blo 1f + movs r1,#0 // reset X coordinate + mov r2,lr // get base pointer to image data -> R2 + + // shift remaining width +1: ldr r7,[sp,#0] // get remaining width + subs r7,#4 // shift width + str r7,[sp,#0] // save new width + + // prepare wrap width - start X -> R7 +2: ldr r7,[sp,#24] // load wrap width + subs r7,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of characters to generate in one part of segment +// R2 ... *current pointer to image buffer +// R3 ... *pointer to conversion table +// R4 ... *background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... *foreground color (expanded to 32-bit) +// R7 ... *wrap width of this segment, later: temporary +// LR ... *base pointer to image data (without X) +// [SP+0] ... *remaining width +// [SP+24] ... *wrap width + +RenderGraph1_OutLoop: + + // limit wrap width by total width -> R7 + ldr r5,[sp,#0] // get remaining width + cmp r7,r5 // compare with wrap width + bls 2f // width is OK + mov r7,r5 // limit wrap width + + // check if remain whole characters +2: cmp r7,#8 // check number of remaining pixels + bhs 5f // enough characters remain + + // check if 1st part of last character remains + cmp r7,#4 // check 1st part of last character + blo 3f // all done + +// ---- render 1st part of last character + +RenderGraph1_Last: + + // [3] load image sample -> R5 + ldrb r5,[r2,#0] // [2] load image sample -> R5 + adds r2,#1 // [1] shift pointer to image buffer + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply image sample * 8 + add r5,r3 // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r1,[r5,#0] // [2] load mask for higher 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + stmia r0!,{r1} // [2] store first 4 pixels + + // check if continue with next segment + mov r2,lr // get base pointer to image data -> R2 + cmp r7,#4 + bhi RenderGraph1_OutLoop + + // pop registers and return +3: pop {r3-r7,pc} + +// ---- prepare to render whole characters + + // prepare number of whole characters to render -> R1 +5: lsrs r1,r7,#2 // shift to get number of characters*2 + lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7 + subs r5,r7 // get remaining width + str r5,[sp,#0] // save new remaining width + subs r1,#1 // number of characters*2 - 1 + +// ---- [20*N-1] start inner loop, render characters in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of characters to generate*2 - 1 (loop counter) +// R2 ... *current pointer to image buffer +// R3 ... *pointer to conversion table +// R4 ... *background color (expanded to 32-bit) +// R5 ... font sample +// R6 ... *foreground color (expanded to 32-bit) +// R7 ... (temporary) +// LR ... *base pointer to image data (without X) + +RenderGraph1_InLoop: + + // [3] load image sample -> R5 + ldrb r5,[r2,#0] // [2] load image sample -> R5 + adds r2,#1 // [1] shift pointer to image buffer + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply image sample * 8 + add r5,r3 // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r7,[r5,#0] // [2] load mask for higher 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store first 4 pixels + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // [2,3] loop counter + subs r1,#2 // [1] shift loop counter + bhi RenderGraph1_InLoop // [1,2] > 0, render next whole character + +// ---- end inner loop, continue with last character, or start new part + + // continue to outer loop + ldr r7,[sp,#24] // load wrap width + beq RenderGraph1_Last // render 1st half of last character + mov r2,lr // get base pointer to image data -> R2 + b RenderGraph1_OutLoop // go back to outer loop + + .align 2 +RenderGraph1_Addr: + .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_graph2.S b/MCUME_pico/picovga_t4/render/vga_graph2.S new file mode 100755 index 0000000..205f844 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_graph2.S @@ -0,0 +1,173 @@ + +// **************************************************************************** +// +// VGA render GF_GRAPH2 +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// extern "C" u8* RenderGraph2(u8* dbuf, int x, int y, int w, sSegm* segm); + +// render 2-bit palette graphics GF_GRAPH2 +// R0 ... destination data buffer +// R1 ... start X coordinate (must be multiple of 4) +// R2 ... start Y coordinate +// R3 ... width of this segment (must be multiple of 4) +// segm ... video segment +// Output new dbuf pointer. +// 320 pixels takes 5 us on 151 MHz. + +.thumb_func +.global RenderGraph2 +RenderGraph2: + + // push registers + push {r3-r7,lr} + +// Input registers and stack content: +// R0 ... destination data buffer +// R1 ... start X coordinate +// R2 ... start Y coordinate +// SP+0: R3 ... width to display (remaining width) +// SP+4: R4 +// SP+8: R5 +// SP+12: R6 +// SP+16: R7 +// SP+20: LR +// SP+24: video segment + + // get pointer to video segment -> R4 + ldr r4,[sp,#24] // load video segment -> R4 + + // get wrap width -> R7 + ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width + movs r6,#3 // mask to align to 32-bit + bics r7,r6 // align wrap + + // align X coordinate to 32-bit -> R1 + bics r1,r6 + + // align remaining width -> [SP+0] + bics r3,r6 + str r3,[sp,#0] // save new width + + // base pointer to image data (without X) -> LR, R2 + ldrh r5,[r4,#SSEGM_WB] // get pitch of rows + muls r2,r5 // Y * WB -> offset of row in image buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + add r2,r5 // base address of image buffer + mov lr,r2 // save pointer to image buffer + + // prepare pointer to image data with X -> R2 + lsrs r6,r1,#2 // convert X to character index (1 character is 4 pixels width) + add r2,r6 // add index, pointer to source image buffer -> R2 + + // prepare pointer to palette translation table -> R3 + ldr r3,[r4,#SSEGM_PAR] // get pointer to palette translation table -> R3 + + // prepare wrap width - start X -> R6 + subs r6,r7,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of 4-pixels to generate in one part of segment +// R2 ... *pointer to source image buffer +// R3 ... *pointer to palette translation table +// R4 ... (temporary) +// R5 ... (temporary) +// R6 ... part width +// R7 ... *wrap width +// LR ... *base pointer to image data (without X) +// [SP+0] ... width to display + +RenderGraph2_OutLoop: + + // limit wrap width by total width -> R7 + ldr r4,[sp,#0] // get remaining width + cmp r6,r4 // compare with wrap width + bls 2f // width is OK + mov r6,r4 // limit wrap width + + // check number of pixels +2: cmp r6,#4 // check number of remaining pixels + bhs 5f // enough characters remain + + // pop registers and return + pop {r3-r7,pc} + +// ---- prepare to render whole characters + + // prepare number of 4-pixels to render -> R1 +5: lsrs r1,r6,#2 // shift to get number of 4-pixels + lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6 + subs r4,r6 // get remaining width + str r4,[sp,#0] // save new remaining width + +// ---- generate odd pixel + + // [2,3] check odd pixel + lsrs r1,#1 // [1] check odd pixel + bcc RenderGraph2_InLoop // [1,2] odd pixel not set + + // [3] load image sample -> R4 + ldrb r4,[r2,#0] // [2] load image sample + adds r2,#1 // [1] increase pointer to image data + + // [5] write 4 pixels + lsls r4,#2 // [1] index*4 + ldr r5,[r3,r4] // [2] load colors + stmia r0!,{r5} // [2] write pixels + + // [2,3] check end of data + tst r1,r1 // [1] check counter + beq RenderGraph2_EndLoop // [1,2] end + +// ---- [17*N-1] start inner loop, render pixels in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of 4-pixels to generate (loop counter) +// R2 ... *pointer to source image buffer +// R3 ... *pointer to palette translation table +// R4 ... image sample +// R5 ... output pixels +// R6 ... output pixels +// R7 ... *wrap width +// LR ... *base pointer to image data (without X) + +RenderGraph2_InLoop: + + // [2] load image sample -> R4 + ldrb r4,[r2,#0] // [2] load image sample + + // [3] prepare 4 pixels + lsls r4,#2 // [1] index*4 + ldr r5,[r3,r4] // [2] load colors + + // [3] load image sample -> R4 + ldrb r4,[r2,#1] // [2] load image sample + adds r2,#2 // [1] increase pointer to image data + + // [6] prepare and write next 4 pixels + lsls r4,#2 // [1] index*4 + ldr r6,[r3,r4] // [2] load colors + stmia r0!,{r5,r6} // [3] write pixels + + // [2,3] loop counter + subs r1,#1 // [1] loop counter + bne RenderGraph2_InLoop // [1,2] next step + +// ---- end inner loop, start new part + +RenderGraph2_EndLoop: + + // continue to outer loop + mov r6,r7 // load wrap width -> R6 + mov r2,lr // get base pointer to text data -> R2 + b RenderGraph2_OutLoop // go back to outer loop diff --git a/MCUME_pico/picovga_t4/render/vga_graph4.S b/MCUME_pico/picovga_t4/render/vga_graph4.S new file mode 100755 index 0000000..a063b4c --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_graph4.S @@ -0,0 +1,214 @@ + +// **************************************************************************** +// +// VGA render GF_GRAPH4 +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// extern "C" u8* RenderGraph4(u8* dbuf, int x, int y, int w, sSegm* segm); + +// render 4-bit palette graphics GF_GRAPH4 +// R0 ... destination data buffer +// R1 ... start X coordinate (must be multiple of 4) +// R2 ... start Y coordinate +// R3 ... width of this segment (must be multiple of 4) +// segm ... video segment +// Output new dbuf pointer. +// 320 pixels takes 8.8 us on 151 MHz. + +.thumb_func +.global RenderGraph4 +RenderGraph4: + + // push registers + push {r3-r7,lr} + +// Input registers and stack content: +// R0 ... destination data buffer +// R1 ... start X coordinate +// R2 ... start Y coordinate +// SP+0: R3 ... width to display (remaining width) +// SP+4: R4 +// SP+8: R5 +// SP+12: R6 +// SP+16: R7 +// SP+20: LR +// SP+24: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#24] // load video segment -> R4 + + // get wrap width -> [SP+24] + ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width + movs r6,#3 // mask to align to 32-bit + bics r7,r6 // align wrap + str r7,[sp,#24] // save wrap width + + // align X coordinate to 32-bit -> R1 + bics r1,r6 + + // align remaining width -> [SP+0] + bics r3,r6 + str r3,[sp,#0] // save new width + + // base pointer to image data (without X) -> LR, R2 + ldrh r5,[r4,#SSEGM_WB] // get pitch of rows + muls r2,r5 // Y * WB -> offset of row in image buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + add r2,r5 // base address of image buffer + mov lr,r2 // save pointer to image buffer + + // prepare pointer to image data with X -> R2 + lsrs r6,r1,#1 // convert X to character index (1 character is 2 pixels width) + add r2,r6 // add index, pointer to source image buffer -> R2 + + // prepare pointer to palette translation table -> R3 + ldr r3,[r4,#SSEGM_PAR] // get pointer to palette translation table -> R3 + + // prepare wrap width - start X -> R6 + ldr r6,[sp,#24] // load wrap width + subs r6,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of 4-pixels to generate in one part of segment +// R2 ... *pointer to source image buffer +// R3 ... *pointer to palette translation table +// R4 ... (temporary) +// R5 ... (temporary) +// R6 ... part width +// R7 ... (temporary) +// LR ... *base pointer to image data (without X) +// [SP+0] ... width to display +// [SP+24] ... wrap width + +RenderGraph4_OutLoop: + + // limit wrap width by total width -> R6 + ldr r4,[sp,#0] // get remaining width + cmp r6,r4 // compare with wrap width + bls 2f // width is OK + mov r6,r4 // limit wrap width + + // check number of pixels +2: cmp r6,#4 // check number of remaining pixels + bhs 5f // enough characters remain + + // pop registers and return + pop {r3-r7,pc} + +// ---- prepare to render whole characters + + // prepare number of 4-pixels to render -> R1 +5: lsrs r1,r6,#2 // shift to get number of 4-pixels + lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6 + subs r4,r6 // get remaining width + str r4,[sp,#0] // save new remaining width + +// ---- generate odd pixel + + // [2,3] check odd pixel + lsrs r1,#1 // [1] check odd pixel + bcc RenderGraph4_InLoop // [1,2] odd pixel not set + + // [2] load image sample -> R4 + ldrb r4,[r2,#0] // [2] load image sample + + // [3] prepare 1st and 2nd pixel -> R5 + lsls r4,#1 // [1] index*2 + ldrh r5,[r3,r4] // [2] load 2 pixels + + // [3] load image sample -> R4 + ldrb r4,[r2,#1] // [2] load image sample + adds r2,#2 // [1] increase pointer to image data + + // [3] prepare 3rd and 4th pixel -> R6 + lsls r4,#1 // [1] index*2 + ldrh r6,[r3,r4] // [2] load 2 pixels + + // [2] compose pixels -> R5 + lsls r6,#16 // [1] shift 3rd and 4th pixels + orrs r5,r6 // [1] compose pixels + + // [2] write pixels + stmia r0!,{r5} // [2] write 4 pixels + + // [2,3] check end of data + tst r1,r1 // [1] check counter + beq RenderGraph4_EndLoop // [1,2] end + +// ---- [31*N-1] start inner loop, render pixels in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of 4-pixels to generate (loop counter) +// R2 ... *pointer to source image buffer +// R3 ... *pointer to palette translation table +// R4 ... image sample +// R5 ... output pixels +// R6 ... output pixels +// R7 ... output pixels +// LR ... *base pointer to image data (without X) +// [SP+24] ... wrap width + +RenderGraph4_InLoop: + + // [2] load image sample -> R4 + ldrb r4,[r2,#0] // [2] load image sample + + // [3] prepare 1st and 2nd pixel -> R5 + lsls r4,#1 // [1] index*2 + ldrh r5,[r3,r4] // [2] load 2 pixels + + // [2] load image sample -> R4 + ldrb r4,[r2,#1] // [2] load image sample + + // [3] prepare 3rd and 4th pixel -> R6 + lsls r4,#1 // [1] index*2 + ldrh r6,[r3,r4] // [2] load 2 pixels + + // [2] compose pixels -> R5 + lsls r6,#16 // [1] shift 3rd and 4th pixels + orrs r5,r6 // [1] compose pixels + + // [2] load image sample -> R4 + ldrb r4,[r2,#2] // [2] load image sample + + // [3] prepare 1st and 2nd pixel -> R6 + lsls r4,#1 // [1] index*2 + ldrh r6,[r3,r4] // [2] load 2 pixels + + // [3] load image sample -> R4 + ldrb r4,[r2,#3] // [2] load image sample + adds r2,#4 // [1] increase pointer to image data + + // [3] prepare 3rd and 4th pixel -> R7 + lsls r4,#1 // [1] index*2 + ldrh r7,[r3,r4] // [2] load 2 pixels + + // [2] compose pixels -> R6 + lsls r7,#16 // [1] shift 3rd and 4th pixels + orrs r6,r7 // [1] compose pixels + + // [3] write pixels + stmia r0!,{r5,r6} // [3] write 8 pixels + + // [2,3] loop counter + subs r1,#1 // [1] loop counter + bne RenderGraph4_InLoop // [1,2] next step + +// ---- end inner loop, start new part + +RenderGraph4_EndLoop: + + // continue to outer loop + ldr r6,[sp,#24] // load wrap width -> R6 + mov r2,lr // get base pointer to text data -> R2 + b RenderGraph4_OutLoop // go back to outer loop diff --git a/MCUME_pico/picovga_t4/render/vga_graph8.S b/MCUME_pico/picovga_t4/render/vga_graph8.S new file mode 100755 index 0000000..e31e3d2 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_graph8.S @@ -0,0 +1,134 @@ + +// **************************************************************************** +// +// VGA render GF_GRAPH8 +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// extern "C" u32* RenderGrad1(u32* cbuf, int x, int y, int w, sSegm* segm); + +// render gradient with 1 line GF_GRAD1 +// R0 ... pointer to control buffer +// R1 ... start X coordinate (in pixels, must be multiple of 4) +// R2 ... start Y coordinate (in graphics lines), will be ignored and substituted with 0 +// R3 ... width to display (must be multiple of 4 and > 0) +// [stack] ... segm video segment sSegm +// Output new pointer to control buffer. +// 320 pixels takes 0.45 us on 151 MHz. + +.thumb_func +.global RenderGrad1 +RenderGrad1: + movs r2,#0 + + +// extern "C" u32* RenderGrad2(u32* cbuf, int x, int y, int w, sSegm* segm); + +// render gradient with 2 lines GF_GRAD2 +// R0 ... pointer to control buffer +// R1 ... start X coordinate (in pixels, must be multiple of 4) +// R2 ... start Y coordinate (in graphics lines), will be masked to values 0 and 1 +// R3 ... width to display (must be multiple of 4 and > 0) +// [stack] ... segm video segment sSegm +// Output new pointer to control buffer. +// 320 pixels takes 0.45 us on 151 MHz. + +.thumb_func +.global RenderGrad2 +RenderGrad2: + lsls r2,#31 + lsrs r2,#31 + + +// extern "C" u32* RenderGraph8(u32* cbuf, int x, int y, int w, sSegm* segm); + +// render native 8-bit graphics GF_GRAPH8 +// R0 ... pointer to control buffer +// R1 ... start X coordinate (in pixels, must be multiple of 4) +// R2 ... start Y coordinate (in graphics lines) +// R3 ... width to display (must be multiple of 4 and > 0) +// [stack] ... segm video segment sSegm +// Output new pointer to control buffer. +// 320 pixels takes 0.45 us on 151 MHz. + +.thumb_func +.global RenderGraph8 +RenderGraph8: + + // push registers + push {r4-r7,lr} + +// Stack content: +// SP+0: R4 +// SP+4: R5 +// SP+8: R6 +// SP+12: R7 +// SP+16: LR +// SP+20: video segment + +// Variables: +// R0 ... pointer to control buffer +// R1 ... X coordinate, later: width of one segment +// R2 ... Y coordinate, later: current pointer to data buffer +// R3 ... remaining width +// R4 ... base pointer to data buffer +// R5 ... (temporary) +// R6 ... (temporary) +// R7 ... wrap width + + // get pointer to video segment -> R4 + ldr r4,[sp,#20] // load video segment -> R4 + + // get wrap width -> R7 + ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width + movs r6,#3 // mask to align to 32-bit + bics r7,r6 // align wrap + + // align X coordinate to 32-bit -> R1 + bics r1,r6 + + // align remaining width -> R3 + bics r3,r6 + + // base pointer to data buffer (without X) -> R4 + ldrh r5,[r4,#SSEGM_WB] // get pitch of rows + muls r2,r5 // Y * WB -> offset in data buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + adds r4,r2,r5 // base address of data buffer -> R4 + + // prepare current pointer to image data with X -> R2 + adds r2,r4,r1 // pointer to source data buffer -> R2 + + // prepare wrap width - start X -> R1 + subs r1,r7,r1 // pixels remaining to end of segment + + // check remaining width +2: tst r3,r3 // check remaining width + beq 6f // end of data + + // limit wrap width by total width -> R1 + cmp r1,r3 // compare with wrap width + bls 4f // width is OK + mov r1,r3 // limit wrap width + + // decrease remaining width +4: subs r3,r1 // subtract from remaining width + + // save control block + lsrs r1,#2 // width / 4 + stm r0!,{r1,r2} // save width and pointer to control block + + // continue to next loop + mov r1,r7 // load wrap width -> R1 + mov r2,r4 // get base pointer to text data -> R2 + b 2b // go next loop + + // pop registers and return +6: pop {r4-r7,pc} diff --git a/MCUME_pico/picovga_t4/render/vga_graph8mat.S b/MCUME_pico/picovga_t4/render/vga_graph8mat.S new file mode 100755 index 0000000..4e07bb4 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_graph8mat.S @@ -0,0 +1,310 @@ + +// **************************************************************************** +// +// VGA render GF_GRAPH8MAT +// +// **************************************************************************** +// data ... image data +// par ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)) +// par2 ... LOW=number of bits of image width, HIGH=number of bits of image height +// image width must be max. 4096 (= 1< LR + lsrs r1,r3,#1 // width/2 + negs r1,r1 // negate + mov lr,r1 // store start coordinate X0 -> LR + + // prepare number of 4-pixels (loop counter) -> R7 + lsrs r7,r3,#2 // width/4 -> R7 + + // get pointer to video segment -> R4 + ldr r4,[sp,#20] // load video segment -> R4 + + // prepare current coordinate Y0 = -h/2 + y -> R12 + ldrh r1,[r4,#SSEGM_WRAPY] // get segment height -> R1 + lsrs r1,#1 // height/2 + negs r1,r1 // negate + adds r1,r2 // add current Y coordinate + mov r12,r1 // store current coordinate Y0 -> R12 + + // get number of bits of image width "xbits" -> R1 + ldrh r1,[r4,#SSEGM_PAR2] // number of bits of image width -> R1 + + // get number of bits of image height "ybits" -> R2 + ldrh r2,[r4,#SSEGM_PAR2+2] // number of bits of image height -> R2 + + // prepare address of interpolator base -> R3 + ldr r3,RenderGraph8Mat_Interp // get address of interpolator base -> R3 + +// R0 ... pointer to data buffer +// R1 ... number of bits of image width xbits +// R2 ... number of bits of image height ybits +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- setup interpolator + + // set image base to base2 + ldr r6,[r4,#SSEGM_DATA] // load image base + str r6,[r3,#BASE2_OFFSET] // set image base + + // set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1 + ldr r6,RenderGraph8Mat_Ctrl // load control word + subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) + lsls r5,r1,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5 + orrs r6,r5 // add xbits to control word + subs r1,#1 // xbits - 1 -> R1 + adds r5,r1,r2 // xbits-1+ybits -> R5 + lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1 + +// R0 ... pointer to data buffer +// R1 ... image width xbits-1 +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + + // set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1 + ldr r6,RenderGraph8Mat_Ctrl // load control word + lsls r1,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position + orrs r6,r1 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- set matrix + + // get pointer to matrix -> R4 + ldr r4,[r4,#SSEGM_PAR] // get pointer to matrix -> R4 + +// r4+0 ... m11 +// r4+4 ... m12 +// r4+8 ... m13 +// r4+12 ... m21 +// r4+16 ... m22 +// r4+20 ... m23 + + // set m11 -> R5 base0 + ldr r5,[r4,#0] // load m11 + str r5,[r3,#BASE0_OFFSET] // set base0 + + // set m21 -> R6 base1 + ldr r6,[r4,#12] // load m21 + str r6,[r3,#BASE1_OFFSET] // set base1 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... pointer to matrix +// R5 ... m11 +// R6 ... m21 +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + + // set x0*m11 + y0*m12 + m13 -> accum0 + mov r2,lr // start coordinate X0 -> X2 + muls r5,r2 // x0*m11 -> R5 + muls r2,r6 // x0*m21 -> R2 + ldr r1,[r4,#4] // load m12 -> R1 + mov r6,r12 // load coordinate Y0 -> R6 + muls r1,r6 // y0*m12 -> R1 + adds r5,r1 // x0*m11 + y0*m12 -> R5 + ldr r1,[r4,#8] // load m13 -> R1 + adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 + str r5,[r3,#ACCUM0_OFFSET] // set accum0 + +// R0 ... pointer to data buffer +// R2 ... x0*m21 +// R3 ... interpolator base +// R4 ... pointer to matrix +// R6 ... current coordinate Y0 +// R7 ... width/4 + + // set x0*m21 + y0*m22 + m23 -> accum1 + ldr r1,[r4,#16] // load m22 -> R1 + muls r1,r6 // y0*m22 -> R1 + adds r2,r1 // x0*m21 + y0*m22 -> R2 + ldr r1,[r4,#20] // load m23 -> R1 + adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 + str r2,[r3,#ACCUM1_OFFSET] // set accum1 + +// ---- process odd 4-pixel + +// R0 ... pointer to destination data buffer +// R3 ... interpolator base +// R4 ... (temporary - get pointer to pixel) +// R5 ... (temporary - load pixel) +// R6 ... (temporary - pixel accumulator) +// R7 ... width/4 (loop counter) + + lsrs r7,#1 // width/4/2 + bcc 2f // no odd 4-pixel + + // [3] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r6,[r4,#0] // [2] load pixel + + // [5] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r5,[r4,#0] // [2] load pixel + lsls r5,#8 // [1] shift 1 byte left + orrs r6,r5 // [1] add pixel to accumulator + + // [5] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r5,[r4,#0] // [2] load pixel + lsls r5,#16 // [1] shift 2 bytes left + orrs r6,r5 // [1] add pixel to accumulator + + // [5] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r5,[r4,#0] // [2] load pixel + lsls r5,#24 // [1] shift 3 bytes left + orrs r6,r5 // [1] add pixel to accumulator + + // [2] store 4 pixels + stmia r0!,{r6} // [2] store 4 pixels + + // check number of remaining pixels +2: tst r7,r7 // check number of pixels + beq 8f // end + +// ---- [42 per 8 pixels] inner loop +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to pixel, load pixel) +// R7 ... width/8 (loop counter) + + // [3] load 1st pixel +6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r1,[r4,#0] // [2] load pixel + + // [5] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [5] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [5] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#24 // [1] shift 3 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [3] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r2,[r4,#0] // [2] load pixel + + // [5] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + + // [5] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r2,r4 // [1] add pixel to accumulator + + // [5] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#24 // [1] shift 3 bytes left + orrs r2,r4 // [1] add pixel to accumulator + + // [3] store 8 pixels + stmia r0!,{r1,r2} // [3] store 8 pixels + + // [2,3] loop counter + subs r7,#1 // [1] 8-pixel counter + bne 6b // [1,2] next 8-pixels + + // pop registers +8: pop {r4-r7,pc} + + .align 2 +// pointer to Interp1 base +RenderGraph8Mat_Interp: + .word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base + +RenderGraph8Mat_Ctrl: // lane control word + .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 + ldr r4,[sp,#20] // load video segment -> R4 + + // prepare current coordinate Y0 = y - h -> R12 + ldrh r1,[r4,#SSEGM_WRAPY] // get segment height -> R1 + subs r2,r1 // y - h = current Y coordinate + mov r12,r2 // store current coordinate Y0 -> R12 + + // start calculating distance coefficient dist = FRACTMUL*h/(y - h + horiz + 1) + lsls r6,r1,#FRACT // segment height * FRACTMUL -> R6 + ldr r5,RenderGraph8Persp_pSioBase // get address of SIO base -> R5 + str r6,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h + ldrh r6,[r4,#SSEGM_PAR3] // horizon offset -> R6 + adds r2,r1 // y = current Y coordinate + adds r6,r2 // horizon + y -> R6 + adds r6,#1 // horizon + y + 1 -> R6 + str r6,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz + 1 + + // prepare start coordinate X0 = -w/2 -> LR + lsrs r5,r3,#1 // width/2 + negs r5,r5 // negate + mov lr,r5 // store start coordinate X0 -> LR + + // prepare number of 4-pixels (loop counter) -> R7 + lsrs r7,r3,#2 // width/4 -> R7 + + // get number of bits of image width "xbits" -> R1 + ldrh r1,[r4,#SSEGM_PAR2] // number of bits of image width -> R1 + + // get number of bits of image height "ybits" -> R2 + ldrh r2,[r4,#SSEGM_PAR2+2] // number of bits of image height -> R2 + + // prepare address of interpolator base -> R3 + ldr r3,RenderGraph8Persp_Interp // get address of interpolator base -> R3 + +// R0 ... pointer to data buffer +// R1 ... number of bits of image width xbits +// R2 ... number of bits of image height ybits +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- setup interpolator + + // set image base to base2 + ldr r6,[r4,#SSEGM_DATA] // load image base + str r6,[r3,#BASE2_OFFSET] // set image base + + // set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1 + ldr r6,RenderGraph8Persp_Ctrl // load control word + subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) + lsls r5,r1,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5 + orrs r6,r5 // add xbits to control word + subs r1,#1 // xbits - 1 -> R1 + adds r5,r1,r2 // xbits-1+ybits -> R5 + lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1 + +// R0 ... pointer to data buffer +// R1 ... image width xbits-1 +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + + // set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1 + ldr r6,RenderGraph8Persp_Ctrl // load control word + lsls r1,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position + orrs r6,r1 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- set matrix + + // get pointer to matrix -> R4 + ldr r4,[r4,#SSEGM_PAR] // get pointer to matrix -> R4 + + // get distance coefficient dist -> R1 + ldr r1,RenderGraph8Persp_pSioBase // get address of SIO base -> R1 + ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient + +// r4+0 ... m11 +// r4+4 ... m12 +// r4+8 ... m13 +// r4+12 ... m21 +// r4+16 ... m22 +// r4+20 ... m23 + + // set m11 -> R5 base0 + ldr r5,[r4,#0] // load m11 + muls r5,r1 // m11*dist + asrs r5,#FRACT // (m11*dist)>>FRACT + str r5,[r3,#BASE0_OFFSET] // set base0 + + // set m21 -> R6 base1 + ldr r6,[r4,#12] // load m21 + muls r6,r1 // m11*dist + asrs r6,#FRACT // (m11*dist)>>FRACT + str r6,[r3,#BASE1_OFFSET] // set base1 + +// R0 ... pointer to data buffer +// R1 ... distance coefficient +// R3 ... interpolator base +// R4 ... pointer to matrix +// R5 ... m11 +// R6 ... m21 +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + + // set x0*m11 + y0*m12 + m13 -> accum0 + mov r2,lr // start coordinate X0 -> X2 + muls r5,r2 // x0*m11 -> R5 + muls r2,r6 // x0*m21 -> R2 + mov lr,r1 // save distance coefficient -> LR + ldr r6,[r4,#4] // load m12 -> R6 + muls r1,r6 // m12*dist + asrs r1,#FRACT // (m12*dist)>>FRACT + mov r6,r12 // load coordinate Y0 -> R6 + muls r1,r6 // y0*m12 -> R1 + adds r5,r1 // x0*m11 + y0*m12 -> R5 + ldr r1,[r4,#8] // load m13 -> R1 + adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 + str r5,[r3,#ACCUM0_OFFSET] // set accum0 + +// R0 ... pointer to data buffer +// R2 ... x0*m21 +// R3 ... interpolator base +// R4 ... pointer to matrix +// R6 ... current coordinate Y0 +// R7 ... width/4 +// LR ... distance coefficient + + // set x0*m21 + y0*m22 + m23 -> accum1 + ldr r1,[r4,#16] // load m22 -> R1 + mov r5,lr // distance coefficient -> R5 + muls r1,r5 // m22*dist + asrs r1,#FRACT // (m22*dist)>>FRACT + muls r1,r6 // y0*m22 -> R1 + adds r2,r1 // x0*m21 + y0*m22 -> R2 + ldr r1,[r4,#20] // load m23 -> R1 + adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 + str r2,[r3,#ACCUM1_OFFSET] // set accum1 + +// ---- process odd 4-pixel + +// R0 ... pointer to destination data buffer +// R3 ... interpolator base +// R4 ... (temporary - get pointer to pixel) +// R5 ... (temporary - load pixel) +// R6 ... (temporary - pixel accumulator) +// R7 ... width/4 (loop counter) + + lsrs r7,#1 // width/4/2 + bcc 2f // no odd 4-pixel + + // [3] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r6,[r4,#0] // [2] load pixel + + // [5] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r5,[r4,#0] // [2] load pixel + lsls r5,#8 // [1] shift 1 byte left + orrs r6,r5 // [1] add pixel to accumulator + + // [5] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r5,[r4,#0] // [2] load pixel + lsls r5,#16 // [1] shift 2 bytes left + orrs r6,r5 // [1] add pixel to accumulator + + // [5] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r5,[r4,#0] // [2] load pixel + lsls r5,#24 // [1] shift 3 bytes left + orrs r6,r5 // [1] add pixel to accumulator + + // [2] store 4 pixels + stmia r0!,{r6} // [2] store 4 pixels + + // check number of remaining pixels +2: tst r7,r7 // check number of pixels + beq 8f // end + +// ---- [42 per 8 pixels] inner loop +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to pixel, load pixel) +// R7 ... width/8 (loop counter) + + // [3] load 1st pixel +6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r1,[r4,#0] // [2] load pixel + + // [5] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [5] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [5] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#24 // [1] shift 3 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [3] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r2,[r4,#0] // [2] load pixel + + // [5] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + + // [5] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r2,r4 // [1] add pixel to accumulator + + // [5] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#24 // [1] shift 3 bytes left + orrs r2,r4 // [1] add pixel to accumulator + + // [3] store 8 pixels + stmia r0!,{r1,r2} // [3] store 8 pixels + + // [2,3] loop counter + subs r7,#1 // [1] 8-pixel counter + bne 6b // [1,2] next 8-pixels + + // pop registers +8: pop {r4-r7,pc} + + .align 2 +// pointer to SIO base +RenderGraph8Persp_pSioBase: + .word SIO_BASE // addres of SIO base + +// pointer to Interp1 base +RenderGraph8Persp_Interp: + .word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base + +RenderGraph8Persp_Ctrl: // lane control word + .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< 0) +// [stack] ... segm video segment sSegm +// Output new pointer to destination data buffer. +// 320 pixels takes 8.3 us on 151 MHz. + +.thumb_func +.global RenderGText +RenderGText: + + // push registers + push {r1-r7,lr} + mov r4,r8 + push {r4} + +// Stack content: +// SP+0: R8 +// SP+4: R1 start X coordinate (later: base pointer to gradient array) +// SP+8: R2 start Y coordinate (later: base pointer to text data row) +// SP+12: R3 width to display +// SP+16: R4 +// SP+20: R5 +// SP+24: R6 +// SP+28: R7 +// SP+32: LR +// SP+36: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#36] // load video segment -> R4 + + // start divide Y/font height + ldr r6,RenderGText_pSioBase // get address of SIO base -> R6 + str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate + ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2 + str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height + +// - now we must wait at least 8 clock cycles to get result of division + + // [6] get wrap width -> [SP+36] + ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width + movs r7,#3 // [1] mask to align to 32-bit + bics r5,r7 // [1] align wrap + str r5,[sp,#36] // [2] save wrap width + + // [1] align X coordinate to 32-bit + bics r1,r7 // [1] + + // [3] align remaining width + bics r3,r7 // [1] + str r3,[sp,#12] // [2] save new width + + // load result of division Y/font_height -> R6 Y relative at row, R7 Y row + // Note: QUOTIENT must be read last + ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row + ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row + + // pointer to font line -> R8 + lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) + ldr r3,[r4,#SSEGM_PAR] // get pointer to font + add r3,r5 // line offset + font base -> pointer to current font line R3 + mov r8,r3 + + // base pointer to text data (without X) -> [SP+8], R2 + ldrh r5,[r4,#SSEGM_WB] // get pitch of rows + muls r2,r5 // Y * WB -> offset of row in text buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + add r2,r5 // base address of text buffer + str r2,[sp,#8] // save pointer to text buffer + + // base pointer to gradient array -> [SP+4], R3 + ldr r3,[r4,#SSEGM_PAR2] // pointer to graient array + str r3,[sp,#4] // save pointer to gradient array + + // prepare pointer to text data with X -> R2 + add r3,r1 // pointer to source gradient array + lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) + add r2,r6 // pointer to source text buffer -> R2 + + // prepare background color, expand to 32 bits -> R4 + ldrb r4,[r4,#SSEGM_PAR3] // load background color + lsls r5,r4,#8 // shift background color << 8 + orrs r5,r4 // color expanded to 16 bits + lsls r4,r5,#16 // shift 16-bit color << 16 + orrs r4,r5 // color expanded to 32 bits + + // prepare pointer to conversion table -> LR + ldr r5,RenderGText_Addr // get pointer to conversion table -> R5 + mov lr,r5 // conversion table -> LR + +// ---- render 2nd half of first character +// R0 ... pointer to destination data buffer +// R1 ... start X coordinate +// R2 ... pointer to source text buffer +// R3 ... pointer to gradient array +// R4 ... background color (expanded to 32-bit) +// R5..R7 ... (temporary) +// R8 ... pointer to font line +// LR ... pointer to conversion table +// [SP+4] ... base pointer to gradient array +// [SP+8] ... base pointer to text data (without X) +// [SP+12] ... remaining width +// [SP+36] ... wrap width + + // check bit 2 of X coordinate - check if image starts with 2nd half of first character + lsls r6,r1,#29 // check bit 2 of X coordinate + bpl 2f // bit 2 not set, starting even 4-pixels + + // [6] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + adds r2,#1 // [1] shift pointer to source text buffer + add r5,r8 // [1] pointer to font line + ldrb r5,[r5] // [2] load font sample -> R5 + + // [3] load foreground color, XOR with background -> R6 + ldmia r3!,{r6} // [2] load foreground color from gradient buffer + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // shift X coordinate + adds r1,#4 // shift X coordinate + + // check end of segment + ldr r7,[sp,#36] // load wrap width + cmp r1,r7 // end of segment? + blo 1f + movs r1,#0 // reset X coordinate + ldr r2,[sp,#8] // get base pointer to text data -> R2 + ldr r3,[sp,#4] // get base pointer to gradient array -> R3 + + // shift remaining width +1: ldr r7,[sp,#12] // get remaining width + subs r7,#4 // shift width + str r7,[sp,#12] // save new width + + // prepare wrap width - start X -> R7 +2: ldr r7,[sp,#36] // load wrap width + subs r7,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of characters to generate in one part of segment +// R2 ... *pointer to source text buffer +// R3 ... *pointer to gradient array +// R4 ... *background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... (temporary) +// R7 ... *wrap width of this segment, later: temporary +// R8 ... *pointer to font line +// LR ... *pointer to conversion table +// [SP+4] ... base pointer to gradient array +// [SP+8] ... base pointer to text data (without X) +// [SP+12] ... remaining width +// [SP+36] ... wrap width + +RenderGText_OutLoop: + + // limit wrap width by total width -> R7 + ldr r6,[sp,#12] // get remaining width + cmp r7,r6 // compare with wrap width + bls 2f // width is OK + mov r7,r6 // limit wrap width + + // check if remain whole characters +2: cmp r7,#8 // check number of remaining pixels + bhs 5f // enough characters remain + + // check if 1st part of last character remains + cmp r7,#4 // check 1st part of last character + blo 3f // all done + +// ---- render 1st part of last character + +RenderGText_Last: + + // [6] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + adds r2,#1 // [1] shift pointer to source text buffer + add r5,r8 // [1] pointer to font line + ldrb r5,[r5] // [2] load font sample -> R5 + + // [3] load foreground color, XOR with background -> R6 + ldmia r3!,{r6} // [2] load foreground color from gradient buffer + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r1,[r5,#0] // [2] load mask for higher 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + stmia r0!,{r1} // [2] store first 4 pixels + + // check if continue with next segment + ldr r2,[sp,#8] // get base pointer to text data -> R2 + ldr r3,[sp,#4] // get base pointer to gradient array -> R3 + cmp r7,#4 + bhi RenderGText_OutLoop + + // pop registers and return +3: pop {r4} + mov r8,r4 + pop {r1-r7,pc} + +// ---- prepare to render whole characters + + // prepare number of whole characters to render -> R1 +5: lsrs r1,r7,#2 // shift to get number of characters*2 + lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 + subs r6,r5 // get remaining width + str r6,[sp,#12] // save new remaining width + subs r1,#1 // number of characters*2 - 1 + +// ---- [28*N-1] start inner loop, render characters in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of characters to generate*2 - 1 (loop counter) +// R2 ... *pointer to source text buffer +// R3 ... *pointer to gradient array +// R4 ... *background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... foreground color +// R7 ... font sample +// R8 ... *pointer to font line +// LR ... *pointer to conversion table +// [SP+4] ... base pointer to gradient array +// [SP+8] ... base pointer to text data (without X) +// [SP+12] ... remaining width +// [SP+36] ... wrap width + +RenderGText_InLoop: + + // [6] load font sample -> R7 + ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7 + adds r2,#1 // [1] shift pointer to source text buffer + add r7,r8 // [1] pointer to font line + ldrb r7,[r7] // [2] load font sample -> R7 + + // [3] load foreground color, XOR with background -> R6 + ldmia r3!,{r6} // [2] load foreground color from gradient buffer + eors r6,r4 // [1] XOR foreground color with background color + + // [2] prepare conversion table -> R7 + lsls r7,#3 // [1] multiply font sample * 8 + add r7,lr // [1] add pointer to conversion table + + // [4] convert first 4 pixels (higher 4 bits) -> R5 + ldr r5,[r7,#0] // [2] load mask for higher 4 bits + ands r5,r6 // [1] mask foreground color + eors r5,r4 // [1] combine with background color + + // [3] load foreground color, XOR with background -> R6 + ldmia r3!,{r6} // [2] load foreground color from gradient buffer + eors r6,r4 // [1] XOR foreground color with background color + + // [7] convert and store second 4 pixels (lower 4 bits) + ldr r7,[r7,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r5,r7} // [3] store 8 pixels + + // [2,3] loop counter + subs r1,#2 // [1] shift loop counter + bhi RenderGText_InLoop // [1,2] > 0, render next whole character + +// ---- end inner loop, continue with last character, or start new part + + // continue to outer loop + ldr r7,[sp,#36] // load wrap width + beq RenderGText_Last // render 1st half of last character + ldr r2,[sp,#8] // get base pointer to text data -> R2 + ldr r3,[sp,#4] // get base pointer to gradient array -> R3 + b RenderGText_OutLoop // go back to outer loop + + .align 2 +RenderGText_Addr: + .word RenderTextMask +RenderGText_pSioBase: + .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_level.S b/MCUME_pico/picovga_t4/render/vga_level.S new file mode 100755 index 0000000..7057556 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_level.S @@ -0,0 +1,431 @@ + +// **************************************************************************** +// +// VGA render GF_LEVEL +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// render font pixel mask +.extern RenderTextMask // u32 RenderTextMask[512]; + +// extern "C" u8* RenderLevel(u8* dbuf, int x, int y, int w, sSegm* segm); + +// render level graph GF_LEVEL +// dbuf ... destination data buffer +// x ... start X coordinate (must be multiple of 4) +// y ... start Y coordinate +// w ... width of this segment (must be multiple of 4) +// segm ... video segment +// Output new dbuf pointer. +// 320 pixels takes 14 us on 151 MHz. + +.thumb_func +.global RenderLevel +RenderLevel: + + // push registers + push {r1-r7,lr} + +// Input registers and stack content: +// R0 ... pointer to testination data buffer +// SP+0: R1 start X coordinate (later: zero level) +// SP+4: R2 start Y coordinate (later: base pointer to sample data) +// SP+8: R3 width to display +// SP+12: R4 +// SP+16: R5 +// SP+20: R6 +// SP+24: R7 +// SP+28: LR +// SP+32: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#32] // load video segment -> R4 + + // get wrap width -> [SP+32] + ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width + movs r7,#3 // mask to align to 32-bit + bics r5,r7 // align wrap + str r5,[sp,#32] // save wrap width + + // align X coordinate to 32-bit -> R1 + bics r1,r7 + + // align remaining width -> [SP+8] + bics r3,r7 + str r3,[sp,#8] // save new width + + // current Y in direction from bottom to up -> R5 + ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height + subs r5,#1 // wrapy - 1 + subs r5,r2 // subtract Y, get Y relative to bottom -> R5 + + // get zero level -> [SP+0] + ldrb r3,[r4,#SSEGM_PAR2] // get zero level + str r3,[sp,#0] // save zero level + + // base pointer to sample data (without X) -> [SP+4], R2 + ldr r2,[r4,#SSEGM_DATA] // pointer to sample data + str r2,[sp,#4] // save pointer to sample buffer + + // prepare pointer to sample data with X -> R2 + add r2,r1 // pointer to source sample buffer -> R2 + + // prepare foreground color, expand to 32-bit -> R6 + ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color + lsls r3,r6,#8 // [1] shift foreground color << 8 + orrs r3,r6 // [1] color expanded to 16 bits + lsls r6,r3,#16 // [1] shift 16-bit color << 16 + orrs r6,r3 // [1] color expanded to 32 bits + + // prepare background color, expand to 32 bits -> R4 + ldrb r4,[r4,#SSEGM_PAR] // load background color + lsls r3,r4,#8 // shift background color << 8 + orrs r3,r4 // color expanded to 16 bits + lsls r4,r3,#16 // shift 16-bit color << 16 + orrs r4,r3 // color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // prepare pointer to conversion table -> LR + ldr r3,RenderLevel_Addr // get pointer to conversion table -> R5 + mov lr,r3 // conversion table -> LR + + // prepare wrap width - start X -> R7 + ldr r7,[sp,#32] // load wrap width + subs r7,r1 // pixels remaining to end of segment + + // last 4-pixels + cmp r7,#4 + bhi RenderLevel_OutLoop + ldr r7,[sp,#32] // load wrap width + b RenderLevel_Last // render last 4-pixels of first segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of 4-pixels to generate in one part of segment +// R2 ... *pointer to source sample buffer +// R3 ... remaining width, later: (temporary) +// R4 ... *background color (expanded to 32-bit) +// R5 ... *current line Y (in direction from bottom to up) +// R6 ... *foreground color (expanded to 32-bit) +// R7 ... *wrap width of this segment, later: (temporary) +// LR ... *pointer to conversion table +// [SP+0] ... *zero level +// [SP+4] ... *base pointer to sample data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderLevel_OutLoop: + + // limit wrap width by total width -> R7 + ldr r3,[sp,#8] // get remaining width + cmp r7,r3 // compare with wrap width + bls 2f // width is OK + mov r7,r3 // limit wrap width + + // check number of pixels +2: cmp r7,#8 // check number of remaining pixels + bhs 5f // enough pixels remain to render 8-pixels + + // check last 4-pixels + cmp r7,#4 // check last 4-pixels + blo 3f // all done + +// ---- render last 4 pixels + +RenderLevel_Last: + + // check half of graph + ldr r3,[sp,#0] // get zero level + cmp r5,r3 // check current line + blo RenderLevel_Last2 // bottom half of graph + +// ---- top half + + // [1] clear sample accumulator + movs r1,#0 // [1] clear sample accumulator + + // [4] get sample 0 + ldrb r3,[r2,#0] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + + // [4] get sample 1 + ldrb r3,[r2,#1] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + + // [4] get sample 2 + ldrb r3,[r2,#2] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + + // [4] get sample 3 + ldrb r3,[r2,#3] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + adds r2,#4 // [1] shift pointer to source buffer + + // [2] prepare conversion table -> R1 + lsls r1,#3 // [1] multiply sample * 8 + add r1,lr // [1] add pointer to conversion table + + // [7] convert 4 pixels (lower 4 bits) + ldr r1,[r1,#4] // [2] load mask for lower 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + stmia r0!,{r1} // [3] store 4 pixels + + b 7f + +// ---- bottom half + +RenderLevel_Last2: + + // [1] clear sample accumulator + movs r1,#0 // [1] clear sample accumulator + + // [4] get sample 0 + ldrb r3,[r2,#0] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + + // [4] get sample 1 + ldrb r3,[r2,#1] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + + // [4] get sample 2 + ldrb r3,[r2,#2] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + + // [4] get sample 3 + ldrb r3,[r2,#3] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + adds r2,#4 // [1] shift pointer to source buffer + + // [2] prepare conversion table -> R1 + lsls r1,#3 // [1] multiply sample * 8 + add r1,lr // [1] add pointer to conversion table + + // [7] convert 4 pixels (lower 4 bits) + ldr r1,[r1,#4] // [2] load mask for lower 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + stmia r0!,{r1} // [3] store 4 pixels + + + // check if continue with next segment +7: ldr r2,[sp,#4] // get base pointer to sample data -> R2 + cmp r7,#4 + bhi RenderLevel_OutLoop + + // pop registers and return +3: pop {r1-r7,pc} + +// ---- prepare to render 8-pixels + + // prepare number of whole 4-pixels to render -> R1 +5: lsrs r1,r7,#2 // shift width to get number of 4-pixels + lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7 + subs r3,r7 // get remaining width + str r3,[sp,#8] // save new remaining width + subs r1,#1 // number of 4-pixels - 1 + + // check half of graph + ldr r3,[sp,#0] // get zero level + cmp r5,r3 // check current line + blo RenderLevel_InLoopBot // bottom half of graph + +// ---- [50*N-1] start inner loop, render in one part of segment - top half of graph +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter) +// R2 ... *pointer to source sample buffer +// R3 ... sample +// R4 ... *background color (expanded to 32-bit) +// R5 ... *current line Y (in direction from bottom to up) +// R6 ... *foreground color (expanded to 32-bit) +// R7 ... sample accumulator, conversion table +// LR ... *pointer to conversion table +// [SP+0] ... *zero level +// [SP+4] ... *base pointer to sample data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderLevel_InLoopTop: // render 8 pixels in one loop step, top half of graph + + // [1] clear sample accumulator + movs r7,#0 // [1] clear sample accumulator + + // [4] get sample 0 + ldrb r3,[r2,#0] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 1 + ldrb r3,[r2,#1] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 2 + ldrb r3,[r2,#2] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 3 + ldrb r3,[r2,#3] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 4 + ldrb r3,[r2,#4] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 5 + ldrb r3,[r2,#5] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 6 + ldrb r3,[r2,#6] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 7 + ldrb r3,[r2,#7] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + adds r2,#8 // [1] shift pointer to source buffer + + // [2] prepare conversion table -> R7 + lsls r7,#3 // [1] multiply sample * 8 + add r7,lr // [1] add pointer to conversion table + + // [4] convert first 4 pixels (higher 4 bits) + ldr r3,[r7,#0] // [2] load mask for higher 4 bits + ands r3,r6 // [1] mask foreground color + eors r3,r4 // [1] combine with background color + + // [7] convert second 4 pixels (lower 4 bits) + ldr r7,[r7,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r3,r7} // [3] store second 4 pixels + + // [2,3] loop counter + subs r1,#2 // [1] shift loop counter + bhi RenderLevel_InLoopTop // [1,2] > 0, render next whole 8-pixels + +// ---- end inner loop, continue with last 4-pixels, or start new part + + // continue to outer loop + ldr r7,[sp,#32] // load wrap width +8: beq RenderLevel_Last // render last 4-pixels + ldr r2,[sp,#4] // get base pointer to sample data -> R2 + b RenderLevel_OutLoop // go back to outer loop + +// ---- [50*N-1] start inner loop, render in one part of segment - bottom half of graph +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter) +// R2 ... *pointer to source sample buffer +// R3 ... sample +// R4 ... *background color (expanded to 32-bit) +// R5 ... *current line Y (in direction from bottom to up) +// R6 ... *foreground color (expanded to 32-bit) +// R7 ... sample accumulator, conversion table +// LR ... *pointer to conversion table +// [SP+0] ... *zero level +// [SP+4] ... *base pointer to sample data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderLevel_InLoopBot: // render 8 pixels in one loop step, bottom half of graph + + // [1] clear sample accumulator + movs r7,#0 // [1] clear sample accumulator + + // [4] get sample 0 + ldrb r3,[r2,#0] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 1 + ldrb r3,[r2,#1] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 2 + ldrb r3,[r2,#2] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 3 + ldrb r3,[r2,#3] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 4 + ldrb r3,[r2,#4] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 5 + ldrb r3,[r2,#5] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 6 + ldrb r3,[r2,#6] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 7 + ldrb r3,[r2,#7] // [2] get data sample -> R3 + cmp r5,r3 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + adds r2,#8 // [1] shift pointer to source buffer + + // [2] prepare conversion table -> R7 + lsls r7,#3 // [1] multiply sample * 8 + add r7,lr // [1] add pointer to conversion table + + // [4] convert first 4 pixels (higher 4 bits) + ldr r3,[r7,#0] // [2] load mask for higher 4 bits + ands r3,r6 // [1] mask foreground color + eors r3,r4 // [1] combine with background color + + // [7] convert second 4 pixels (lower 4 bits) + ldr r7,[r7,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r3,r7} // [3] store second 4 pixels + + // [2,3] loop counter + subs r1,#2 // [1] shift loop counter + bhi RenderLevel_InLoopBot // [1,2] > 0, render next whole 8-pixels + +// ---- end inner loop, continue with last 4-pixels, or start new part + + // continue to outer loop + ldr r7,[sp,#32] // load wrap width + beq 8b // render last 4-pixels + ldr r2,[sp,#4] // get base pointer to sample data -> R2 + b RenderLevel_OutLoop // go back to outer loop + + .align 2 +RenderLevel_Addr: + .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_levelgrad.S b/MCUME_pico/picovga_t4/render/vga_levelgrad.S new file mode 100755 index 0000000..f2671e6 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_levelgrad.S @@ -0,0 +1,287 @@ + +// **************************************************************************** +// +// VGA render GF_LEVELGRAD +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// render font pixel mask +.extern RenderTextMask // u32 RenderTextMask[512]; + +// extern "C" u8* RenderLevelGrad(u8* dbuf, int x, int y, int w, sSegm* segm); + +// render level gradient graph GF_LEVELGRAD +// dbuf ... destination data buffer +// x ... start X coordinate (must be multiple of 4) +// y ... start Y coordinate +// w ... width of this segment (must be multiple of 4) +// segm ... video segment +// Output new dbuf pointer. +// 320 pixels takes 14 us on 151 MHz. + +.thumb_func +.global RenderLevelGrad +RenderLevelGrad: + + // push registers + push {r1-r7,lr} + +// Input registers and stack content: +// R0 ... pointer to testination data buffer +// SP+0: R1 start X coordinate +// SP+4: R2 start Y coordinate (later: base pointer to sample data) +// SP+8: R3 width to display +// SP+12: R4 +// SP+16: R5 +// SP+20: R6 +// SP+24: R7 +// SP+28: LR +// SP+32: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#32] // load video segment -> R4 + + // get wrap width -> [SP+32] + ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width + movs r7,#3 // mask to align to 32-bit + bics r5,r7 // align wrap + str r5,[sp,#32] // save wrap width + + // align X coordinate to 32-bit -> R1 + bics r1,r7 + + // align remaining width -> [SP+8] + bics r3,r7 + str r3,[sp,#8] // save new width + + // current Y in direction from bottom to up -> R5 + ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height + subs r5,#1 // wrapy - 1 + subs r5,r2 // subtract Y, get Y relative to bottom -> R5 + + // base pointer to sample data (without X) -> [SP+4], R2 + ldr r2,[r4,#SSEGM_DATA] // pointer to sample data + str r2,[sp,#4] // save pointer to sample buffer + + // prepare pointer to sample data with X -> R2 + add r2,r1 // pointer to source sample buffer -> R2 + + // prepare foreground color, expand to 32-bit -> R6 + ldr r6,[r4,#SSEGM_PAR] // pointer to gradient 1 + ldrb r6,[r6,r5] // load foreground color + lsls r3,r6,#8 // [1] shift foreground color << 8 + orrs r3,r6 // [1] color expanded to 16 bits + lsls r6,r3,#16 // [1] shift 16-bit color << 16 + orrs r6,r3 // [1] color expanded to 32 bits + + // prepare background color, expand to 32 bits -> R4 + ldr r4,[r4,#SSEGM_PAR2] // pointer to gradient 2 + ldrb r4,[r4,r5] // load background color + lsls r3,r4,#8 // shift background color << 8 + orrs r3,r4 // color expanded to 16 bits + lsls r4,r3,#16 // shift 16-bit color << 16 + orrs r4,r3 // color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // prepare pointer to conversion table -> LR + ldr r3,RenderLevelGrad_Addr // get pointer to conversion table -> R5 + mov lr,r3 // conversion table -> LR + + // prepare wrap width - start X -> R7 + ldr r7,[sp,#32] // load wrap width + subs r7,r1 // pixels remaining to end of segment + + // last 4-pixels + cmp r7,#4 + bhi RenderLevelGrad_OutLoop + ldr r7,[sp,#32] // load wrap width + b RenderLevelGrad_Last // render last 4-pixels of first segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of 4-pixels to generate in one part of segment +// R2 ... *pointer to source sample buffer +// R3 ... remaining width, later: (temporary) +// R4 ... *background color (expanded to 32-bit) +// R5 ... *current line Y (in direction from bottom to up) +// R6 ... *foreground color (expanded to 32-bit) +// R7 ... *wrap width of this segment, later: (temporary) +// LR ... *pointer to conversion table +// [SP+0] +// [SP+4] ... *base pointer to sample data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderLevelGrad_OutLoop: + + // limit wrap width by total width -> R7 + ldr r3,[sp,#8] // get remaining width + cmp r7,r3 // compare with wrap width + bls 2f // width is OK + mov r7,r3 // limit wrap width + + // check number of pixels +2: cmp r7,#8 // check number of remaining pixels + bhs 5f // enough pixels remain to render 8-pixels + + // check last 4-pixels + cmp r7,#4 // check last 4-pixels + blo 3f // all done + +// ---- render last 4 pixels + +RenderLevelGrad_Last: + + // [1] clear sample accumulator + movs r1,#0 // [1] clear sample accumulator + + // [4] get sample 0 + ldrb r3,[r2,#0] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + + // [4] get sample 1 + ldrb r3,[r2,#1] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + + // [4] get sample 2 + ldrb r3,[r2,#2] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + + // [4] get sample 3 + ldrb r3,[r2,#3] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1 + adds r2,#4 // [1] shift pointer to source buffer + + // [2] prepare conversion table -> R1 + lsls r1,#3 // [1] multiply sample * 8 + add r1,lr // [1] add pointer to conversion table + + // [7] convert 4 pixels (lower 4 bits) + ldr r1,[r1,#4] // [2] load mask for lower 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + stmia r0!,{r1} // [3] store 4 pixels + + // check if continue with next segment +7: ldr r2,[sp,#4] // get base pointer to sample data -> R2 + cmp r7,#4 + bhi RenderLevelGrad_OutLoop + + // pop registers and return +3: pop {r1-r7,pc} + +// ---- prepare to render 8-pixels + + // prepare number of whole 4-pixels to render -> R1 +5: lsrs r1,r7,#2 // shift width to get number of 4-pixels + lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7 + subs r3,r7 // get remaining width + str r3,[sp,#8] // save new remaining width + subs r1,#1 // number of 4-pixels - 1 + +// ---- [50*N-1] start inner loop, render in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter) +// R2 ... *pointer to source sample buffer +// R3 ... sample +// R4 ... *background color (expanded to 32-bit) +// R5 ... *current line Y (in direction from bottom to up) +// R6 ... *foreground color (expanded to 32-bit) +// R7 ... sample accumulator, conversion table +// LR ... *pointer to conversion table +// [SP+0] ... +// [SP+4] ... *base pointer to sample data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderLevelGrad_InLoopTop: // render 8 pixels in one loop step, top half of graph + + // [1] clear sample accumulator + movs r7,#0 // [1] clear sample accumulator + + // [4] get sample 0 + ldrb r3,[r2,#0] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 1 + ldrb r3,[r2,#1] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 2 + ldrb r3,[r2,#2] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 3 + ldrb r3,[r2,#3] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 4 + ldrb r3,[r2,#4] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 5 + ldrb r3,[r2,#5] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [4] get sample 6 + ldrb r3,[r2,#6] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 7 + ldrb r3,[r2,#7] // [2] get data sample -> R3 + cmp r3,r5 // [1] compare sample with current line + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + adds r2,#8 // [1] shift pointer to source buffer + + // [2] prepare conversion table -> R7 + lsls r7,#3 // [1] multiply sample * 8 + add r7,lr // [1] add pointer to conversion table + + // [4] convert first 4 pixels (higher 4 bits) + ldr r3,[r7,#0] // [2] load mask for higher 4 bits + ands r3,r6 // [1] mask foreground color + eors r3,r4 // [1] combine with background color + + // [7] convert second 4 pixels (lower 4 bits) + ldr r7,[r7,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r3,r7} // [3] store second 4 pixels + + // [2,3] loop counter + subs r1,#2 // [1] shift loop counter + bhi RenderLevelGrad_InLoopTop // [1,2] > 0, render next whole 8-pixels + +// ---- end inner loop, continue with last 4-pixels, or start new part + + // continue to outer loop + ldr r7,[sp,#32] // load wrap width +8: beq RenderLevelGrad_Last // render last 4-pixels + ldr r2,[sp,#4] // get base pointer to sample data -> R2 + b RenderLevelGrad_OutLoop // go back to outer loop + + .align 2 +RenderLevelGrad_Addr: + .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_mtext.S b/MCUME_pico/picovga_t4/render/vga_mtext.S new file mode 100755 index 0000000..b2abc7d --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_mtext.S @@ -0,0 +1,288 @@ + +// **************************************************************************** +// +// VGA render GF_MTEXT +// +// **************************************************************************** +// u32 par SSEGM_PAR pointer to the font +// u32 par2 SSEGM_PAR2 LOW background color, HIGH foreground color +// u16 par3 font height + +#include "../define.h" // common definitions of C and ASM +#include "hardware/regs/sio.h" // registers of hardware divider +#include "hardware/regs/addressmap.h" // SIO base address + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// render font pixel mask +.extern RenderTextMask // u32 RenderTextMask[512]; + +// extern "C" u8* RenderMText(u8* dbuf, int x, int y, int w, sSegm* segm) + +// render 8-pixel mono text GF_MTEXT +// R0 ... destination data buffer +// R1 ... start X coordinate (in pixels, must be multiple of 4) +// R2 ... start Y coordinate (in graphics lines) +// R3 ... width to display (must be multiple of 4 and > 0) +// [stack] ... segm video segment sSegm +// Output new pointer to destination data buffer. +// 320 pixels takes 6.9 us on 151 MHz. + +.thumb_func +.global RenderMText +RenderMText: + + // push registers + push {r1-r7,lr} + +// Stack content: +// SP+0: R1 start X coordinate +// SP+4: R2 start Y coordinate (later: base pointer to text data row) +// SP+8: R3 width to display +// SP+12: R4 +// SP+16: R5 +// SP+20: R6 +// SP+24: R7 +// SP+28: LR +// SP+32: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#32] // load video segment -> R4 + + // start divide Y/font height + ldr r6,RenderMText_pSioBase // get address of SIO base -> R6 + str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate + ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2 + str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height + +// - now we must wait at least 8 clock cycles to get result of division + + // [6] get wrap width -> [SP+32] + ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width + movs r7,#3 // [1] mask to align to 32-bit + bics r5,r7 // [1] align wrap + str r5,[sp,#32] // [2] save wrap width + + // [1] align X coordinate to 32-bit + bics r1,r7 // [1] + + // [3] align remaining width + bics r3,r7 // [1] + str r3,[sp,#8] // [2] save new width + + // load result of division Y/font_height -> R6 Y relative at row, R7 Y row + // Note: QUOTIENT must be read last + ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row + ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row + + // pointer to font line -> R3 + lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long) + ldr r3,[r4,#SSEGM_PAR] // get pointer to font + add r3,r5 // line offset + font base -> pointer to current font line R3 + + // base pointer to text data (without X) -> [SP+4], R2 + ldrh r5,[r4,#SSEGM_WB] // get pitch of rows + muls r2,r5 // Y * WB -> offset of row in text buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + add r2,r5 // base address of text buffer + str r2,[sp,#4] // save pointer to text buffer + + // prepare pointer to text data with X -> R2 + lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width) + add r2,r6 // pointer to source text buffer -> R2 + + // prepare foreground color, expand to 32-bit -> R6 + ldrb r6,[r4,#SSEGM_PAR2+1] // load foreground color + lsls r7,r6,#8 // [1] shift foreground color << 8 + orrs r7,r6 // [1] color expanded to 16 bits + lsls r6,r7,#16 // [1] shift 16-bit color << 16 + orrs r6,r7 // [1] color expanded to 32 bits + + // prepare background color, expand to 32 bits -> R4 + ldrb r4,[r4,#SSEGM_PAR2] // load background color + lsls r5,r4,#8 // shift background color << 8 + orrs r5,r4 // color expanded to 16 bits + lsls r4,r5,#16 // shift 16-bit color << 16 + orrs r4,r5 // color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // prepare pointer to conversion table -> LR + ldr r5,RenderMText_Addr // get pointer to conversion table -> R5 + mov lr,r5 // conversion table -> LR + +// ---- render 2nd half of first character +// R0 ... pointer to destination data buffer +// R1 ... start X coordinate +// R2 ... pointer to source text buffer +// R3 ... pointer to font line +// R4 ... background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... foreground color (expanded to 32-bit) +// R7 ... (temporary) +// LR ... pointer to conversion table +// [SP+4] ... base pointer to text data (without X) +// [SP+8] ... remaining width +// [SP+32] ... wrap width + + // check bit 2 of X coordinate - check if image starts with 2nd half of first character + lsls r5,r1,#29 // check bit 2 of X coordinate + bpl 2f // bit 2 not set, starting even 4-pixels + + // [5] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + adds r2,#1 // [1] shift pointer to source text buffer + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // shift X coordinate + adds r1,#4 // shift X coordinate + + // check end of segment + ldr r7,[sp,#32] // load wrap width + cmp r1,r7 // end of segment? + blo 1f + movs r1,#0 // reset X coordinate + ldr r2,[sp,#4] // get base pointer to text data -> R2 + + // shift remaining width +1: ldr r7,[sp,#8] // get remaining width + subs r7,#4 // shift width + str r7,[sp,#8] // save new width + + // prepare wrap width - start X -> R7 +2: ldr r7,[sp,#32] // load wrap width + subs r7,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of characters to generate in one part of segment +// R2 ... *pointer to source text buffer +// R3 ... *pointer to font line +// R4 ... *background color (expanded to 32-bit) +// R5 ... (temporary) +// R6 ... *foreground color (expanded to 32-bit) +// R7 ... *wrap width of this segment, later: temporary +// LR ... *pointer to conversion table +// [SP+4] ... *base pointer to text data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderMText_OutLoop: + + // limit wrap width by total width -> R7 + ldr r5,[sp,#8] // get remaining width + cmp r7,r5 // compare with wrap width + bls 2f // width is OK + mov r7,r5 // limit wrap width + + // check if remain whole characters +2: cmp r7,#8 // check number of remaining pixels + bhs 5f // enough characters remain + + // check if 1st part of last character remains + cmp r7,#4 // check 1st part of last character + blo 3f // all done + +// ---- render 1st part of last character + +RenderMText_Last: + + // [5] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + adds r2,#1 // [1] shift pointer to source text buffer + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r1,[r5,#0] // [2] load mask for higher 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + stmia r0!,{r1} // [2] store first 4 pixels + + // check if continue with next segment + ldr r2,[sp,#4] // get base pointer to text data -> R2 + cmp r7,#4 + bhi RenderMText_OutLoop + + // pop registers and return +3: pop {r1-r7,pc} + +// ---- prepare to render whole characters + + // prepare number of whole characters to render -> R1 +5: lsrs r1,r7,#2 // shift to get number of characters*2 + lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7 + subs r5,r7 // get remaining width + str r5,[sp,#8] // save new remaining width + subs r1,#1 // number of characters*2 - 1 + +// ---- [22*N-1] start inner loop, render characters in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of characters to generate*2 - 1 (loop counter) +// R2 ... *pointer to source text buffer +// R3 ... *pointer to font line +// R4 ... *background color (expanded to 32-bit) +// R5 ... font sample +// R6 ... *foreground color (expanded to 32-bit) +// R7 ... (temporary) +// LR ... *pointer to conversion table + +RenderMText_InLoop: + + // [5] load font sample -> R5 + ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5 + ldrb r5,[r3,r5] // [2] load font sample -> R5 + adds r2,#1 // [1] shift pointer to source text buffer + + // [2] prepare conversion table -> R5 + lsls r5,#3 // [1] multiply font sample * 8 + add r5,lr // [1] add pointer to conversion table + + // [6] convert first 4 pixels (higher 4 bits) + ldr r7,[r5,#0] // [2] load mask for higher 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store first 4 pixels + + // [6] convert second 4 pixels (lower 4 bits) + ldr r7,[r5,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r7} // [2] store second 4 pixels + + // [2,3] loop counter + subs r1,#2 // [1] shift loop counter + bhi RenderMText_InLoop // [1,2] > 0, render next whole character + +// ---- end inner loop, continue with last character, or start new part + + // continue to outer loop + ldr r7,[sp,#32] // load wrap width + beq RenderMText_Last // render 1st half of last character + ldr r2,[sp,#4] // get base pointer to text data -> R2 + b RenderMText_OutLoop // go back to outer loop + + .align 2 +RenderMText_Addr: + .word RenderTextMask +RenderMText_pSioBase: + .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_oscil.S b/MCUME_pico/picovga_t4/render/vga_oscil.S new file mode 100755 index 0000000..f3312e0 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_oscil.S @@ -0,0 +1,297 @@ + +// **************************************************************************** +// +// VGA render GF_OSCIL +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// render font pixel mask +.extern RenderTextMask // u32 RenderTextMask[512]; + +// extern "C" u8* RenderOscil(u8* dbuf, int x, int y, int w, sSegm* segm); + +// render oscilloscope graph GF_OSCIL +// dbuf ... destination data buffer +// x ... start X coordinate (must be multiple of 4) +// y ... start Y coordinate +// w ... width of this segment (must be multiple of 4) +// segm ... video segment +// Output new dbuf pointer. +// 320 pixels takes 16.6 us on 151 MHz. + +.thumb_func +.global RenderOscil +RenderOscil: + + // push registers + push {r1-r7,lr} + +// Input registers and stack content: +// R0 ... pointer to testination data buffer +// SP+0: R1 start X coordinate +// SP+4: R2 start Y coordinate (later: base pointer to sample data) +// SP+8: R3 width to display +// SP+12: R4 +// SP+16: R5 +// SP+20: R6 +// SP+24: R7 +// SP+28: LR +// SP+32: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#32] // load video segment -> R4 + + // get wrap width -> [SP+32] + ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width + movs r7,#3 // mask to align to 32-bit + bics r5,r7 // align wrap + str r5,[sp,#32] // save wrap width + + // align X coordinate to 32-bit -> R1 + bics r1,r7 + + // align remaining width -> [SP+8] + bics r3,r7 + str r3,[sp,#8] // save new width + + // current Y in direction from bottom to up -> R5 + ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height + subs r5,#1 // wrapy - 1 + subs r5,r2 // subtract Y, get Y relative to bottom -> R5 + + // get pixel height -> LR + ldrb r3,[r4,#SSEGM_PAR2] // get pixel height + mov lr,r3 // pixel height -> LR + + // base pointer to sample data (without X) -> [SP+4], R2 + ldr r2,[r4,#SSEGM_DATA] // pointer to sample data + str r2,[sp,#4] // save pointer to sample buffer + + // prepare pointer to sample data with X -> R2 + add r2,r1 // pointer to source sample buffer -> R2 + + // prepare foreground color, expand to 32-bit -> R6 + ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color + lsls r3,r6,#8 // [1] shift foreground color << 8 + orrs r3,r6 // [1] color expanded to 16 bits + lsls r6,r3,#16 // [1] shift 16-bit color << 16 + orrs r6,r3 // [1] color expanded to 32 bits + + // prepare background color, expand to 32 bits -> R4 + ldrb r4,[r4,#SSEGM_PAR] // load background color + lsls r3,r4,#8 // shift background color << 8 + orrs r3,r4 // color expanded to 16 bits + lsls r4,r3,#16 // shift 16-bit color << 16 + orrs r4,r3 // color expanded to 32 bits + + // [1] XOR foreground and background color -> R6 + eors r6,r4 // [1] XOR foreground color with background color + + // prepare wrap width - start X -> R7 + ldr r7,[sp,#32] // load wrap width + subs r7,r1 // pixels remaining to end of segment + + // last 4-pixels + cmp r7,#4 + bhi RenderOscil_OutLoop + ldr r7,[sp,#32] // load wrap width + b RenderOscil_Last // render last 4-pixels of first segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of 4-pixels to generate in one part of segment +// R2 ... *pointer to source sample buffer +// R3 ... remaining width, later: (temporary) +// R4 ... *background color (expanded to 32-bit) +// R5 ... *current line Y (in direction from bottom to up) +// R6 ... *foreground color (expanded to 32-bit) +// R7 ... *wrap width of this segment, later: (temporary) +// LR ... *pixel height +// [SP+4] ... *base pointer to sample data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderOscil_OutLoop: + + // limit wrap width by total width -> R7 + ldr r3,[sp,#8] // get remaining width + cmp r7,r3 // compare with wrap width + bls 2f // width is OK + mov r7,r3 // limit wrap width + + // check number of pixels +2: cmp r7,#8 // check number of remaining pixels + bhs 5f // enough pixels remain to render 8-pixels + + // check last 4-pixels + cmp r7,#4 // check last 4-pixels + blo 3f // all done + +// ---- render last 4 pixels + +RenderOscil_Last: + + // [1] clear sample accumulator + movs r1,#0 // [1] clear sample accumulator + + // [5] get sample 0 + ldrb r3,[r2,#0] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 1 + ldrb r3,[r2,#1] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 2 + ldrb r3,[r2,#2] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 3 + ldrb r3,[r2,#3] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7 + adds r2,#4 // [1] shift pointer to source buffer + + // [4] prepare conversion table -> R1 + lsls r1,#3 // [1] multiply sample * 8 + ldr r3,RenderOscil_Addr // [2] get pointer to conversion table -> R3 + add r1,r3 // [1] add pointer to conversion table + + // [7] convert 4 pixels (lower 4 bits) + ldr r1,[r1,#4] // [2] load mask for lower 4 bits + ands r1,r6 // [1] mask foreground color + eors r1,r4 // [1] combine with background color + stmia r0!,{r1} // [3] store 4 pixels + + // check if continue with next segment + ldr r2,[sp,#4] // get base pointer to sample data -> R2 + cmp r7,#4 + bhi RenderOscil_OutLoop + + // pop registers and return +3: pop {r1-r7,pc} + +// ---- prepare to render 8-pixels + + // prepare number of whole 4-pixels to render -> R1 +5: lsrs r1,r7,#2 // shift width to get number of 4-pixels + lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7 + subs r3,r7 // get remaining width + str r3,[sp,#8] // save new remaining width + subs r1,#1 // number of 4-pixels - 1 + +// ---- [50*N-1] start inner loop, render in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter) +// R2 ... *pointer to source sample buffer +// R3 ... sample +// R4 ... *background color (expanded to 32-bit) +// R5 ... *current line Y (in direction from bottom to up) +// R6 ... *foreground color (expanded to 32-bit) +// R7 ... sample accumulator, conversion table +// LR ... *pixel height +// [SP+4] ... *base pointer to sample data (without X) +// [SP+8] ... *remaining width +// [SP+32] ... *wrap width + +RenderOscil_InLoop: // render 8 pixels in one loop step, top half of graph + + // [1] clear sample accumulator + movs r7,#0 // [1] clear sample accumulator + + // [5] get sample 0 + ldrb r3,[r2,#0] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 1 + ldrb r3,[r2,#1] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 2 + ldrb r3,[r2,#2] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 3 + ldrb r3,[r2,#3] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 4 + ldrb r3,[r2,#4] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 5 + ldrb r3,[r2,#5] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [5] get sample 6 + ldrb r3,[r2,#6] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + + // [6] get sample 7 + ldrb r3,[r2,#7] // [2] get data sample -> R3 + subs r3,r5 // [1] distance from current line + cmp lr,r3 // [1] compare with pixel height + adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7 + adds r2,#8 // [1] shift pointer to source buffer + + // [4] prepare conversion table -> R7 + lsls r7,#3 // [1] multiply sample * 8 + ldr r3,RenderOscil_Addr // [2] get pointer to conversion table -> R3 + add r7,r3 // [1] add pointer to conversion table + + // [4] convert first 4 pixels (higher 4 bits) + ldr r3,[r7,#0] // [2] load mask for higher 4 bits + ands r3,r6 // [1] mask foreground color + eors r3,r4 // [1] combine with background color + + // [7] convert second 4 pixels (lower 4 bits) + ldr r7,[r7,#4] // [2] load mask for lower 4 bits + ands r7,r6 // [1] mask foreground color + eors r7,r4 // [1] combine with background color + stmia r0!,{r3,r7} // [3] store second 4 pixels + + // [2,3] loop counter + subs r1,#2 // [1] shift loop counter + bhi RenderOscil_InLoop // [1,2] > 0, render next whole 8-pixels + +// ---- end inner loop, continue with last 4-pixels, or start new part + + // continue to outer loop + ldr r7,[sp,#32] // load wrap width +8: beq RenderOscil_Last // render last 4-pixels + ldr r2,[sp,#4] // get base pointer to sample data -> R2 + b RenderOscil_OutLoop // go back to outer loop + + .align 2 +RenderOscil_Addr: + .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_oscline.S b/MCUME_pico/picovga_t4/render/vga_oscline.S new file mode 100755 index 0000000..978c539 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_oscline.S @@ -0,0 +1,190 @@ + +// **************************************************************************** +// +// VGA render GF_OSCLINE +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// render font pixel mask +.extern RenderTextMask // u32 RenderTextMask[512]; + +// extern "C" u8* RenderOscLine(u8* dbuf, int x, int y, int w, sSegm* segm); + +// render oscilloscope graph GF_OSCLINE +// dbuf ... destination data buffer +// x ... start X coordinate (must be multiple of 4) +// y ... start Y coordinate +// w ... width of this segment (must be multiple of 4) +// segm ... video segment +// Output new dbuf pointer. +// 320 pixels takes 21.5 us on 151 MHz. + +.thumb_func +.global RenderOscLine +RenderOscLine: + + // push registers + push {r2-r7,lr} + +// Input registers and stack content: +// R0 ... pointer to testination data buffer +// R1 ... start X coordinate +// SP+0: R2 start Y coordinate (later: base pointer to sample data) +// SP+4: R3 width to display +// SP+8: R4 +// SP+12: R5 +// SP+16: R6 +// SP+20: R7 +// SP+24: LR +// SP+28: video segment (later: wrap width in X direction) + + // get pointer to video segment -> R4 + ldr r4,[sp,#28] // load video segment -> R4 + + // get wrap width/2 -> [SP+28] + ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width + lsrs r5,#1 // wrap width / 2 + str r5,[sp,#28] // save wrap width + + // X coordinate/2 -> R1 + lsrs r1,#1 + + // remaining width/2 -> [SP+4] + lsrs r3,#1 + str r3,[sp,#4] // save new width + + // current Y in direction from bottom to up -> LR + ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height + subs r5,#1 // wrapy - 1 + subs r5,r2 // subtract Y, get Y relative to bottom -> R5 + mov lr,r5 + + // base pointer to sample data (without X) -> [SP+0], R2 + ldr r2,[r4,#SSEGM_DATA] // pointer to sample data + str r2,[sp,#0] // save pointer to sample buffer + + // prepare pointer to sample data with X -> R2 + add r2,r1 // pointer to source sample buffer -> R2 + + // prepare foreground color -> R6 + ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color + lsls r7,r6,#8 + orrs r6,r7 + + // prepare background color -> R4 + ldrb r4,[r4,#SSEGM_PAR] // load background color + lsls r7,r4,#8 + orrs r4,r7 + + // prepare wrap width - start X -> R1 + ldr r7,[sp,#28] // load wrap width + subs r1,r7,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... *wrap width of this segment, later: number of pixels to generate in one part of segment +// R2 ... *pointer to source sample buffer +// R3 ... remaining width, later: (temporary) +// R4 ... *background color +// R5 ... (temporary) +// R6 ... *foreground color +// R7 ... (temporary) +// LR ... *current line Y (in direction from bottom to up) +// [SP+0] ... *base pointer to sample data (without X) +// [SP+4] ... *remaining width +// [SP+28] ... *wrap width + +RenderOscLine_OutLoop: + + // limit wrap width by total width -> R1 + ldr r3,[sp,#4] // get remaining width + cmp r1,r3 // compare with wrap width + bls 2f // width is OK + mov r1,r3 // limit wrap width + + // check number of pixels +2: cmp r1,#0 // check number of remaining pixels + beq RenderOscLine_Stop // stop + subs r3,r1 // get remaining width + str r3,[sp,#4] // save new remaining width + +// ---- start inner loop, render in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of pixels to generate (loop counter) +// R2 ... *pointer to source sample buffer +// R3 ... sample +// R4 ... *background color +// R5 ... previous sample +// R6 ... *foreground color +// R7 ... current color +// LR ... *current line Y (in direction from bottom to up) +// [SP+0] ... *base pointer to sample data (without X) +// [SP+4] ... *remaining width +// [SP+28] ... *wrap width + + ldrb r5,[r2,#0] // [2] prepare previous sample -> R5 + +RenderOscLine_InLoop: // render 8 pixels in one loop step, top half of graph + + // [3] get sample + ldrb r3,[r2,#0] // [2] get data sample -> R3 + adds r2,#1 // [1] increment pointer + + // [1] preset to background color + mov r7,r4 // [1] preset to background color + + // [3..8] (sample > previous sample) AND (sample > line) AND (line > previous sample) - display pixel + cmp r3,lr // [1] compare sample with line + beq 4f // [1,2] (sample == line), true, display pixel everytime + blo 2f // [1,2] (sample < line), false + cmp r3,r5 // [1] compare sample with previous sample + bls 2f // [1,2] (sample <= previous), false + cmp lr,r5 // [1] compare line with previous sample + bhi 4f // [1,2] (line > previous), true + + // [3..7] (sample < previous sample) AND (sample < line) AND (line < previous sample) - display pixel +2: cmp r3,r5 // [1] compare sample with previous sample + bhs 6f // [1,2] (sample >= previous), false + cmp r3,lr // [1] compare sample with line + bhs 6f // [1,2] (sample >= line), false + cmp lr,r5 // [1] compare line with previous sample + bhs 6f // [1,2] (line >= previous), false + + // [1] use foreground color +4: mov r7,r6 // [1] use foreground color + + // [3] write 2 pixels +6: strh r7,[r0,#0] // [2] write pixel + adds r0,#2 // [1] increment pointer + + // [1] save previous sample + mov r5,r3 // [1] + + // [2,3] loop counter + subs r1,#1 // [1] shift loop counter + bne RenderOscLine_InLoop // [1,2] render next pixel + +// ---- end inner loop, start new part + + // continue to outer loop + ldr r1,[sp,#28] // load wrap width + ldr r2,[sp,#0] // get base pointer to sample data -> R2 + b RenderOscLine_OutLoop // go back to outer loop + +RenderOscLine_Stop: + + // pop registers and return + pop {r2-r7,pc} + + .align 2 +RenderOscLine_Addr: + .word RenderTextMask diff --git a/MCUME_pico/picovga_t4/render/vga_persp.S b/MCUME_pico/picovga_t4/render/vga_persp.S new file mode 100755 index 0000000..4056bfd --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_persp.S @@ -0,0 +1,360 @@ + +// **************************************************************************** +// +// VGA render LAYERMODE_PERSP* +// +// **************************************************************************** +// img ... (const u8*) SLAYER_IMG image data +// par ... (const void*) SLAYER_PAR pointer to 6 matrix integer parameters m11,m12..m23 +// horiz ... (s8) SLAYER_HORIZ horizon offset/4 (0=no perspecitve, <0 ceilling) +// xbits ... (u8) SLAYER_XBITS number of bits of image width +// ybits ... (u8) SLAYER_YBITS number of bits of image height +// w ... (u16) SLAYER_W destination width +// h ... (u16) SLAYER_H destination height + +#include "../define.h" // common definitions of C and ASM +#include "hardware/regs/sio.h" // registers of hardware divider +#include "hardware/regs/addressmap.h" // SIO base address + +#define ACCUM0_OFFSET 0 +#define ACCUM1_OFFSET 4 +#define BASE0_OFFSET 8 +#define BASE1_OFFSET 12 +#define BASE2_OFFSET 16 +#define POP_LANE0_OFFSET 20 +#define POP_LANE1_OFFSET 24 +#define POP_FULL_OFFSET 28 +#define PEEK_LANE0_OFFSET 32 +#define PEEK_LANE1_OFFSET 36 +#define PEEK_FULL_OFFSET 40 +#define CTRL_LANE0_OFFSET 44 +#define CTRL_LANE1_OFFSET 48 +#define ACCUM0_ADD_OFFSET 52 +#define ACCUM1_ADD_OFFSET 56 +#define BASE_1AND0_OFFSET 60 + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// extern "C" void RenderPersp(u8* dbuf, int y, sLayer* scr) + +// render layers with transformatio matrix LAYERMODE_PERSP* +// R0 ... dbuf pointer to data buffer +// R1 ... y coordinate of scanline (relative in destination image) +// R2 ... scr pointer to layer screen structure sLayer + +.thumb_func +.global RenderPersp +RenderPersp: + + // push registers + push {r4-r7,lr} + +// Stack content and input variables: +// R0 dbuf pointer to data buffer +// R1 Y coordinate of scanline +// R2 scr pointer to layer screen structure sLayer +// R3 +// SP+0: R4 +// SP+4: R5 +// SP+8: R6 +// SP+12: R7 +// SP+16: LR + +// R0 ... pointer to destination data buffer +// R1 ... Y coordinate +// R2 ... sLayer + + // load horizon offset -> R4, check if use perspective + ldr r6,RenderPersp_pSioBase // get address of SIO base -> R6 + ldrh r5,[r2,#SLAYER_H] // get destination height -> R5 + ldrb r4,[r2,#SLAYER_HORIZ] // get horizon offset -> R4 + sxtb r4,r4 // signed extension + lsls r4,#2 // horizon * 4, horizon = 0 ? + bne 2f // use perspective + + // not using perspective, start Y coordinate y0 = y - h/2 -> R12 + lsrs r5,#1 // destination height/2 -> R5 + subs r1,r5 // y - h/2 -> R1 + mov r12,r1 // current coordinate Y0 = y - h/2 -> R12 + + // prepare divide result to get 1< R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL + b 4f + + // using perspective, check ceilling mode +2: bpl 3f // horizon is not negative + subs r1,r5,r1 // negate, y = h - y + subs r1,#1 // y = h - 1 - y + negs r4,r4 // absolute value of horizon + + // prepare current coordinate Y0 = y - h -> R12 +3: subs r7,r1,r5 // y - h = current Y coordinate -> R7 + mov r12,r7 // store current coordinate Y0 -> R12 + + // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) + lsls r5,#FRACT // destination height * FRACTMUL -> R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h + adds r1,r4 // horizon + y -> R2 + str r1,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz + +// R0 ... pointer to destination data buffer +// R2 ... sLayer +// R12 ... current coordinate Y0 + + // prepare start coordinate X0 = -w/2 -> LR +4: ldrh r3,[r2,#SLAYER_W] // get destination width -> R3 + lsrs r5,r3,#1 // width/2 + negs r5,r5 // negate + mov lr,r5 // store start coordinate X0 -> LR + + // prepare number of 4-pixels (loop counter) -> R7 + lsrs r7,r3,#2 // width/4 -> R7 + + // get number of bits of image width "xbits" -> R1 + ldrb r1,[r2,#SLAYER_XBITS] // number of bits of image width -> R1 + + // get number of bits of image height "ybits" -> R4 + ldrb r4,[r2,#SLAYER_YBITS] // number of bits of image height -> R4 + + // prepare address of interpolator base -> R3 + ldr r3,RenderPersp_Interp // get address of interpolator base -> R3 + +// R0 ... pointer to destination data buffer +// R1 ... number of bits of image width xbits +// R2 ... sLayer +// R3 ... interpolator base +// R4 ... number of bits of image height ybits +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- setup interpolator + + // set image base to base2 + ldr r6,[r2,#SLAYER_IMG] // load image base + str r6,[r3,#BASE2_OFFSET] // set image base + + // set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1 + ldr r6,RenderPersp_Ctrl // load control word + subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) + lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5 + orrs r6,r5 // add xbits to control word + subs r1,#1 // xbits - 1 -> R1 + adds r5,r1,r4 // xbits-1+ybits -> R5 + lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1 + +// R0 ... pointer to destination data buffer +// R1 ... image width xbits-1 +// R2 ... sLayer +// R3 ... interpolator base +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + + // set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1 + ldr r6,RenderPersp_Ctrl // load control word + lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position + orrs r6,r1 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0 + +// R0 ... pointer to destination data buffer +// R2 ... sLayer +// R3 ... interpolator base +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- set matrix + + // get pointer to matrix -> R4 + ldr r4,[r2,#SLAYER_PAR] // get pointer to matrix -> R4 + + // get distance coefficient dist -> R1 + ldr r1,RenderPersp_pSioBase // get address of SIO base -> R1 + ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient + +// r4+0 ... m11 +// r4+4 ... m12 +// r4+8 ... m13 +// r4+12 ... m21 +// r4+16 ... m22 +// r4+20 ... m23 + + // set m11 -> R5 base0 + ldr r5,[r4,#0] // load m11 + muls r5,r1 // m11*dist + asrs r5,#FRACT // (m11*dist)>>FRACT + str r5,[r3,#BASE0_OFFSET] // set base0 + + // set m21 -> R6 base1 + ldr r6,[r4,#12] // load m21 + muls r6,r1 // m21*dist + asrs r6,#FRACT // (m21*dist)>>FRACT + str r6,[r3,#BASE1_OFFSET] // set base1 + +// R0 ... pointer to destination data buffer +// R1 ... distance coefficient +// R3 ... interpolator base +// R4 ... pointer to matrix +// R5 ... m11 +// R6 ... m21 +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + + // set x0*m11 + y0*m12 + m13 -> accum0 + mov r2,lr // start coordinate X0 -> X2 + muls r5,r2 // x0*m11 -> R5 + muls r2,r6 // x0*m21 -> R2 + mov lr,r1 // save distance coefficient -> LR + ldr r6,[r4,#4] // load m12 -> R6 + muls r1,r6 // m12*dist -> R1 + asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 + mov r6,r12 // load coordinate Y0 -> R6 + muls r1,r6 // y0*m12 -> R1 + adds r5,r1 // x0*m11 + y0*m12 -> R5 + ldr r1,[r4,#8] // load m13 -> R1 + adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 + str r5,[r3,#ACCUM0_OFFSET] // set accum0 + +// R0 ... pointer to destination data buffer +// R2 ... x0*m21 +// R3 ... interpolator base +// R4 ... pointer to matrix +// R6 ... current coordinate Y0 +// R7 ... width/4 +// LR ... distance coefficient + + // set x0*m21 + y0*m22 + m23 -> accum1 + ldr r1,[r4,#16] // load m22 -> R1 + mov r5,lr // distance coefficient -> R5 + muls r1,r5 // m22*dist + asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 + muls r1,r6 // y0*m22 -> R1 + adds r2,r1 // x0*m21 + y0*m22 -> R2 + ldr r1,[r4,#20] // load m23 -> R1 + adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 + str r2,[r3,#ACCUM1_OFFSET] // set accum1 + +// ---- process odd 4-pixel + +// R0 ... pointer to destination data buffer +// R3 ... interpolator base +// R4 ... (temporary - get pointer to pixel) +// R5 ... (temporary - load pixel) +// R6 ... (temporary - pixel accumulator) +// R7 ... width/4 (loop counter) + + lsrs r7,#1 // width/4/2 + bcc 2f // no odd 4-pixel + + // [3] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r6,[r4,#0] // [2] load pixel + + // [5] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r5,[r4,#0] // [2] load pixel + lsls r5,#8 // [1] shift 1 byte left + orrs r6,r5 // [1] add pixel to accumulator + + // [5] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r5,[r4,#0] // [2] load pixel + lsls r5,#16 // [1] shift 2 bytes left + orrs r6,r5 // [1] add pixel to accumulator + + // [5] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r5,[r4,#0] // [2] load pixel + lsls r5,#24 // [1] shift 3 bytes left + orrs r6,r5 // [1] add pixel to accumulator + + // [2] store 4 pixels + stmia r0!,{r6} // [2] store 4 pixels + + // check number of remaining pixels +2: tst r7,r7 // check number of pixels + beq 8f // end + +// ---- [42 per 8 pixels] inner loop +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to pixel, load pixel) +// R7 ... width/8 (loop counter) + + // [3] load 1st pixel +6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r1,[r4,#0] // [2] load pixel + + // [5] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [5] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [5] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#24 // [1] shift 3 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [3] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r2,[r4,#0] // [2] load pixel + + // [5] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + + // [5] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r2,r4 // [1] add pixel to accumulator + + // [5] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#24 // [1] shift 3 bytes left + orrs r2,r4 // [1] add pixel to accumulator + + // [3] store 8 pixels + stmia r0!,{r1,r2} // [3] store 8 pixels + + // [2,3] loop counter + subs r7,#1 // [1] 8-pixel counter + bne 6b // [1,2] next 8-pixels + + // pop registers +8: pop {r4-r7,pc} + + .align 2 +// pointer to SIO base +RenderPersp_pSioBase: + .word SIO_BASE // addres of SIO base + +// pointer to Interp1 base +RenderPersp_Interp: + .word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base + +RenderPersp_Ctrl: // lane control word + .word SIO_INTERP1_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4, check if use perspective + ldr r6,RenderPersp_pSioBase // get address of SIO base -> R6 + ldrh r5,[r2,#SLAYER_H] // get destination height -> R5 + ldrb r4,[r2,#SLAYER_HORIZ] // get horizon offset -> R4 + sxtb r4,r4 // signed extension + lsls r4,#2 // horizon * 4, horizon = 0 ? + bne 2f // use perspective + + // not using perspective, start Y coordinate y0 = y - h/2 -> R12 + lsrs r5,#1 // destination height/2 -> R5 + subs r1,r5 // y - h/2 -> R1 + mov r12,r1 // current coordinate Y0 = y - h/2 -> R12 + + // prepare divide result to get 1< R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL + b 4f + + // using perspective, check ceilling mode +2: bpl 3f // horizon is not negative + subs r1,r5,r1 // negate, y = h - y + subs r1,#1 // y = h - 1 - y + negs r4,r4 // absolute value of horizon + + // prepare current coordinate Y0 = y - h -> R12 +3: subs r7,r1,r5 // y - h = current Y coordinate -> R7 + mov r12,r7 // store current coordinate Y0 -> R12 + + // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) + lsls r5,#FRACT // destination height * FRACTMUL -> R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h + adds r1,r4 // horizon + y -> R2 + str r1,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz + +// R0 ... pointer to destination data buffer +// R2 ... sLayer +// R12 ... current coordinate Y0 + + // prepare start coordinate X0 = -w/2 -> LR +4: ldrh r3,[r2,#SLAYER_W] // get destination width -> R3 + lsrs r5,r3,#1 // width/2 + negs r5,r5 // negate + mov lr,r5 // store start coordinate X0 -> LR + + // prepare number of 4-pixels (loop counter) -> R7 + lsrs r7,r3,#2 // width/4 -> R7 + + // get number of bits of image width "xbits" -> R1 + ldrb r1,[r2,#SLAYER_XBITS] // number of bits of image width -> R1 + + // get number of bits of image height "ybits" -> R4 + ldrb r4,[r2,#SLAYER_YBITS] // number of bits of image height -> R4 + + // prepare address of interpolator base -> R3 + ldr r3,RenderPersp_Interp // get address of interpolator base -> R3 + +// R0 ... pointer to destination data buffer +// R1 ... number of bits of image width xbits +// R2 ... sLayer +// R3 ... interpolator base +// R4 ... number of bits of image height ybits +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- setup interpolator + + // set image base to base2 + ldr r6,[r2,#SLAYER_IMG] // load image base + str r6,[r3,#BASE2_OFFSET] // set image base + + // set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1 + ldr r6,RenderPersp_Ctrl // load control word + subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) + lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5 + orrs r6,r5 // add xbits to control word + subs r1,#1 // xbits - 1 -> R1 + adds r5,r1,r4 // xbits-1+ybits -> R5 + lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1 + +// R0 ... pointer to destination data buffer +// R1 ... image width xbits-1 +// R2 ... sLayer +// R3 ... interpolator base +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + + // set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1 + ldr r6,RenderPersp_Ctrl // load control word + lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position + orrs r6,r1 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0 + +// R0 ... pointer to destination data buffer +// R2 ... sLayer +// R3 ... interpolator base +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- set matrix + + // get pointer to matrix -> R4 + ldr r4,[r2,#SLAYER_PAR] // get pointer to matrix -> R4 + + // get distance coefficient dist -> R1 + ldr r1,RenderPersp_pSioBase // get address of SIO base -> R1 + ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient + +// r4+0 ... m11 +// r4+4 ... m12 +// r4+8 ... m13 +// r4+12 ... m21 +// r4+16 ... m22 +// r4+20 ... m23 + + // set m11 -> R5 base0 + ldr r5,[r4,#0] // load m11 + muls r5,r1 // m11*dist + asrs r5,#FRACT-1 // (m11*dist)>>(FRACT-1) ... 2*delta + str r5,[r3,#BASE0_OFFSET] // set base0 + asrs r5,#1 // (m11*dist)>>FRACT + + // set m21 -> R6 base1 + ldr r6,[r4,#12] // load m21 + muls r6,r1 // m21*dist + asrs r6,#FRACT-1 // (m21*dist)>>(FRACT-1) ... 2*delta + str r6,[r3,#BASE1_OFFSET] // set base1 + asrs r6,#1 // (m21*dist)>>FRACT + +// R0 ... pointer to destination data buffer +// R1 ... distance coefficient +// R3 ... interpolator base +// R4 ... pointer to matrix +// R5 ... m11 +// R6 ... m21 +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + + // set x0*m11 + y0*m12 + m13 -> accum0 + mov r2,lr // start coordinate X0 -> X2 + muls r5,r2 // x0*m11 -> R5 + muls r2,r6 // x0*m21 -> R2 + mov lr,r1 // save distance coefficient -> LR + ldr r6,[r4,#4] // load m12 -> R6 + muls r1,r6 // m12*dist -> R1 + asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 + mov r6,r12 // load coordinate Y0 -> R6 + muls r1,r6 // y0*m12 -> R1 + adds r5,r1 // x0*m11 + y0*m12 -> R5 + ldr r1,[r4,#8] // load m13 -> R1 + adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 + str r5,[r3,#ACCUM0_OFFSET] // set accum0 + +// R0 ... pointer to destination data buffer +// R2 ... x0*m21 +// R3 ... interpolator base +// R4 ... pointer to matrix +// R6 ... current coordinate Y0 +// R7 ... width/4 +// LR ... distance coefficient + + // set x0*m21 + y0*m22 + m23 -> accum1 + ldr r1,[r4,#16] // load m22 -> R1 + mov r5,lr // distance coefficient -> R5 + muls r1,r5 // m22*dist + asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 + muls r1,r6 // y0*m22 -> R1 + adds r2,r1 // x0*m21 + y0*m22 -> R2 + ldr r1,[r4,#20] // load m23 -> R1 + adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 + str r2,[r3,#ACCUM1_OFFSET] // set accum1 + +// ---- process odd 4-pixel + +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to pixel, load pixel) +// R7 ... width/4 (loop counter) + + lsrs r7,#1 // width/4/2 + bcc 2f // no odd 4-pixel + + // [5] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r1,[r4,#0] // [2] load pixel + lsls r4,r1,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [7] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [2] store 4 pixels + stmia r0!,{r1} // [2] store 4 pixels + + // check number of remaining pixels +2: tst r7,r7 // check number of pixels + beq 8f // end + +// ---- [30 per 8 pixels] inner loop +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to pixel, load pixel) +// R7 ... width/8 (loop counter) + + // [5] load 1st pixel +6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r1,[r4,#0] // [2] load pixel + lsls r4,r1,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [7] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [5] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r2,[r4,#0] // [2] load pixel + lsls r4,r2,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + + // [7] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value + ldrb r4,[r4,#0] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r2,r4 // [1] add pixel to accumulator + lsls r4,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + + // [3] store 8 pixels + stmia r0!,{r1,r2} // [3] store 8 pixels + + // [2,3] loop counter + subs r7,#1 // [1] 8-pixel counter + bne 6b // [1,2] next 8-pixels + + // pop registers +8: pop {r4-r7,pc} + + .align 2 +// pointer to SIO base +RenderPersp_pSioBase: + .word SIO_BASE // addres of SIO base + +// pointer to Interp1 base +RenderPersp_Interp: + .word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base + +RenderPersp_Ctrl: // lane control word + .word SIO_INTERP1_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 + ldr r4,[sp,#24] // load video segment -> R4 + + // get wrap width -> [SP+24] + ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width + movs r6,#3 // mask to align to 32-bit + bics r7,r6 // align wrap + str r7,[sp,#24] // save wrap width + + // align X coordinate to 32-bit -> R1 + bics r1,r6 + + // align remaining width -> [SP+0] + bics r3,r6 + str r3,[sp,#0] // save new width + + // base pointer to image data (without X) -> LR, R2 + ldrh r5,[r4,#SSEGM_WB] // get pitch of rows + muls r2,r5 // Y * WB -> offset of row in image buffer + ldr r5,[r4,#SSEGM_DATA] // pointer to data + add r2,r5 // base address of image buffer + mov lr,r2 // save pointer to image buffer + + // prepare pointer to image data with X -> R2 + lsrs r6,r1,#3 // convert X to 8-pixel offset + add r2,r6 // pointer to source image buffer -> R2 + + // prepare size of one plane -> R3 + ldr r3,[r4,#SSEGM_PAR] // get size of one plane -> R3 + + // prepare pointer to palette translation table -> R7 + ldr r7,[r4,#SSEGM_PAR2] // get pointer to palette translation table -> R7 + +// ---- render 2nd half of first 8-pixel +// R0 ... pointer to destination data buffer +// R1 ... start X coordinate +// R2 ... pointer to source image data +// R3 ... size of one plane (= offset of plane 1 from plane 0) +// R4 ... (temporary) +// R5 ... (temporary) +// R6 ... (temporary) +// R7 ... *pointer to palette translation table +// LR ... *base pointer to image data (without X) +// [SP+0] ... *remaining width +// [SP+24] ... *wrap width + + // check bit 2 of X coordinate - check if image starts with 2nd half of first 8-pixel + lsls r5,r1,#29 // check bit 2 of X coordinate + bpl 2f // bit 2 not set, starting even 4-pixels + + // [5] load samples -> R5, R6 + ldrb r5,[r2,#0] // [2] load sample from plane 1 + ldrb r6,[r2,r3] // [2] load sample from plane 2 + adds r2,#1 // [1] increase pointer + + // [5] compose samples LOW -> R5 + lsls r6,#28 // [1] isolate low 4 bits from sample 2 + lsrs r6,#22 // [1] shift to bit position 6 + lsls r5,#28 // [1] isolate low 4 bit from sample 1 + lsrs r5,#26 // [1] shift to bit position 2 + orrs r5,r6 // [1] compose samples + + // [5] write pixels + ldr r5,[r7,r5] // [2] load colors + stmia r0!,{r5} // [3] write pixels + + // shift X coordinate + adds r1,#4 // shift X coordinate + + // check end of segment + ldr r6,[sp,#24] // load wrap width + cmp r1,r6 // X=end of segment? + blo 1f + movs r1,#0 // reset X coordinate + mov r2,lr // get base pointer to image data -> R2 + + // shift remaining width +1: ldr r6,[sp,#0] // get remaining width + subs r6,#4 // shift width + str r6,[sp,#0] // save new width + + // prepare wrap width - start X -> R6 +2: ldr r6,[sp,#24] // load wrap width + subs r6,r1 // pixels remaining to end of segment + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination data buffer +// R1 ... number of 4-pixels - 1 to generate in one part of segment +// R2 ... *pointer to source image data +// R3 ... *size of one plane (= offset of plane 1 from plane 0) +// R4 ... (temporary) +// R5 ... (temporary) +// R6 ... part width +// R7 ... *pointer to palette translation table +// LR ... *base pointer to image data (without X) +// [SP+0] ... *remaining width +// [SP+24] ... *wrap width + +RenderPlane2_OutLoop: + + // limit wrap width by total width -> R7 + ldr r4,[sp,#0] // get remaining width + cmp r6,r4 // compare with wrap width + bls 2f // width is OK + mov r6,r4 // limit wrap width + + // check number of pixels +2: cmp r6,#8 // check number of remaining pixels + bhs 5f // enough 8-pixels remain + + // check if 1st part of last 8-pixel remains + cmp r6,#4 // check number of pixels + blo 3f // all done + +// ---- render 1st part of last 8-pixel + +RenderPlane2_Last: + + // [5] load samples -> R5, R4 + ldrb r5,[r2,#0] // [2] load sample from plane 1 + ldrb r4,[r2,r3] // [2] load sample from plane 2 + adds r2,#1 // [1] increase pointer + + // [5] compose samples HIGH -> R4 + lsrs r4,#4 // [1] isolate high 4 bits from sample 2 + lsls r4,#8 // [1] shift left + orrs r4,r5 // [1] compose sample 2 with sample 1 + lsrs r4,#4 // [1] isolate high 4 bits from sample 1 + lsls r4,#2 // [1] 2 shifts to get index*4 + + // [4] write pixels + ldr r4,[r7,r4] // [2] load colors + stmia r0!,{r4} // [2] write pixels + + // check if continue with next segment + mov r2,lr // get base pointer to image data -> R2 + cmp r6,#4 + bhi RenderPlane2_OutLoop + + // pop registers and return +3: pop {r3-r7,pc} + +// ---- prepare to render whole 8-pixels + + // prepare number of 4-pixels to render -> R1 +5: lsrs r1,r6,#2 // shift to get number of 4-pixels + lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6 + subs r4,r6 // get remaining width + str r4,[sp,#0] // save new remaining width + subs r1,#1 // number of 4-pixels - 1 + +// ---- [25*N-1] start inner loop, render whole 8-pixels in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination data buffer +// R1 ... *number of 4-pixels - 1 to generate (loop counter) +// R2 ... *pointer to source image data +// R3 ... *size of one plane (= offset of plane 1 from plane 0) +// R4 ... output sample +// R5 ... sample from plane 1 +// R6 ... sample from plane 2 +// R7 ... *pointer to palette translation table +// LR ... *base pointer to image data (without X) +// [SP+0] ... *remaining width +// [SP+24] ... *wrap width + +RenderPlane2_InLoop: + + // [5] load samples -> R5, R6 + ldrb r5,[r2,#0] // [2] load sample from plane 1 + ldrb r6,[r2,r3] // [2] load sample from plane 2 + adds r2,#1 // [1] increase pointer + + // [5] compose samples HIGH -> R4 + lsrs r4,r6,#4 // [1] isolate high 4 bits from sample 2 + lsls r4,#8 // [1] shift left + orrs r4,r5 // [1] compose sample 2 with sample 1 + lsrs r4,#4 // [1] isolate high 4 bits from sample 1 + lsls r4,#2 // [1] 2 shifts to get index*4 + + // [2] prepare first 4 pixels + ldr r4,[r7,r4] // [2] load colors + + // [5] compose samples LOW -> R5 + lsls r6,#28 // [1] isolate low 4 bits from sample 2 + lsrs r6,#22 // [1] shift to bit position 6 + lsls r5,#28 // [1] isolate low 4 bit from sample 1 + lsrs r5,#26 // [1] shift to bit position 2 + orrs r5,r6 // [1] compose samples + + // [5] write pixels + ldr r5,[r7,r5] // [2] load colors + stmia r0!,{r4,r5} // [3] write pixels + + // [2,3] loop counter + subs r1,#2 // [1] loop counter + bhi RenderPlane2_InLoop // [1,2] > 0, next step + +// ---- end inner loop + +RenderPlane2_EndLoop: + + // continue to outer loop + ldr r6,[sp,#24] // load wrap width -> R6 + beq RenderPlane2_Last // render 1st half of last 8-pixels + mov r2,lr // get base pointer to image data -> R2 + b RenderPlane2_OutLoop // go back to outer loop diff --git a/MCUME_pico/picovga_t4/render/vga_progress.S b/MCUME_pico/picovga_t4/render/vga_progress.S new file mode 100755 index 0000000..4ec2b0e --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_progress.S @@ -0,0 +1,123 @@ + +// **************************************************************************** +// +// VGA render GF_PROGRESS +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// extern "C" u32* RenderProgress(u32* cbuf, int x, int y, int w, sSegm* segm); + +// render horizontal progress indicator GF_PROGRESS +// R0 ... pointer to control buffer +// R1 ... start X coordinate (in pixels, must be multiple of 4) +// R2 ... start Y coordinate (in graphics lines) +// R3 ... width to display (must be multiple of 4 and > 0) +// [stack] ... segm video segment sSegm +// Output new pointer to control buffer. +// 320 pixels takes 0.5 us on 151 MHz. + +.thumb_func +.global RenderProgress +RenderProgress: + + // push registers + push {r4-r7,lr} + +// Stack content: +// SP+0: R4 +// SP+4: R5 +// SP+8: R6 +// SP+12: R7 +// SP+16: LR +// SP+20: video segment + +// Variables: +// R0 ... pointer to control buffer +// R1 ... X coordinate/4 +// R2 ... data sample +// R3 ... remaining width +// R4 ... gradient buffer 1 +// R5 ... gradient buffer 2 +// R6 ... (temporary) +// R7 ... current wrap width +// LR ... wrap width + + // get pointer to video segment -> R4 + ldr r4,[sp,#20] // load video segment -> R4 + + // prepare X coordinate/4 -> R1 + lsrs r1,#2 // X coordinate/4 -> R1 + + // load data sample -> R2 + ldr r5,[r4,#SSEGM_DATA] // pointer to data + ldrb r2,[r5,r2] // load data sample -> R2 + + // prepare remaining width/4 -> R3 + lsrs r3,#2 // width/4 -> R3 + + // get wrap width/4 -> LR + ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width + lsrs r7,#2 // wrap width/4 -> R7 + mov lr,r7 + + // prepare gradient buffers -> R4, R5 + ldr r5,[r4,#SSEGM_PAR2] // gradient buffer 2 -> R5 + ldr r4,[r4,#SSEGM_PAR] // gradient buffer 1 -> R4 + + // check remaining width +2: tst r3,r3 // check remaining width + beq 9f // end of data + + // prepare wrap width - start X -> R7 + mov r7,lr // wrap width + subs r7,r1 // pixels remaining to end of segment + + // limit wrap width by total width -> R7 + cmp r7,r3 // compare with wrap width + bls 4f // width is OK + mov r7,r3 // limit wrap width + + // decrease remaining width +4: subs r3,r7 // subtract from remaining width + + // first part visible if x < data + cmp r1,r2 + bhs 6f // x >= data + + // width of this part + subs r6,r2,r1 // width <- data - x + + // limit width + cmp r6,r7 // check width + bls 5f // width is OK + mov r6,r7 // limit width +5: subs r7,r6 // decrease width + + // save control block with 1st part +5: stm r0!,{r6} // write width + adds r6,r4,r1 // gradient address at offset x + stm r0!,{r6} // write address + mov r1,r2 // X <- data + + // check if some width remain +6: tst r7,r7 // check with of this part + beq 7f // end of segment + + // save control block width 2nd part + stm r0!,{r7} // write width + adds r6,r5,r1 // gradient address at offset x + stm r0!,{r6} // write address + + // reset X +7: movs r1,#0 // reset X + b 2b // next segment + + // pop registers and return +9: pop {r4-r7,pc} diff --git a/MCUME_pico/picovga_t4/render/vga_sprite.S b/MCUME_pico/picovga_t4/render/vga_sprite.S new file mode 100755 index 0000000..c8723f4 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_sprite.S @@ -0,0 +1,164 @@ + +// **************************************************************************** +// +// VGA render LAYERMODE_SPRITE* +// +// **************************************************************************** + +#include "../define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// extern "C" void RenderSprite(u8* dbuf, int y, sLayer* scr) + +// render layers with sprites LAYERMODE_SPRITE* +// R0 ... dbuf pointer to data buffer +// R1 ... y coordinate of scanline +// R2 ... scr pointer to layer screen structure sLayer + +.thumb_func +.global RenderSprite +RenderSprite: + + // push registers + push {r4-r7,lr} + +// Stack content and input variables: +// R0 dbuf pointer to data buffer +// R1 Y coordinate of scanline +// R2 scr pointer to layer screen structure sLayer, later: num number of sprites +// R3 +// SP+0: R4 +// SP+4: R5 +// SP+8: R6 +// SP+12: R7 +// SP+16: LR + +// Variables: +// R0 ... dbuf pointer to data buffer, later: dbuf[x] destination address +// R1 ... Y coordinate of scanline, later: Y2 coordinate relative to sprite base, later: s->img[Y2*WB+X2] address of sprite line +// R2 ... num number of sprites (loop counter), later: W2 width of sprite segment +// R3 ... s pointer to current sprite, later: col key color +// R4 ... (temporary), later: absolute X coordinate of start of line +// R5 ... relative X2 coordinate of sprite segment +// R6 ... W layer screen width +// R7 ... spr pointer to list of sprites +// LR + + // load pointer to list of sprites -> R7 + ldr r7,[r2,#SLAYER_IMG] + + // load screen width -> R6 + ldrh r6,[r2,#SLAYER_W] + + // load number of sprites -> R2 + ldrh r2,[r2,#SLAYER_SPRITENUM] + + // count number of sprites, end if num = 0 +2: subs r2,#1 // decrement number of sprites + blo 9f // no other sprites + +// R0 ... dbuf pointer to data buffer +// R1 ... Y coordinate of scanline +// R2 ... num number of sprites (loop counter) +// R3 ... +// R4 ... +// R5 ... +// R6 ... W layer screen width +// R7 ... spr pointer to list of sprites + + // push registers + push {r0-r2} // push resiters R0..R2 + + // get pointer to next sprite -> R3 + ldmia r7!,{r3} // pointer to sprite -> R3 +// R3 ... s pointer to current sprite + + // prepare Y2 coordinate relative to sprite base -> R1 + ldrh r4,[r3,#SSPRITE_Y] // get Y coordinate of the sprite -> R4 + sxth r4,r4 // signed extend Y2 + subs r1,r1,r4 // relative coordinate Y2 = Y - s->y +// R1 ... Y2 coordinate relative to sprite base + + // check if Y2 coordinate is valid + bmi 8f // Y2 < 0, go next sprite + ldrh r4,[r3,#SSPRITE_H] // get sprite height + cmp r1,r4 // check sprite height + bge 8f // Y2 >= s->h, go next sprite + + // prepare relative start X2 coordinate of this line segment -> R5 + ldr r4,[r3,#SSPRITE_X0] // get table of X0 of lines + ldrb r5,[r4,r1] // get X2 coordinate s->x0[y2] -> R5 + // lsls r5,#2 // convert X2 coordinate to byte offset +// R5 ... relative X2 coordinate of sprite segment + + // get width W2 of this line segment -> R2 + ldr r4,[r3,#SSPRITE_W0] // get table of W0 of lines + ldrb r2,[r4,r1] // get W2 width s->w0[y2] -> R2 + // lsls r2,#2 // convert W2 width to bytes +// R2 ... W2 width of sprite segment + + // get address of sprite line s->img[Y2*s->wb] -> R1 + ldrh r4,[r3,#SSPRITE_WB] // get sprite pitch w->wb + muls r1,r1,r4 // sprite offset Y2*s->wb + ldr r4,[r3,#SSPRITE_IMG] // get sprite image + add r1,r4 // line address -> R1 +// R1 ... s->img[Y2*WB] address of sprite line + + // get absolute X coordinate of start of line -> R4 + ldrh r4,[r3,#SSPRITE_X] // get sprite X coordinate -> R4 + sxth r4,r4 // signed extend X +// R4 ... absolute X coordinate of start of line + + // get key color -> R3 + ldrb r3,[r3,#SSPRITE_KEYCOL] // get key color -> R3 +// R3 ... col key color + + // check if X coordinate >= 0 + adds r4,r4,r5 // s->X + X2, X coordinate of start of line -> R4 + bpl 3f // X >= 0, sprite does not lie below start + + // sprite correction + subs r5,r4 // X2 -= X + adds r2,r4 // W2 += X + movs r4,#0 // X = 0 + + // shift source address -> R1 +3: adds r1,r5 // add X2 +// R1 ... s->img[Y2*WB+X2] address of sprite line +// R5 ... + + // check line length W2 + subs r5,r6,r4 // W - X -> R5 + cmp r2,r5 // compare W2 with W - X + ble 4f // W2 <= W - X, length is OK + mov r2,r5 // limit segment width W2 -> R2 + + // check width W2 +4: tst r2,r2 // check W2 + ble 8f // no W2 left (W2 <= 0) + + // shift destination address + adds r0,r4 + +// R0 ... dbuf pointer to data buffer +// R1 ... s->img[Y2*WB+X2] address of sprite line +// R2 ... W2 width of sprite segment +// R3 ... col key color +// R4 ... +// R5 ... +// R6 ... W layer screen width +// R7 ... spr pointer to list of sprites + + // blit sprite line + bl BlitKey // blit sprite line + + // pop registers and continue loop +8: pop {r0-r2} // pop registers R0..R2 + b 2b // continue loop + + // pop registers and return +9: pop {r4-r7,pc} diff --git a/MCUME_pico/picovga_t4/render/vga_tile.S b/MCUME_pico/picovga_t4/render/vga_tile.S new file mode 100755 index 0000000..23539b0 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_tile.S @@ -0,0 +1,431 @@ + +// **************************************************************************** +// +// VGA render GF_TILE +// +// **************************************************************************** +// u16 par3; // SSEGM_PAR3 tile width (must be multiple of 4) +// u32 par; // SSEGM_PAR tile table with one column of tiles +// u32 par2; // SSEGM_PAR2 tile height + +#include "../define.h" // common definitions of C and ASM +#include "hardware/regs/sio.h" // registers of hardware divider +#include "hardware/regs/addressmap.h" // SIO base address + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// extern "C" u32* RenderTile(u32* cbuf, int x, int y, int w, sSegm* segm); + +// render tiles GF_TILE +// cbuf ... destination control buffer +// x ... start X coordinate (must be multiple of 4) +// y ... start Y coordinate +// w ... width of this segment (must be multiple of 4) +// segm ... video segment +// Output new cbuf pointer. +// 320 pixels takes on 151 MHz: tiles 8x8 3.5 us, tile 16x16 2 us, tiles 32x32 1.3 us, tiles 64x64 0.9 us. + +.thumb_func +.global RenderTile +RenderTile: + + // push registers + push {r1-r7,lr} + +// Input registers and stack content: +// R0 ... destination control buffer +// SP+0: R1 ... X coordinate +// SP+4: R2 ... Y coordinate +// SP+8: R3 ... width to display +// SP+12: R4 +// SP+16: R5 +// SP+20: R6 +// SP+24: R7 +// SP+28: LR +// SP+32: video segment + + // get pointer to video segment -> R4 + ldr r4,[sp,#32] // load video segment -> R4 + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... Y coordinate +// R3 ... remaining width +// R4 ... sSegm* + + // start divide Y/tile_height + ldr r5,RenderTile_pSioBase // get address of SIO base -> R5 + str r2,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate + ldr r2,[r4,#SSEGM_PAR2] // tile height -> R2 + str r2,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile height + +// - now we must wait at least 8 clock cycles to get result of division + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile height +// R3 ... remaining width +// R4 ... sSegm* +// R5 ... SIO_BASE + + // [6] get wrap width -> [SP+0] + ldrh r7,[r4,#SSEGM_WRAPX] // [2] get wrap width + movs r6,#3 // [1] mask to align to 32-bit + bics r7,r6 // [1] align wrap + str r7,[sp,#0] // [2] save wrap width + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile height +// R3 ... remaining width +// R4 ... sSegm* +// R5 ... SIO_BASE +// R6 ... align mask #3 +// [SP+0] ... wrap width + + // [1] align X coordinate to 32-bit -> R1 + bics r1,r6 // [1] align X + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile height +// R3 ... remaining width +// R4 ... sSegm* +// R5 ... SIO_BASE +// R6 ... align mask #3 +// [SP+0] ... wrap width + + // [3] align remaining width -> [SP+4] + bics r3,r6 // [1] align width + str r3,[sp,#4] // [2] store aligned width to [SP+4] + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile height +// R4 ... sSegm* +// R5 ... SIO_BASE +// [SP+0] ... wrap width +// [SP+4] ... remaining width + + // [4] prepare tile width -> [SP+8], R3 + ldrh r3,[r4,#SSEGM_PAR3] // [2] get tile width -> R3 + str r3,[sp,#8] // [2] save tile width -> [SP+8] + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile height +// R3 ... tile width +// R4 ... sSegm* +// R5 ... SIO_BASE +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + + // load result of division Y/tile_height -> R6 Y relative at row, R7 Y row + // Note: QUOTIENT must be read last + ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, Y coordinate relative to current row + ldr r7,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R7, index of row + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile height +// R3 ... tile width +// R4 ... sSegm* +// R5 ... SIO_BASE +// R6 ... Y relative at row +// R7 ... Y row index +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + + // start divide X/tile_width + str r1,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, X coordinate + str r3,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile width + +// - now we must wait at least 8 clock cycles to get result of division + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile height +// R3 ... tile width +// R4 ... sSegm* +// R5 ... SIO_BASE +// R6 ... Y relative at row +// R7 ... Y row index +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + + // [1] prepare tile size -> R2 + muls r2,r3 // [1] tile height*width -> size R2 + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile size +// R3 +// R4 ... sSegm* +// R5 ... SIO_BASE +// R6 ... Y relative at row +// R7 ... Y row index +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + + // [7] base pointer to source data buffer (without X) -> LR, R7 + ldrh r3,[r4,#SSEGM_WB] // [2] get pitch of rows -> R3 + muls r7,r3 // [1] pitch * row (Y * WB) -> offset of row in data buffer + ldr r3,[r4,#SSEGM_DATA] // [2] pointer to data -> R3 + adds r7,r3 // [1] base address of data buffer + mov lr,r7 // [1] save base address + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile size +// R3 +// R4 ... sSegm* +// R5 ... SIO_BASE +// R6 ... Y relative at row +// R7 ... base address of data buffer (without X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + + // [6] tile base address -> R4 + ldr r3,[sp,#8] // [2] tile width + muls r6,r3 // [1] tile width * Y relative to row -> tile line offset R6 + ldr r4,[r4,#SSEGM_PAR] // [2] pointer to tiles + adds r4,r6 // [1] tile base address -> R4 + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile size +// R3 ... tile width +// R4 ... tile base address +// R5 ... SIO_BASE +// R6 +// R7 ... base address of data buffer (without X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + + // load result of division X/tile_width -> R6 X pixel relative, R5 tile position + // Note: QUOTIENT must be read last + ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, X pixel relative in tile + ldr r5,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R5, tile position + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile size +// R3 ... tile width +// R4 ... tile base address +// R5 ... tile position +// R6 ... X pixel relative in tile +// R7 ... base address of data buffer (without X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + + // prepare current pointer to source data buffer with X -> R7 + adds r7,r5 // tile source address -> R7 + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... tile size +// R3 ... tile width +// R4 ... tile base address +// R5 +// R6 ... X pixel relative in tile +// R7 ... pointer to source data buffer (with X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + +// ---- render rest of first tile + + // check if X is tile-aligned + tst r6,r6 // check tile align + beq 2f // X is tile aligned + + // shift X coordinate + subs r5,r3,r6 // pixels remain in current tile -> R5 + adds r1,r5 // shift X coordinate (align to next tile) + + // shift remaining width + ldr r3,[sp,#4] // get remaining width + subs r3,r5 // shift width + str r3,[sp,#4] // store remaining width + + // write number of 4-pixels + lsrs r5,#2 // number of 4-pixels + stmia r0!,{r5} // save width + + // load tile index -> R3 + ldrb r3,[r7,#0] // [2] load tile index + adds r7,#1 // [1] increase tile address + + // write tile addres + muls r3,r2 // tile index * tile size = tile offset + add r3,r4 // [1] add tile base address + add r3,r6 // [1] shift to tile start + stmia r0!,{r3} // [3] save pointer + + // check end of segment + ldr r3,[sp,#0] // get wrap width + cmp r1,r3 // check end of segment + blo 2f // not end of segment + movs r1,#0 // reset X coordinate + mov r7,lr // get base pointer to tile data + + // prepare wrap width - start X -> R5 +2: ldr r3,[sp,#0] // get wrap width + subs r5,r3,r1 // pixels remaining to end of segment + ldr r3,[sp,#4] // total remaining width -> R3 + +// ---- start outer loop, render one part of segment +// Outer loop variables (* prepared before outer loop): +// R0 ... *pointer to destination control buffer +// R1 ... +// R2 ... *tile size +// R3 ... *total remaining width +// R4 ... *tile base address +// R5 ... *wrap width of this segment +// R6 ... +// R7 ... *pointer to source data buffer +// LR ... *base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + +RenderTile_OutLoop: + + // limit wrap width by total width -> R5 + cmp r5,r3 // compare wrap width with total width + bls 2f // width is OK + mov r5,r3 // limit wrap width + + // check if remain whole tile +2: ldr r1,[sp,#8] // get tile width -> R1 + cmp r5,r1 // check number of remaining pixels + bhs 5f // remain whole tiles + + // check if start of last tile remains + cmp r5,#4 // check start of last tile + blo 3f // all done + mov r1,r5 // width to render + +// ---- render start of last tile +// R0 ... *pointer to destination control buffer +// R1 ... *width to render in this segment +// R2 ... *tile size +// R3 ... *total remaining width +// R4 ... *tile base address +// R5 ... *wrap width of this segment +// R6 ... +// R7 ... *pointer to source data buffer (with X) +// LR ... *base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + +RenderTile_Last: + + // save width + lsrs r6,r1,#2 // number of 4-pixels + stmia r0!,{r6} // save width + + // load tile index -> R6 + ldrb r6,[r7,#0] // [2] load tile index + adds r7,#1 // [1] increase tile index + + // save tile addres + muls r6,r2 // multiply tile index * tile size + add r6,r4 // [1] add tile base address + stmia r0!,{r6} // [3] save pointer + + // check if continue with next segment + mov r7,lr // get base pointer to tile data + ldr r6,[sp,#8] // get tile width -> R6 + cmp r5,r6 // whole tile remains? + bhs RenderTile_OutLoop // render next segment + + // pop registers and return +3: pop {r1-r7,pc} + +// ---- prepare to render whole tiles +// R0 ... pointer to destination control buffer +// R1 +// R2 ... tile size +// R3 ... total remaining width +// R4 ... tile base address +// R5 ... width of this segment +// R6 +// R7 ... pointer to source data buffer (with X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + + // prepare number of 4-pixels to render -> R1 +5: lsrs r1,r5,#2 // shift to get number of tiles in multiply of 4-pixels -> R1 + lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 + subs r3,r5 // update remaining width -> R3 + + ldr r5,[sp,#8] // get tile width -> R5 + lsrs r5,#2 // tile width/4 -> R5 + subs r1,r5 // number of 4-pixels - width/4 + adds r1,#1 // number of 4-pixels - (width/4-1) + +// ---- [11*N-1] start inner loop, render in one part of segment +// Inner loop variables (* prepared before inner loop): +// R0 ... *pointer to destination control buffer +// R1 ... *number of 4-pixels to generate - 1 (loop counter) +// R2 ... *tile size +// R3 ... *total remaining width +// R4 ... *tile base address +// R5 ... *tile width/4 +// R6 ... (temporary) +// R7 ... *pointer to source data buffer (with X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width +// [SP+8] ... tile width + +RenderTile_InLoop: + + // [3] load tile index -> R6 + ldrb r6,[r7,#0] // [2] load tile index + adds r7,#1 // [1] increase tile index + + // [2] get tile addres + muls r6,r2 // [1] multiply tile index * tile size + add r6,r4 // [1] add tile base address + + // [3] save control block + stmia r0!,{r5,r6} // [3] save width and pointer + + // [2,3] loop + subs r1,r5 // [1] shift loop counter, subtract tile width/4 + bhi RenderTile_InLoop // [1,2] > 0, render next whole tile + +// ---- end inner loop, continue with last tile, or start new part + + // continue to outer loop + adds r1,r5 // return size of last tile + subs r1,#1 // add "tile size/4 - 1" + ldr r5,[sp,#0] // load wrap width -> R5 + lsls r1,#2 // convert back to pixels + bne RenderTile_Last // render 1st half of last tile + mov r7,lr // get base pointer to tile data -> R7 + b RenderTile_OutLoop // go back to outer loop + + .align 2 +// pointer to SIO base +RenderTile_pSioBase: + .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_tile2.S b/MCUME_pico/picovga_t4/render/vga_tile2.S new file mode 100755 index 0000000..7e4db00 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_tile2.S @@ -0,0 +1,376 @@ + +// **************************************************************************** +// +// VGA render GF_TILE2 +// +// **************************************************************************** +// u16 par3; // SSEGM_PAR3 tile width (must be multiple of 4) +// u32 par; // SSEGM_PAR tile table with one column of tiles +// u32 par2; // SSEGM_PAR2 LOW tile height, HIGH tile width bytes + +#include "../define.h" // common definitions of C and ASM +#include "hardware/regs/sio.h" // registers of hardware divider +#include "hardware/regs/addressmap.h" // SIO base address + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// extern "C" u32* RenderTile2(u32* cbuf, int x, int y, int w, sSegm* segm); + +// render tiles GF_TILE2 +// cbuf ... destination control buffer +// x ... start X coordinate (must be multiple of 4) +// y ... start Y coordinate +// w ... width of this segment (must be multiple of 4) +// segm ... video segment +// Output new cbuf pointer. +// 320 pixels takes on 151 MHz: tiles 8x8 3.5 us, tile 16x16 2 us, tiles 32x32 1.3 us, tiles 64x64 0.9 us. + +.thumb_func +.global RenderTile2 +RenderTile2: + + // push registers + push {r2-r7,lr} + +// Input registers and stack content: +// R0 ... destination control buffer +// R1 ... X coordinate +// SP+0: R2 ... Y coordinate +// SP+4: R3 ... width to display +// SP+8: R4 +// SP+12: R5 +// SP+16: R6 +// SP+20: R7 +// SP+24: LR +// SP+28: video segment + + // get pointer to video segment -> R4 + ldr r4,[sp,#28] // load video segment -> R4 + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R2 ... Y coordinate +// R3 ... remaining width +// R4 ... sSegm* + + // start divide Y/tile_height + ldr r5,RenderTile_pSioBase // get address of SIO base -> R5 + str r2,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate + ldrh r2,[r4,#SSEGM_PAR2] // tile height -> R2 + str r2,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile height + +// - now we must wait at least 8 clock cycles to get result of division + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R3 ... remaining width +// R4 ... sSegm* +// R5 ... SIO_BASE + + // [6] get wrap width -> [SP+0] + ldrh r7,[r4,#SSEGM_WRAPX] // [2] get wrap width + movs r6,#3 // [1] mask to align to 32-bit + bics r7,r6 // [1] align wrap + str r7,[sp,#0] // [2] save wrap width + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R3 ... remaining width +// R4 ... sSegm* +// R5 ... SIO_BASE +// R6 ... align mask #3 +// [SP+0] ... wrap width + + // [1] align X coordinate to 32-bit -> R1 + bics r1,r6 // [1] align X + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R3 ... remaining width +// R4 ... sSegm* +// R5 ... SIO_BASE +// R6 ... align mask #3 +// [SP+0] ... wrap width + + // [3] align remaining width -> [SP+4] + bics r3,r6 // [1] align width + str r3,[sp,#4] // [2] store aligned width to [SP+4] + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R4 ... sSegm* +// R5 ... SIO_BASE +// [SP+0] ... wrap width +// [SP+4] ... remaining width + + // [2] prepare tile width -> R3 + ldrh r3,[r4,#SSEGM_PAR3] // [2] get tile width -> R3 + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R3 ... tile width +// R4 ... sSegm* +// R5 ... SIO_BASE +// [SP+0] ... wrap width +// [SP+4] ... remaining width + + // load result of division Y/tile_height -> R6 Y relative at row, R7 Y row + // Note: QUOTIENT must be read last + ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, Y coordinate relative to current row + ldr r7,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R7, index of row + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R3 ... tile width +// R4 ... sSegm* +// R5 ... SIO_BASE +// R6 ... Y relative at row +// R7 ... Y row index +// [SP+0] ... wrap width +// [SP+4] ... remaining width + + // start divide X/tile_width + str r1,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, X coordinate + str r3,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile width + +// - now we must wait at least 8 clock cycles to get result of division + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R3 ... tile width +// R4 ... sSegm* +// R5 ... SIO_BASE +// R6 ... Y relative at row +// R7 ... Y row index +// [SP+0] ... wrap width +// [SP+4] ... remaining width + + // [7] base pointer to source data buffer (without X) -> LR, R7 + ldrh r2,[r4,#SSEGM_WB] // [2] get pitch of rows -> R2 + muls r7,r2 // [1] pitch * row (Y * WB) -> offset of row in data buffer + ldr r2,[r4,#SSEGM_DATA] // [2] pointer to data -> R2 + adds r7,r2 // [1] base address of data buffer + mov lr,r7 // [1] save base address + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R3 ... tile width +// R4 ... sSegm* +// R5 ... SIO_BASE +// R6 ... Y relative at row +// R7 ... base address of data buffer (without X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width + + // [6] tile base address -> R4 + ldrh r2,[r4,#SSEGM_PAR2+2] // [2] tile width bytes -> R2 + muls r6,r2 // [1] tile width bytes * Y relative to row -> tile line offset R6 + ldr r4,[r4,#SSEGM_PAR] // [2] pointer to tiles + adds r4,r6 // [1] tile base address -> R4 + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R3 ... tile width +// R4 ... tile base address +// R5 ... SIO_BASE +// R7 ... base address of data buffer (without X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width + + // load result of division X/tile_width -> R6 X pixel relative, R5 tile position + // Note: QUOTIENT must be read last + ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, X pixel relative in tile + ldr r5,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R5, tile position + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R3 ... tile width +// R4 ... tile base address +// R5 ... tile position +// R6 ... X pixel relative in tile +// R7 ... base address of data buffer (without X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width + + // prepare current pointer to source data buffer with X -> R7 + adds r7,r5 // tile source address -> R7 + +// R0 ... pointer to destination control buffer +// R1 ... X coordinate +// R3 ... tile width +// R4 ... tile base address +// R6 ... X pixel relative in tile +// R7 ... pointer to source data buffer (with X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width +// [SP+4] ... remaining width + +// ---- render rest of first tile + + // check if X is tile-aligned + tst r6,r6 // check tile align + beq 2f // X is tile aligned + + // shift X coordinate + subs r5,r3,r6 // pixels remain in current tile -> R5 + adds r1,r5 // shift X coordinate (align to next tile) + + // shift remaining width + ldr r2,[sp,#4] // get remaining width + subs r2,r5 // shift width + str r2,[sp,#4] // store remaining width + + // write number of 4-pixels + lsrs r5,#2 // number of 4-pixels + stmia r0!,{r5} // save width + + // load tile index -> R2 + ldrb r2,[r7,#0] // [2] load tile index + adds r7,#1 // [1] increase tile address + + // write tile addres + muls r2,r3 // tile index * tile width = tile offset + add r2,r4 // [1] add tile base address + add r2,r6 // [1] shift to tile start + stmia r0!,{r2} // [3] save pointer + + // check end of segment + ldr r2,[sp,#0] // get wrap width + cmp r1,r2 // check end of segment + blo 2f // not end of segment + movs r1,#0 // reset X coordinate + mov r7,lr // get base pointer to tile data + + // prepare wrap width - start X -> R5 +2: ldr r2,[sp,#0] // get wrap width + subs r5,r2,r1 // pixels remaining to end of segment + ldr r2,[sp,#4] // total remaining width -> R3 + +// ---- start outer loop, render one part of segment +// R0 ... pointer to destination control buffer +// R2 ... total remaining width +// R3 ... tile width +// R4 ... tile base address +// R5 ... wrap width of this segment +// R7 ... pointer to source data buffer +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width + +RenderTile_OutLoop: + + // limit wrap width by total width -> R5 + cmp r5,r2 // compare wrap width with total width + bls 2f // width is OK + mov r5,r2 // limit wrap width + + // check if remain whole tile +2: cmp r5,r3 // check number of remaining pixels + bhs 5f // remain whole tiles + + // check if start of last tile remains + cmp r5,#4 // check start of last tile + blo 3f // all done + mov r1,r5 // width to render + +// ---- render start of last tile +// R0 ... pointer to destination control buffer +// R1 ... width to render in this segment +// R2 ... total remaining width +// R3 ... tile width +// R4 ... tile base address +// R5 ... wrap width of this segment +// R7 ... pointer to source data buffer (with X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width + +RenderTile_Last: + + // save width + lsrs r6,r1,#2 // number of 4-pixels + stmia r0!,{r6} // save width + + // load tile index -> R6 + ldrb r6,[r7,#0] // [2] load tile index + adds r7,#1 // [1] increase tile index + + // save tile addres + muls r6,r3 // multiply tile index * tile width + add r6,r4 // [1] add tile base address + stmia r0!,{r6} // [3] save pointer + + // check if continue with next segment + mov r7,lr // get base pointer to tile data + cmp r5,r3 // whole tile remains? + bhs RenderTile_OutLoop // render next segment + + // pop registers and return +3: pop {r2-r7,pc} + +// ---- prepare to render whole tiles +// R0 ... pointer to destination control buffer +// R2 ... total remaining width +// R3 ... tile width +// R4 ... tile base address +// R5 ... width of this segment +// R7 ... pointer to source data buffer (with X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width + + // prepare number of 4-pixels to render -> R1 +5: lsrs r1,r5,#2 // shift to get number of tiles in multiply of 4-pixels -> R1 + lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5 + subs r2,r5 // update remaining width -> R2 + + lsrs r5,r3,#2 // tile width/4 -> R5 + subs r1,r5 // number of 4-pixels - width/4 + adds r1,#1 // number of 4-pixels - (width/4-1) + +// ---- [11*N-1] start inner loop, render in one part of segment +// R0 ... pointer to destination control buffer +// R1 ... number of 4-pixels to generate - 1 (loop counter) +// R2 ... total remaining width +// R3 ... tile width +// R4 ... tile base address +// R5 ... tile width/4 +// R7 ... pointer to source data buffer (with X) +// LR ... base address of data buffer (without X) +// [SP+0] ... wrap width + +RenderTile_InLoop: + + // [3] load tile index -> R6 + ldrb r6,[r7,#0] // [2] load tile index + adds r7,#1 // [1] increase tile index + + // [2] get tile addres + muls r6,r3 // [1] multiply tile index * tile width + add r6,r4 // [1] add tile base address + + // [3] save control block + stmia r0!,{r5,r6} // [3] save width and pointer + + // [2,3] loop + subs r1,r5 // [1] shift loop counter, subtract tile width/4 + bhi RenderTile_InLoop // [1,2] > 0, render next whole tile + +// ---- end inner loop, continue with last tile, or start new part + + // continue to outer loop + adds r1,r5 // return size of last tile + subs r1,#1 // add "tile size/4 - 1" + ldr r5,[sp,#0] // load wrap width -> R5 + lsls r1,#2 // convert back to pixels + bne RenderTile_Last // render start of last tile + mov r7,lr // get base pointer to tile data -> R7 + b RenderTile_OutLoop // go back to outer loop + + .align 2 +// pointer to SIO base +RenderTile_pSioBase: + .word SIO_BASE // addres of SIO base diff --git a/MCUME_pico/picovga_t4/render/vga_tilepersp.S b/MCUME_pico/picovga_t4/render/vga_tilepersp.S new file mode 100755 index 0000000..8b9a720 --- /dev/null +++ b/MCUME_pico/picovga_t4/render/vga_tilepersp.S @@ -0,0 +1,450 @@ + +// **************************************************************************** +// +// VGA render GF_TILEPERSP +// +// **************************************************************************** +// data ... tile map +// par ... column of tile images +// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)) +// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset +// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height +// wrapy ... segment height + +#include "../define.h" // common definitions of C and ASM +#include "hardware/regs/sio.h" // registers of hardware divider +#include "hardware/regs/addressmap.h" // SIO base address + +#define ACCUM0_OFFSET0 0 +#define ACCUM1_OFFSET0 4 +#define BASE0_OFFSET0 8 +#define BASE1_OFFSET0 12 +#define BASE2_OFFSET0 16 +#define POP_LANE0_OFFSET0 20 +#define POP_LANE1_OFFSET0 24 +#define POP_FULL_OFFSET0 28 +#define PEEK_LANE0_OFFSET0 32 +#define PEEK_LANE1_OFFSET0 36 +#define PEEK_FULL_OFFSET0 40 +#define CTRL_LANE0_OFFSET0 44 +#define CTRL_LANE1_OFFSET0 48 +#define ACCUM0_ADD_OFFSET0 52 +#define ACCUM1_ADD_OFFSET0 56 +#define BASE_1AND0_OFFSET0 60 + +#define ACCUM0_OFFSET1 64 +#define ACCUM1_OFFSET1 68 +#define BASE0_OFFSET1 72 +#define BASE1_OFFSET1 76 +#define BASE2_OFFSET1 80 +#define POP_LANE0_OFFSET1 84 +#define POP_LANE1_OFFSET1 88 +#define POP_FULL_OFFSET1 92 +#define PEEK_LANE0_OFFSET1 96 +#define PEEK_LANE1_OFFSET1 100 +#define PEEK_FULL_OFFSET1 104 +#define CTRL_LANE0_OFFSET1 108 +#define CTRL_LANE1_OFFSET1 112 +#define ACCUM0_ADD_OFFSET1 116 +#define ACCUM1_ADD_OFFSET1 120 +#define BASE_1AND0_OFFSET1 124 + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// extern "C" u32* RenderTilePersp(u32* cbuf, int x, int y, int w, sSegm* segm); + +// render tiles with perspective GF_TILEPERSP +// using hardware interpolator inter0 and inter1 (their state is not saved during interrup) +// R0 ... pointer to destination data buffer +// R1 ... start X coordinate (not used) +// R2 ... start Y coordinate (in graphics lines) +// R3 ... width to display (must be multiple of 4) +// [stack] ... segm video segment sSegm +// Output new pointer to data buffer. +// 320 pixels takes ?? us on 151 MHz. + +.thumb_func +.global RenderTilePersp +RenderTilePersp: + +// Input registers and stack: +// R0 ... pointer to destination data buffer +// R1 ... X coordinate (not used) +// R2 ... Y coordinate +// SP+0: R3 ... remaining width +// SP+4: R4 +// SP+8: R5 +// SP+12: R6 +// SP+16: R7 +// SP+20: LR +// SP+24: video segment + + // push registers + push {r3-r7,lr} + +// ---- prepare registers + + // get pointer to video segment -> R4 + ldr r4,[sp,#24] // load video segment -> R4 + +// R0 ... pointer to data buffer +// R2 ... Y coordinate +// R3 ... remaining width +// R4 ... video segment + + // load horizon offset -> R1, check if use perspective + ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6 + ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5 + ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1 + sxtb r1,r1 // signed extension + lsls r1,#2 // horizon * 4, horizon = 0 ? + bne 2f // use perspective + + // not using perspective, start Y coordinate y0 = y - h/2 -> R12 + lsrs r5,#1 // segment height/2 -> R5 + subs r2,r5 // y - h/2 -> R2 + mov r12,r2 // current coordinate Y0 = y - h/2 -> R12 + + // prepare divide result to get 1< R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL + b 4f + + // using perspective, check ceilling mode +2: bpl 3f // horizon is not negative + subs r2,r5,r2 // negate, y = h - y + subs r2,#1 // y = h - 1 - y + negs r1,r1 // absolute value of horizon + + // prepare current coordinate Y0 = y - h -> R12 +3: subs r7,r2,r5 // y - h = current Y coordinate -> R7 + mov r12,r7 // store current coordinate Y0 -> R12 + + // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) + lsls r5,#FRACT // segment height * FRACTMUL -> R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h + adds r2,r1 // horizon + y -> R2 + str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz + +// R0 ... pointer to data buffer +// R3 ... remaining width +// R4 ... video segment +// R12 ... current coordinate Y0 + + // prepare start coordinate X0 = -w/2 -> LR +4: lsrs r5,r3,#1 // width/2 + negs r5,r5 // negate + mov lr,r5 // store start coordinate X0 -> LR + + // prepare number of 4-pixels (loop counter) -> R7 + lsrs r7,r3,#2 // width/4 -> R7 + + // prepare address of interpolator 0 base -> R3 + ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- setup interpolator 0 to get tile index + + // set tile map base to base2 + ldr r6,[r4,#SSEGM_DATA] // load tile map base + str r6,[r3,#BASE2_OFFSET0] // set tile map base + + // set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1 + ldr r6,RenderTilePersp_Ctrl // load control word + ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1 + str r1,[sp,#0] // save tile size -> [SP+0] + adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) + ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2 + subs r5,r2,#1 // mapwbits - 1 + lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0 + + // set control word of lane 1: shift=FRACT+tilebits-mapwbits, + // mask=mapwbits..mapwbits+maphbits-1 + subs r6,r2 // FRACT + tilebits - mapwbits + lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position + orrs r6,r2 // add mapwbits to control word + ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2 + lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position + adds r6,r2 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1 + +// ---- setup interpolator 1 to get pixel index + + // set tile image to base2 + ldr r6,[r4,#SSEGM_PAR] // load tile image base + str r6,[r3,#BASE2_OFFSET1] // set tile image base + + // set control word of lane 0: shift=FRACT, mask=0..tilebits-1 + ldr r6,RenderTilePersp_Ctrl // load control word + subs r5,r1,#1 // tilebits - 1 + lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0 + + // set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1 + subs r6,r1 // FRACT - tilebits + lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position + orrs r6,r5 // add tilebits to control word + lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position + adds r6,r1 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 +// [SP+0] ... number of bits of tile width and height + +// ---- set matrix + + // get pointer to matrix -> R4 + ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4 + + // get distance coefficient dist -> R1 + ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1 + ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient + +// r4+0 ... m11 +// r4+4 ... m12 +// r4+8 ... m13 +// r4+12 ... m21 +// r4+16 ... m22 +// r4+20 ... m23 + + // set m11 -> R5 base0 + ldr r5,[r4,#0] // load m11 + muls r5,r1 // m11*dist + asrs r5,#FRACT // (m11*dist)>>FRACT + str r5,[r3,#BASE0_OFFSET0] // set base0 + str r5,[r3,#BASE0_OFFSET1] // set base0 + + // set m21 -> R6 base1 + ldr r6,[r4,#12] // load m21 + muls r6,r1 // m21*dist + asrs r6,#FRACT // (m21*dist)>>FRACT + str r6,[r3,#BASE1_OFFSET0] // set base1 + str r6,[r3,#BASE1_OFFSET1] // set base1 + +// R0 ... pointer to data buffer +// R1 ... distance coefficient +// R3 ... interpolator base +// R4 ... pointer to matrix +// R5 ... m11 +// R6 ... m21 +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 +// [SP+0] ... number of bits of tile width and height + + // set x0*m11 + y0*m12 + m13 -> accum0 + mov r2,lr // start coordinate X0 -> X2 + muls r5,r2 // x0*m11 -> R5 + muls r2,r6 // x0*m21 -> R2 + mov lr,r1 // save distance coefficient -> LR + ldr r6,[r4,#4] // load m12 -> R6 + muls r1,r6 // m12*dist -> R1 + asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 + mov r6,r12 // load coordinate Y0 -> R6 + muls r1,r6 // y0*m12 -> R1 + adds r5,r1 // x0*m11 + y0*m12 -> R5 + ldr r1,[r4,#8] // load m13 -> R1 + adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 + str r5,[r3,#ACCUM0_OFFSET0] // set accum0 + str r5,[r3,#ACCUM0_OFFSET1] // set accum0 + +// R0 ... pointer to data buffer +// R2 ... x0*m21 +// R3 ... interpolator base +// R4 ... pointer to matrix +// R6 ... current coordinate Y0 +// R7 ... width/4 +// LR ... distance coefficient +// [SP+0] ... number of bits of tile width and height + + // set x0*m21 + y0*m22 + m23 -> accum1 + ldr r1,[r4,#16] // load m22 -> R1 + mov r5,lr // distance coefficient -> R5 + muls r1,r5 // m22*dist + asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 + muls r1,r6 // y0*m22 -> R1 + adds r2,r1 // x0*m21 + y0*m22 -> R2 + ldr r1,[r4,#20] // load m23 -> R1 + adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 + str r2,[r3,#ACCUM1_OFFSET0] // set accum1 + str r2,[r3,#ACCUM1_OFFSET1] // set accum1 + +// ---- process odd 4-pixel + + // prepare tile bits * 2 + ldr r6,[sp,#0] // get tile bits + lsls r6,#1 // tile bits * 2 + +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to tile map, load tile index) +// R5 ... (temporary - get pointer to pixel, load pixel) +// R6 ... tilebits*2 +// R7 ... width/4 (loop counter) +// [SP+0] ... number of bits of tile width and height + + // check odd 4-pixels + lsrs r7,#1 // width/4/2 + bcc 2f // no odd 4-pixel + + // [7] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r1,[r5,r4] // [2] load pixel + + // [9] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [9] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [9] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#24 // [1] shift 3 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [2] store 4 pixels + stmia r0!,{r1} // [2] store 4 pixels + + // check number of remaining pixels +2: tst r7,r7 // check number of pixels + beq 8f // end + +// ---- [74 per 8 pixels] inner loop +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to tile map, load tile index) +// R5 ... (temporary - get pointer to pixel, load pixel) +// R6 ... tilebits*2 +// R7 ... width/8 (loop counter) + + // [7] load 1st pixel +6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r1,[r5,r4] // [2] load pixel + + // [9] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [9] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [9] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#24 // [1] shift 3 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [7] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r2,[r5,r4] // [2] load pixel + + // [9] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + + // [9] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r2,r4 // [1] add pixel to accumulator + + // [9] load 4th pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#24 // [1] shift 3 bytes left + orrs r2,r4 // [1] add pixel to accumulator + + // [3] store 8 pixels + stmia r0!,{r1,r2} // [3] store 8 pixels + + // [2,3] loop counter + subs r7,#1 // [1] 8-pixel counter + bne 6b // [1,2] next 8-pixels + + // pop registers +8: pop {r3-r7,pc} + + .align 2 +// pointer to SIO base +RenderTilePersp_pSioBase: + .word SIO_BASE // addres of SIO base + +// pointer to Interp0 base +RenderTilePersp_Interp: + .word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base + +RenderTilePersp_Ctrl: // lane control word + .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 + ldr r4,[sp,#24] // load video segment -> R4 + +// R0 ... pointer to data buffer +// R2 ... Y coordinate +// R3 ... remaining width +// R4 ... video segment + + // load horizon offset -> R1, check if use perspective + ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6 + ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5 + ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1 + sxtb r1,r1 // signed extension + lsls r1,#2 // horizon * 4, horizon = 0 ? + bne 2f // use perspective + + // not using perspective, start Y coordinate y0 = y - h/2 -> R12 + lsrs r5,#1 // segment height/2 -> R5 + subs r2,r5 // y - h/2 -> R2 + mov r12,r2 // current coordinate Y0 = y - h/2 -> R12 + + // prepare divide result to get 1< R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL + b 4f + + // using perspective, check ceilling mode +2: bpl 3f // horizon is not negative + subs r2,r5,r2 // negate, y = h - y + subs r2,#1 // y = h - 1 - y + negs r1,r1 // absolute value of horizon + + // prepare current coordinate Y0 = y - h -> R12 +3: subs r7,r2,r5 // y - h = current Y coordinate -> R7 + mov r12,r7 // store current coordinate Y0 -> R12 + + // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) + lsls r5,#FRACT // segment height * FRACTMUL -> R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h + adds r2,r1 // horizon + y -> R2 + str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz + +// R0 ... pointer to data buffer +// R3 ... remaining width +// R4 ... video segment +// R12 ... current coordinate Y0 + + // prepare start coordinate X0 = -w/2 -> LR +4: lsrs r5,r3,#1 // width/2 + negs r5,r5 // negate + mov lr,r5 // store start coordinate X0 -> LR + + // prepare number of 4-pixels (loop counter) -> R7 + lsrs r7,r3,#2 // width/4 -> R7 + + // prepare address of interpolator 0 base -> R3 + ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- setup interpolator 0 to get tile index + + // set tile map base to base2 + ldr r6,[r4,#SSEGM_DATA] // load tile map base + str r6,[r3,#BASE2_OFFSET0] // set tile map base + + // set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1 + ldr r6,RenderTilePersp_Ctrl // load control word + ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1 + str r1,[sp,#0] // save tile size -> [SP+0] + adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) + ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2 + subs r5,r2,#1 // mapwbits - 1 + lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0 + + // set control word of lane 1: shift=FRACT+tilebits-mapwbits, + // mask=mapwbits..mapwbits+maphbits-1 + subs r6,r2 // FRACT + tilebits - mapwbits + lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position + orrs r6,r2 // add mapwbits to control word + ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2 + lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position + adds r6,r2 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1 + +// ---- setup interpolator 1 to get pixel index + + // set tile image to base2 + ldr r6,[r4,#SSEGM_PAR] // load tile image base + str r6,[r3,#BASE2_OFFSET1] // set tile image base + + // set control word of lane 0: shift=FRACT, mask=0..tilebits-1 + ldr r6,RenderTilePersp_Ctrl // load control word + subs r5,r1,#1 // tilebits - 1 + lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0 + + // set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1 + subs r6,r1 // FRACT - tilebits + lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position + orrs r6,r5 // add tilebits to control word + lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position + adds r6,r1 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 +// [SP+0] ... number of bits of tile width and height + +// ---- set matrix + + // get pointer to matrix -> R4 + ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4 + + // get distance coefficient dist -> R1 + ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1 + ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient + +// r4+0 ... m11 +// r4+4 ... m12 +// r4+8 ... m13 +// r4+12 ... m21 +// r4+16 ... m22 +// r4+20 ... m23 + + // set m11 -> R5 base0 + ldr r5,[r4,#0] // load m11 + muls r5,r1 // m11*dist + asrs r5,#FRACT // (m11*dist)>>FRACT + asrs r2,r5,#1 // delta/2 + adds r2,r5 // delta*1.5 + str r2,[r3,#BASE0_OFFSET0] // set base0 + str r2,[r3,#BASE0_OFFSET1] // set base0 + + // set m21 -> R6 base1 + ldr r6,[r4,#12] // load m21 + muls r6,r1 // m21*dist + asrs r6,#FRACT // (m21*dist)>>FRACT + asrs r2,r6,#1 // delta/2 + adds r2,r6 // delta*1.5 + str r2,[r3,#BASE1_OFFSET0] // set base1 + str r2,[r3,#BASE1_OFFSET1] // set base1 + +// R0 ... pointer to data buffer +// R1 ... distance coefficient +// R3 ... interpolator base +// R4 ... pointer to matrix +// R5 ... m11 +// R6 ... m21 +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 +// [SP+0] ... number of bits of tile width and height + + // set x0*m11 + y0*m12 + m13 -> accum0 + mov r2,lr // start coordinate X0 -> X2 + muls r5,r2 // x0*m11 -> R5 + muls r2,r6 // x0*m21 -> R2 + mov lr,r1 // save distance coefficient -> LR + ldr r6,[r4,#4] // load m12 -> R6 + muls r1,r6 // m12*dist -> R1 + asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 + mov r6,r12 // load coordinate Y0 -> R6 + muls r1,r6 // y0*m12 -> R1 + adds r5,r1 // x0*m11 + y0*m12 -> R5 + ldr r1,[r4,#8] // load m13 -> R1 + adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 + str r5,[r3,#ACCUM0_OFFSET0] // set accum0 + str r5,[r3,#ACCUM0_OFFSET1] // set accum0 + +// R0 ... pointer to data buffer +// R2 ... x0*m21 +// R3 ... interpolator base +// R4 ... pointer to matrix +// R6 ... current coordinate Y0 +// R7 ... width/4 +// LR ... distance coefficient +// [SP+0] ... number of bits of tile width and height + + // set x0*m21 + y0*m22 + m23 -> accum1 + ldr r1,[r4,#16] // load m22 -> R1 + mov r5,lr // distance coefficient -> R5 + muls r1,r5 // m22*dist + asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 + muls r1,r6 // y0*m22 -> R1 + adds r2,r1 // x0*m21 + y0*m22 -> R2 + ldr r1,[r4,#20] // load m23 -> R1 + adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 + str r2,[r3,#ACCUM1_OFFSET0] // set accum1 + str r2,[r3,#ACCUM1_OFFSET1] // set accum1 + +// ---- process odd 4-pixel + + // prepare tile bits * 2 + ldr r6,[sp,#0] // get tile bits + lsls r6,#1 // tile bits * 2 + +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to tile map, load tile index) +// R5 ... (temporary - get pointer to pixel, load pixel) +// R6 ... tilebits*2 +// R7 ... width/4 (loop counter) +// [SP+0] ... number of bits of tile width and height + + // check odd 4-pixels + lsrs r7,#1 // width/4/2 + bcc 2f // no odd 4-pixel + + // [7] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r1,[r5,r4] // [2] load pixel + + // [9] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [11] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [2] store 4 pixels + stmia r0!,{r1} // [2] store 4 pixels + + // check number of remaining pixels +2: tst r7,r7 // check number of pixels + beq 8f // end + +// ---- [60 per 8 pixels] inner loop +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to tile map, load tile index) +// R5 ... (temporary - get pointer to pixel, load pixel) +// R6 ... tilebits*2 +// R7 ... width/8 (loop counter) + + // [7] load 1st pixel +6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r1,[r5,r4] // [2] load pixel + + // [9] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [11] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [7] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r2,[r5,r4] // [2] load pixel + + // [9] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + + // [11] load 3rd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r2,r4 // [1] add pixel to accumulator + lsls r4,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + + // [3] store 8 pixels + stmia r0!,{r1,r2} // [3] store 8 pixels + + // [2,3] loop counter + subs r7,#1 // [1] 8-pixel counter + bne 6b // [1,2] next 8-pixels + + // pop registers +8: pop {r3-r7,pc} + + .align 2 +// pointer to SIO base +RenderTilePersp_pSioBase: + .word SIO_BASE // addres of SIO base + +// pointer to Interp0 base +RenderTilePersp_Interp: + .word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base + +RenderTilePersp_Ctrl: // lane control word + .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 + ldr r4,[sp,#24] // load video segment -> R4 + +// R0 ... pointer to data buffer +// R2 ... Y coordinate +// R3 ... remaining width +// R4 ... video segment + + // load horizon offset -> R1, check if use perspective + ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6 + ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5 + ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1 + sxtb r1,r1 // signed extension + lsls r1,#2 // horizon * 4, horizon = 0 ? + bne 2f // use perspective + + // not using perspective, start Y coordinate y0 = y - h/2 -> R12 + lsrs r5,#1 // segment height/2 -> R5 + subs r2,r5 // y - h/2 -> R2 + mov r12,r2 // current coordinate Y0 = y - h/2 -> R12 + + // prepare divide result to get 1< R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL + b 4f + + // using perspective, check ceilling mode +2: bpl 3f // horizon is not negative + subs r2,r5,r2 // negate, y = h - y + subs r2,#1 // y = h - 1 - y + negs r1,r1 // absolute value of horizon + + // prepare current coordinate Y0 = y - h -> R12 +3: subs r7,r2,r5 // y - h = current Y coordinate -> R7 + mov r12,r7 // store current coordinate Y0 -> R12 + + // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) + lsls r5,#FRACT // segment height * FRACTMUL -> R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h + adds r2,r1 // horizon + y -> R2 + str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz + +// R0 ... pointer to data buffer +// R3 ... remaining width +// R4 ... video segment +// R12 ... current coordinate Y0 + + // prepare start coordinate X0 = -w/2 -> LR +4: lsrs r5,r3,#1 // width/2 + negs r5,r5 // negate + mov lr,r5 // store start coordinate X0 -> LR + + // prepare number of 4-pixels (loop counter) -> R7 + lsrs r7,r3,#2 // width/4 -> R7 + + // prepare address of interpolator 0 base -> R3 + ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- setup interpolator 0 to get tile index + + // set tile map base to base2 + ldr r6,[r4,#SSEGM_DATA] // load tile map base + str r6,[r3,#BASE2_OFFSET0] // set tile map base + + // set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1 + ldr r6,RenderTilePersp_Ctrl // load control word + ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1 + str r1,[sp,#0] // save tile size -> [SP+0] + adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) + ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2 + subs r5,r2,#1 // mapwbits - 1 + lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0 + + // set control word of lane 1: shift=FRACT+tilebits-mapwbits, + // mask=mapwbits..mapwbits+maphbits-1 + subs r6,r2 // FRACT + tilebits - mapwbits + lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position + orrs r6,r2 // add mapwbits to control word + ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2 + lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position + adds r6,r2 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1 + +// ---- setup interpolator 1 to get pixel index + + // set tile image to base2 + ldr r6,[r4,#SSEGM_PAR] // load tile image base + str r6,[r3,#BASE2_OFFSET1] // set tile image base + + // set control word of lane 0: shift=FRACT, mask=0..tilebits-1 + ldr r6,RenderTilePersp_Ctrl // load control word + subs r5,r1,#1 // tilebits - 1 + lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0 + + // set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1 + subs r6,r1 // FRACT - tilebits + lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position + orrs r6,r5 // add tilebits to control word + lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position + adds r6,r1 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 +// [SP+0] ... number of bits of tile width and height + +// ---- set matrix + + // get pointer to matrix -> R4 + ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4 + + // get distance coefficient dist -> R1 + ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1 + ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient + +// r4+0 ... m11 +// r4+4 ... m12 +// r4+8 ... m13 +// r4+12 ... m21 +// r4+16 ... m22 +// r4+20 ... m23 + + // set m11 -> R5 base0 + ldr r5,[r4,#0] // load m11 + muls r5,r1 // m11*dist + asrs r5,#FRACT-1 // (m11*dist)>>(FRACT-1) ... 2*delta + str r5,[r3,#BASE0_OFFSET0] // set base0 + str r5,[r3,#BASE0_OFFSET1] // set base0 + asrs r5,#1 // (m11*dist)>>FRACT + + // set m21 -> R6 base1 + ldr r6,[r4,#12] // load m21 + muls r6,r1 // m21*dist + asrs r6,#FRACT-1 // (m21*dist)>>(FRACT-1) ... 2*delta + str r6,[r3,#BASE1_OFFSET0] // set base1 + str r6,[r3,#BASE1_OFFSET1] // set base1 + asrs r6,#1 // (m21*dist)>>FRACT + +// R0 ... pointer to data buffer +// R1 ... distance coefficient +// R3 ... interpolator base +// R4 ... pointer to matrix +// R5 ... m11 +// R6 ... m21 +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 +// [SP+0] ... number of bits of tile width and height + + // set x0*m11 + y0*m12 + m13 -> accum0 + mov r2,lr // start coordinate X0 -> X2 + muls r5,r2 // x0*m11 -> R5 + muls r2,r6 // x0*m21 -> R2 + mov lr,r1 // save distance coefficient -> LR + ldr r6,[r4,#4] // load m12 -> R6 + muls r1,r6 // m12*dist -> R1 + asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 + mov r6,r12 // load coordinate Y0 -> R6 + muls r1,r6 // y0*m12 -> R1 + adds r5,r1 // x0*m11 + y0*m12 -> R5 + ldr r1,[r4,#8] // load m13 -> R1 + adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 + str r5,[r3,#ACCUM0_OFFSET0] // set accum0 + str r5,[r3,#ACCUM0_OFFSET1] // set accum0 + +// R0 ... pointer to data buffer +// R2 ... x0*m21 +// R3 ... interpolator base +// R4 ... pointer to matrix +// R6 ... current coordinate Y0 +// R7 ... width/4 +// LR ... distance coefficient +// [SP+0] ... number of bits of tile width and height + + // set x0*m21 + y0*m22 + m23 -> accum1 + ldr r1,[r4,#16] // load m22 -> R1 + mov r5,lr // distance coefficient -> R5 + muls r1,r5 // m22*dist + asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 + muls r1,r6 // y0*m22 -> R1 + adds r2,r1 // x0*m21 + y0*m22 -> R2 + ldr r1,[r4,#20] // load m23 -> R1 + adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 + str r2,[r3,#ACCUM1_OFFSET0] // set accum1 + str r2,[r3,#ACCUM1_OFFSET1] // set accum1 + +// ---- process odd 4-pixel + + // prepare tile bits * 2 + ldr r6,[sp,#0] // get tile bits + lsls r6,#1 // tile bits * 2 + +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to tile map, load tile index) +// R5 ... (temporary - get pointer to pixel, load pixel) +// R6 ... tilebits*2 +// R7 ... width/4 (loop counter) +// [SP+0] ... number of bits of tile width and height + + // check odd 4-pixels + lsrs r7,#1 // width/4/2 + bcc 2f // no odd 4-pixel + + // [9] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r1,[r5,r4] // [2] load pixel + lsls r4,r1,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [11] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [2] store 4 pixels + stmia r0!,{r1} // [2] store 4 pixels + + // check number of remaining pixels +2: tst r7,r7 // check number of pixels + beq 8f // end + +// ---- [46 per 8 pixels] inner loop +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to tile map, load tile index) +// R5 ... (temporary - get pointer to pixel, load pixel) +// R6 ... tilebits*2 +// R7 ... width/8 (loop counter) + + // [9] load 1st pixel +6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r1,[r5,r4] // [2] load pixel + lsls r4,r1,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [11] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [9] load 1st pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r2,[r5,r4] // [2] load pixel + lsls r4,r2,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + + // [11] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r2,r4 // [1] add pixel to accumulator + lsls r4,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + + // [3] store 8 pixels + stmia r0!,{r1,r2} // [3] store 8 pixels + + // [2,3] loop counter + subs r7,#1 // [1] 8-pixel counter + bne 6b // [1,2] next 8-pixels + + // pop registers +8: pop {r3-r7,pc} + + .align 2 +// pointer to SIO base +RenderTilePersp_pSioBase: + .word SIO_BASE // addres of SIO base + +// pointer to Interp0 base +RenderTilePersp_Interp: + .word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base + +RenderTilePersp_Ctrl: // lane control word + .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 + ldr r4,[sp,#24] // load video segment -> R4 + +// R0 ... pointer to data buffer +// R2 ... Y coordinate +// R3 ... remaining width +// R4 ... video segment + + // load horizon offset -> R1, check if use perspective + ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6 + ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5 + ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1 + sxtb r1,r1 // signed extension + lsls r1,#2 // horizon * 4, horizon = 0 ? + bne 2f // use perspective + + // not using perspective, start Y coordinate y0 = y - h/2 -> R12 + lsrs r5,#1 // segment height/2 -> R5 + subs r2,r5 // y - h/2 -> R2 + mov r12,r2 // current coordinate Y0 = y - h/2 -> R12 + + // prepare divide result to get 1< R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL + b 4f + + // using perspective, check ceilling mode +2: bpl 3f // horizon is not negative + subs r2,r5,r2 // negate, y = h - y + subs r2,#1 // y = h - 1 - y + negs r1,r1 // absolute value of horizon + + // prepare current coordinate Y0 = y - h -> R12 +3: subs r7,r2,r5 // y - h = current Y coordinate -> R7 + mov r12,r7 // store current coordinate Y0 -> R12 + + // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) + lsls r5,#FRACT // segment height * FRACTMUL -> R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h + adds r2,r1 // horizon + y -> R2 + str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz + +// R0 ... pointer to data buffer +// R3 ... remaining width +// R4 ... video segment +// R12 ... current coordinate Y0 + + // prepare start coordinate X0 = -w/2 -> LR +4: lsrs r5,r3,#1 // width/2 + negs r5,r5 // negate + mov lr,r5 // store start coordinate X0 -> LR + + // prepare number of 4-pixels (loop counter) -> R7 + lsrs r7,r3,#2 // width/4 -> R7 + + // prepare address of interpolator 0 base -> R3 + ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- setup interpolator 0 to get tile index + + // set tile map base to base2 + ldr r6,[r4,#SSEGM_DATA] // load tile map base + str r6,[r3,#BASE2_OFFSET0] // set tile map base + + // set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1 + ldr r6,RenderTilePersp_Ctrl // load control word + ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1 + str r1,[sp,#0] // save tile size -> [SP+0] + adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) + ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2 + subs r5,r2,#1 // mapwbits - 1 + lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0 + + // set control word of lane 1: shift=FRACT+tilebits-mapwbits, + // mask=mapwbits..mapwbits+maphbits-1 + subs r6,r2 // FRACT + tilebits - mapwbits + lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position + orrs r6,r2 // add mapwbits to control word + ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2 + lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position + adds r6,r2 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1 + +// ---- setup interpolator 1 to get pixel index + + // set tile image to base2 + ldr r6,[r4,#SSEGM_PAR] // load tile image base + str r6,[r3,#BASE2_OFFSET1] // set tile image base + + // set control word of lane 0: shift=FRACT, mask=0..tilebits-1 + ldr r6,RenderTilePersp_Ctrl // load control word + subs r5,r1,#1 // tilebits - 1 + lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0 + + // set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1 + subs r6,r1 // FRACT - tilebits + lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position + orrs r6,r5 // add tilebits to control word + lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position + adds r6,r1 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 +// [SP+0] ... number of bits of tile width and height + +// ---- set matrix + + // get pointer to matrix -> R4 + ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4 + + // get distance coefficient dist -> R1 + ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1 + ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient + +// r4+0 ... m11 +// r4+4 ... m12 +// r4+8 ... m13 +// r4+12 ... m21 +// r4+16 ... m22 +// r4+20 ... m23 + + // set m11 -> R5 base0 + ldr r5,[r4,#0] // load m11 + muls r5,r1 // m11*dist + asrs r5,#FRACT // (m11*dist)>>FRACT ... delta + lsls r2,r5,#1 // delta*2 + adds r2,r5 // delta*3 + str r2,[r3,#BASE0_OFFSET0] // set base0 + str r2,[r3,#BASE0_OFFSET1] // set base0 + + // set m21 -> R6 base1 + ldr r6,[r4,#12] // load m21 + muls r6,r1 // m21*dist + asrs r6,#FRACT // (m21*dist)>>FRACT ... delta + lsls r2,r6,#1 // delta*2 + adds r2,r6 // delta*3 + str r2,[r3,#BASE1_OFFSET0] // set base1 + str r2,[r3,#BASE1_OFFSET1] // set base1 + +// R0 ... pointer to data buffer +// R1 ... distance coefficient +// R3 ... interpolator base +// R4 ... pointer to matrix +// R5 ... m11 +// R6 ... m21 +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 +// [SP+0] ... number of bits of tile width and height + + // set x0*m11 + y0*m12 + m13 -> accum0 + mov r2,lr // start coordinate X0 -> X2 + muls r5,r2 // x0*m11 -> R5 + muls r2,r6 // x0*m21 -> R2 + mov lr,r1 // save distance coefficient -> LR + ldr r6,[r4,#4] // load m12 -> R6 + muls r1,r6 // m12*dist -> R1 + asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 + mov r6,r12 // load coordinate Y0 -> R6 + muls r1,r6 // y0*m12 -> R1 + adds r5,r1 // x0*m11 + y0*m12 -> R5 + ldr r1,[r4,#8] // load m13 -> R1 + adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 + str r5,[r3,#ACCUM0_OFFSET0] // set accum0 + str r5,[r3,#ACCUM0_OFFSET1] // set accum0 + +// R0 ... pointer to data buffer +// R2 ... x0*m21 +// R3 ... interpolator base +// R4 ... pointer to matrix +// R6 ... current coordinate Y0 +// R7 ... width/4 +// LR ... distance coefficient +// [SP+0] ... number of bits of tile width and height + + // set x0*m21 + y0*m22 + m23 -> accum1 + ldr r1,[r4,#16] // load m22 -> R1 + mov r5,lr // distance coefficient -> R5 + muls r1,r5 // m22*dist + asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 + muls r1,r6 // y0*m22 -> R1 + adds r2,r1 // x0*m21 + y0*m22 -> R2 + ldr r1,[r4,#20] // load m23 -> R1 + adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 + str r2,[r3,#ACCUM1_OFFSET0] // set accum1 + str r2,[r3,#ACCUM1_OFFSET1] // set accum1 + +// ---- process odd 4-pixel + + // prepare tile bits * 2 + ldr r6,[sp,#0] // get tile bits + lsls r6,#1 // tile bits * 2 + +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to tile map, load tile index) +// R5 ... (temporary - get pointer to pixel, load pixel) +// R6 ... tilebits*2 +// R7 ... width/4 (loop counter) +// [SP+0] ... number of bits of tile width and height + + // check odd 4-pixels + lsrs r7,#1 // width/4/2 + bcc 2f // no odd 4-pixel + + // load pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r1,[r5,r4] // [2] load pixel + lsls r4,r1,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + lsls r4,r1,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [2] store 4 pixels + stmia r0!,{r1} // [2] store 4 pixels + + // check number of remaining pixels +2: tst r7,r7 // check number of pixels + beq 8f // end + +// ---- [37 per 8 pixels] inner loop +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to tile map, load tile index) +// R5 ... (temporary - get pointer to pixel, load pixel) +// R6 ... tilebits*2 +// R7 ... width/8 (loop counter) + + // [9] load 1st pixel +6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r1,[r5,r4] // [2] load pixel + lsls r4,r1,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [11] load 2nd pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r4,[r5,r4] // [2] load pixel + lsls r4,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + lsls r4,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + + // [11] load pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r2,[r5,r4] // [2] load pixel + lsls r4,r2,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + lsls r4,r2,#16 // [1] shift 2 bytes left + orrs r2,r4 // [1] add pixel to accumulator + + // [3] store 8 pixels + stmia r0!,{r1,r2} // [3] store 8 pixels + + // [2,3] loop counter + subs r7,#1 // [1] 8-pixel counter + bne 6b // [1,2] next 8-pixels + + // pop registers +8: pop {r3-r7,pc} + + .align 2 +// pointer to SIO base +RenderTilePersp_pSioBase: + .word SIO_BASE // addres of SIO base + +// pointer to Interp0 base +RenderTilePersp_Interp: + .word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base + +RenderTilePersp_Ctrl: // lane control word + .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT< R4 + ldr r4,[sp,#24] // load video segment -> R4 + +// R0 ... pointer to data buffer +// R2 ... Y coordinate +// R3 ... remaining width +// R4 ... video segment + + // load horizon offset -> R1, check if use perspective + ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6 + ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5 + ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1 + sxtb r1,r1 // signed extension + lsls r1,#2 // horizon * 4, horizon = 0 ? + bne 2f // use perspective + + // not using perspective, start Y coordinate y0 = y - h/2 -> R12 + lsrs r5,#1 // segment height/2 -> R5 + subs r2,r5 // y - h/2 -> R2 + mov r12,r2 // current coordinate Y0 = y - h/2 -> R12 + + // prepare divide result to get 1< R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL + b 4f + + // using perspective, check ceilling mode +2: bpl 3f // horizon is not negative + subs r2,r5,r2 // negate, y = h - y + subs r2,#1 // y = h - 1 - y + negs r1,r1 // absolute value of horizon + + // prepare current coordinate Y0 = y - h -> R12 +3: subs r7,r2,r5 // y - h = current Y coordinate -> R7 + mov r12,r7 // store current coordinate Y0 -> R12 + + // start calculating distance coefficient dist = FRACTMUL*h/(y + horiz) + lsls r5,#FRACT // segment height * FRACTMUL -> R5 + str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h + adds r2,r1 // horizon + y -> R2 + str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz + +// R0 ... pointer to data buffer +// R3 ... remaining width +// R4 ... video segment +// R12 ... current coordinate Y0 + + // prepare start coordinate X0 = -w/2 -> LR +4: lsrs r5,r3,#1 // width/2 + negs r5,r5 // negate + mov lr,r5 // store start coordinate X0 -> LR + + // prepare number of 4-pixels (loop counter) -> R7 + lsrs r7,r3,#2 // width/4 -> R7 + + // prepare address of interpolator 0 base -> R3 + ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 + +// ---- setup interpolator 0 to get tile index + + // set tile map base to base2 + ldr r6,[r4,#SSEGM_DATA] // load tile map base + str r6,[r3,#BASE2_OFFSET0] // set tile map base + + // set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1 + ldr r6,RenderTilePersp_Ctrl // load control word + ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1 + str r1,[sp,#0] // save tile size -> [SP+0] + adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required) + ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2 + subs r5,r2,#1 // mapwbits - 1 + lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0 + + // set control word of lane 1: shift=FRACT+tilebits-mapwbits, + // mask=mapwbits..mapwbits+maphbits-1 + subs r6,r2 // FRACT + tilebits - mapwbits + lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position + orrs r6,r2 // add mapwbits to control word + ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2 + lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position + adds r6,r2 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1 + +// ---- setup interpolator 1 to get pixel index + + // set tile image to base2 + ldr r6,[r4,#SSEGM_PAR] // load tile image base + str r6,[r3,#BASE2_OFFSET1] // set tile image base + + // set control word of lane 0: shift=FRACT, mask=0..tilebits-1 + ldr r6,RenderTilePersp_Ctrl // load control word + subs r5,r1,#1 // tilebits - 1 + lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position + orrs r6,r5 // add to control word + str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0 + + // set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1 + subs r6,r1 // FRACT - tilebits + lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position + orrs r6,r5 // add tilebits to control word + lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position + adds r6,r1 // add to control word + str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1 + +// R0 ... pointer to data buffer +// R3 ... interpolator base +// R4 ... video segment +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 +// [SP+0] ... number of bits of tile width and height + +// ---- set matrix + + // get pointer to matrix -> R4 + ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4 + + // get distance coefficient dist -> R1 + ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1 + ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient + +// r4+0 ... m11 +// r4+4 ... m12 +// r4+8 ... m13 +// r4+12 ... m21 +// r4+16 ... m22 +// r4+20 ... m23 + + // set m11 -> R5 base0 + ldr r5,[r4,#0] // load m11 + muls r5,r1 // m11*dist + asrs r5,#FRACT-2 // (m11*dist)>>(FRACT-2) ... 4*delta + str r5,[r3,#BASE0_OFFSET0] // set base0 + str r5,[r3,#BASE0_OFFSET1] // set base0 + asrs r5,#2 // (m11*dist)>>FRACT + + // set m21 -> R6 base1 + ldr r6,[r4,#12] // load m21 + muls r6,r1 // m21*dist + asrs r6,#FRACT-2 // (m21*dist)>>(FRACT-2) ... 4*delta + str r6,[r3,#BASE1_OFFSET0] // set base1 + str r6,[r3,#BASE1_OFFSET1] // set base1 + asrs r6,#2 // (m21*dist)>>FRACT + +// R0 ... pointer to data buffer +// R1 ... distance coefficient +// R3 ... interpolator base +// R4 ... pointer to matrix +// R5 ... m11 +// R6 ... m21 +// R7 ... width/4 +// LR ... start coordinate X0 +// R12 ... current coordinate Y0 +// [SP+0] ... number of bits of tile width and height + + // set x0*m11 + y0*m12 + m13 -> accum0 + mov r2,lr // start coordinate X0 -> X2 + muls r5,r2 // x0*m11 -> R5 + muls r2,r6 // x0*m21 -> R2 + mov lr,r1 // save distance coefficient -> LR + ldr r6,[r4,#4] // load m12 -> R6 + muls r1,r6 // m12*dist -> R1 + asrs r1,#FRACT // (m12*dist)>>FRACT -> R1 + mov r6,r12 // load coordinate Y0 -> R6 + muls r1,r6 // y0*m12 -> R1 + adds r5,r1 // x0*m11 + y0*m12 -> R5 + ldr r1,[r4,#8] // load m13 -> R1 + adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5 + str r5,[r3,#ACCUM0_OFFSET0] // set accum0 + str r5,[r3,#ACCUM0_OFFSET1] // set accum0 + +// R0 ... pointer to data buffer +// R2 ... x0*m21 +// R3 ... interpolator base +// R4 ... pointer to matrix +// R6 ... current coordinate Y0 +// R7 ... width/4 +// LR ... distance coefficient +// [SP+0] ... number of bits of tile width and height + + // set x0*m21 + y0*m22 + m23 -> accum1 + ldr r1,[r4,#16] // load m22 -> R1 + mov r5,lr // distance coefficient -> R5 + muls r1,r5 // m22*dist + asrs r1,#FRACT // (m22*dist)>>FRACT -> R1 + muls r1,r6 // y0*m22 -> R1 + adds r2,r1 // x0*m21 + y0*m22 -> R2 + ldr r1,[r4,#20] // load m23 -> R1 + adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2 + str r2,[r3,#ACCUM1_OFFSET0] // set accum1 + str r2,[r3,#ACCUM1_OFFSET1] // set accum1 + +// ---- process odd 4-pixel + + // prepare tile bits * 2 + ldr r6,[sp,#0] // get tile bits + lsls r6,#1 // tile bits * 2 + +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to tile map, load tile index) +// R5 ... (temporary - get pointer to pixel, load pixel) +// R6 ... tilebits*2 +// R7 ... width/4 (loop counter) +// [SP+0] ... number of bits of tile width and height + + // check odd 4-pixels + lsrs r7,#1 // width/4/2 + bcc 2f // no odd 4-pixel + + // load pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r1,[r5,r4] // [2] load pixel + lsls r4,r1,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + lsls r4,r1,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [2] store 4 pixels + stmia r0!,{r1} // [2] store 4 pixels + + // check number of remaining pixels +2: tst r7,r7 // check number of pixels + beq 8f // end + +// ---- [28 per 8 pixels] inner loop +// R0 ... pointer to destination data buffer +// R1 ... (temporary - pixel accumulator 1) +// R2 ... (temporary - pixel accumulator 2) +// R3 ... interpolator base +// R4 ... (temporary - get pointer to tile map, load tile index) +// R5 ... (temporary - get pointer to pixel, load pixel) +// R6 ... tilebits*2 +// R7 ... width/8 (loop counter) + + // [11] load pixel +6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r1,[r5,r4] // [2] load pixel + lsls r4,r1,#8 // [1] shift 1 byte left + orrs r1,r4 // [1] add pixel to accumulator + lsls r4,r1,#16 // [1] shift 2 bytes left + orrs r1,r4 // [1] add pixel to accumulator + + // [11] load pixel + ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map + ldrb r4,[r4,#0] // [2] load tile index + lsls r4,r6 // [1] tile index * tile size + ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image + ldrb r2,[r5,r4] // [2] load pixel + lsls r4,r2,#8 // [1] shift 1 byte left + orrs r2,r4 // [1] add pixel to accumulator + lsls r4,r2,#16 // [1] shift 2 bytes left + orrs r2,r4 // [1] add pixel to accumulator + + // [3] store 8 pixels + stmia r0!,{r1,r2} // [3] store 8 pixels + + // [2,3] loop counter + subs r7,#1 // [1] 8-pixel counter + bne 6b // [1,2] next 8-pixels + + // pop registers +8: pop {r3-r7,pc} + + .align 2 +// pointer to SIO base +RenderTilePersp_pSioBase: + .word SIO_BASE // addres of SIO base + +// pointer to Interp0 base +RenderTilePersp_Interp: + .word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base + +RenderTilePersp_Ctrl: // lane control word + .word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<ints0 = (1u << VGA_DMA_PIO0); + + // switch current buffer index + // BufInx = 0 running CtrlBuf1 and preparing CtrlBuf2, BufInx = 1 running CtrlBuf2 and preparing CtrlBuf1 + // bufinx = 0 was running CtrlBuf1, will run CtrlBuf2, will process CtrlBuf1 + int bufinx = BufInx; + BufInx = bufinx ^ 1; + + // update DMA control channels of base layer, and run it + dma_channel_set_read_addr(VGA_DMA_CB0, CtrlBufNext[0], true); + + // save integer divider state + hw_divider_save_state(&DividerState); + + // increment scanline + int line = ScanLine; // current scanline + line++; // new current scanline + if (line > CurVmode.vtot) // last scanline? + { + Frame++; // increment frame counter + line = 1; // restart scanline + } + ScanLine = line; // store new scanline + + int y0 = -1; + u8 linetype = ScanlineType[line]; + switch (linetype) + { + case LINE_IMG: // progressive image 0, 1, 2,... + y0 = line - CurVmode.vfirst1; + if (CurVmode.dbly) y0 >>= 1; + VSync = False; // not vsync + break; + + case LINE_IMGEVEN1: // interlaced image even 0, 2, 4,..., 1st subframe + y0 = line - CurVmode.vfirst1; + if (CurVmode.dbly) y0 >>= 1; + y0 <<= 1; + VSync = False; // not vsync + break; + + case LINE_IMGEVEN2: // interlaced image even 0, 2, 4,..., 2nd subframe + y0 = line - CurVmode.vfirst2; + if (CurVmode.dbly) y0 >>= 1; + y0 <<= 1; + VSync = False; // not vsync + break; + + case LINE_IMGODD1: // interlaced image odd 1, 3, 5,..., 1st subframe + y0 = line - CurVmode.vfirst1; + if (CurVmode.dbly) y0 >>= 1; + y0 = (y0 << 1) + 1; + VSync = False; // not vsync + break; + + case LINE_IMGODD2: // interlaced image odd 1, 3, 5,..., 2nd subframe + y0 = line - CurVmode.vfirst2; + if (CurVmode.dbly) y0 >>= 1; + y0 = (y0 << 1) + 1; + VSync = False; // not vsync + break; + + default: + VSync = True; // vsync + break; + } + + // update DMA control channels of overlapped layers + // check if scanline is visible + if (y0 >= 0) + { + // loop overlapped layers + int layer; + for (layer = 1; layer < LAYERS; layer++) + { + // check if this layer is active + if (CtrlBufNext[layer] == NULL) continue; + + // check if this layer screen is active + sLayer* s = &LayerScreen[layer]; + if (!s->on || (s->w <= 0) || (y0 < s->y) || (y0 >= s->y + s->h)) continue; + + // wait for idle state + // IRQ0 comes a few pixels before end of scanline, when DMA_PIO0 is finished. + // We must wait 1 to 2 us to complete layer DMA. Sometimes it can take + // longer - for such cases we must restart both DMA and state machine. + int sm = VGA_SM(layer); + u32 t1 = time_us_32(); + do { + u8 a = *(volatile u8*)&VGA_PIO->sm[sm].addr & 0x1f; + if (a <= CurLayerProg.maxidle+LAYER_OFFSET) break; + } while ((u32)(time_us_32() - t1) < (u32)10); // wait max. 10 us, low resolution can take long time + + // stop DMA channel + dma_channel_abort(VGA_DMA_PIO(layer)); + dma_channel_abort(VGA_DMA_CB(layer)); + dma_channel_abort(VGA_DMA_PIO(layer)); + dma_channel_abort(VGA_DMA_CB(layer)); + + // restart state machine and clear FIFOs + pio_sm_set_enabled(VGA_PIO, sm, false); + pio_sm_clear_fifos(VGA_PIO, sm); + pio_sm_restart(VGA_PIO, sm); + pio_sm_exec(VGA_PIO, sm, pio_encode_jmp(CurLayerProg.idle+LAYER_OFFSET)); + pio_sm_set_enabled(VGA_PIO, sm, true); + + // enter new scanline + pio_sm_exec(VGA_PIO, sm, pio_encode_jmp(CurLayerProg.entry+LAYER_OFFSET)); + + // start DMA + dma_channel_set_read_addr(VGA_DMA_CB(layer), CtrlBufNext[layer], true); + } + } + + return bufinx; +} + +// render scanline buffers +u32* __not_in_flash_func(VgaBufRender)(u32* cbuf, u32* cbuf0, u8* dbuf, int y0) +{ +// ---- render base layer + + // HSYNC + back porch + *cbuf++ = 4; // send 4x u32 + *cbuf++ = (u32)LineBufHsBp; // HSYNC + back porch + + // render scanline + // cbuf ... control buffer + // dbuf ... data buffer (pixel data) + // line ... current line 0.. + // pixnum ... total pixels (must be multiple of 4) + cbuf = Render(cbuf, dbuf, y0, CurVmode.width); + + // front porch + *cbuf++ = 1; // send 1x u32 + *cbuf++ = (u32)&LineBufFp; // front porch + +// ---- render overlapped layers + + int layer; + for (layer = 1; layer < LAYERS; layer++) + { + // shift buffers + cbuf0 += CtrlBufSize[layer-1]; + dbuf += LineBufSize[layer-1]; + + CtrlBufNext[layer] = NULL; + + // check if layer is active + int mode = LayerModeInx[layer]; + if (mode == LAYERMODE_BASE) continue; + + // check if this layer screen is active + sLayer* s = &LayerScreen[layer]; + if (!s->on || (s->w <= 0) || (y0 < s->y) || (y0 >= s->y + s->h)) continue; + int y = y0 - s->y; + + // set next control buffer + u32* cbuf2 = cbuf0; + CtrlBufNext[layer] = cbuf2; + + // write init word + u8* dbuf2 = dbuf; + *cbuf2++ = 1; + *cbuf2++ = (u32)dbuf2; + *(u32*)dbuf2 = BYTESWAP(s->init); + dbuf2 += 4; + + // render data + switch(mode) + { + case LAYERMODE_SPRITEKEY: + case LAYERMODE_SPRITEBLACK: + case LAYERMODE_SPRITEWHITE: + { + *cbuf2++ = s->trans; + *cbuf2++ = (u32)dbuf2; + MemSet4((u32*)dbuf2, s->keycol, s->w/4); + RenderSprite(dbuf2, y, s); + } + break; + + case LAYERMODE_FASTSPRITEKEY: + case LAYERMODE_FASTSPRITEBLACK: + case LAYERMODE_FASTSPRITEWHITE: + { + MemSet4((u32*)dbuf2, s->keycol, s->w/4); + cbuf2 = RenderFastSprite(cbuf2, y, s, dbuf2); + } + break; + + case LAYERMODE_PERSPKEY: // layer with key color and image with transformation matrix + case LAYERMODE_PERSPBLACK: // layer with black key color and image with transformation matrix + case LAYERMODE_PERSPWHITE: // layer with white key color and image with transformation matrix + { + int w = s->w; // destination width + int x = s->x; // destination coordinate X + + // underflow left edge + if (x < 0) + { + x = ALIGN4(x+4098) - 4096; // round X to 4-pixels + w += x; // decrease W + x = -x; // start offset of X + } + else + { + // overflow right edge + if (x + w > CurVmode.width) + { + w = CurVmode.width - x; // limit W + } + x = 0; + } + + // align W down + w = ALIGN4(w); + + if (w <= 0) + { + // minimal transparent pixels + *cbuf2++ = 1; + *cbuf2++ = (u32)dbuf2; + *(u32*)dbuf2 = s->keycol; + } + else + { + // decode image + *cbuf2++ = w/4; + *cbuf2++ = (u32)&dbuf2[x]; + RenderPersp(dbuf2, y, s); + } + } + break; + + case LAYERMODE_PERSP2KEY: // layer with key color and image with transformation matrix + case LAYERMODE_PERSP2BLACK: // layer with black key color and image with transformation matrix + case LAYERMODE_PERSP2WHITE: // layer with white key color and image with transformation matrix + { + int w = s->w; // destination width + int x = s->x; // destination coordinate X + + // underflow left edge + if (x < 0) + { + x = ALIGN4(x+4098) - 4096; // round X to 4-pixels + w += x; // decrease W + x = -x; // start offset of X + } + else + { + // overflow right edge + if (x + w > CurVmode.width) + { + w = CurVmode.width - x; // limit W + } + x = 0; + } + + // align W down + w = ALIGN4(w); + + if (w <= 0) + { + // minimal transparent pixels + *cbuf2++ = 1; + *cbuf2++ = (u32)dbuf2; + *(u32*)dbuf2 = s->keycol; + } + else + { + // decode image + *cbuf2++ = w/4; + *cbuf2++ = (u32)&dbuf2[x]; + RenderPersp2(dbuf2, y, s); + } + } + break; + + case LAYERMODE_RLE: + { + // rows indices + u16* row = (u16*)s->par; + + // lengt of the row + int n = row[y+1] - row[y]; + + // set transfer count + *cbuf2++ = n; + + // start new DMA + *cbuf2++ = (u32)&s->img[row[y]*4]; + } + break; + + default: + { + // set transfer count + *cbuf2++ = s->trans; + + // start new DMA + *cbuf2++ = (u32)&s->img[y*s->wb]; + } + break; + } + + // end mark of layer + *cbuf2++ = 0; // end mark + *cbuf2++ = 0; // end mark + } + + return cbuf; +} + +// VGA DMA handler - called on end of every scanline +extern "C" void __not_in_flash_func(VgaLine)() +{ + // process scanline buffers (will save integer divider state into DividerState) + int bufinx = VgaBufProcess(); + + // prepare buffers to be processed next + u8* dbuf; // data buffer + u32* cbuf; // control buffer + if (bufinx == 0) + { + dbuf = LineBuf1; + cbuf = CtrlBuf1; + } + else + { + dbuf = LineBuf2; + cbuf = CtrlBuf2; + } + CtrlBufNext[0] = cbuf; + u32* cbuf0 = cbuf; // control buffer base + + // next rendered scanline + int line = ScanLine; // current scanline + line++; // next line to render + if (line > CurVmode.vtot) line = 1; + int y0; + + u8 linetype = ScanlineType[line]; + switch (linetype) + { + case LINE_VSYNC: // long vertical sync + *cbuf++ = 2; // send 2x u32 + *cbuf++ = (u32)&LineBufSync[0]; // VSYNC + break; + + case LINE_VVSYNC: // short vertical + vertical sync + *cbuf++ = 4; // send 4x u32 + *cbuf++ = (u32)&LineBufSync[4]; // VSYNC + break; + + case LINE_VHSYNC: // short vertical + horizontal sync + *cbuf++ = 4; // send 4x u32 + *cbuf++ = (u32)&LineBufSync[6]; // VSYNC + half + break; + + case LINE_HHSYNC: // short horizontal + horizontal sync + *cbuf++ = 4; // send 4x u32 + *cbuf++ = (u32)&LineBufSync[0]; // half + half + break; + + case LINE_HVSYNC: // short horizontal + vertical sync + *cbuf++ = 4; // send 4x u32 + *cbuf++ = (u32)&LineBufSync[2]; // half + VSYNC + break; + + case LINE_DARK: // dark line + *cbuf++ = 2; // send 2x u32 + *cbuf++ = (u32)LineBufDark; // dark + break; + + case LINE_IMG: // progressive image 0, 1, 2,... + y0 = line - CurVmode.vfirst1; + if (CurVmode.dbly) y0 >>= 1; + cbuf = VgaBufRender(cbuf, cbuf0, dbuf, y0); + break; + + case LINE_IMGEVEN1: // interlaced image even 0, 2, 4,..., 1st subframe + y0 = line - CurVmode.vfirst1; + if (CurVmode.dbly) y0 >>= 1; + y0 <<= 1; + cbuf = VgaBufRender(cbuf, cbuf0, dbuf, y0); + break; + + case LINE_IMGEVEN2: // interlaced image even 0, 2, 4,..., 2nd subframe + y0 = line - CurVmode.vfirst2; + if (CurVmode.dbly) y0 >>= 1; + y0 <<= 1; + cbuf = VgaBufRender(cbuf, cbuf0, dbuf, y0); + break; + + case LINE_IMGODD1: // interlaced image odd 1, 3, 5,..., 1st subframe + y0 = line - CurVmode.vfirst1; + if (CurVmode.dbly) y0 >>= 1; + y0 = (y0 << 1) + 1; + cbuf = VgaBufRender(cbuf, cbuf0, dbuf, y0); + break; + + case LINE_IMGODD2: // interlaced image odd 1, 3, 5,..., 2nd subframe + y0 = line - CurVmode.vfirst2; + if (CurVmode.dbly) y0 >>= 1; + y0 = (y0 << 1) + 1; + cbuf = VgaBufRender(cbuf, cbuf0, dbuf, y0); + break; + } + + *cbuf++ = 0; // end mark + *cbuf++ = 0; // end mark + + // restore integer divider state + hw_divider_restore_state(&DividerState); +} + +// initialize VGA DMA +// control blocks aliases: +// +0x0 +0x4 +0x8 +0xC (Trigger) +// 0x00 (alias 0): READ_ADDR WRITE_ADDR TRANS_COUNT CTRL_TRIG +// 0x10 (alias 1): CTRL READ_ADDR WRITE_ADDR TRANS_COUNT_TRIG +// 0x20 (alias 2): CTRL TRANS_COUNT READ_ADDR WRITE_ADDR_TRIG +// 0x30 (alias 3): CTRL WRITE_ADDR TRANS_COUNT READ_ADDR_TRIG ... ! + +void VgaDmaInit() +{ + dma_channel_config cfg; + int layer; + for (layer = 0; layer < LAYERS; layer++) + { + // layer is not active + if ((layer > 0) && (LayerModeInx[layer] == LAYERMODE_BASE)) continue; + +// ==== prepare DMA control channel + + // prepare DMA default config + cfg = dma_channel_get_default_config(VGA_DMA_CB(layer)); + + // increment address on read from memory + channel_config_set_read_increment(&cfg, true); + + // increment address on write to DMA port + channel_config_set_write_increment(&cfg, true); + + // each DMA transfered entry is 32-bits + channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32); + + // write ring - wrap to 8-byte boundary (TRANS_COUNT and READ_ADDR_TRIG of data DMA) + channel_config_set_ring(&cfg, true, 3); + + // DMA configure + dma_channel_configure( + VGA_DMA_CB(layer), // channel + &cfg, // configuration + &dma_hw->ch[VGA_DMA_PIO(layer)].al3_transfer_count, // write address + &CtrlBuf1[0], // read address - as first, control buffer 1 will be sent out + 2, // number of transfers in u32 + false // do not start yet + ); + +// ==== prepare DMA data channel + + // prepare DMA default config + cfg = dma_channel_get_default_config(VGA_DMA_PIO(layer)); + + // increment address on read from memory + channel_config_set_read_increment(&cfg, true); + + // do not increment address on write to PIO + channel_config_set_write_increment(&cfg, false); + + // each DMA transfered entry is 32-bits + channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32); + + // DMA data request for sending data to PIO + channel_config_set_dreq(&cfg, pio_get_dreq(VGA_PIO, VGA_SM(layer), true)); + + // chain channel to DMA control block + channel_config_set_chain_to(&cfg, VGA_DMA_CB(layer)); + + // raise the IRQ flag when 0 is written to a trigger register (end of chain) + channel_config_set_irq_quiet(&cfg, true); + + // set byte swapping + channel_config_set_bswap(&cfg, true); + + // set high priority + cfg.ctrl |= DMA_CH0_CTRL_TRIG_HIGH_PRIORITY_BITS; + + // DMA configure + dma_channel_configure( + VGA_DMA_PIO(layer), // channel + &cfg, // configuration + &VGA_PIO->txf[VGA_SM(layer)], // write address + NULL, // read address + 0, // number of transfers in u32 + false // do not start immediately + ); + } + +// ==== initialize IRQ0, raised from base layer 0 + + // enable DMA channel IRQ0 + dma_channel_set_irq0_enabled(VGA_DMA_PIO0, true); + + // set DMA IRQ handler + irq_set_exclusive_handler(DMA_IRQ_0, VgaLine); + + // set highest IRQ priority + irq_set_priority(DMA_IRQ_0, 0); +} + +// initialize VGA PIO +void VgaPioInit() +{ + int i; + + // clear PIO instruction memory + pio_clear_instruction_memory(VGA_PIO); + + // configure main program instructions + uint16_t ins[32]; // temporary buffer of program instructions + memcpy(ins, &vga_program_instructions, vga_program.length*sizeof(uint16_t)); // copy program into buffer + u16 cpp = (u16)CurVmode.cpp; // number of clocks per pixel + ins[vga_offset_extra1] |= (cpp-2) << 8; // update waits + ins[vga_offset_extra2] |= (cpp-2) << 8; // update waits + + // load main program into PIO's instruction memory + struct pio_program prg; + prg.instructions = ins; + prg.length = vga_program.length; + prg.origin = BASE_OFFSET; + pio_add_program(VGA_PIO, &prg); + + // load layer program + if (LayerProgInx != LAYERPROG_BASE) + { + // configure layer program instructions + memcpy(ins, CurLayerProg.ins, CurLayerProg.length*sizeof(uint16_t)); // copy program into buffer + for (i = 0; i < CurLayerProg.extranum; i++) + { + int extra = (int)cpp - CurLayerProg.extra[i*2+1]; + if (extra < 0) extra = 0; + ins[CurLayerProg.extra[i*2]] |= extra << 8; // update waits + } + + // load layer program into PIO's instruction memory + prg.instructions = ins; + prg.length = CurLayerProg.length; + prg.origin = LAYER_OFFSET; + pio_add_program(VGA_PIO, &prg); + } + + // connect PIO to the pad + // JMH + //for (i = VGA_GPIO_FIRST; i <= VGA_GPIO_LAST; i++) pio_gpio_init(VGA_PIO, i); + for (i = VGA_GPIO_FIRST; i < VGA_GPIO_LAST; i++) pio_gpio_init(VGA_PIO, i); + pio_gpio_init(VGA_PIO, VGA_GPIO_SYNC); + + // negative HSYNC output + if (!CurVmode.psync) gpio_set_outover(VGA_GPIO_SYNC, GPIO_OVERRIDE_INVERT); + + int layer; + for (layer = 0; layer < LAYERS; layer++) + { + // layer is not active + if ((layer > 0) && (LayerModeInx[layer] == LAYERMODE_BASE)) continue; + + // set pin direction to output + // JMH + //pio_sm_set_consecutive_pindirs(VGA_PIO, VGA_SM(layer), VGA_GPIO_FIRST, VGA_GPIO_NUM, true); + pio_sm_set_consecutive_pindirs(VGA_PIO, VGA_SM(layer), VGA_GPIO_FIRST, VGA_GPIO_OUTNUM, true); + pio_sm_set_consecutive_pindirs(VGA_PIO, VGA_SM(layer), VGA_GPIO_SYNC, 1, true); + + // get default config + pio_sm_config cfg = pio_get_default_sm_config(); + + // map state machine's OUT and MOV pins + sm_config_set_out_pins(&cfg, LayerFirstPin[layer], LayerNumPin[layer]); + + // join FIFO to send only + sm_config_set_fifo_join(&cfg, PIO_FIFO_JOIN_TX); + + // PIO clock divider + sm_config_set_clkdiv(&cfg, CurVmode.div); + + // shift left, autopull, pull threshold + sm_config_set_out_shift(&cfg, false, true, 32); + + // base layer 0 + if (layer == 0) + { + // set wrap + sm_config_set_wrap(&cfg, vga_wrap_target+BASE_OFFSET, vga_wrap+BASE_OFFSET); + + // set sideset pins of base layer + sm_config_set_sideset(&cfg, 1, false, false); + sm_config_set_sideset_pins(&cfg, VGA_GPIO_SYNC); + + // initialize state machine + pio_sm_init(VGA_PIO, VGA_SM0, vga_offset_entry+BASE_OFFSET, &cfg); + } + else + { + // set wrap + sm_config_set_wrap(&cfg, CurLayerProg.wrap_target+LAYER_OFFSET, CurLayerProg.wrap+LAYER_OFFSET); + + // initialize state machine + pio_sm_init(VGA_PIO, VGA_SM(layer), CurLayerProg.idle+LAYER_OFFSET, &cfg); + } + } +} + +// initialize scanline buffers +void VgaBufInit() +{ + // init HSYNC..back porch buffer + // hsync must be min. 3 + // hback must be min. 13 + LineBufHsBp[0] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.hsync-3)); // HSYNC + LineBufHsBp[1] = BYTESWAP(VGADARK(CurVmode.hback-4-1-9,0)); // back porch - 1 - 9 + LineBufHsBp[2] = BYTESWAP(VGACMD(vga_offset_irqset+BASE_OFFSET,0)); // IRQ command (takes 9 clock cycles) + LineBufHsBp[3] = BYTESWAP(VGACMD(vga_offset_output+BASE_OFFSET, CurVmode.width - 2)); // missing 2 clock cycles after last pixel + + // init front porch buffer + // hfront must be min. 4 + LineBufFp = BYTESWAP(VGADARK(CurVmode.hfront-4,0)); // front porch + + // init dark line + LineBufDark[0] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.hsync-3)); // HSYNC + LineBufDark[1] = BYTESWAP(VGADARK(CurVmode.htot-CurVmode.hsync-4,0)); // dark line + + // TV mode + if (CurVmode.inter) + { + // vertical synchronization + LineBufSync[0] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.hsync/2-3)); // HSYNC + LineBufSync[1] = BYTESWAP(VGADARK(CurVmode.htot/2-CurVmode.hsync/2-4,0)); // dark line + LineBufSync[2] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.hsync/2-3)); // HSYNC + LineBufSync[3] = BYTESWAP(VGADARK((CurVmode.htot+1)/2-CurVmode.hsync/2-4,0)); // dark line + + LineBufSync[4] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.htot/2-CurVmode.hsync-3)); // invert dark line + LineBufSync[5] = BYTESWAP(VGADARK(CurVmode.hsync-4,0)); // invert HSYNC + LineBufSync[6] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,(CurVmode.htot+1)/2-CurVmode.hsync-3)); // invert dark line + LineBufSync[7] = BYTESWAP(VGADARK(CurVmode.hsync-4,0)); // invert HSYNC + + LineBufSync[8] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.hsync/2-3)); // HSYNC + LineBufSync[9] = BYTESWAP(VGADARK(CurVmode.htot/2-CurVmode.hsync/2-4,0)); // dark line + + // control blocks - initialize to VSYNC + CtrlBuf1[0] = 4; // send 4x u32 + CtrlBuf1[1] = (u32)&LineBufSync[4]; // VSYNC + + CtrlBuf2[0] = 4; // send 4x u32 + CtrlBuf2[1] = (u32)&LineBufSync[4]; // VSYNC + } + + // VGA mode + else + { + // vertical synchronization + // hsync must be min. 4 + LineBufSync[0] = BYTESWAP(VGACMD(vga_offset_sync+BASE_OFFSET,CurVmode.htot-CurVmode.hsync-3)); // invert dark line + LineBufSync[1] = BYTESWAP(VGADARK(CurVmode.hsync-4,0)); // invert HSYNC + + // control blocks - initialize to VSYNC + CtrlBuf1[0] = 2; // send 2x u32 + CtrlBuf1[1] = (u32)&LineBufSync[0]; // VSYNC + + CtrlBuf2[0] = 2; // send 2x u32 + CtrlBuf2[1] = (u32)&LineBufSync[0]; // VSYNC + } + + CtrlBuf1[2] = 0; // stop mark + CtrlBuf1[3] = 0; // stop mark + + CtrlBuf2[2] = 0; // stop mark + CtrlBuf2[3] = 0; // stop mark +} + +// terminate VGA service +void VgaTerm() +{ + int i; + + // abort DMA channels + dma_channel_abort(VGA_DMA_PIO0); // pre-abort, could be chaining right now + dma_channel_abort(VGA_DMA_CB0); + for (i = 0; i < LAYERS; i++) + { + dma_channel_abort(VGA_DMA_PIO(i)); + dma_channel_abort(VGA_DMA_CB(i)); + } + + // disable IRQ0 from DMA0 + irq_set_enabled(DMA_IRQ_0, false); + dma_channel_set_irq0_enabled(VGA_DMA_PIO0, false); + + // Clear the interrupt request for DMA control channel + dma_hw->ints0 = (1u << VGA_DMA_PIO0); + + // stop all state machines + pio_set_sm_mask_enabled(VGA_PIO, VGA_SMALL, false); + + // restart state machine + pio_restart_sm_mask(VGA_PIO, VGA_SMALL); + + // clear FIFOs + for (i = 0; i < LAYERS; i++) + { + pio_sm_clear_fifos(VGA_PIO, VGA_SM(i)); + CtrlBufNext[i] = NULL; + } + + // clear PIO instruction memory + pio_clear_instruction_memory(VGA_PIO); +} + +// initialize scanline type table +void ScanlineTypeInit(const sVmode* v) +{ + u8* d = ScanlineType; + int i, k; + + // line 0 is not used + *d++ = LINE_DARK; + + // progressive mode (VGA 525) + if (!v->inter) + { + // vertical sync (VGA 2) + for (i = v->vsync1; i > 0; i--) *d++ = LINE_VSYNC; + + // dark (VGA 33) + for (i = v->vback1; i > 0; i--) *d++ = LINE_DARK; + + // image (VGA 480) + for (i = v->vact1; i > 0; i--) *d++ = LINE_IMG; + + // dark (VGA 10) + for (i = v->vfront1; i > 0; i--) *d++ = LINE_DARK; + } + + // interlaced mode (PAL 625, NTSC 525) + // - frames start with whole VSYNC + else + { + // vertical sync (PAL 2, NTSC 3) + for (i = v->vsync1/2; i > 0; i--) *d++ = LINE_VVSYNC; + + // vertical sync + half sync (PAL 1, NTSC 0) + if ((v->vsync1 & 1) != 0) *d++ = LINE_VHSYNC; + + // half sync (PAL 2, NTSC 3) + for (i = v->vpost1/2; i > 0; i--) *d++ = LINE_HHSYNC; + + // dark (PAL 18+23, NTSC 10+2) + for (i = v->vback1; i > 0; i--) *d++ = LINE_DARK; + + // image 1st sub-frame (PAL 240, NTSC 240) + if (v->odd) + for (i = v->vact1; i > 0; i--) *d++ = LINE_IMGODD1; // odd lines 1, 3, 5, ... (PAL) + else + for (i = v->vact1; i > 0; i--) *d++ = LINE_IMGEVEN1; // even lines 0, 2, 4, ... (NTSC) + + // dark (PAL 24, NTSC 1) + for (i = v->vfront1; i > 0; i--) *d++ = LINE_DARK; + + // half sync (PAL 2, NTSC 3) + for (i = v->vpre1/2; i > 0; i--) *d++ = LINE_HHSYNC; + + // half sync + vertical sync (PAL 1, NTSC 1) + k = v->vpre1 & 1; + if (k != 0) *d++ = LINE_HVSYNC; + + // vertical sync (PAL 2, NTSC 2) + for (i = (v->vsync2 - k)/2; i > 0; i--) *d++ = LINE_VVSYNC; + + // vertical sync + half sync (PAL 0, NTSC 1) + if (((v->vsync2 - k) & 1) != 0) *d++ = LINE_VHSYNC; + + // half sync (PAL 2, NTSC 2) + for (i = v->vpost2/2; i > 0; i--) *d++ = LINE_HHSYNC; + + // dark (PAL 18+23, NTSC 11+2) + for (i = v->vback2; i > 0; i--) *d++ = LINE_DARK; + + // image 2nd sub-frame (PAL 240, NTSC 240) + if (v->odd) + for (i = v->vact2; i > 0; i--) *d++ = LINE_IMGEVEN2; // even lines 0, 2, 4, ... (PAL) + else + for (i = v->vact2; i > 0; i--) *d++ = LINE_IMGODD2; // odd lines 1, 3, 5, ... (NTSC) + + // dark (PAL 24, NTSC 1) + for (i = v->vfront2; i > 0; i--) *d++ = LINE_DARK; + + // half sync (PAL 3, NTSC 3) + for (i = v->vpre2/2; i > 0; i--) *d++ = LINE_HHSYNC; + } +} + +// scanline names +const char* ScanlineName[] = { + "VSYNC", // long vertical sync + "VVSYNC", // short vertical + vertical sync + "VHSYNC", // short vertical + horizontal sync + "HHSYNC", // short horizontal + horizontal sync + "HVSYNC", // short horizontal + vertical sync + "DARK", // dark line + "IMG", // progressive image 0, 1, 2,... + "IMGEVEN1", // interlaced image even 0, 2, 4,..., 1st subframe + "IMGEVEN2", // interlaced image even 0, 2, 4,..., 2nd subframe + "IMGODD1", // interlaced image odd 1, 3, 5,..., 1st subframe + "IMGODD2", // interlaced image odd 1, 3, 5,..., 2nd subframe +}; + +// print table if scanline types +void ScanlineTypePrint(const u8* scan, int lines) +{ + // skip scanline 0 + scan++; + + // load scanline 1 + u8 last = *scan++; + int num = 1; + int line = 1; + + // process other scanlines + int i; + for (i = 2; i <= lines; i++) + { + if ((*scan != last) || (i == lines)) + { + if (num == 1) + printf("%d (1): %s\n", line, line + num - 1, ScanlineName[last]); + else + printf("%d..%d (%d): %s\n", line, line + num - 1, num, ScanlineName[last]); + + last = *scan; + num = 1; + line = i; + } + else + num++; + scan++; + } +} + +// initialize videomode (returns False on bad configuration) +// - All layer modes must use same layer program (LAYERMODE_BASE = overlapped layers are OFF) +void VgaInit(const sVmode* vmode) +{ + int i; + + // stop old state + VgaTerm(); + + // initialize scanline type table + ScanlineTypeInit(vmode); + + // prepare render font pixel mask + for (i = 0; i < 256; i++) + { + // higher 4 bits + u32 m = 0; + if ((i & B7) != 0) m |= 0xff; + if ((i & B6) != 0) m |= 0xff << 8; + if ((i & B5) != 0) m |= 0xff << 16; + if ((i & B4) != 0) m |= 0xff << 24; + RenderTextMask[2*i] = m; + + // lower 4 bits + m = 0; + if ((i & B3) != 0) m |= 0xff; + if ((i & B2) != 0) m |= 0xff << 8; + if ((i & B1) != 0) m |= 0xff << 16; + if ((i & B0) != 0) m |= 0xff << 24; + RenderTextMask[2*i+1] = m; + } + + // emergency check of structure definitions + if ( (SSPRITE_SIZE != sizeof(sSprite)) || + (SLAYER_SIZE != sizeof(sLayer)) || + (SSEGM_SIZE != sizeof(sSegm)) || + (SSTRIP_SIZE != sizeof(sStrip)) || + (SSCREEN_SIZE != sizeof(sScreen))) + { + while (1) {} + } + + // clear buffer with black color + memset(LineBuf0, COL_BLACK, BLACK_MAX); + + // save current videomode + memcpy(&CurVmode, vmode, sizeof(sVmode)); + + // initialize parameters + ScanLine = 1; // currently processed scanline +// Frame = 0; + BufInx = 0; // at first, control buffer 1 will be sent out + CtrlBufNext[0] = CtrlBuf2; + + // initialize base layer + LayerModeInx[0] = LAYERMODE_BASE; + memcpy(&CurLayerMode[0], &LayerMode[LAYERMODE_BASE], sizeof(sLayerMode)); + memset(&LayerScreen[0], 0, sizeof(sLayer)); + + // save layer modes + LayerModeInx[1] = vmode->mode[1]; + LayerModeInx[2] = vmode->mode[2]; + LayerModeInx[3] = vmode->mode[3]; + + LayerMask = B0; // mask of active layers + for (i = 1; i < LAYERS; i++) + { + memcpy(&CurLayerMode[i], &LayerMode[LayerModeInx[i]], sizeof(sLayerMode)); + if (LayerModeInx[i] != LAYERMODE_BASE) LayerMask |= (1 << i); + } + + // get layer program + LayerProgInx = vmode->prog; + memcpy(&CurLayerProg, &LayerProg[LayerProgInx], sizeof(sLayerProg)); + + // initialize VGA PIO + VgaPioInit(); + + // initialize scanline buffers + VgaBufInit(); + + // initialize DMA + VgaDmaInit(); + + // enable DMA IRQ + irq_set_enabled(DMA_IRQ_0, true); + + // start DMA with base layer 0 + dma_channel_start(VGA_DMA_CB0); + + // run state machines + pio_enable_sm_mask_in_sync(VGA_PIO, LayerMask); +} + +const sVmode* volatile VgaVmodeReq = NULL; // request to reinitialize videomode, 1=only stop driver + +void (* volatile Core1Fnc)() = NULL; // core 1 remote function + +// VGA core +void VgaCore() +{ + const sVmode* v; + void (*fnc)(); + while (1) + { + __dmb(); + + // initialize videomode + v = VgaVmodeReq; + if (v != NULL) + { + if ((u32)v == (u32)1) + VgaTerm(); // terminate + else + VgaInit(v); + __dmb(); + VgaVmodeReq = NULL; + } + + // execute remote function + fnc = Core1Fnc; + if (fnc != NULL) + { + fnc(); + __dmb(); + Core1Fnc = NULL; + } + } +} + +// request to initialize VGA videomode, NULL=only stop driver (wait to initialization completes) +void VgaInitReq(const sVmode* vmode) +{ + if (vmode == NULL) vmode = (const sVmode*)1; + __dmb(); + VgaVmodeReq = vmode; + while (VgaVmodeReq != NULL) { __dmb(); } +} + +// execute core 1 remote function +void Core1Exec(void (*fnc)()) +{ + __dmb(); + Core1Fnc = fnc; + __dmb(); +} + +// check if core 1 is busy (executing remote function) +Bool Core1Busy() +{ + __dmb(); + return Core1Fnc != NULL; +} + +// wait if core 1 is busy (executing remote function) +void Core1Wait() +{ + while (Core1Busy()) {} +} + +// wait for VSync scanline +void WaitVSync() +{ + // wait for end of VSync + while (VSync) { __dmb(); } + + // wait for start of VSync + while (!VSync) { __dmb(); } +} diff --git a/MCUME_pico/picovga_t4/vga.h b/MCUME_pico/picovga_t4/vga.h new file mode 100755 index 0000000..3387740 --- /dev/null +++ b/MCUME_pico/picovga_t4/vga.h @@ -0,0 +1,135 @@ + +// **************************************************************************** +// +// VGA output +// +// **************************************************************************** + +#ifndef _VGA_H +#define _VGA_H + +// scanline type +#define LINE_VSYNC 0 // long vertical sync +#define LINE_VVSYNC 1 // short vertical + vertical sync +#define LINE_VHSYNC 2 // short vertical + horizontal sync +#define LINE_HHSYNC 3 // short horizontal + horizontal sync +#define LINE_HVSYNC 4 // short horizontal + vertical sync +#define LINE_DARK 5 // dark line +#define LINE_IMG 6 // progressive image 0, 1, 2,... +#define LINE_IMGEVEN1 7 // interlaced image even 0, 2, 4,..., 1st subframe +#define LINE_IMGEVEN2 8 // interlaced image even 0, 2, 4,..., 2nd subframe +#define LINE_IMGODD1 9 // interlaced image odd 1, 3, 5,..., 1st subframe +#define LINE_IMGODD2 10 // interlaced image odd 1, 3, 5,..., 2nd subframe + +extern u8 ScanlineType[MAXLINE]; + +extern int DispDev; // current display device +extern sVmode CurVmode; // copy of current videomode table +//extern int LayerMode; // current layer mode (LAYERMODE_*) +extern volatile int ScanLine; // current scan line 1... +extern volatile u32 Frame; // frame counter +extern volatile int BufInx; // current buffer set (0..1) +extern volatile Bool VSync; // current scan line is vsync or dark + +// line buffers +extern ALIGNED u8 LineBuf1[DBUF_MAX]; // scanline 1 image data +extern ALIGNED u8 LineBuf2[DBUF_MAX]; // scanline 2 image data +extern int LineBufSize[LAYERS_MAX]; // size of data buffers +extern u32 LineBufHsBp[4]; // HSYNC ... back porch-1 ... IRQ command ... image command +extern u32 LineBufFp; // front porch+1 +extern u32 LineBufDark[2]; // HSYNC ... dark line +extern u32 LineBufSync[10]; // vertical synchronization + // interlaced (5x half scanlines): + // 2x half synchronization (HSYNC pulse/2 ... line dark/2) + // 2x vertical synchronization (invert line dark/2 ... invert HSYNC pulse) + // 1x half synchronization (HSYNC pulse/2 ... line dark/2) + // progressive: 1x scanline with vertical synchronization (invert line dark ... invert HSYNC pulse) + +extern ALIGNED u8 LineBuf0[BLACK_MAX]; // line buffer with black color (used to clear rest of scanline) + +// control buffers +extern u32 CtrlBuf1[CBUF_MAX]; // control pairs: u32 count, read address (must be terminated with [0,0]) +extern u32 CtrlBuf2[CBUF_MAX]; // control pairs: u32 count, read address (must be terminated with [0,0]) + +extern int CtrlBufSize[LAYERS_MAX]; // size of control buffers + +// render font pixel mask +extern u32 RenderTextMask[512]; + +// fill memory buffer with u32 words +// buf ... data buffer, must be 32-bit aligned +// data ... data word to store +// num ... number of 32-bit words (= number of bytes/4) +// Returns new destination address. +extern "C" u32* MemSet4(u32* buf, u32 data, int num); + +// blit scanline using key color +// dst ... destination buffer +// src ... source buffer +// w ... width +// key ... key color +extern "C" void BlitKey(u8* dst, u8* src, int w, u8 key); + +// render layers with sprites LAYERMODE_SPRITE* +// dbuf ... pointer to data buffer +// y ... coordinate of scanline +// scr ... pointer to layer screen structure sLayer +extern "C" void RenderSprite(u8* dbuf, int y, sLayer* scr); + +// render layers with fast sprites LAYERMODE_FASTSPRITE* +// cbuf ... pointer to control buffer +// y ... coordinate of scanline +// scr ... pointer to layer screen structure sLayer +// buf ... pointer to destination data buffer with transparent color +// Output new pointer to control buffer. +extern "C" u32* RenderFastSprite(u32* cbuf, int y, sLayer* scr, u8* buf); + +// render layers with transformation matrix LAYERMODE_PERSP* +// R0 ... dbuf pointer to data buffer +// R1 ... y coordinate of scanline (relative in destination image) +// R2 ... scr pointer to layer screen structure sLayer +extern "C" void RenderPersp(u8* dbuf, int y, sLayer* scr); + +// render layers double pixel with transformation matrix LAYERMODE_PERSP2* +// R0 ... dbuf pointer to data buffer +// R1 ... y coordinate of scanline (relative in destination image) +// R2 ... scr pointer to layer screen structure sLayer +extern "C" void RenderPersp2(u8* dbuf, int y, sLayer* scr); + +// render scanline +// cbuf ... control buffer +// dbuf ... data buffer (pixel data) +// line ... current line 0.. +// pixnum ... total pixels (must be multiple of 4) +// Returns new pointer to control buffer +extern "C" u32* Render(u32* cbuf, u8* dbuf, int line, int pixnum); + +// initialize scanline type table +void ScanlineTypeInit(const sVmode* v); + +// print table if scanline types +void ScanlineTypePrint(const u8* scan, int lines); + +// initialize videomode (returns False on bad configuration) +// - All layer modes must use same layer program (LAYERMODE_BASE = overlapped layers are OFF) +void VgaInit(const sVmode* vmode); //, u8 layer1mode=LAYERMODE_BASE, u8 layer2mode=LAYERMODE_BASE, u8 layer3mode=LAYERMODE_BASE); + +// VGA core +void VgaCore(); + +// request to initialize VGA videomode, NULL=only stop driver (wait to initialization completes) +void VgaInitReq(const sVmode* vmode); + +// execute core 1 remote function +void Core1Exec(void (*fnc)()); + +// check if core 1 is busy (executing remote function) +Bool Core1Busy(); + +// wait if core 1 is busy (executing remote function) +void Core1Wait(); + +// wait for VSync scanline +void WaitVSync(); + +#endif // _VGA_H diff --git a/MCUME_pico/picovga_t4/vga_blitkey.S b/MCUME_pico/picovga_t4/vga_blitkey.S new file mode 100755 index 0000000..d9534d8 --- /dev/null +++ b/MCUME_pico/picovga_t4/vga_blitkey.S @@ -0,0 +1,90 @@ + +// **************************************************************************** +// +// VGA sprites +// +// **************************************************************************** +// Takes 100 bytes + +#include "define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.BlitKey, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +// [6,7] blit macro (4 instructions, 8 bytes) +.macro blitkey n + ldrb r4,[r1,#\n] // [2] load 1 pixel + cmp r4,r3 // [1] is it transparent color? + beq 2f // [1,2] pixel is transparent + strb r4,[r0,#\n] // [2] write 1 pixel +2: +.endm + +// blit scanline using key color +// dst ... destination buffer +// src ... source buffer +// w ... width +// key ... key color +//extern "C" void BlitKey(u8* dst, u8* src, int w, u8 key); + +.thumb_func +.global BlitKey +BlitKey: + + // push registers + push {r4,lr} + +// Registers: +// R0 ... destination buffer +// R1 ... source buffer +// R2 ... width counter +// R3 ... key color +// R4 ... (temporary) + + // save start of destination buffer + mov lr,r0 // start buffer + + // get number of pixels aligned to 8 bytes + lsrs r4,r2,#3 // number of pixels / 8 + lsls r4,#3 // number of pixels aligned to 8 bytes down -> R4 + eors r2,r4 // number of pixels last 3 bits (modulo 8) + + // shift pointers to last 8-byte group + add r0,r4 // shift destination pointer to the end + add r1,r4 // shift source pointer to the end + + // jump to blit rest of pixels in last 8-byte group + adr r4,3f // get address of label '3:' (must be word aligned) + lsls r2,#3 // *8, convert number of pixels to offset of blit macro (1 macro is 8 bytes long) + subs r4,r2 // subtract offset of first valid blit macro + adds r4,#1 // set bit 0 - flag to use thumb instructions + bx r4 // jump into loop + +// ---- [53..61 per loop] blend pixels, speed 6.625..7.625 clock cycles per pixel + +.align 2 // address of label '3:' must be word aligned (32 bits) + + // [2] shift pointers 8 bytes down +1: subs r0,#8 // [1] shift destination pointer by 8 bytes down + subs r1,#8 // [1] shift source pointer by 8 bytes down + + // [48..56] blit 8 pixels (32 instructions) + blitkey 7 // [6,7] blit pixel 7 + blitkey 6 // [6,7] blit pixel 6 + blitkey 5 // [6,7] blit pixel 5 + blitkey 4 // [6,7] blit pixel 4 + blitkey 3 // [6,7] blit pixel 3 + blitkey 2 // [6,7] blit pixel 2 + blitkey 1 // [6,7] blit pixel 1 + blitkey 0 // [6,7] blit pixel 0 + +// this address must be word aligned + + // [2,3] next 8 pixels +3: cmp r0,lr // [1] start address reached? + bhi 1b // [1,2] not start address yet + + // pop registers and return from function +9: pop {r4,pc} diff --git a/MCUME_pico/picovga_t4/vga_config.h b/MCUME_pico/picovga_t4/vga_config.h new file mode 100755 index 0000000..f52387c --- /dev/null +++ b/MCUME_pico/picovga_t4/vga_config.h @@ -0,0 +1,114 @@ +#include "platform_config.h" +#include "iopins.h" + +// **************************************************************************** +// +// VGA configuration +// +// **************************************************************************** + +// === Configuration +#define LAYERS 1 //4 // total layers 1..4 (1 base layer + 3 overlapped layers) +#define SEGMAX 8 // max. number of video segment per video strip (size of 1 sSegm = 28 bytes) +#define STRIPMAX 8 // max. number of video strips (size of 1 sStrip = sSegm size*SEGMAX+4 = 228 bytes) + // size of sScreen = sStrip size*STRIPMAX+4 = 1828 bytes + +#define MAXX 320 //640 // max. resolution in X direction (must be power of 4) +#define MAXY 240 //480 // max. resolution in Y direction + +#define MAXLINE 700 // max. number of scanlines (including sync and dark lines) + +// === Scanline render buffers (800 pixels: default size of buffers = 2*4*(800+8+800+24)+800 = 13856 bytes +// Requirements by format, base layer 0, 1 wrap X segment: +// GF_GRAPH8 ... control buffer 16 bytes +// GF_TILE8 ... control buffer "width"+8 bytes +// GF_TILE16 ... control buffer "width/2"+8 bytes +// GF_TILE32 ... control buffer "width/4"+8 bytes +// GF_TILE64 ... control buffer "width/8"+8 bytes +// GF_PROGRESS ... control buffer 24 bytes +// other formats: data buffer "width" bytes, control buffer 16 bytes +#define DBUF0_MAX (MAXX+8) // max. size of data buffer of layer 0 +#define CBUF0_MAX ((MAXX+24)/4) // max. size of control buffer of layer 0 + +// Requirements by format, overlapped layer 1..3: +// LAYERMODE_SPRITE* ... data buffer "width"+4 bytes, control buffer 24 bytes +// LAYERMODE_FASTSPRITE* ... data buffer "width"+4 bytes, control buffer up to "width*2"+16 bytes +// other formats ... data buffer 4 bytes, control buffer 24 bytes +#define DBUF1_MAX (MAXX+8) // max. size of data buffer of layer 1 +#define CBUF1_MAX ((MAXX+24)/4) // max. size of control buffer of layer 1 + +#define DBUF2_MAX (MAXX+8) // max. size of data buffer of layer 2 +#define CBUF2_MAX ((MAXX+24)/4) // max. size of control buffer of layer 2 + +#define DBUF3_MAX (MAXX+8) // max. size of data buffer of layer 3 +#define CBUF3_MAX ((MAXX+24)/4) // max. size of control buffer of layer 3 + +#if LAYERS==1 +#define DBUF_MAX DBUF0_MAX // max. size of data buffer +#define CBUF_MAX CBUF0_MAX // max. size of control buffer +#elif LAYERS==2 +#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX) // max. size of data buffer +#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX) // max. size of control buffer +#elif LAYERS==3 +#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX+DBUF2_MAX) // max. size of data buffer +#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX+CBUF2_MAX) // max. size of control buffer +#elif LAYERS==4 +#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX+DBUF2_MAX+DBUF3_MAX) // max. size of data buffer +#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX+CBUF2_MAX+CBUF3_MAX) // max. size of control buffer +#else +#error Unsupported number of layers! +#endif + +// === VGA port pins +// GP0 ... VGA B0 blue +// GP1 ... VGA B1 +// GP2 ... VGA G0 green +// GP3 ... VGA G1 +// GP4 ... VGA G2 +// GP5 ... VGA R0 red +// GP6 ... VGA R1 +// GP7 ... VGA R2 +// GP8 ... VGA SYNC synchronization (inverted: negative SYNC=LOW=0x80, BLACK=HIGH=0x00) +#define VGA_GPIO_FIRST VGA_COLORBASE // first VGA GPIO +#define VGA_GPIO_NUM 9 // number of VGA GPIOs, including HSYNC and VSYNC +#define VGA_GPIO_OUTNUM 8 // number of VGA color GPIOs, without HSYNC and VSYNC +#define VGA_GPIO_LAST (VGA_GPIO_FIRST+VGA_GPIO_NUM-1) // last VGA GPIO +#define VGA_GPIO_SYNC VGA_SYNCBASE // VGA SYNC GPIO + +// VGA PIO and state machines +#define VGA_PIO pio0 // VGA PIO +#define VGA_SM0 0 // VGA state machine of base layer 0 +#define VGA_SM1 1 // VGA state machine of overlapped layer 1 +#define VGA_SM2 2 // VGA state machine of overlapped layer 2 +#define VGA_SM3 3 // VGA state machine of overlapped layer 3 +#define VGA_SM(layer) (VGA_SM0+(layer)) // VGA state machine of the layer + +#if LAYERS==1 +#define VGA_SMALL B0 // mask of all state machines +#elif LAYERS==2 +#define VGA_SMALL (B0+B1) // mask of all state machines +#elif LAYERS==3 +#define VGA_SMALL (B0+B1+B2) // mask of all state machines +#elif LAYERS==4 +#define VGA_SMALL (B0+B1+B2+B3) // mask of all state machines +#else +#error Unsupported number of layers! +#endif + +// VGA DMA +#define VGA_DMA 2 // VGA DMA base channel +#define VGA_DMA_CB0 (VGA_DMA+0) // VGA DMA channel - control block of base layer +#define VGA_DMA_PIO0 (VGA_DMA+1) // VGA DMA channel - copy data of base layer to PIO (raises IRQ0 on quiet) +#define VGA_DMA_CB1 (VGA_DMA+2) // VGA DMA channel - control block of overlapped layer 1 +#define VGA_DMA_PIO1 (VGA_DMA+3) // VGA DMA channel - copy data of overlapped layer 1 to PIO +#define VGA_DMA_CB2 (VGA_DMA+4) // VGA DMA channel - control block of overlapped layer 1 +#define VGA_DMA_PIO2 (VGA_DMA+5) // VGA DMA channel - copy data of overlapped layer 2 to PIO +#define VGA_DMA_CB3 (VGA_DMA+6) // VGA DMA channel - control block of overlapped layer 1 +#define VGA_DMA_PIO3 (VGA_DMA+7) // VGA DMA channel - copy data of overlapped layer 3 to PIO + +#define VGA_DMA_CB(layer) (VGA_DMA_CB0+(layer)*2) // VGA DMA control channel of the layer +#define VGA_DMA_PIO(layer) (VGA_DMA_PIO0+(layer)*2) // VGA DMA data channel of the layer + +#define VGA_DMA_NUM (LAYERS*2) // number of used DMA channels +#define VGA_DMA_FIRST VGA_DMA // first used DMA +#define VGA_DMA_LAST (VGA_DMA_FIRST+VGA_DMA_NUM-1) // last used DMA diff --git a/MCUME_pico/picovga_t4/vga_layer.cpp b/MCUME_pico/picovga_t4/vga_layer.cpp new file mode 100755 index 0000000..a281b43 --- /dev/null +++ b/MCUME_pico/picovga_t4/vga_layer.cpp @@ -0,0 +1,505 @@ + +// **************************************************************************** +// +// VGA layers +// +// **************************************************************************** + +#include "include.h" + +// layer program descriptors +const sLayerProg LayerProg[LAYERPROG_NUM] = { + + // LAYERPROG_BASE base layer + { + .ins=vga_program_instructions, // pointer to program instructions + .prg=&vga_program, // pointer to program descriptor + .length=vga_program.length, // program length (number of instructions) + .wrap_target=vga_wrap_target, // offset of wrap target + .wrap=vga_wrap, // offset of wrap end + .idle=vga_offset_entry, // offset of idle + .entry=vga_offset_entry, // offset of entry + .maxidle=2, // max. offset of idle to detect end of job + .extranum=2, // number of extra offsets + .extra={ // extra offsets, pairs: offset, CPP-correction + vga_offset_extra1, 2, + vga_offset_extra2, 2, + }, + }, + + // LAYERPROG_KEY layer with key color + { + .ins=keylayer_program_instructions, // pointer to program instructions + .prg=&keylayer_program, // pointer to program descriptor + .length=keylayer_program.length, // program length (number of instructions) + .wrap_target=keylayer_wrap_target, // offset of wrap target + .wrap=keylayer_wrap, // offset of wrap end + .idle=keylayer_offset_idle, // offset of idle + .entry=keylayer_offset_entry, // offset of entry + .maxidle=2, // max. offset of idle to detect end of job + .extranum=1, // number of extra offsets + .extra={ // extra offsets, pairs: offset, CPP-correction + keylayer_offset_extra1, 6, + }, + }, + + // LAYERPROG_BLACK layer with black key color + { + .ins=blacklayer_program_instructions, // pointer to program instructions + .prg=&blacklayer_program, // pointer to program descriptor + .length=blacklayer_program.length, // program length (number of instructions) + .wrap_target=blacklayer_wrap_target, // offset of wrap target + .wrap=blacklayer_wrap, // offset of wrap end + .idle=blacklayer_offset_idle, // offset of idle + .entry=blacklayer_offset_entry, // offset of entry + .maxidle=2, // max. offset of idle to detect end of job + .extranum=2, // number of extra offsets + .extra={ // extra offsets, pairs: offset, CPP-correction + blacklayer_offset_extra1, 4, + blacklayer_offset_extra2, 3, + }, + }, + + // LAYERPROG_WHITE layer with white key color + { + .ins=whitelayer_program_instructions, // pointer to program instructions + .prg=&whitelayer_program, // pointer to program descriptor + .length=whitelayer_program.length, // program length (number of instructions) + .wrap_target=whitelayer_wrap_target, // offset of wrap target + .wrap=whitelayer_wrap, // offset of wrap end + .idle=whitelayer_offset_idle, // offset of idle + .entry=whitelayer_offset_entry, // offset of entry + .maxidle=2, // max. offset of idle to detect end of job + .extranum=1, // number of extra offsets + .extra={ // extra offsets, pairs: offset, CPP-correction + whitelayer_offset_extra1, 4, + }, + }, + + // LAYERPROG_MONO layer with mono pattern or simple color + { + .ins=monolayer_program_instructions, // pointer to program instructions + .prg=&monolayer_program, // pointer to program descriptor + .length=monolayer_program.length, // program length (number of instructions) + .wrap_target=monolayer_wrap_target, // offset of wrap target + .wrap=monolayer_wrap, // offset of wrap end + .idle=monolayer_offset_idle, // offset of idle + .entry=monolayer_offset_entry, // offset of entry + .maxidle=2, // max. offset of idle to detect end of job + .extranum=2, // number of extra offsets + .extra={ // extra offsets, pairs: offset, CPP-correction + monolayer_offset_extra1, 4, + monolayer_offset_extra2, 2, + }, + }, + + // LAYERPROG_RLE layer with RLE compression + { + .ins=rlelayer_program_instructions, // pointer to program instructions + .prg=&rlelayer_program, // pointer to program descriptor + .length=rlelayer_program.length, // program length (number of instructions) + .wrap_target=rlelayer_wrap_target, // offset of wrap target + .wrap=rlelayer_wrap, // offset of wrap end + .idle=rlelayer_offset_idle, // offset of idle + .entry=rlelayer_offset_entry, // offset of entry + .maxidle=2, // max. offset of idle to detect end of job + .extranum=7, // number of extra offsets + .extra={ // extra offsets, pairs: offset, CPP-correction + rlelayer_offset_extra1, 1, + rlelayer_offset_extra2, 3, + rlelayer_offset_extra3, 2, + rlelayer_offset_extra4, 2, + rlelayer_offset_extra5, 3, + rlelayer_offset_extra6, 2, + rlelayer_offset_extra7, 3, + }, + }, + +}; + +// current layer program of overlapped layers +u8 LayerProgInx; // index of current layer program (LAYERPROG_*) +sLayerProg CurLayerProg; // copy of current layer program + +// layer mode descriptors +const sLayerMode LayerMode[LAYERMODE_NUM] = { + + // LAYERMODE_BASE base layer + { + .prog=LAYERPROG_BASE, // layer program (LAYERPROG_*) + .mincpp=2, // minimal clock cycles per pixel + .maxcpp=17, // maximal clock cycles per pixel + }, + + // LAYERMODE_KEY layers with key color + { + .prog=LAYERPROG_KEY, // layer program (LAYERPROG_*) + .mincpp=6, // minimal clock cycles per pixel + .maxcpp=37, // maximal clock cycles per pixel + }, + + // LAYERMODE_BLACK layers with black key color + { + .prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*) + .mincpp=4, // minimal clock cycles per pixel + .maxcpp=34, // maximal clock cycles per pixel + }, + + // LAYERMODE_WHITE layers with white key color + { + .prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*) + .mincpp=4, // minimal clock cycles per pixel + .maxcpp=35, // maximal clock cycles per pixel + }, + + // LAYERMODE_MONO layers with mono pattern + { + .prog=LAYERPROG_MONO, // layer program (LAYERPROG_*) + .mincpp=4, // minimal clock cycles per pixel + .maxcpp=35, // maximal clock cycles per pixel + }, + + // LAYERMODE_COLOR layers with simple color + { + .prog=LAYERPROG_MONO, // layer program (LAYERPROG_*) + .mincpp=2, // minimal clock cycles per pixel + .maxcpp=33, // maximal clock cycles per pixel + }, + + // LAYERMODE_RLE layers with RLE compression + { + .prog=LAYERPROG_RLE, // layer program (LAYERPROG_*) + .mincpp=3, // minimal clock cycles per pixel + .maxcpp=32, // maximal clock cycles per pixel + }, + + // LAYERMODE_SPRITEKEY layers with sprites with key color + { + .prog=LAYERPROG_KEY, // layer program (LAYERPROG_*) + .mincpp=6, // minimal clock cycles per pixel + .maxcpp=37, // maximal clock cycles per pixel + }, + + // LAYERMODE_SPRITEBLACK layers with sprites with black key color + { + .prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*) + .mincpp=4, // minimal clock cycles per pixel + .maxcpp=34, // maximal clock cycles per pixel + }, + + // LAYERMODE_SPRITEWHITE layers with sprites with white key color + { + .prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*) + .mincpp=4, // minimal clock cycles per pixel + .maxcpp=35, // maximal clock cycles per pixel + }, + + // LAYERMODE_FASTSPRITEKEY layers with fast sprites with key color + { + .prog=LAYERPROG_KEY, // layer program (LAYERPROG_*) + .mincpp=6, // minimal clock cycles per pixel + .maxcpp=37, // maximal clock cycles per pixel + }, + + // LAYERMODE_FASTSPRITEBLACK layers with fast sprites with black key color + { + .prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*) + .mincpp=4, // minimal clock cycles per pixel + .maxcpp=34, // maximal clock cycles per pixel + }, + + // LAYERMODE_FASTSPRITEWHITE layers with fast sprites with white key color + { + .prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*) + .mincpp=4, // minimal clock cycles per pixel + .maxcpp=35, // maximal clock cycles per pixel + }, + + // LAYERMODE_PERSPKEY layer with key color and image with transformation matrix + { + .prog=LAYERPROG_KEY, // layer program (LAYERPROG_*) + .mincpp=6, // minimal clock cycles per pixel + .maxcpp=37, // maximal clock cycles per pixel + }, + + // LAYERMODE_PERSPBLACK layer with black key color and image with transformation matrix + { + .prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*) + .mincpp=4, // minimal clock cycles per pixel + .maxcpp=34, // maximal clock cycles per pixel + }, + + // LAYERMODE_PERSPWHITE layer with white key color and image with transformation matrix + { + .prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*) + .mincpp=4, // minimal clock cycles per pixel + .maxcpp=35, // maximal clock cycles per pixel + }, + + // LAYERMODE_PERSP2KEY layer with key color and double pixel image with transformation matrix + { + .prog=LAYERPROG_KEY, // layer program (LAYERPROG_*) + .mincpp=6, // minimal clock cycles per pixel + .maxcpp=37, // maximal clock cycles per pixel + }, + + // LAYERMODE_PERSP2BLACK layer with black key color and double pixel image with transformation matrix + { + .prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*) + .mincpp=4, // minimal clock cycles per pixel + .maxcpp=34, // maximal clock cycles per pixel + }, + + // LAYERMODE_PERSP2WHITE layer with white key color and double pixel image with transformation matrix + { + .prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*) + .mincpp=4, // minimal clock cycles per pixel + .maxcpp=35, // maximal clock cycles per pixel + }, +}; + +// current layer mode of layers +u8 LayerModeInx[LAYERS]; // index of current layer mode (LAYERMODE_*) +sLayerMode CurLayerMode[LAYERS]; // copy of current layer mode + +// current layer screens +sLayer LayerScreen[LAYERS]; // layer screens + +u8 LayerMask; // mask of active layers + +// index of first pin of layer (base layer should stay VGA_GPIO_FIRST) +u8 LayerFirstPin[LAYERS_MAX] = { VGA_GPIO_FIRST, VGA_GPIO_FIRST, VGA_GPIO_FIRST, VGA_GPIO_FIRST}; + +// number of pins of overlapped layer (base layer should stay VGA_GPIO_OUTNUM) +u8 LayerNumPin[LAYERS_MAX] = { VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM}; + +// set overlapped layer 1..3 ON +void LayerOn(u8 inx) +{ + __dmb(); + LayerScreen[inx].on = True; + __dmb(); +} + +// set overlapped layer 1..3 OFF +void LayerOff(u8 inx) +{ + __dmb(); + LayerScreen[inx].on = False; + __dmb(); +} + +// set coordinate X of overlapped layer +void LayerSetX(u8 inx, s16 x) +{ + sLayer* lay = &LayerScreen[inx]; + s32 cppx = lay->cpp*x; // initial delay + if (cppx < 0) cppx = 0; + u32 w = lay->w; // image width + u32 init = 0; // init word + + // prepare init word + switch (lay->mode) + { + case LAYERMODE_PERSP2KEY: // layer with key color and double pixel image with transformation matrix + case LAYERMODE_PERSPKEY: // layer with key color and image with transformation matrix + case LAYERMODE_FASTSPRITEKEY: // layer with fast sprites with key color + case LAYERMODE_SPRITEKEY: // layer with sprites with key color + case LAYERMODE_KEY: // layer with key color + init = VGAKEY(cppx, w, (lay->keycol & 0xff)); + break; + + case LAYERMODE_PERSP2BLACK: // layer with black key color and double pixel image with transformation matrix + case LAYERMODE_PERSPBLACK: // layer with black key color and image with transformation matrix + case LAYERMODE_FASTSPRITEBLACK: // layer with fast sprites with black key color + case LAYERMODE_SPRITEBLACK: // layer with sprites with black key color + case LAYERMODE_BLACK: // layer with black key color + init = VGABLACK(cppx, w); + break; + + case LAYERMODE_PERSP2WHITE: // layer with white key color and double pixel image with transformation matrix + case LAYERMODE_PERSPWHITE: // layer with white key color and image with transformation matrix + case LAYERMODE_FASTSPRITEWHITE: // layer with fast sprites with white key color + case LAYERMODE_SPRITEWHITE: // layer with sprites with white key color + case LAYERMODE_WHITE: // layer with white key color + init = VGAWHITE(cppx, w); + break; + + case LAYERMODE_MONO: // layer with mono pattern + init = VGAMONO(cppx, w, (lay->keycol & 0xff)); + break; + + case LAYERMODE_COLOR: // layer with simple color + init = VGACOLOR(cppx, w); + break; + + case LAYERMODE_RLE: // layer with RLE compression + init = VGARLE(cppx); + break; + } + lay->init = init; // init word + lay->x = x; // start X coordinate +} + +// set coordinate Y of overlapped layer +void LayerSetY(u8 inx, s16 y) +{ + sLayer* lay = &LayerScreen[inx]; + lay->y = y; +} + +// set width of image of overlapped layer +// Uses auto pitch wb (full line). Set custom wb after calling this function. +void LayerSetW(u8 inx, u16 w) +{ + sLayer* lay = &LayerScreen[inx]; + lay->w = w; // image width + Bool mono = (lay->mode == LAYERMODE_MONO); + lay->trans = mono ? (((w/8)+3)/4) : (w/4); // transfer count + lay->wb = mono ? (w/8) : w; // width bytes + LayerSetX(inx, lay->x); // update init word +} + +// set height of image of overlapped layer +void LayerSetH(u8 inx, u16 h) +{ + sLayer* lay = &LayerScreen[inx]; + lay->h = h; +} + +// setup overlapped layer 1..3 (not for sprites and not for perspective mode) +// inx ... layer index 1..3 +// img ... pointer to image data +// vmode ... pointer to initialized video configuration +// w ... image width in pixels (must be multiple of 4) +// h ... image height +// col ... key color (needed for LAYERMODE_KEY and LAYERMODE_MONO layer mode) +// par ... additional data (RLE index table, integer transformation matrix) +// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn +void LayerSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 col /* = 0 */, const void* par /* = NULL */) +{ + LayerOff(inx); // set layer OFF + sLayer* lay = &LayerScreen[inx]; // get pointer to layer + lay->img = img; // pointer to image data + lay->par = par; // additional parameter + lay->keycol = col | ((u16)col << 8) | ((u32)col << 16) | ((u32)col << 24); // key color + lay->x = 0; // X coordinate + lay->y = 0; // Y coordinate + lay->h = h; // height of image + lay->spritenum = 0; // number of sprites + lay->cpp = vmode->cpp; // save clocks per pixel + lay->mode = vmode->mode[inx]; // layer mode + LayerSetW(inx, w); // set width of image, update parameters init, trans and wb +} + +// setup overlapped layer 1..3 for LAYERMODE_PERSP* modes +// inx ... layer index 1..3 +// img ... pointer to source image data (image width and height must be power of 2) +// vmode ... pointer to initialized video configuration +// w ... destination image width in pixels (must be multiple of 4) +// h ... destination image height +// xbits ... number of bits of width of source image +// ybits ... number of bits of height of source image +// horiz ... horizon of perspective projection/4 (0=no perspecitve, <0 ceilling) +// mat ... integer transformation matrix +// col ... key color (needed for LAYERMODE_PERSPKEY layer mode) +// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn +void LayerPerspSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 xbits, u8 ybits, + s8 horiz, const int* mat, u8 col /* = 0 */) +{ + LayerSetup(inx, img, vmode, w, h, col, mat); + sLayer* lay = &LayerScreen[inx]; // get pointer to layer + lay->xbits = xbits; + lay->ybits = ybits; + lay->horiz = horiz; +} + +// setup overlapped layer 1..3 for LAYERMODE_SPRITE* and LAYERMODE_FASTSPRITE* modes +// inx ... layer index 1..3 +// sprite ... pointer to list of sprites (array of pointers to sprites; sorted by X on LAYERMODE_FASTSPRITE* modes) +// spritenum ... number of sprites in the list (to turn sprite off, you can set its coordinate Y out of the screen) +// vmode ... pointer to initialized video configuration +// x ... start coordinate X of area with sprites +// y ... start coordinate Y of area with sprites +// w ... width of area with sprites (must be multiple of 4) +// h ... height of area with sprites +// col ... key color (needed for LAYERMODE_SPRITEKEY and LAYERMODE_FASTSPRITEKEY layer mode) +// Use functions LayerOn after layer setup. +void LayerSpriteSetup(u8 inx, sSprite** sprite, u16 spritenum, const sVmode* vmode, + s16 x, s16 y, u16 w, u16 h, u8 col /* = 0 */) +{ + LayerSetup(inx, (const u8*)sprite, vmode, w, h, col); + LayerSetX(inx, x); + LayerSetY(inx, y); + sLayer* lay = &LayerScreen[inx]; // get pointer to layer + lay->spritenum = spritenum; +} + +// prepare array of start and length of lines (detects transparent pixels) +// img ... image +// x0 ... array of start of lines +// w0 ... array of length of lines +// w ... sprite width (slow sprite: max. width 255) +// h ... sprite height +// wb ... sprite pitch (bytes between lines) +// col ... key color +// fast ... fast sprite, divide start and length of line by 4 +void SpritePrepLines(const u8* img, u8* x0, u8* w0, u16 w, u16 h, u16 wb, u8 col, Bool fast) +{ + int x1, x2, w2, y; + const u8* d; + + // loop through lines + for (y = 0; y < h; y++) + { + // find start of line + d = &img[y*wb]; + for (x1 = 0; x1 < w; x1++) + { + if (*d != col) break; + d++; + } + + // find end of line + d = &img[y*wb + w - 1]; + for (x2 = w; x2 > x1; x2--) + { + if (*d != col) break; + d--; + } + + // prepare start and length + w2 = x2 - x1; + if (fast) + { + w2 += ((x2 + 3) & ~3) - x2; + x1 /= 4; + w2 = (w2 + 3)/4; + } + if (x1 > 255) x1 = 255; + if (w2 > 255) w2 = 255; + + // store start and length + *x0++ = x1; + *w0++ = w2; + } +} + +// sort fast sprite list by X coordinate +void SortSprite(sSprite** list, int num) +{ + int i; + sSprite* s; + sSprite* s2; + for (i = 0; i < num-1; i++) + { + s = list[i]; + s2 = list[i+1]; + if (s->x > s2->x) + { + list[i] = s2; + list[i+1] = s; + if (i > 0) i -= 2; + } + } +} diff --git a/MCUME_pico/picovga_t4/vga_layer.h b/MCUME_pico/picovga_t4/vga_layer.h new file mode 100755 index 0000000..d0c59ee --- /dev/null +++ b/MCUME_pico/picovga_t4/vga_layer.h @@ -0,0 +1,195 @@ + +// **************************************************************************** +// +// VGA layers +// +// **************************************************************************** + +#ifndef _VGA_LAYER_H +#define _VGA_LAYER_H + +// base layer commands +#define VGADARK(num,col) (((u32)(vga_offset_dark+BASE_OFFSET)<<27) | ((u32)(num)<<8) | (u32)(col)) // assemble control word of "dark" command +#define VGACMD(jmp,num) (((u32)(jmp)<<27) | (u32)(num)) // assemble control word + +// --- overlapped layer init word (delay: use number of offset pixels * Vmode.cpp, num: number of pixels) + +// init word of key color layer LAYERPROG_KEY +#define VGAKEY(delay,num,col) (((u32)((delay)+1)<<19) | ((u32)(col)<<11) | (u32)((num)-1)) + +// init word of mono layer LAYERPROG_MONO +#define VGAMONO(delay,num,col) (((u32)((delay)+0)<<20) | ((u32)(col)<<12) | ((u32)((num)-1)<<1) | B0) + +// init word of color layer LAYERPROG_MONO +#define VGACOLOR(delay,num) (((u32)((delay)+2)<<20) | ((u32)0xff<<12) | ((u32)((num)-1)<<1) | 0) + +// init word of black color layer LAYERPROG_BLACK +#define VGABLACK(delay,num) (((u32)((delay)+3)<<16) | (u32)((num)-1)) + +// init word of white color layer LAYERPROG_WHITE +#define VGAWHITE(delay,num) (((u32)((delay)+3)<<16) | (u32)((num)-1)) + +// init word of RLE layer LAYERPROG_RLE +#define VGARLE(delay) ((delay)+1) + +// swap bytes of command +#define BYTESWAP(n) ((((n)&0xff)<<24)|(((n)&0xff00)<<8)|(((n)&0xff0000)>>8)|(((n)&0xff000000)>>24)) + +// align to multiple of 4 +#define ALIGN4(x) ((x) & ~3) + +// layer program descriptor +typedef struct { + const u16* ins; // pointer to program instructions (NULL=layers is OFF) + const struct pio_program* prg; // pointer to program descriptor + u8 length; // program length (number of instructions) + u8 wrap_target; // offset of wrap target + u8 wrap; // offset of wrap end + u8 idle; // offset of idle + u8 entry; // offset of entry + u8 maxidle; // max. offset of idle to detect end of job + u8 extranum; // number of extra offsets + u8 extra[2*16]; // extra offsets, pairs: offset, CPP-correction +} sLayerProg; + +// layer program descriptors +extern const sLayerProg LayerProg[LAYERPROG_NUM]; + +// current layer program of overlapped layers +extern u8 LayerProgInx; // index of current layer program (LAYERPROG_*, LAYERPROG_BASE = overlapped layers are OFF) +extern sLayerProg CurLayerProg; // copy of current layer program + +// layer mode descriptor +typedef struct { + u8 prog; // layer program (LAYERPROG_*) + u8 mincpp; // minimal clock cycles per pixel + u8 maxcpp; // maximal clock cycles per pixel +} sLayerMode; + +// layer mode descriptors +extern const sLayerMode LayerMode[LAYERMODE_NUM]; + +// current layer mode of layers +extern u8 LayerModeInx[LAYERS]; // index of current layer mode (LAYERMODE_*) +extern sLayerMode CurLayerMode[LAYERS]; // copy of current layer mode + +// layer screen descriptor (on change update SLAYER_* in define.h) +typedef struct { + const u8* img; // pointer to image in current layer format, or sprite list + const void* par; // additional parameter (RLE index table, integer transformation matrix) + u32 init; // init word sent on start of scanline (start X coordinate) + u32 keycol; // key color + u16 trans; // trans count + s16 x; // start X coordinate + s16 y; // start Y coordinate + u16 w; // width in pixels + u16 h; // height + u16 wb; // image width in bytes (pitch of lines) + u8 mode; // layer mode + s8 horiz; // horizon of perspective projection/4 (only with LAYERMODE_PERSP* modes, 0=no perspecitve, <0 ceilling) + u8 xbits; // number of bits of width of source image (only with LAYERMODE_PERSP* modes) + u8 ybits; // number of bits of height of source image (only with LAYERMODE_PERSP* modes) + u16 spritenum; // number of sprites + Bool on; // layer is ON + u8 cpp; // current clock pulses per pixel (used to calculate X coordinate) +} sLayer; + +// sprite (on change update SSPRITE_* in define.h) +typedef struct { + u8* img; // SSPRITE_IMG pointer to image data + u8* x0; // SSPRITE_X0 pointer to array of start of lines, or fast sprite start of lines/4 + u8* w0; // SSPRITE_W0 pointer to array of length of lines, or fast sprite length of lines/4 + u32 keycol; // SSPRITE_KEYCOL key color + s16 x; // SSPRITE_X sprite X-coordinate on the screen + s16 y; // SSPRITE_Y sprite Y-coordinate on the screen + u16 w; // SSPRITE_W sprite width (slow sprite: max. width 255) + u16 h; // SSPRITE_H sprite height + u16 wb; // SSPRITE_WB sprite pitch (number of bytes between lines) + u16 res; // ...reserved, structure align +} sSprite; + +// current layer screens +extern sLayer LayerScreen[LAYERS]; // layer screens + +extern u8 LayerMask; // mask of active layers + +// index of first pin of layer (base layer should stay VGA_GPIO_FIRST) +extern u8 LayerFirstPin[LAYERS_MAX]; + +// number of pins of overlapped layer (base layer should stay VGA_GPIO_OUTNUM) +extern u8 LayerNumPin[LAYERS_MAX]; + +// set overlapped layer 1..3 ON +void LayerOn(u8 inx); + +// set overlapped layer 1..3 OFF +void LayerOff(u8 inx); + +// set coordinate X of overlapped layer +void LayerSetX(u8 inx, s16 x); + +// set coordinate Y of overlapped layer +void LayerSetY(u8 inx, s16 y); + +// set width of image of overlapped layer +// Uses auto pitch wb (full line). Set custom wb after calling this function. +void LayerSetW(u8 inx, u16 w); + +// set height of image of overlapped layer +void LayerSetH(u8 inx, u16 h); + +// setup overlapped layer 1..3 (not for sprites and not for perspective mode) +// inx ... layer index 1..3 +// img ... pointer to image data +// vmode ... pointer to initialized video configuration +// w ... image width in pixels (must be multiple of 4) +// h ... image height +// col ... key color (needed for LAYERMODE_KEY and LAYERMODE_MONO layer mode) +// par ... additional data (RLE index table, integer transformation matrix) +// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn +void LayerSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 col = 0, const void* par = NULL); + +// setup overlapped layer 1..3 for LAYERMODE_PERSP* modes +// inx ... layer index 1..3 +// img ... pointer to source image data (image width and height must be power of 2) +// vmode ... pointer to initialized video configuration +// w ... destination image width in pixels (must be multiple of 4) +// h ... destination image height +// xbits ... number of bits of width of source image +// ybits ... number of bits of height of source image +// horiz ... horizon of perspective projection/4 (0=no perspecitve, <0 ceilling) +// mat ... integer transformation matrix +// col ... key color (needed for LAYERMODE_PERSPKEY layer mode) +// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn +void LayerPerspSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 xbits, u8 ybits, + s8 horiz, const int* mat, u8 col = 0); + +// setup overlapped layer 1..3 for LAYERMODE_SPRITE* and LAYERMODE_FASTSPRITE* modes +// inx ... layer index 1..3 +// sprite ... pointer to list of sprites (array of pointers to sprites; sorted by X on LAYERMODE_FASTSPRITE* modes) +// spritenum ... number of sprites in the list (to turn sprite off, you can set its coordinate Y out of the screen) +// vmode ... pointer to initialized video configuration +// x ... start coordinate X of area with sprites +// y ... start coordinate Y of area with sprites +// w ... width of area with sprites (must be multiple of 4) +// h ... height of area with sprites +// col ... key color (needed for LAYERMODE_SPRITEKEY and LAYERMODE_FASTSPRITEKEY layer mode) +// Use functions LayerOn after layer setup. +void LayerSpriteSetup(u8 inx, sSprite** sprite, u16 spritenum, const sVmode* vmode, + s16 x, s16 y, u16 w, u16 h, u8 col = 0); + +// prepare array of start and length of lines (detects transparent pixels) +// img ... image +// x0 ... array of start of lines +// w0 ... array of length of lines +// w ... sprite width (slow sprite: max. width 255) +// h ... sprite height +// wb ... sprite pitch (bytes between lines) +// col ... key color +// fast ... fast sprite, divide start and length of line by 4 +void SpritePrepLines(const u8* img, u8* x0, u8* w0, u16 w, u16 h, u16 wb, u8 col, Bool fast); + +// sort fast sprite list by X coordinate +void SortSprite(sSprite** list, int num); + +#endif // _VGA_LAYER_H diff --git a/MCUME_pico/picovga_t4/vga_pal.h b/MCUME_pico/picovga_t4/vga_pal.h new file mode 100755 index 0000000..ff71e40 --- /dev/null +++ b/MCUME_pico/picovga_t4/vga_pal.h @@ -0,0 +1,109 @@ + +// **************************************************************************** +// +// VGA colors and palettes +// +// **************************************************************************** + +#ifndef _VGA_PAL_H +#define _VGA_PAL_H + +#define MULTICOL(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) // multiply color pattern (used in mode GF_COLOR) + +// CGA colors +#define CGACOL_0 0 // 0x000000 black +#define CGACOL_1 2 // 0x0000C3 dark blue +#define CGACOL_2 20 // 0x00C300 dark green +#define CGACOL_3 22 // 0x00C3C3 dark cyan +#define CGACOL_4 160 // 0xC30000 dark red +#define CGACOL_5 162 // 0xC300C3 dark magenta +#define CGACOL_6 168 // 0xC35400 brown +#define CGACOL_7 182 // 0xC3C3C3 light gray +#define CGACOL_8 73 // 0x545454 dark gray +#define CGACOL_9 75 // 0x5454FF light blue +#define CGACOL_10 93 // 0x54FF54 light green +#define CGACOL_11 95 // 0x54FFFF light cyan +#define CGACOL_12 233 // 0xFF5454 light red +#define CGACOL_13 235 // 0xFF54FF light magenta +#define CGACOL_14 253 // 0xFFFF54 yellow +#define CGACOL_15 255 // 0xFFFFFF white + +// ZX Spectrum color +#define ZXCOL_0 0 // 0x000000 black +#define ZXCOL_1 2 // 0x0000C3 dark blue +#define ZXCOL_2 160 // 0xC30000 dark red +#define ZXCOL_3 162 // 0xC300C3 dark magenta +#define ZXCOL_4 20 // 0x00C300 dark green +#define ZXCOL_5 22 // 0x00C3C3 dark cyan +#define ZXCOL_6 180 // 0xC3C300 dark yellow +#define ZXCOL_7 182 // 0xC3C3C3 light gray +#define ZXCOL_8 73 // 0x545454 dark gray +#define ZXCOL_9 3 // 0x0000FF light blue +#define ZXCOL_10 224 // 0xFF0000 light red +#define ZXCOL_11 227 // 0xFF00FF light magenta +#define ZXCOL_12 28 // 0x00FF00 light green +#define ZXCOL_13 31 // 0x00FFFF light cyan +#define ZXCOL_14 252 // 0xFFFF00 yellow +#define ZXCOL_15 255 // 0xFFFFFF white + +// Colors +// GP0 ... B0 ... VGA B0 blue +// GP1 ... B1 ... VGA B1 +// GP2 ... B2 ... VGA G0 green +// GP3 ... B3 ... VGA G1 +// GP4 ... B4 ... VGA G2 +// GP5 ... B5 ... VGA R0 red +// GP6 ... B6 ... VGA R1 +// GP7 ... B7 ... VGA R2 + +#define COL_BLACK 0 + +#define COL_DARKBLUE B0 +#define COL_SEMIBLUE B1 +#define COL_BLUE (B0+B1) +#define COL_MOREBLUE (COL_BLUE+B3+B6) +#define COL_LIGHTBLUE (COL_BLUE+B4+B7) + +#define COL_DARKGREEN B3 +#define COL_SEMIGREEN B4 +#define COL_GREEN (B2+B3+B4) +#define COL_MOREGREEN (COL_GREEN+B0+B6) +#define COL_LIGHTGREEN (COL_GREEN+B1+B7) + +#define COL_DARKRED B6 +#define COL_SEMIRED B7 +#define COL_RED (B5+B6+B7) +#define COL_MORERED (COL_RED+B0+B3) +#define COL_LIGHTRED (COL_RED+B1+B4) + +#define COL_DARKCYAN (B0+B3) +#define COL_SEMICYAN (B1+B4) +#define COL_CYAN (B0+B1+B2+B3+B4) + +#define COL_DARKMAGENTA (B0+B6) +#define COL_SEMIMAGENTA (B1+B7) +#define COL_MAGENTA (B0+B1+B5+B6+B7) + +#define COL_DARKYELLOW (B3+B6) +#define COL_SEMIYELLOW (B4+B7) +#define COL_YELLOW (B2+B3+B4+B5+B6+B7) + +#define COL_GRAY0 0 +#define COL_GRAY1 (B2+B5) +#define COL_GRAY2 (B0+B3+B6) +#define COL_GRAY3 (B0+B2+B3+B5+B6) +#define COL_GRAY4 (B1+B4+B7) +#define COL_GRAY5 (B1+B2+B4+B5+B7) +#define COL_GRAY6 (B0+B1+B3+B4+B6+B7) +#define COL_GRAY7 (B0+B1+B2+B3+B4+B5+B6+B7) + +#define COL_WHITE COL_GRAY7 + +// compose color from RGB +#define COLRGB(r,g,b) ((u8)(((r)&0xe0)|(((g)&0xe0)>>3)|((b)>>6))) + +// default 16-color palettes (CGA colors) +// - do not set "const", to stay in faster RAM +extern u8 DefPal16[16]; + +#endif // _VGA_PAL_H diff --git a/MCUME_pico/picovga_t4/vga_render.S b/MCUME_pico/picovga_t4/vga_render.S new file mode 100755 index 0000000..643603d --- /dev/null +++ b/MCUME_pico/picovga_t4/vga_render.S @@ -0,0 +1,313 @@ + +// **************************************************************************** +// +// VGA render +// +// **************************************************************************** + +#include "define.h" // common definitions of C and ASM + + .syntax unified + .section .time_critical.Render, "ax" + .cpu cortex-m0plus + .thumb // use 16-bit instructions + +.extern pScreen // sScreen* pScreen; // pointer to current video screen +.extern LineBuf0 // u8 LineBuf0[BLACK_MAX]; // line buffer with black color + +// extern "C" u32* Render(u32* cbuf, u8* dbuf, int line, int pixnum); + +// render scanline +// cbuf ... control buffer +// dbuf ... data buffer (pixel data) +// line ... current scanline 0.. +// pixnum ... total pixels (must be multiple of 4) +// Returns new pointer to control buffer + +.thumb_func +.global Render +Render: + + // push registers + push {r4-r7,lr} + + // prepare local variables +// SP+0: input argument of render functions +// SP+4: R0 control buffer +// SP+8: R1 data buffer (pixel data) +// SP+12: R2 current scanline 0.. +// SP+16: R3 total pixels +// SP+20: R4 +// SP+24: R5 +// SP+28: R6 +// SP+32: R7 +// SP+36: LR + + sub sp,#20 + str r0,[sp,#4] // control buffer + str r1,[sp,#8] // data buffer + str r3,[sp,#16] // total pixels + +// ---- prepare pointer to current screen +// sScreen* s = pScreen; +// if (s != NULL) { + + // prepare pointer to current screen + ldr r4,Render_pScreenAddr // pointer to pointer to current video Screen (variable pScreen) + ldr r4,[r4,#0] // pointer to current video Screen + cmp r4,#0 // is pointer valid? + beq Render_Clear // pointer is not valid, clear rest of line (display is OFF) + +// ---- find video strip with current scanline +// int stripnum = s->num; +// sStrip* t = &s->strip[0]; +// for (; stripnum > 0; stripnum--) { + + // loop through video strips + ldrh r5,[r4,#SSCREEN_NUM] // u16 number of video strips + tst r5,r5 // check number of video strips + beq Render_Clear // no video strips, return + adds r4,#SSCREEN_STRIP // pointer to first video strip + +// R2 ... current scanline +// R4 ... pointer to video strip +// R5 ... counter of video strips + +Render_StripLoop: + + // chek if current scanline has been found + // if (line < t->height) { + ldrh r3,[r4,#SSTRIP_HEIGHT] // u16 height of this video strip + cmp r2,r3 // check if current scanline fits into this video strip + blo Render_StripOK // scanline < strip height, this strip is OK + + // subtract video strip height from scanline number (to be relative to start of strip) + // line -= t->height; + subs r2,r3 // subtract strip height from scanline number + + // next video strip + // t++; + // for (; stripnum > 0; stripnum--) + adds r4,#SSTRIP_SIZE // shift pointer to next video strip + subs r5,#1 // counter of video strips + bne Render_StripLoop // next video strip + b Render_Clear // video strip not found + +// ---- process all video segments + +Render_StripOK: + + // prepare first video segment + // sSegm* g = &t->seg[0]; + // int segnum = t->num; + // for (; segnum > 0; segnum--) { + str r2,[sp,#12] // save current scanline + ldrh r5,[r4,#SSTRIP_NUM] // u16 number of video segments + tst r5,r5 // check number of video segments + beq Render_Clear // no video strips, return + adds r4,#SSTRIP_SEG // pointer to first video segment + +// R4 ... pointer to video segment +// R5 ... counter of video segments + +Render_SegmLoop: + + // get number of remaining pixels + ldr r2,[sp,#16] // get remaining pixels + tst r2,r2 // check number of pixels + beq Render_Clear // end of scanline, stop rendering + + // get segment width -> R3 + // int w = g->width; + // if (w > pixnum) w = pixnum; + // if (w > 0) { + ldrh r3,[r4,#SSEGM_WIDTH] // get segment width + cmp r3,r2 // check width + blo 2f // width is OK + mov r3,r2 // limit width by total width +2: tst r3,r3 // check width + beq Render_SegmNext // this segment is invisible, skip it + + // update remaining pixels + // pixnum -= w; + subs r2,r3 // decrease remaining width + str r2,[sp,#16] // store new remaining pixels + + // get Y coordinate -> R2 + // int y = g->offy + line; + ldrh r2,[r4,#SSEGM_OFFY] // get offset at Y direction + sxth r2,r2 // expand to signed + ldr r1,[sp,#12] // get current scanline + add r2,r1 // add Y offset and current scanline + + // double lines + // if (g->dbly) y /= 2; + ldrb r1,[r4,#SSEGM_DBLY] // get dbly flag + tst r1,r1 // is dbly flag set? + beq 2f // dbly flag not set + asrs r2,#1 // Y coordinate / 2 + + // wrap Y coordinate + // int wy = g->wrapy; + // while (y < 0) y += wy; + // while (y >= wy) y -= wy; +2: ldrh r1,[r4,#SSEGM_WRAPY] // get wrapy +3: subs r2,r1 // subtract wrapy + bpl 3b // repeat +4: adds r2,r1 // add wrapy + bmi 4b // repeat + + // get X coordinate -> R1 + // int x = g->offx; +6: ldrh r1,[r4,#SSEGM_OFFX] // get offset at X direction + sxth r1,r1 // expand to signed + + // wrap X coordinate + // int wx = g->wrapx; + // while (x < 0) x += wx; + // while (x >= wx) x -= wx; + ldrh r0,[r4,#SSEGM_WRAPX] // get wrapx +3: subs r1,r0 // subtract wrapx + bpl 3b // repeat +4: adds r1,r0 // add wrapx + bmi 4b // repeat + +// ---- process 1st format group: GF_COLOR + + // get format -> R0 +6: ldrb r0,[r4,#SSEGM_FORM] // get current format + + // serve format GF_COLOR + tst r0,r0 // format GF_COLOR ? + bne 7f // no + + // u32 par = ((y & 1) == 0) ? g->par : g->par2 + lsrs r2,#1 // check bit 0 of Y coordinate + ldr r1,[r4,#SSEGM_PAR] // get par for even line + bcc 2f // even line + ldr r1,[r4,#SSEGM_PAR2] // get par2 for odd line + + // *cbuf++ = w/4; // number of pixels/4 +2: lsrs r2,r3,#2 // width/4 + ldr r6,[sp,#4] // get pointer to control buffer + stmia r6!,{r2} // store width/4 + + // *cbuf++ = (u32)dbuf; // pointer to data buffer + ldr r0,[sp,#8] // get pointer to data buffer + stmia r6!,{r0} // store pointer to data + str r6,[sp,#4] // save new pointer to control buffer + + // dbuf = RenderColor(dbuf, par, w/4); + bl RenderColor + str r0,[sp,#8] // store new pointer to data buffer + b Render_SegmNext + +// ---- process 2nd format group: using control buffer cbuf + + // prepare input argument video segment -> [SP+0] +7: str r4,[sp,#0] // prepare 4th argument - current video segment + + // prepare function addres -> R7 + adr r7,Render_FncAddr // get address of jump table + lsls r6,r0,#2 // format * 4 + ldr r7,[r7,r6] // load function address -> R7 + + // check 2nd format group + cmp r0,#GF_GRP2MAX // check 2nd format group + bhi 2f // > 2nd group + + // cbuf = RenderGraph8(cbuf, x, y, w, g); + ldr r0,[sp,#4] // get pointer to control buffer + blx r7 // call render function + str r0,[sp,#4] // save new pointer to control buffer + b Render_SegmNext + +// ---- process 3rd format group: using data buffer dbuf + + // *cbuf++ = w/4; // number of pixels/4 +2: lsrs r0,r3,#2 // width/4 + ldr r6,[sp,#4] // get pointer to control buffer + stmia r6!,{r0} // store width/4 + + // *cbuf++ = (u32)dbuf; // pointer to data buffer + ldr r0,[sp,#8] // get pointer to data buffer + stmia r6!,{r0} // store pointer to data + str r6,[sp,#4] // save new pointer to control buffer + + // dbuf = RenderColor(dbuf, par, w/4); + blx r7 // call render function + str r0,[sp,#8] // store new pointer to data buffer + +Render_SegmNext: + + // next video segment + adds r4,#SSEGM_SIZE // shift pointer to next video segment + subs r5,#1 // counter of video segments + bne Render_SegmLoop // next video segment + +// ---- clear rest of line, write pointer to control buffer + +Render_Clear: + + // return current control buffer + ldr r0,[sp,#4] // control buffer + + // check if some pixels left + ldr r1,[sp,#16] // number of remaining pixels + lsrs r1,#2 // number of pixels/4 (= number of 4-pixels) + beq 9f // no pixels left + + // write size and address to control buffer + ldr r2,Render_LineBuf0Addr // data buffer with black color + stmia r0!,{r1,r2} // write number of 4-pixels and pointer to data buffer to control buffer + + // pop registers and return (return control buffer in r0) +9: add sp,#20 + pop {r4-r7,pc} + + .align 2 + +// pointer to pointer with current video screen +Render_pScreenAddr: + .word pScreen + +// pointer to buffer with black color +Render_LineBuf0Addr: + .word LineBuf0 + +// poiners to render functions +Render_FncAddr: + // 1st format group + .word RenderColor // GF_COLOR simple color (par=color pattern 4-pixels even line, par2=color pattern 4-pixels odd line) + + // 2nd format group + .word RenderGraph8 // GF_GRAPH8 native 8-bit graphics (X1Y1R2G2B2) - fast, transfers "as is" to PIO + .word RenderTile // GF_TILE tiles + .word RenderTile2 // GF_TILE alternate tiles + .word RenderProgress // GF_PROGRESS horizontal progress indicator + .word RenderGrad1 // render gradient with 1 line GF_GRAD1 + .word RenderGrad2 // render gradient with 2 lines GF_GRAD2 + + // 3rd format group + .word RenderGraph4 // GF_GRAPH4 4-bit graphics + .word RenderGraph2 // GF_GRAPH2 2-bit graphics + .word RenderGraph1 // GF_GRAPH1 1-bit graphics + .word RenderMText // GF_MTEXT 8-pixel mono text + .word RenderAText // GF_ATEXT 8-pixel attribute text, character + 2x4 bit attributes + .word RenderFText // GF_FTEXT 8-pixel foreground color text, character + foreground color + .word RenderCText // GF_CTEXT 8-pixel color text, character + background color + foreground color + .word RenderGText // GF_GTEXT 8-pixel gradient text (par = pointer to 1-bit font, par2 = pointer to color array) + .word RenderDText // GF_DTEXT 8-pixel double gradient text (par = pointer to 1-bit font, par2 = pointer to color array) + .word RenderLevel // GF_LEVEL level graph + .word RenderLevelGrad // GF_LEVELGRAD level gradient graph + .word RenderOscil // GF_OSCIL oscilloscope pixel graph + .word RenderOscLine // GF_OSCLINE oscilloscope line graph + .word RenderPlane2 // GF_PLANE2 4 colors on 2 graphic planes + .word RenderAttrib8 // GF_ATTRIB8 2x4 bit color attribute per 8x8 pixel sample + .word RenderGraph8Mat // GF_GRAPH8MAT 8-bit graphics with 2D matrix transformation + .word RenderGraph8Persp // GF_GRAPH8PERSP 8-bit graphics with perspective projection + .word RenderTilePersp // GF_TILEPERSP tiles with perspective + .word RenderTilePersp15 // GF_TILEPERSP15 tiles with perspective, 1.5 pixels + .word RenderTilePersp2 // GF_TILEPERSP2 tiles with perspective, double pixels + .word RenderTilePersp3 // GF_TILEPERSP3 tiles with perspective, triple pixels + .word RenderTilePersp4 // GF_TILEPERSP4 tiles with perspective, quadruple pixels diff --git a/MCUME_pico/picovga_t4/vga_screen.cpp b/MCUME_pico/picovga_t4/vga_screen.cpp new file mode 100755 index 0000000..0cd92e8 --- /dev/null +++ b/MCUME_pico/picovga_t4/vga_screen.cpp @@ -0,0 +1,707 @@ + +// **************************************************************************** +// +// VGA screen layout +// +// **************************************************************************** + +#include "include.h" + +// current video screen +sScreen Screen = { .num = 0 }; // default video screen +sScreen* pScreen = &Screen; // pointer to current video screen + +// clear screen (set 0 strips, does not modify sprites) +void ScreenClear(sScreen* s) +{ + __dmb(); + s->num = 0; + __dmb(); +} + +// add empty strip to the screen (returns pointer to the strip) +sStrip* ScreenAddStrip(sScreen* s, int height) +{ + int n = s->num; + sStrip* t = &s->strip[n]; + t->height = height; + t->num = 0; + __dmb(); + s->num = n + 1; + __dmb(); + return t; +} + +// add empty segment to video strip (returns pointer to the segment and initialises is to defaults) +sSegm* ScreenAddSegm(sStrip* strip, int width) +{ + int n = strip->num; + sSegm* g = &strip->seg[n]; + g->width = width; + g->wb = width; + g->offx = 0; + g->offy = 0; + g->wrapx = width; + g->wrapy = strip->height; + g->data = NULL; + g->form = GF_COLOR; + g->dbly = false; + g->par = 0; + g->par2 = 0; + __dmb(); + strip->num = n + 1; + __dmb(); + return g; +} + +// set video segment to simple color format GF_COLOR +// col1 = color pattern 4-pixels even line (use macro MULTICOL) +// col2 = color pattern 4-pixels odd line (use macro MULTICOL) +void ScreenSegmColor(sSegm* segm, u32 col1, u32 col2) +{ + segm->par = col1; + segm->par2 = col2; + __dmb(); + segm->form = GF_COLOR; + __dmb(); +} + +// set video segment to gradient with 1 line +// data = pointer to data buffer with gradient +// wb = pitch - length of buffer +// To scroll gradient, set virtual dimension wrapx, then shift offx +void ScreenSegmGrad1(sSegm* segm, const void* data, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->wb = wb; + __dmb(); + segm->form = GF_GRAD1; + __dmb(); +} + +// set video segment to gradient with 2 lines +// data = pointer to data buffer with gradient +// wb = pitch - lenght of buffer +// To scroll gradient, set virtual dimension wrapx, then shift offx +void ScreenSegmGrad2(sSegm* segm, const void* data, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->wb = wb; + __dmb(); + segm->form = GF_GRAD2; + __dmb(); +} + +// set video segment to native 8-bit graphics (R3G3B2) +// data = pointer to data buffer +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmGraph8(sSegm* segm, const void* data, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->wb = wb; + __dmb(); + segm->form = GF_GRAPH8; + __dmb(); +} + +// generate 16-color palette translation table for functions ScreenSegmGraph4 +// trans = pointer to destination palette translation table (u16 trans[256]) +// pal = pointer to source palette of 16 colors (u8 pal[16]) +void GenPal16Trans(u16* trans, const u8* pal) +{ + int i, j; + u16 k; + for (i = 0; i < 256; i++) + { + j = (i >> 4) & 0x0f; + k = pal[j]; + + j = i & 0x0f; + k |= (u16)pal[j] << 8; + + trans[i] = k; + } +} + +// set video segment to 4-bit palette graphics +// data = pointer to data buffer +// trans = pointer to 16-color palette translation table (generated with GenPal16Trans function) +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmGraph4(sSegm* segm, const void* data, const void* trans, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)trans; + segm->wb = wb; + __dmb(); + segm->form = GF_GRAPH4; + __dmb(); +} + +// generate palette 4 translation table for functions ScreenSegmGraph2 +// trans = pointer to destination palette translation table (u32 trans[256]) +// pal = pointer to source palette of 4 colors (u8 pal[4]) +void GenPal4Trans(u32* trans, const u8* pal) +{ + int i, j; + u32 k; + for (i = 0; i < 256; i++) + { + j = (i >> 6) & 0x03; + k = pal[j]; + + j = (i >> 4) & 0x03; + k |= (u32)pal[j] << 8; + + j = (i >> 2) & 0x03; + k |= (u32)pal[j] << 16; + + j = i & 0x03; + k |= (u32)pal[j] << 24; + + trans[i] = k; + } +} + +// set video segment to 2-bit palette graphics +// data = pointer to data buffer +// trans = pointer to 4-color palette translation table (generated with GenPal4Trans function) +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmGraph2(sSegm* segm, const void* data, const void* trans, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)trans; + segm->wb = wb; + __dmb(); + segm->form = GF_GRAPH2; + __dmb(); +} + +// set video segment to 1-bit palette graphics +// data = pointer to data buffer +// bg = background color +// fg = foreground color +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmGraph1(sSegm* segm, const void* data, u8 bg, u8 fg, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = bg | ((u32)fg << 8); + segm->wb = wb; + __dmb(); + segm->form = GF_GRAPH1; + __dmb(); +} + +// set video segment to 8-pixel mono text +// data = pointer to text buffer +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// bg = background color +// fg = foreground color +// wb = pitch - number of bytes between text lines +void ScreenSegmMText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, u8 fg, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)font; + segm->par2 = bg | ((u32)fg << 8); + segm->par3 = fontheight; + segm->wb = wb; + __dmb(); + segm->form = GF_MTEXT; + __dmb(); +} + +// set video segment to 8-pixel attribute text +// data = pointer to text buffer (character + 2x4 bit attributes) +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// pal = pointer to palette of 16 colors +// wb = pitch - number of bytes between text lines +void ScreenSegmAText(sSegm* segm, const void* data, const void* font, u16 fontheight, const void* pal, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)font; + segm->par2 = (u32)pal; + segm->par3 = fontheight; + segm->wb = wb; + __dmb(); + segm->form = GF_ATEXT; + __dmb(); +} + +// set video segment to 8-pixel foreground color text +// data = pointer to text buffer (character + foreground color) +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// bg = background color +// wb = pitch - number of bytes between text lines +void ScreenSegmFText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)font; + segm->par2 = bg; + segm->par3 = fontheight; + segm->wb = wb; + __dmb(); + segm->form = GF_FTEXT; + __dmb(); +} + +// set video segment to 8-pixel color text +// data = pointer to text buffer (character + background color + foreground color) +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// wb = pitch - number of bytes between text lines +void ScreenSegmCText(sSegm* segm, const void* data, const void* font, u16 fontheight, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)font; + segm->par3 = fontheight; + segm->wb = wb; + __dmb(); + segm->form = GF_CTEXT; + __dmb(); +} + +// set video segment to 8-pixel gradient color text +// data = pointer to text buffer (character + foreground color) +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// bg = background color +// grad = pointer to array of gradient colors +// wb = pitch - number of bytes between text lines +void ScreenSegmGText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)font; + segm->par3 = bg | (fontheight << 8); + segm->par2 = (u32)grad; + segm->wb = wb; + __dmb(); + segm->form = GF_GTEXT; + __dmb(); +} + +// set video segment to 8-pixel double gradient color text +// data = pointer to text buffer (character + foreground color) +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// bg = background color +// grad = pointer to array of gradient colors +// wb = pitch - number of bytes between text lines +void ScreenSegmDText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)font; + segm->par3 = bg | (fontheight << 8); + segm->par2 = (u32)grad; + segm->wb = wb; + __dmb(); + segm->form = GF_DTEXT; + __dmb(); +} + +// set video segment to tiles +// data = pointer to tile map buffer (with tile indices) +// tiles = pointer to 1 column of tiles, 1 pixel = 8 bits +// w = tile width (must be multiple of 4) +// h = tile height +// wb = pitch - number of bytes between tile map rows +void ScreenSegmTile(sSegm* segm, const void* data, const void* tiles, int w, int h, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)tiles; + segm->par2 = (u32)h; + segm->par3 = (u16)w; + segm->wb = wb; + segm->wrapx = (segm->width+w-1)/w*w; + segm->wrapy = (segm->wrapy+h-1)/h*h; + __dmb(); + segm->form = GF_TILE; + __dmb(); +} + +// set video segment to alternate tiles +// data = pointer to tile map buffer (with tile indices) +// tiles = pointer to 1 row of tiles, 1 pixel = 8 bits +// w = tile width (must be multiple of 4) +// h = tile height +// tilewb = tile width bytes (usually tile width * number of tiles) +// wb = pitch - number of bytes between tile map rows +void ScreenSegmTile2(sSegm* segm, const void* data, const void* tiles, int w, int h, int tilewb, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)tiles; + segm->par2 = (u32)h + ((u32)(u16)tilewb << 16); + segm->par3 = (u16)w; + segm->wb = wb; + segm->wrapx = (segm->width+w-1)/w*w; + segm->wrapy = (segm->wrapy+h-1)/h*h; + __dmb(); + segm->form = GF_TILE2; + __dmb(); +} + +// set video segment to level graph GF_LEVEL +// data = pointer to buffer with line samples 0..255 +// bg = background color +// fg = foreground color +// zero = Y zero level +void ScreenSegmLevel(sSegm* segm, const void* data, u8 bg, u8 fg, u8 zero) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = bg | ((u32)fg << 8); + segm->par2 = zero; + __dmb(); + segm->form = GF_LEVEL; + __dmb(); +} + +// set video segment to leve gradient graph GF_LEVELGRAD +// data = pointer to buffer with values 0..255 of 4-pixels in rows +// sample1 = scanline sample < data +// sample2 = scanline sample >= data +void ScreenSegmLevelGrad(sSegm* segm, const void* data, const void* sample1, const void* sample2) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)sample1; + segm->par2 = (u32)sample2; + __dmb(); + segm->form = GF_LEVELGRAD; + __dmb(); +} + +// set video segment to oscilloscope 1-pixel graph GF_OSCIL +// data = pointer to buffer with line samples 0..255 +// bg = background color +// fg = foreground color +// pixh = height of pixels - 1 +void ScreenSegmOscil(sSegm* segm, const void* data, u8 bg, u8 fg, int pixh) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = bg | ((u32)fg << 8); + segm->par2 = pixh; + __dmb(); + segm->form = GF_OSCIL; + __dmb(); +} + +// set video segment to oscilloscope line graph GF_OSCLINE +// data = pointer to buffer with line samples 0..255 +// bg = background color +// fg = foreground color +void ScreenSegmOscLine(sSegm* segm, const void* data, u8 bg, u8 fg) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = bg | ((u32)fg << 8); + __dmb(); + segm->form = GF_OSCLINE; + __dmb(); +} + +// generate palette 4-planes translation table for function ScreenSegmPlane2 +// trans = pointer to destination palette translation table (u32 trans[256]) +// pal = pointer to source palette of 4 colors (u8 pal[4]) +void GenPal4Plane(u32* trans, const u8* pal) +{ + int i, j; + u32 k; + for (i = 0; i < 256; i++) + { + j = 0; + if ((i & B7) != 0) j |= B1; + if ((i & B3) != 0) j |= B0; + k = pal[j]; + + j = 0; + if ((i & B6) != 0) j |= B1; + if ((i & B2) != 0) j |= B0; + k |= (u32)pal[j] << 8; + + j = 0; + if ((i & B5) != 0) j |= B1; + if ((i & B1) != 0) j |= B0; + k |= (u32)pal[j] << 16; + + j = 0; + if ((i & B4) != 0) j |= B1; + if ((i & B0) != 0) j |= B0; + k |= (u32)pal[j] << 24; + + trans[i] = k; + } +} + +// set video segment to 4-color on 2-planes graphics +// data = pointer to data buffer +// plane = offset of 2nd graphics plane (in bytes), size of one graphics plane +// trans = pointer to 4-color palette translation table (generated with GenPal4Plane function) +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmPlane2(sSegm* segm, const void* data, int plane, const void* trans, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = plane; + segm->par2 = (u32)trans; + segm->wb = wb; + __dmb(); + segm->form = GF_PLANE2; + __dmb(); +} + +// set video segment to 2x4 bit color attribute per 8x8 pixel sample graphics +// data = pointer to data buffer with mono pixels +// attr = pointer to color attributes +// pal = pointer to 16-color palette table +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmAttrib8(sSegm* segm, const void* data, const void* attr, const u8* pal, int wb) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)attr; + segm->par2 = (u32)pal; + segm->wb = wb; + __dmb(); + segm->form = GF_ATTRIB8; + __dmb(); +} + +// set video segment to horizontal progress indicator GF_PROGRESS +// data = pointer to buffer with values 0..255 of 4-pixels in rows +// sample1 = scanline sample < data +// sample2 = scanline sample >= data +void ScreenSegmProgress(sSegm* segm, const void* data, const void* sample1, const void* sample2) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->par = (u32)sample1; + segm->par2 = (u32)sample2; + __dmb(); + segm->form = GF_PROGRESS; + __dmb(); +} + +// set video segment to 8-bit graphics with 2D matrix transformation +// data = pointer to image data (width and height of image must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes) +// ybits = number of bits of image height (image height must be power of 2) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmGraph8Mat(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->wb = (1<offx = 0; + segm->offy = 0; + segm->wrapx = segm->width; + segm->par = (u32)mat; + segm->par2 = xbits | ((u32)ybits << 16); + __dmb(); + segm->form = GF_GRAPH8MAT; + __dmb(); +} + +// set video segment to 8-bit graphics with perspective projection +// data = pointer to image data (width and height of image must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes) +// ybits = number of bits of image height (image height must be power of 2) +// horiz = horizon offset +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmGraph8Persp(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits, u16 horiz) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = data; + segm->wb = (1<offx = 0; + segm->offy = 0; + segm->wrapx = segm->width; + segm->par = (u32)mat; + segm->par2 = xbits | ((u32)ybits << 16); + segm->par3 = horiz; + __dmb(); + segm->form = GF_GRAPH8PERSP; + __dmb(); +} + +// set video segment to tiles with perspective +// map = pointer to tile map with tile indices (width and height must be power of 2) +// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// mapwbits = number of bits of tile map width +// maphbits = number of bits of tile map height +// tilebits = number of bits of tile width and height +// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmTilePersp(sSegm* segm, const u8* map, const u8* tiles, const int* mat, + u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = map; + segm->wb = mapwbits | ((u16)maphbits<<8); + segm->offx = 0; + segm->offy = 0; + segm->wrapx = segm->width; + segm->par = (u32)tiles; + segm->par2 = (u32)mat; + segm->par3 = tilebits | ((u16)horizon<<8); + __dmb(); + segm->form = GF_TILEPERSP; + __dmb(); +} + +// set video segment to tiles with perspective, 1.5 pixels +// map = pointer to tile map with tile indices (width and height must be power of 2) +// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// mapwbits = number of bits of tile map width +// maphbits = number of bits of tile map height +// tilebits = number of bits of tile width and height +// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmTilePersp15(sSegm* segm, const u8* map, const u8* tiles, const int* mat, + u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = map; + segm->wb = mapwbits | ((u16)maphbits<<8); + segm->offx = 0; + segm->offy = 0; + segm->wrapx = segm->width; + segm->par = (u32)tiles; + segm->par2 = (u32)mat; + segm->par3 = tilebits | ((u16)horizon<<8); + __dmb(); + segm->form = GF_TILEPERSP15; + __dmb(); +} + +// set video segment to tiles with perspective, double pixels +// map = pointer to tile map with tile indices (width and height must be power of 2) +// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// mapwbits = number of bits of tile map width +// maphbits = number of bits of tile map height +// tilebits = number of bits of tile width and height +// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmTilePersp2(sSegm* segm, const u8* map, const u8* tiles, const int* mat, + u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = map; + segm->wb = mapwbits | ((u16)maphbits<<8); + segm->offx = 0; + segm->offy = 0; + segm->wrapx = segm->width; + segm->par = (u32)tiles; + segm->par2 = (u32)mat; + segm->par3 = tilebits | ((u16)horizon<<8); + __dmb(); + segm->form = GF_TILEPERSP2; + __dmb(); +} + +// set video segment to tiles with perspective, triple pixels +// map = pointer to tile map with tile indices (width and height must be power of 2) +// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// mapwbits = number of bits of tile map width +// maphbits = number of bits of tile map height +// tilebits = number of bits of tile width and height +// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmTilePersp3(sSegm* segm, const u8* map, const u8* tiles, const int* mat, + u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = map; + segm->wb = mapwbits | ((u16)maphbits<<8); + segm->offx = 0; + segm->offy = 0; + segm->wrapx = segm->width; + segm->par = (u32)tiles; + segm->par2 = (u32)mat; + segm->par3 = tilebits | ((u16)horizon<<8); + __dmb(); + segm->form = GF_TILEPERSP3; + __dmb(); +} + +// set video segment to tiles with perspective, quadruple pixels +// map = pointer to tile map with tile indices (width and height must be power of 2) +// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// mapwbits = number of bits of tile map width +// maphbits = number of bits of tile map height +// tilebits = number of bits of tile width and height +// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmTilePersp4(sSegm* segm, const u8* map, const u8* tiles, const int* mat, + u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon) +{ + segm->form = GF_COLOR; + __dmb(); + segm->data = map; + segm->wb = mapwbits | ((u16)maphbits<<8); + segm->offx = 0; + segm->offy = 0; + segm->wrapx = segm->width; + segm->par = (u32)tiles; + segm->par2 = (u32)mat; + segm->par3 = tilebits | ((u16)horizon<<8); + __dmb(); + segm->form = GF_TILEPERSP4; + __dmb(); +} diff --git a/MCUME_pico/picovga_t4/vga_screen.h b/MCUME_pico/picovga_t4/vga_screen.h new file mode 100755 index 0000000..f53ddcc --- /dev/null +++ b/MCUME_pico/picovga_t4/vga_screen.h @@ -0,0 +1,307 @@ + +// **************************************************************************** +// +// VGA screen layout +// +// **************************************************************************** + +#ifndef _VGA_SCREEN_H +#define _VGA_SCREEN_H + +// video segment (on change update SSEGM_* in define.h) +typedef struct { + u16 width; // SSEGM_WIDTH width of this video segment in pixels (must be multiple of 4, 0=inactive segment) + u16 wb; // SSEGM_WB pitch - number of bytes between lines + s16 offx; // SSEGM_OFFX display offset at X direction (must be multiple of 4) + s16 offy; // SSEGM_OFFY display offset at Y direction + u16 wrapx; // SSEGM_WRAPX wrap width in X direction (number of pixels, must be multiply of 4 and > 0) + // text modes: wrapx must be multiply of 8 + u16 wrapy; // SSEGM_WRAPY wrap width in Y direction (number of lines, cannot be 0) + const void* data; // SSEGM_DATA pointer to video buffer with image data + u8 form; // SSEGM_FORM graphics format GF_* + bool dbly; // SSEGM_DBLY double Y (2 scanlines per 1 image line) + u16 par3; // SSEGM_PAR3 parameter 3 + u32 par; // SSEGM_PAR parameter 1 + u32 par2; // SSEGM_PAR2 parameter 2 +} sSegm; + +// video strip (on change update SSTRIP_* in define.h) +typedef struct { + u16 height; // SSTRIP_HEIGHT height of this strip in number of scanlines + u16 num; // SSTRIP_NUM number of video segments + sSegm seg[SEGMAX]; // SSTRIP_SEG list of video segments +} sStrip; + +// video screen (on change update SSCREEN_* in define.h) +typedef struct { + u16 num; // SSCREEN_NUM number of video strips + u16 backup; // SSCREEN_BACKUP backup number of video strips during display OFF + sStrip strip[STRIPMAX]; // SSCREEN_STRIP list of video strips +} sScreen; + +// current video screen +extern sScreen Screen; // default video screen +extern sScreen* pScreen; // pointer to current video screen + +// clear screen (set 0 strips, does not modify sprites) +void ScreenClear(sScreen* s); + +// add empty strip to the screen (returns pointer to the strip) +sStrip* ScreenAddStrip(sScreen* s, int height); + +// add empty segment to video strip (returns pointer to the segment and initialises is to defaults) +sSegm* ScreenAddSegm(sStrip* strip, int width); + +// set video segment to simple color format GF_COLOR +// col1 = color pattern 4-pixels even line (use macro MULTICOL) +// col2 = color pattern 4-pixels odd line (use macro MULTICOL) +void ScreenSegmColor(sSegm* segm, u32 col1, u32 col2); + +// set video segment to gradient with 1 line +// data = pointer to data buffer with gradient +// wb = pitch - length of buffer +// To scroll gradient, set virtual dimension wrapx, then shift offx +void ScreenSegmGrad1(sSegm* segm, const void* data, int wb); + +// set video segment to gradient with 2 lines +// data = pointer to data buffer with gradient +// wb = pitch - lenght of buffer +// To scroll gradient, set virtual dimension wrapx, then shift offx +void ScreenSegmGrad2(sSegm* segm, const void* data, int wb); + +// set video segment to native 8-bit graphics (R3G3B2) +// data = pointer to data buffer +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmGraph8(sSegm* segm, const void* data, int wb); + +// generate 16-color palette translation table +// trans = pointer to destination palette translation table (u16 trans[256]) +// pal = pointer to source palette of 16 colors (u8 pal[16]) +void GenPal16Trans(u16* trans, const u8* pal); + +// set video segment to 4-bit palette graphics +// data = pointer to data buffer +// trans = pointer to 16-color palette translation table (generated with GenPal16Trans function) +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmGraph4(sSegm* segm, const void* data, const void* trans, int wb); + +// generate palette 4 translation table for function ScreenSegmGraph2 +// trans = pointer to destination palette translation table (u32 trans[256]) +// pal = pointer to source palette of 4 colors (u8 pal[4]) +void GenPal4Trans(u32* trans, const u8* pal); + +// set video segment to 2-bit palette graphics +// data = pointer to data buffer +// trans = pointer to 4-color palette translation table (generated with GenPal4Trans function) +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmGraph2(sSegm* segm, const void* data, const void* trans, int wb); + +// set video segment to 1-bit palette graphics +// data = pointer to data buffer +// bg = background color +// fg = foreground color +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmGraph1(sSegm* segm, const void* data, u8 bg, u8 fg, int wb); + +// set video segment to 8-pixel mono text +// data = pointer to text buffer +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// bg = background color +// fg = foreground color +// wb = pitch - number of bytes between text lines +void ScreenSegmMText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, u8 fg, int wb); + +// set video segment to 8-pixel attribute text +// data = pointer to text buffer (character + 2x4 bit attributes) +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// pal = pointer to palette of 16 colors +// wb = pitch - number of bytes between text lines +void ScreenSegmAText(sSegm* segm, const void* data, const void* font, u16 fontheight, const void* pal, int wb); + +// set video segment to 8-pixel foreground color text +// data = pointer to text buffer (character + foreground color) +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// bg = background color +// wb = pitch - number of bytes between text lines +void ScreenSegmFText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, int wb); + +// set video segment to 8-pixel color text +// data = pointer to text buffer (character + background color + foreground color) +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// wb = pitch - number of bytes between text lines +void ScreenSegmCText(sSegm* segm, const void* data, const void* font, u16 fontheight, int wb); + +// set video segment to 8-pixel gradient color text +// data = pointer to text buffer (character + foreground color) +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// bg = background color +// grad = pointer to array of gradient colors +// wb = pitch - number of bytes between text lines +void ScreenSegmGText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb); + +// set video segment to 8-pixel double gradient color text +// data = pointer to text buffer (character + foreground color) +// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels) +// fontheight = font height +// bg = background color +// grad = pointer to array of gradient colors +// wb = pitch - number of bytes between text lines +void ScreenSegmDText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb); + +// set video segment to tiles +// data = pointer to tile map buffer (with tile indices) +// tiles = pointer to 1 column of tiles, 1 pixel = 8 bits +// w = tile width (must be multiple of 4) +// h = tile height +// wb = pitch - number of bytes between tile map rows +void ScreenSegmTile(sSegm* segm, const void* data, const void* tiles, int w, int h, int wb); + +// set video segment to alternate tiles +// data = pointer to tile map buffer (with tile indices) +// tiles = pointer to 1 row of tiles, 1 pixel = 8 bits +// w = tile width (must be multiple of 4) +// h = tile height +// tilewb = tile width bytes (usually tile width * number of tiles) +// wb = pitch - number of bytes between tile map rows +void ScreenSegmTile2(sSegm* segm, const void* data, const void* tiles, int w, int h, int tilewb, int wb); + +// set video segment to level graph GF_LEVEL +// data = pointer to buffer with line samples 0..255 +// zero = Y zero level +// bg = background color +// fg = foreground color +void ScreenSegmLevel(sSegm* segm, const void* data, u8 zero, u8 bg, u8 fg); + +// set video segment to leve gradient graph GF_LEVELGRAD +// data = pointer to buffer with values 0..255 of 4-pixels in rows +// sample1 = scanline sample < data +// sample2 = scanline sample >= data +void ScreenSegmLevelGrad(sSegm* segm, const void* data, const void* sample1, const void* sample2); + +// set video segment to oscilloscope 1-pixel graph GF_OSCIL +// data = pointer to buffer with line samples 0..255 +// bg = background color +// fg = foreground color +// pixh = height of pixels - 1 +void ScreenSegmOscil(sSegm* segm, const void* data, u8 bg, u8 fg, int pixh); + +// set video segment to oscilloscope line graph GF_OSCLINE +// data = pointer to buffer with line samples 0..255 +// bg = background color +// fg = foreground color +void ScreenSegmOscLine(sSegm* segm, const void* data, u8 bg, u8 fg); + +// generate palette 4-color translation table for function ScreenSegmPlane2 +// trans = pointer to destination palette translation table (u32 trans[256]) +// pal = pointer to source palette of 4 colors (u8 pal[4]) +void GenPal4Plane(u32* trans, const u8* pal); + +// set video segment to 4-color on 2-planes graphics +// data = pointer to data buffer +// plane = offset of 2nd graphics plane (in bytes), size of one graphics plane +// trans = pointer to 4-color palette translation table (generated with GenPal4Plane function) +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmPlane2(sSegm* segm, const void* data, int plane, const void* trans, int wb); + +// set video segment to 2x4 bit color attribute per 8x8 pixel sample graphics +// data = pointer to data buffer with mono pixels +// attr = pointer to color attributes +// pal = pointer to 16-color palette table +// wb = pitch - number of bytes between lines +// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy. +void ScreenSegmAttrib8(sSegm* segm, const void* data, const void* attr, const u8* pal, int wb); + +// set video segment to horizontal progress indicator GF_PROGRESS +// data = pointer to buffer with values 0..255 of 4-pixels in rows +// sample1 = scanline sample < data +// sample2 = scanline sample >= data +void ScreenSegmProgress(sSegm* segm, const void* data, const void* sample1, const void* sample2); + +// set video segment to 8-bit graphics with 2D matrix transformation +// data = pointer to image data (width and height of image must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes) +// ybits = number of bits of image height (image height must be power of 2) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmGraph8Mat(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits); + +// set video segment to 8-bit graphics with perspective projection +// data = pointer to image data (width and height of image must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes) +// ybits = number of bits of image height (image height must be power of 2) +// horiz = horizon offset +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmGraph8Persp(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits, u16 horiz); + +// set video segment to tiles with perspective +// map = pointer to tile map with tile indices (width and height must be power of 2) +// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// mapwbits = number of bits of tile map width +// maphbits = number of bits of tile map height +// tilebits = number of bits of tile width and height +// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmTilePersp(sSegm* segm, const u8* map, const u8* tiles, const int* mat, + u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon); + +// set video segment to tiles with perspective, 1.5 pixels +// map = pointer to tile map with tile indices (width and height must be power of 2) +// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// mapwbits = number of bits of tile map width +// maphbits = number of bits of tile map height +// tilebits = number of bits of tile width and height +// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmTilePersp15(sSegm* segm, const u8* map, const u8* tiles, const int* mat, + u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon); + +// set video segment to tiles with perspective, double pixels +// map = pointer to tile map with tile indices (width and height must be power of 2) +// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// mapwbits = number of bits of tile map width +// maphbits = number of bits of tile map height +// tilebits = number of bits of tile width and height +// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmTilePersp2(sSegm* segm, const u8* map, const u8* tiles, const int* mat, + u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon); + +// set video segment to tiles with perspective, triple pixels +// map = pointer to tile map with tile indices (width and height must be power of 2) +// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// mapwbits = number of bits of tile map width +// maphbits = number of bits of tile map height +// tilebits = number of bits of tile width and height +// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmTilePersp3(sSegm* segm, const u8* map, const u8* tiles, const int* mat, + u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon); + +// set video segment to tiles with perspective, quadruple pixels +// map = pointer to tile map with tile indices (width and height must be power of 2) +// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2) +// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function) +// mapwbits = number of bits of tile map width +// maphbits = number of bits of tile map height +// tilebits = number of bits of tile width and height +// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling) +// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height +void ScreenSegmTilePersp4(sSegm* segm, const u8* map, const u8* tiles, const int* mat, + u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon); + +#endif // _VGA_SCREEN_H diff --git a/MCUME_pico/picovga_t4/vga_t_dma.h b/MCUME_pico/picovga_t4/vga_t_dma.h new file mode 100644 index 0000000..ad3305e --- /dev/null +++ b/MCUME_pico/picovga_t4/vga_t_dma.h @@ -0,0 +1,58 @@ +/* + Wrapping class to extend VGA_T4 to TFT_T_DMA +*/ + +#ifndef _VGA_T_DMAH_ +#define _VGA_T_DMAH_ + +#ifdef __cplusplus +#include "VGA_t4.h" +#endif + + +#define RGBVAL16(r,g,b) VGA_RGB(r,g,b) +#define RGBVAL8(r,g,b) VGA_RGB(r,g,b) + + + +#ifdef HIRES +#define TFT_WIDTH 640 +#define TFT_REALWIDTH 640 +#else +#define TFT_WIDTH 320 +#define TFT_REALWIDTH 320 +#endif + +#define TFT_HEIGHT 240 +#define TFT_REALHEIGHT 240 + + + +#ifdef __cplusplus + +class TFT_T_DMA: public VGA_T4 +{ + public: + // Fake touch screen functions + bool isTouching(void) { return false; } + void readRaw(uint16_t * oX, uint16_t * oY, uint16_t * oZ) { } + void readCal(uint16_t * oX, uint16_t * oY, uint16_t * oZ) { }; + void callibrateTouch(uint16_t xMin,uint16_t yMin,uint16_t xMax,uint16_t yMax) { } + + // fake DMA functions + void startDMA(void) { }; + void stopDMA(void) { }; + void flipscreen(bool flip) { }; + + // fake no DMA functions + void writeScreenNoDma(const vga_pixel *pcolors) { writeScreen(pcolors); } + void fillScreenNoDma(vga_pixel color) { clear(color); } + void drawTextNoDma(int16_t x, int16_t y, const char * text, vga_pixel fgcolor, vga_pixel bgcolor, bool doublesize) { drawText(x,y,text,fgcolor,bgcolor,doublesize); } + void drawRectNoDma(int16_t x, int16_t y, int16_t w, int16_t h, vga_pixel color) { drawRect(x, y, w, h, color); } + void drawSpriteNoDma(int16_t x, int16_t y, const int16_t *bitmap) { drawSprite(x, y, bitmap); } + void drawSpriteNoDma(int16_t x, int16_t y, const int16_t *bitmap, uint16_t croparx, uint16_t cropary, uint16_t croparw, uint16_t croparh) { drawSprite(x, y, bitmap, croparx, cropary, croparw, croparh); } +}; + + +#endif +#endif diff --git a/MCUME_pico/picovga_t4/vga_vmode.cpp b/MCUME_pico/picovga_t4/vga_vmode.cpp new file mode 100755 index 0000000..d30351e --- /dev/null +++ b/MCUME_pico/picovga_t4/vga_vmode.cpp @@ -0,0 +1,1020 @@ + +// **************************************************************************** +// +// VGA videomodes +// +// **************************************************************************** + +#include "include.h" + +sVmode Vmode; // videomode setup +sVgaCfg Cfg; // required configuration +sCanvas Canvas; // canvas of draw box + +// default 16-color palettes (EGA colors) +// - do not set "const", to stay in faster RAM +u8 DefPal16[16] = { + CGACOL_0, // 0 // 0x000000 black + CGACOL_1, // 2 // 0x0000AA dark blue + CGACOL_2, // 20 // 0x00B600 dark green + CGACOL_3, // 22 // 0x00B6AA dark cyan + CGACOL_4, // 160 // 0xB60000 dark red + CGACOL_5, // 162 // 0xB600AA dark magenta + CGACOL_6, // 168 // 0xB64900 brown + CGACOL_7, // 182 // 0xB6B6AA light gray + + CGACOL_8, // 73 // 0x494955 dark gray + CGACOL_9, // 75 // 0x4949FF light blue + CGACOL_10, // 93 // 0x49FF55 light green + CGACOL_11, // 95 // 0x49FFFF light cyan + CGACOL_12, // 233 // 0xFF4955 light red + CGACOL_13, // 235 // 0xFF49FF light magenta + CGACOL_14, // 253 // 0xFFFF55 yellow + CGACOL_15, // 255 // 0xFFFFFF white +}; + +// 16-color palette translation table +u16 Pal16Trans[256]; + +/* +http://martin.hinner.info/vga/pal.html + +VGA system (525 lines total): +time 0: +- line 1, 2: (2) vertical sync +- line 3..35: (33) dark +- line 36..515: (480) image lines 0..479 +- line 516..525: (10) dark + +PAL system (625 lines total): +time 0: +- line 1, 2: (2) vertical sync + vertical sync +- line 3: (1) vertical sync + half sync +- line 4, 5: (2) half sync + half sync +- line 6..23: (18) dark +- line 24..46: (23) dark image +time 46: +- line 47..286: (240) image lines odd 1, 3, 5 ... 479 +- line 287..310: (24) dark image +- line 311..312: (2) half sync + half sync +- line 313: (1) half sync + vertical sync +vsync time 313 (vsync time 312.5): +- line 314..315: (2) vertical sync + vertical sync +- line 316..317: (2) half sync + half sync +- line 318..335: (18) dark +- line 336..358: (23) dark image +time 358 (45.5 from last vsync) +- line 359..598: (240) image lines even 0, 2, ... 478 +- line 599..622: (24) dark image +- line 623..625: (3) half sync + half sync +time 625: + +NTSC system (525 lines total): +time 0, even field: +- line 1..3: (3) vertical sync + vertical sync (6 serration pulses: 27.3 us low, 4.5 us high) +- line 4..6: (3) half sync + half sync (6 equalizing pulses: 2.3 us low, 29.5 us high) +- line 7..16: (10) dark (blanked video: 4.7 us low, 58.9 us high) +- line 17,18: (2) dark image +time 18: +- line 19..258: (240) image lines even 0, 2, ... 478 +- line 259: (1) dark image +- line 260..262: (3) half sync + half sync (7 equalizing pulses) +- line 263: (1) half sync + vertical sync (6 serration pulses) +time 263 (vsync time 262.5): +- line 264,265: (2) vertical sync + vertical sync +- line 266: (1) vertical sync + half sync (5 equalizing pulses) +- line 267..268: (2) half sync + half sync +- line 269..279: (11) dark +- line 280..281: (2) dark image +time 281 (18.5 from last vsync) +- line 282..521: (240) image lines odd 1, 3, 5 ... 479 +- line 522: (1) dark image +- line 523..525: (3) half sync + half sync +time 525: + +*/ + +// === TV videomodes + +// TV PAL interlaced 5:4 720x576 (4:3 768x576, 16:9 1024x576) +const sVideo VideoPAL = { + // horizontal (horizontal frequency 15625 Hz, effective sync pulses 16000 Hz) + .htot= 64.00000f, // total scanline in [us] + .hfront= 1.65000f, // H front porch (after image, before HSYNC) in [us] + .hsync= 4.70000f, // H sync pulse in [us] + .hback= 5.70000f, // H back porch (after HSYNC, before image) in [us] + .hfull= 47.36000f, // H full visible in [us] (formally should be 51.95 us) + + // vertical (vertical frequency 50 Hz) + .vtot=625, // total scanlines (both subframes) + .vmax=576, // maximal height + + // subframe 1 + .vsync1=5, // V sync (half-)pulses on subframe 1 + .vpost1=5, // V sync post half-pulses on subframe 1 + .vback1=18+23, // V back porch (after VSYNC, before image) on subframe 1 + .vact1=240, // active visible scanlines, subframe 1 (formally should be 288, 576 total) + .vfront1=24, // V front porch (after image, before VSYNC) on subframe 1 + .vpre1=5, // V sync pre half-pulses on subframe 1 + + // subframe 2 (ignored if not interlaced) + .vsync2=5, // V sync half-pulses on subframe 2 + .vpost2=4, // V sync post half-pulses on subframe 2 + .vback2=18+23, // V back porch (after VSYNC, before image) on subframe 2 + .vact2=240, // active visible scanlines, subframe 2 (formally should be 288, 576 total) + .vfront2=24, // V front porch (after image, before VSYNC) on subframe 2 + .vpre2=6, // V sync pre half-pulses on subframe 2 + + // name + .name = "PAL ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + .inter=True, // interlaced (use subframes) + .psync=False, // positive synchronization + .odd=True, // first sub-frame is odd lines 1, 3, 5,... (PAL) +}; + +// TV PAL progressive 5:4 360x288 (4:3 384x288, 16:9 512x288) +const sVideo VideoPALp = { + // horizontal (horizontal frequency 15625 Hz) + .htot= 64.00000f, // total scanline in [us] + .hfront= 1.65000f, // H front porch (after image, before HSYNC) in [us] + .hsync= 4.70000f, // H sync pulse in [us] + .hback= 5.70000f, // H back porch (after HSYNC, before image) in [us] + .hfull= 47.36000f, // H full visible in [us] (formally should be 51.95 us) + + // vertical (vertical frequency 50 Hz) + .vtot=312, // total scanlines (both subframes) + .vmax=288, // maximal height + + // subframe 1 + .vsync1=2, // V sync (half-)pulses on subframe 1 + .vpost1=0, // V sync post half-pulses on subframe 1 + .vback1=18+23+2, // V back porch (after VSYNC, before image) on subframe 1 + .vact1=240, // active visible scanlines, subframe 1 (formally should be 288, 576 total) + .vfront1=24+3, // V front porch (after image, before VSYNC) on subframe 1 + .vpre1=0, // V sync pre half-pulses on subframe 1 + + // subframe 2 (ignored if not interlaced) + .vsync2=0, // V sync half-pulses on subframe 2 + .vpost2=0, // V sync post half-pulses on subframe 2 + .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 + .vact2=0, // active visible scanlines, subframe 2 (formally should be 288, 576 total) + .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 + .vpre2=0, // V sync pre half-pulses on subframe 2 + + // name + .name = "PALp ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + .inter=False, // interlaced (use subframes) + .psync=False, // positive synchronization + .odd=True, // first sub-frame is odd lines 1, 3, 5,... (PAL) +}; + +// TV NTSC interlaced 4:3 640x480 (5:4 600x480, 16:9 848x480) +// serration pulses (half vsync): 27.3 us low, 4.5 us high +// equalizing pulses (half hsync): 2.3 us low, 29.5 us high +// blanked video (hsync pulses): 4.7 us low, 58.9 us high +const sVideo VideoNTSC = { + // horizontal (horizontal frequency 15734 Hz, effective sync pulses 16274 Hz) + .htot= 63.55582f, // total scanline in [us] + .hfront= 1.50000f, // H front porch (after image, before HSYNC) in [us] + .hsync= 4.70000f, // H sync pulse in [us] + .hback= 4.50000f, // H back porch (after HSYNC, before image) in [us] + .hfull= 47.03130f, // H full visible in [us] + + // vertical + .vtot=525, // total scanlines (both subframes) + .vmax=480, // maximal height + + // subframe 1 + .vsync1=6, // V sync (half-)pulses on subframe 1 + .vpost1=6, // V sync post half-pulses on subframe 1 + .vback1=10+2, // V back porch (after VSYNC, before image) on subframe 1 + .vact1=240, // active visible scanlines, subframe 1 + .vfront1=1, // V front porch (after image, before VSYNC) on subframe 1 + .vpre1=7, // V sync pre half-pulses on subframe 1 + + // subframe 2 (ignored if not interlaced) + .vsync2=6, // V sync half-pulses on subframe 2 + .vpost2=5, // V sync post half-pulses on subframe 2 + .vback2=11+2, // V back porch (after VSYNC, before image) on subframe 2 + .vact2=240, // active visible scanlines, subframe 2 + .vfront2=1, // V front porch (after image, before VSYNC) on subframe 2 + .vpre2=6, // V sync pre half-pulses on subframe 2 + + // name + .name = "NTSC ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + .inter=True, // interlaced (use subframes) + .psync=False, // positive synchronization + .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) +}; + +// TV NTSC progressive 4:3 320x240 (5:4 300x240, 16:9 424x240) +const sVideo VideoNTSCp = { + // horizontal (horizontal frequency 15734 Hz) + .htot= 63.55582f, // total scanline in [us] + .hfront= 1.50000f, // H front porch (after image, before HSYNC) in [us] + .hsync= 4.70000f, // H sync pulse in [us] + .hback= 4.50000f, // H back porch (after HSYNC, before image) in [us] + .hfull= 47.03130f, // H full visible in [us] + + // vertical + .vtot=262, // total scanlines (both subframes) + .vmax=240, // maximal height + + // subframe 1 + .vsync1=3, // V sync (half-)pulses on subframe 1 + .vpost1=0, // V sync post half-pulses on subframe 1 + .vback1=10+2+3, // V back porch (after VSYNC, before image) on subframe 1 + .vact1=240, // active visible scanlines, subframe 1 + .vfront1=1+3, // V front porch (after image, before VSYNC) on subframe 1 + .vpre1=0, // V sync pre half-pulses on subframe 1 + + // subframe 2 (ignored if not interlaced) + .vsync2=0, // V sync half-pulses on subframe 2 + .vpost2=0, // V sync post half-pulses on subframe 2 + .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 + .vact2=0, // active visible scanlines, subframe 2 + .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 + .vpre2=6, // V sync pre half-pulses on subframe 2 + + // name + .name = "NTSCp", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + .inter=False, // interlaced (use subframes) + .psync=False, // positive synchronization + .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) +}; + +// === Monitor videomodes + +// EGA 8:5 640x400 (5:4 500x400, 4:3 528x400, 16:9 704x400), vert. 70 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz +const sVideo VideoEGA = { + // horizontal + .htot= 31.77781f, // total scanline in [us] + .hfront= 0.63556f, // H front porch (after image, before HSYNC) in [us] + .hsync= 3.81334f, // H sync pulse in [us] + .hback= 1.90667f, // H back porch (after HSYNC, before image) in [us] + .hfull= 25.42224f, // H full visible in [us] + + // vertical + .vtot=449, // total scanlines (both subframes) + .vmax=400, // maximal height + + // subframe 1 + .vsync1=2, // V sync (half-)pulses on subframe 1 + .vpost1=0, // V sync post half-pulses on subframe 1 + .vback1=35, // V back porch (after VSYNC, before image) on subframe 1 + .vact1=400, // active visible scanlines, subframe 1 + .vfront1=12, // V front porch (after image, before VSYNC) on subframe 1 + .vpre1=0, // V sync pre half-pulses on subframe 1 + + // subframe 2 (ignored if not interlaced) + .vsync2=0, // V sync half-pulses on subframe 2 + .vpost2=0, // V sync post half-pulses on subframe 2 + .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 + .vact2=0, // active visible scanlines, subframe 2 + .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 + .vpre2=0, // V sync pre half-pulses on subframe 2 + + // name + .name = "EGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + .inter=False, // interlaced (use subframes) + .psync=False, // positive synchronization + .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) +}; + +// VGA 4:3 640x480 (16:9 848x480), vert. 60 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz +const sVideo VideoVGA = { + // horizontal + .htot= 31.77781f, // total scanline in [us] (800 pixels) + .hfront= 0.63556f, // H front porch (after image, before HSYNC) in [us] (16 pixels) + .hsync= 3.81334f, // H sync pulse in [us] (96 pixels) + .hback= 1.90667f, // H back porch (after HSYNC, before image) in [us] (48 pixels) + .hfull= 25.42224f, // H full visible in [us] (640 pixels) + + // vertical + .vtot=525, // total scanlines (both subframes) + .vmax=480, // maximal height + + // subframe 1 + .vsync1=2, // V sync (half-)pulses on subframe 1 + .vpost1=0, // V sync post half-pulses on subframe 1 + .vback1=33, // V back porch (after VSYNC, before image) on subframe 1 + .vact1=480, // active visible scanlines, subframe 1 + .vfront1=10, // V front porch (after image, before VSYNC) on subframe 1 + .vpre1=0, // V sync pre half-pulses on subframe 1 + + // subframe 2 (ignored if not interlaced) + .vsync2=0, // V sync half-pulses on subframe 2 + .vpost2=0, // V sync post half-pulses on subframe 2 + .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 + .vact2=0, // active visible scanlines, subframe 2 + .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 + .vpre2=0, // V sync pre half-pulses on subframe 2 + + // name + .name = "VGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + .inter=False, // interlaced (use subframes) + .psync=False, // positive synchronization + .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) +}; + +// SVGA 4:3 800x600 (16:9 1064x600), vert. 60 Hz, hor. 37.897 kHz, pixel clock 40 MHz +const sVideo VideoSVGA = { + // horizontal + .htot= 26.40000f, // total scanline in [us] (1056 pixels) + .hfront= 1.00000f, // H front porch (after image, before HSYNC) in [us] (40 pixels) + .hsync= 3.20000f, // H sync pulse in [us] (128 pixels) + .hback= 2.20000f, // H back porch (after HSYNC, before image) in [us] (88 pixels) + .hfull= 20.00000f, // H full visible in [us] (800 pixels) + + // vertical + .vtot=628, // total scanlines (both subframes) + .vmax=600, // maximal height + + // subframe 1 + .vsync1=4, // V sync (half-)pulses on subframe 1 + .vpost1=0, // V sync post half-pulses on subframe 1 + .vback1=23, // V back porch (after VSYNC, before image) on subframe 1 + .vact1=600, // active visible scanlines, subframe 1 + .vfront1=1, // V front porch (after image, before VSYNC) on subframe 1 + .vpre1=0, // V sync pre half-pulses on subframe 1 + + // subframe 2 (ignored if not interlaced) + .vsync2=0, // V sync half-pulses on subframe 2 + .vpost2=0, // V sync post half-pulses on subframe 2 + .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 + .vact2=0, // active visible scanlines, subframe 2 + .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 + .vpre2=0, // V sync pre half-pulses on subframe 2 + + // name + .name = "SVGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + .inter=False, // interlaced (use subframes) + .psync=True, // positive synchronization + .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) +}; + +// XGA 4:3 1024x768 (16:9 1360x768), vert. 60 Hz, hor. 48.36310 kHz, pixel clock 65 MHz +const sVideo VideoXGA = { + // horizontal + .htot= 20.67692f, // total scanline in [us] (1344 pixels) + .hfront= 0.36923f, // H front porch (after image, before HSYNC) in [us] (24 pixels) + .hsync= 2.09231f, // H sync pulse in [us] (136 pixels) + .hback= 2.46154f, // H back porch (after HSYNC, before image) in [us] (160 pixels) + .hfull= 15.75385f, // H full visible in [us] (1024 pixels) + + // vertical + .vtot=806, // total scanlines (both subframes) + .vmax=768, // maximal height + + // subframe 1 + .vsync1=6, // V sync (half-)pulses on subframe 1 + .vpost1=0, // V sync post half-pulses on subframe 1 + .vback1=29, // V back porch (after VSYNC, before image) on subframe 1 + .vact1=768, // active visible scanlines, subframe 1 + .vfront1=3, // V front porch (after image, before VSYNC) on subframe 1 + .vpre1=0, // V sync pre half-pulses on subframe 1 + + // subframe 2 (ignored if not interlaced) + .vsync2=0, // V sync half-pulses on subframe 2 + .vpost2=0, // V sync post half-pulses on subframe 2 + .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 + .vact2=0, // active visible scanlines, subframe 2 + .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 + .vpre2=0, // V sync pre half-pulses on subframe 2 + + // name + .name = "XGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + .inter=False, // interlaced (use subframes) + .psync=False, // positive synchronization + .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) +}; + +// VESA 4:3 1152x864, vert. 60 Hz, hor. 53.697 kHz, pixel clock 81.62 MHz +const sVideo VideoVESA = { + // horizontal + .htot= 18.62289f, // total scanline in [us] (1520 pixels) + .hfront= 0.78412f, // H front porch (after image, before HSYNC) in [us] (64 pixels) + .hsync= 1.47023f, // H sync pulse in [us] (120 pixels) + .hback= 2.25435f, // H back porch (after HSYNC, before image) in [us] (184 pixels) + .hfull= 14.11419f, // H full visible in [us] (1152 pixels) + + // vertical + .vtot=895, // total scanlines (both subframes) + .vmax=864, // maximal height + + // subframe 1 + .vsync1=3, // V sync (half-)pulses on subframe 1 + .vpost1=0, // V sync post half-pulses on subframe 1 + .vback1=27, // V back porch (after VSYNC, before image) on subframe 1 + .vact1=864, // active visible scanlines, subframe 1 + .vfront1=1, // V front porch (after image, before VSYNC) on subframe 1 + .vpre1=0, // V sync pre half-pulses on subframe 1 + + // subframe 2 (ignored if not interlaced) + .vsync2=0, // V sync half-pulses on subframe 2 + .vpost2=0, // V sync post half-pulses on subframe 2 + .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 + .vact2=0, // active visible scanlines, subframe 2 + .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 + .vpre2=0, // V sync pre half-pulses on subframe 2 + + // name + .name = "VESA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + .inter=False, // interlaced (use subframes) + .psync=True, // positive synchronization + .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) +}; + +// HD 4:3 1280x960, vert. 53 Hz, hor. 51.858 kHz, pixel clock 102.1 MHz +#define HD_SLOW 1.15f +const sVideo VideoHD = { + // horizontal + .htot= 16.76787f*HD_SLOW, // total scanline in [us] (1712 pixels) + .hfront= 0.78355f*HD_SLOW, // H front porch (after image, before HSYNC) in [us] (80 pixels) + .hsync= 1.33203f*HD_SLOW, // H sync pulse in [us] (136 pixels) + .hback= 2.11557f*HD_SLOW, // H back porch (after HSYNC, before image) in [us] (216 pixels) + .hfull= 12.53673f*HD_SLOW, // H full visible in [us] (1280 pixels) + + // vertical + .vtot=994-10, // total scanlines (both subframes) + .vmax=960, // maximal height + + // subframe 1 + .vsync1=3, // V sync (half-)pulses on subframe 1 + .vpost1=0, // V sync post half-pulses on subframe 1 + .vback1=30-10, // V back porch (after VSYNC, before image) on subframe 1 + .vact1=960, // active visible scanlines, subframe 1 + .vfront1=1, // V front porch (after image, before VSYNC) on subframe 1 + .vpre1=0, // V sync pre half-pulses on subframe 1 + + // subframe 2 (ignored if not interlaced) + .vsync2=0, // V sync half-pulses on subframe 2 + .vpost2=0, // V sync post half-pulses on subframe 2 + .vback2=0, // V back porch (after VSYNC, before image) on subframe 2 + .vact2=0, // active visible scanlines, subframe 2 + .vfront2=0, // V front porch (after image, before VSYNC) on subframe 2 + .vpre2=0, // V sync pre half-pulses on subframe 2 + + // name + .name = "HD ", // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + .inter=False, // interlaced (use subframes) + .psync=False, // positive synchronization + .odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL) +}; + + +// Search PLL setup +// reqkhz ... required output frequency in kHz +// input ... PLL input frequency in kHz (default 12000, or use clock_get_hz(clk_ref)/1000) +// vcomin ... minimal VCO frequency in kHz (default 400000) +// vcomax ... maximal VCO frequency in kHz (default 1600000) +// lowvco ... prefer low VCO (lower power but more jiter) +// outputs: +// outkhz ... output achieved frequency in kHz (0=not found) +// outvco ... output VCO frequency in kHz +// outfbdiv ... output fbdiv (16..320) +// outpd1 ... output postdiv1 (1..7) +// outpd2 ... output postdiv2 (1..7) +// Returns true if precise frequency has been found, or near frequency used otherwise. +bool vcocalc(u32 reqkhz, u32 input, u32 vcomin, u32 vcomax, bool lowvco, + u32* outkhz, u32* outvco, u16* outfbdiv, u8* outpd1, u8* outpd2) +{ + u32 khz, vco, margin; + u16 fbdiv; + u8 pd1, pd2; + u32 margin_best = 100000; + *outkhz = 0; + + // fbdiv loop + fbdiv = lowvco ? 16 : 320; + for (;;) + { + // get current vco + vco = fbdiv * input; + + // check vco range + if ((vco >= vcomin) && (vco <= vcomax)) + { + // pd1 loop + for (pd1 = 7; pd1 >= 1; pd1--) + { + // pd2 loop + for (pd2 = pd1; pd2 >= 1; pd2--) + { + // current output frequency + khz = vco / (pd1 * pd2); + + // check best frequency + margin = abs((int)(khz - reqkhz)); + if (margin < margin_best) + { + margin_best = margin; + *outkhz = khz; + *outvco = vco; + *outfbdiv = fbdiv; + *outpd1 = pd1; + *outpd2 = pd2; + } + } + } + } + + // shift fbdiv + if (lowvco) + { + fbdiv++; + if (fbdiv > 320) break; + } + else + { + fbdiv--; + if (fbdiv < 16) break; + } + } + + // check precise frequency + return (*outkhz == reqkhz) && (*outvco == *outkhz * *outpd1 * *outpd2); +} + +// find sysclock setup (use set_sys_clock_pll to set sysclock) +// reqkhz ... required frequency in kHz +// outputs: +// outkhz ... output achieved frequency in kHz (0=not found) +// outvco ... output VCO frequency in kHz +// outfbdiv ... output fbdiv (16..320) +// outpd1 ... output postdiv1 (1..7) +// outpd2 ... output postdiv2 (1..7) +// Returns true if precise frequency has been found, or near frequency used otherwise. +bool FindSysClock(u32 reqkhz, u32* outkhz, u32* outvco, u16* outfbdiv, u8* outpd1, u8* outpd2) +{ + // get reference frequency in kHz (should be 12 MHz) + u32 input = clock_get_hz(clk_ref)/1000; + + // find PLL setup + return vcocalc(reqkhz, input, 400000, 1600000, false, outkhz, outvco, outfbdiv, outpd1, outpd2); +} + + +// initialize default VGA configuration +void VgaCfgDef(sVgaCfg* cfg) +{ + cfg->width = 640; // width in pixels + cfg->height = 480; // height in lines + cfg->wfull = 0; // width of full screen, corresponding to 'hfull' time (0=use 'width' parameter) + cfg->video = &VideoVGA; // used video timings + cfg->freq = 250000; //120000; // required minimal system frequency in kHz (real frequency can be higher) + cfg->fmax = 270000; // maximal system frequency in kHz (limit resolution if needed) + cfg->mode[0] = LAYERMODE_BASE; // modes of overlapped layers 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off) + cfg->mode[1] = LAYERMODE_BASE; // - mode of layer 0 is ignored (always use LAYERMODE_BASE) + cfg->mode[2] = LAYERMODE_BASE; // - all overlapped layers must use same layer program + cfg->mode[3] = LAYERMODE_BASE; + cfg->dbly = False; // double in Y direction + cfg->lockfreq = False; // lock required frequency, do not change it +} + +// debug print videomode setup +void VgaPrintCfg(const sVmode* vmode) +{ + printf("width=%u height=%u wfull=%u wmax=%u\n", vmode->width, vmode->height, vmode->wfull, vmode->wmax); + printf("freq=%u vco=%u fbdiv=%u pd1=%u pd2=%u\n", vmode->freq, vmode->vco, vmode->fbdiv, vmode->pd1, vmode->pd2); + printf("div=%u cpp=%u prog=%u mode=%u %u %u %u\n", vmode->div, vmode->cpp, vmode->prog, vmode->mode[0], vmode->mode[1], vmode->mode[2], vmode->mode[3]); + printf("htot=%u hfront=%u hsync=%u hback=%u\n", vmode->htot, vmode->hfront, vmode->hsync, vmode->hback); + printf("vtot=%u vmax=%u\n", vmode->vtot, vmode->vmax); + printf("vsync1=%u vpost1=%u vback1=%u vact1=%u vfront1=%u vpre1=%u vfirst1=%u\n", vmode->vsync1, vmode->vpost1, + vmode->vback1, vmode->vact1, vmode->vfront1, vmode->vpre1, vmode->vfirst1); + printf("vsync2=%u vpost2=%u vback2=%u vact2=%u vfront2=%u vpre2=%u vfirst2=%u\n", vmode->vsync2, vmode->vpost2, + vmode->vback2, vmode->vact2, vmode->vfront2, vmode->vpre2, vmode->vfirst2); + printf("lockfreq=%u dbly=%u inter=%u psync=%u odd=%u\n", vmode->lockfreq, vmode->dbly, vmode->inter, vmode->psync, vmode->odd); +} + +// calculate videomode setup +// cfg ... required configuration +// vmode ... destination videomode setup for driver +void VgaCfg(const sVgaCfg* cfg, sVmode* vmode) +{ + int i; + + // prepare layer program, copy layer modes + u8 prog = LAYERMODE_BASE; + vmode->mode[0] = prog; + for (i = 1; i < LAYERS; i++) + { + if (cfg->mode[i] != LAYERMODE_BASE) prog = LayerMode[cfg->mode[i]].prog; + vmode->mode[i] = cfg->mode[i]; + } + vmode->prog = prog; + + // prepare minimal and maximal clocks per pixel + int mincpp = LayerMode[LAYERMODE_BASE].mincpp; + int maxcpp = LayerMode[LAYERMODE_BASE].maxcpp; + int cpp; + for (i = 1; i < LAYERS; i++) + { + cpp = LayerMode[cfg->mode[i]].mincpp; + if (cpp > mincpp) mincpp = cpp; + cpp = LayerMode[cfg->mode[i]].maxcpp; + if (cpp < maxcpp) maxcpp = cpp; + } + + // prepare full width + int w = cfg->width; // required width + int wfull = cfg->wfull; // full width + if (wfull == 0) wfull = w; // use required width as 100% width + + // prepare maximal active time and maximal pixels + const sVideo* v = cfg->video; + float hmax = v->htot - v->hfront - v->hsync - v->hback; + float hfull = v->hfull; + int wmax = (int)(wfull*hmax/hfull + 0.001f); + + // calculate cpp from required frequency (rounded down), limit minimal cpp + u32 freq = cfg->freq; + cpp = (int)(freq*hfull/1000/wfull + 0.1f); + if (cpp < mincpp) cpp = mincpp; + + // recalculate frequency if not locked + if (!cfg->lockfreq) + { + int freq2 = (int)(cpp*wfull*1000/hfull + 0.5f) + 200; + if (freq2 < freq) + { + cpp++; + freq2 = (int)(cpp*wfull*1000/hfull + 0.5f) + 200; + } + if (freq2 >= freq) freq = freq2; + if (freq > cfg->fmax) freq = cfg->fmax; + } + + // find sysclock setup (use set_sys_clock_pll to set sysclock) + u32 vco; + u16 fbdiv; + u8 pd1, pd2; + FindSysClock(freq, &freq, &vco, &fbdiv, &pd1, &pd2); + + vmode->freq = freq; + vmode->vco = vco; + vmode->fbdiv = fbdiv; + vmode->pd1 = pd1; + vmode->pd2 = pd2; + + // calculate divisor + cpp = (int)(freq*hfull/1000/wfull + 0.2f); + int div = 1; + while (cpp > maxcpp) + { + div++; + cpp = (int)(freq*hfull/1000/wfull/div + 0.2f); + } + + vmode->div = div; + vmode->cpp = cpp; + + // calculate new full resolution and max resolution + wfull = (int)(freq*hfull/1000/cpp/div + 0.4f); + wmax = (int)(freq*hmax/1000/cpp/div + 0.4f); + + // limit resolution + if (w > wmax) w = wmax; + w = ALIGN4(w); + vmode->width = w; // active width + vmode->wfull = wfull; // width of full screen (image should be full visible) + vmode->wmax = wmax; // maximal width (can be > wfull) + + // horizontal timings + int hwidth = w*cpp; // active width in state machine clocks + int htot = (int)(freq*v->htot/1000/div + 0.5f); // total state machine clocks per line + int hsync = (int)(freq*v->hsync/1000/div + 0.5f); // H sync pulse in state machine clocks (min. 4) + + if (hsync < 4) + { + htot -= 4 - hsync; + hsync = 4; + } + + int hfront = (int)(freq*v->hfront/1000/div + 0.5f); // H front porch in state machine clocks (min. 2) + int hback = (int)(freq*v->hback/1000/div + 0.5f); // H back porch in state machine clocks (min. 13) + int d = htot - hfront - hsync - hback - hwidth; // difference + hfront += d/2; + hback += (d < 0) ? (d-1)/2 : (d+1)/2; + + if (hfront < 4) + { + hback -= 4 - hfront; + hfront = 4; + } + + if (hback < 13) + { + hfront -= 13 - hback; + hback = 13; + + if (hfront < 2) hfront = 2; + } + + htot = hfront + hsync + hback + hwidth; // total state machine clocks per line + + // interliced htot must be even (to enable split to half-sync) + if (v->inter && ((htot & 1) != 0)) + { + htot--; + hfront++; + } + + vmode->htot = (u16)htot; // total state machine clocks per line + vmode->hfront = (u16)hfront; // H front porch in state machine clocks (min. 2) + vmode->hsync = (u16)hsync; // H sync pulse in state machine clocks (min. 4) + vmode->hback = (u16)hback; // H back porch in state machine clocks (min. 13) + + // vertical timings + int h = cfg->height; // required height + if (cfg->dbly) h *= 2; // use double lines + vmode->vmax = v->vmax; // maximal height + if (h > v->vmax) h = v->vmax; // limit height + if (cfg->dbly) h &= ~1; // must be even number if double lines + + int vact1 = h; // active lines in progress mode + int vact2 = 0; + if (v->inter) // interlaced + { + if (v->odd) // first frame is odd lines + { + vact1 = h/2; + vact2 = (h+1)/2; // if even lines, even frame will have more lines + } + else + { + vact1 = (h+1)/2; // if even lines, even frame will have more lines + vact2 = h/2; + } + } + + if (cfg->dbly) h /= 2; // return double lines to single lines + vmode->height = h; + + // vertical timings + vmode->vtot = v->vtot; // total scanlines + + vmode->vact1 = vact1; // active scanlines of 1st subframe + int dh = vact1 - v->vact1; // difference + vmode->vsync1 = v->vsync1; // V sync (half-)pulses on subframe 1 + vmode->vpost1 = v->vpost1; // V sync post (half-)pulses on subframe 1 + vmode->vback1 = v->vback1 - dh/2; // V back porch (after VSYNC, before image) on subframe 1 + vmode->vfront1 = v->vfront1 - ((dh < 0) ? (dh-1)/2 : (dh+1)/2); // V front porch (after image, before VSYNC) on subframe 1 + vmode->vpre1 = v->vpre1; // V sync pre (half-)pulses on subframe 1 + + vmode->vact2 = vact2; // active scanlines of 2nd subframe + dh = vact2 - v->vact2; // difference + vmode->vsync2 = v->vsync2; // V sync half-pulses on subframe 2 + vmode->vpost2 = v->vpost2; // V sync post half-pulses on subframe 2 + vmode->vback2 = v->vback2 - dh/2; // V back porch (after VSYNC, before image) on subframe 2 + vmode->vfront2 = v->vfront2 - ((dh < 0) ? (dh-1)/2 : (dh+1)/2); // V front porch (after image, before VSYNC) on subframe 2 + vmode->vpre2 = v->vpre2; // V sync pre half-pulses on subframe 2 + + // frequency + vmode->hfreq = vmode->freq * 1000.0f / vmode->div / vmode->htot; + vmode->vfreq = vmode->hfreq / vmode->vtot; + + // name + vmode->name = v->name; // video timing name + + // flags + vmode->lockfreq = cfg->lockfreq; // lock current frequency, do not change it + vmode->dbly = cfg->dbly; // double scanlines + vmode->inter = v->inter; // interlaced (use sub-frames) + vmode->psync = v->psync; // positive synchronization + vmode->odd = v->odd; // first sub-frame is odd lines 1, 3, 5,... (PAL) + + // first active scanline + if (v->inter) + { + // interlaced + vmode->vfirst1 = (vmode->vsync1 + vmode->vpost1)/2 + vmode->vback1 + 1; + vmode->vfirst2 = vmode->vfirst1 + vmode->vact1 + vmode->vfront1 + + (vmode->vpre1 + vmode->vsync2 + vmode->vpost2)/2 + vmode->vback2; + } + else + { + // progressive + vmode->vfirst1 = vmode->vsync1 + vmode->vback1 + 1; + vmode->vfirst2 = 0; + } +} + +// timings +const sVideo* VideoResTab[DEV_MAX*RES_MAX] = +{ + // DEV_PAL + &VideoPALp, // RES_ZX = 0, // 256x192 + &VideoPALp, // RES_CGA, // 320x200 + &VideoPALp, // RES_QVGA, // 320x240 + &VideoPAL, // RES_EGA, // 528x400 + &VideoPAL, // RES_VGA, // 640x480 + &VideoPAL, // RES_SVGA, // 800x600 (not for TV device) + &VideoPAL, // RES_XGA, // 1024x768 (not for TV device) + &VideoPAL, // RES_HD, // 1280x960 (not for TV device) + + // DEV_NTSC + &VideoNTSCp, // RES_ZX = 0, // 256x192 + &VideoNTSCp, // RES_CGA, // 320x200 + &VideoNTSCp, // RES_QVGA, // 320x240 + &VideoNTSC, // RES_EGA, // 528x400 + &VideoNTSC, // RES_VGA, // 640x480 + &VideoNTSC, // RES_SVGA, // 800x600 (not for TV device) + &VideoNTSC, // RES_XGA, // 1024x768 (not for TV device) + &VideoNTSC, // RES_HD, // 1280x960 (not for TV device) + + // DEV_VGA + &VideoEGA, // RES_ZX = 0, // 256x192 + &VideoVGA, // RES_CGA, // 320x200 + &VideoVGA, // RES_QVGA, // 320x240 + &VideoEGA, // RES_EGA, // 528x400 + &VideoVGA, // RES_VGA, // 640x480 + &VideoSVGA, // RES_SVGA, // 800x600 (not for TV device) + &VideoXGA, // RES_XGA, // 1024x768 (not for TV device) + &VideoHD, // RES_HD, // 1280x960 (not for TV device) +}; + +// required resolution width x height +const u16 VideoResReq[RES_MAX*2] = +{ + 256, 192, // RES_ZX = 0, // 256x192 + 320, 200, // RES_CGA, // 320x200 + 320, 240, // RES_QVGA, // 320x240 + 512, 400, // RES_EGA, // 512x400 + 640, 480, // RES_VGA, // 640x480 + 800, 600, // RES_SVGA, // 800x600 (not for TV device) + 1024, 768, // RES_XGA, // 1024x768 (not for TV device) + 1280, 960, // RES_HD, // 1280x960 (not for TV device) +}; + +// initialize videomode +// dev ... device DEV_* +// res ... resolution RES_* +// form ... format FORM_* +// buf ... pointer to frame buffer (must be aligned to 4-bites, use ALIGNED attribute) +// buf2 ...pointer to additional buffer: +// FORM_TILE: pointer to column of tiles 32x32 in 8-bit graphics +// FORM_TEXT: pointer to font 8x16 or 8x8 (size 4 KB or 2 KB, ALIGNED attribute, should be in RAM) +// - copy font to 4KB or 2 KB RAM buffer with ALIGNED attribute +// - text uses color attributes PC_* +// FORM_RLE: pointer to image rows (ALIGNED attribute, should be in RAM) +// JMH +const sVmode* Video(u8 dev, u8 res, u8 form, u8* buf, const void* buf2 /* = FontBoldB8x16 */) +{ + // stop VGA core + // JMH + //multicore_reset_core1(); + + // run VGA core + // JMH + //multicore_launch_core1(VgaCore); + + // prepare timings structure + if (dev >= DEV_MAX) dev = DEV_VGA; + if (res >= RES_MAX) res = RES_MAX-1; + if (form >= FORM_MAX) form = FORM_MAX-1; + const sVideo* v = VideoResTab[dev*RES_MAX + res]; + + // required resolution + u16 w = VideoResReq[res*2]; + u16 h = VideoResReq[res*2+1]; + if (h > v->vmax) h = v->vmax; + + if ((form == FORM_TEXT8) || (form == FORM_MTEXT8)) + { + w = w/8*8; + h = h/8*8; + } + + if ((form == FORM_TEXT16) || (form == FORM_MTEXT16)) + { + w = w/8*8; + h = h/16*16; + } + + // setup videomode + VgaCfgDef(&Cfg); // get default configuration + Cfg.video = v; // video timings + Cfg.width = w; // screen width + Cfg.height = h; // screen height + if (form == FORM_RLE) Cfg.mode[1] = LAYERMODE_RLE; + Cfg.dbly = h <= v->vmax/2; // double scanlines + VgaCfg(&Cfg, &Vmode); // calculate videomode setup + + // initialize base layer 0 + ScreenClear(pScreen); + sStrip* t = ScreenAddStrip(pScreen, h); + sSegm* g = ScreenAddSegm(t, w); + switch (form) + { + case FORM_8BIT: // 8-bit pixel graphics (up to EGA resolution) + ScreenSegmGraph8(g, buf, w); + Canvas.img = buf; + Canvas.w = w; + Canvas.h = h; + Canvas.wb = w; + Canvas.format = CANVAS_8; + break; + + case FORM_4BIT: // 4-bit pixel graphics (up to SVGA graphics) + GenPal16Trans(Pal16Trans, DefPal16); // generate palette translation table + ScreenSegmGraph4(g, buf, Pal16Trans, w/2); + Canvas.img = buf; + Canvas.w = w; + Canvas.h = h; + Canvas.wb = w/2; + Canvas.format = CANVAS_4; + break; + + case FORM_MONO: // 1-bit pixel graphics + ScreenSegmGraph1(g, buf, COL_BLACK, COL_WHITE, w/8); + Canvas.img = buf; + Canvas.w = w; + Canvas.h = h; + Canvas.wb = w/8; + Canvas.format = CANVAS_1; + break; + + case FORM_TILE8: // 8x8 tiles + ScreenSegmTile(g, buf, buf2, 8, 8, (w+7)/8); + break; + + case FORM_TILE12: // 12x12 tiles + ScreenSegmTile(g, buf, buf2, 12, 12, (w+11)/12); + break; + + case FORM_TILE16: // 16x16 tiles + ScreenSegmTile(g, buf, buf2, 16, 16, (w+15)/16); + break; + + case FORM_TILE24: // 24x24 tiles + ScreenSegmTile(g, buf, buf2, 24, 24, (w+23)/24); + break; + + case FORM_TILE32: // 32x32 tiles + ScreenSegmTile(g, buf, buf2, 32, 32, (w+31)/32); + break; + + case FORM_TILE48: // 48x48 tiles + ScreenSegmTile(g, buf, buf2, 48, 48, (w+47)/48); + break; + + case FORM_TILE64: // 64x64 tiles + ScreenSegmTile(g, buf, buf2, 64, 64, (w+63)/64); + break; + + case FORM_MTEXT8: // mono text with font 8x8 + ScreenSegmMText(g, buf, buf2, 8, COL_BLACK, COL_WHITE, w/8); + break; + + case FORM_MTEXT16: // mono text with font 8x16 + ScreenSegmMText(g, buf, buf2, 16, COL_BLACK, COL_WHITE, w/8); + break; + + case FORM_TEXT8: // attribute text with font 8x8 + ScreenSegmAText(g, buf, buf2, 8, DefPal16, w/8*2); + break; + + case FORM_TEXT16: // attribute text with font 8x16 + ScreenSegmAText(g, buf, buf2, 16, DefPal16, w/8*2); + break; + + case FORM_RLE: // images with RLE compression (on overlapped layer 1) + ScreenSegmColor(g, 0, 0); + LayerSetup(1, buf, &Vmode, w, h, 0, buf2); + LayerOn(1); + break; + } + + // initialize system clock + set_sys_clock_pll(Vmode.vco*1000, Vmode.pd1, Vmode.pd2); + + + + // initialize videomode + // JMH + //VgaInitReq(&Vmode); + + return &Vmode; +} + diff --git a/MCUME_pico/picovga_t4/vga_vmode.h b/MCUME_pico/picovga_t4/vga_vmode.h new file mode 100755 index 0000000..a39e6ca --- /dev/null +++ b/MCUME_pico/picovga_t4/vga_vmode.h @@ -0,0 +1,238 @@ + +// **************************************************************************** +// +// VGA videomodes +// +// **************************************************************************** + +#ifndef _VGA_VMODE_H +#define _VGA_VMODE_H + +#define VIDEO_NAME_LEN 5 // length of video timing name + +// video timings +typedef struct { + // horizontal + float htot; // total scanline in [us] + float hfront; // H front porch (after image, before HSYNC) in [us] + float hsync; // H sync pulse in [us] + float hback; // H back porch (after HSYNC, before image) in [us] + float hfull; // H full visible in [us] (corresponding to 'wfull' pixels) + + // vertical + u16 vtot; // total scanlines (both subframes) + u16 vmax; // maximal height + + // subframe 1 + u16 vsync1; // V sync (half-)pulses on subframe 1 + u16 vpost1; // V sync post half-pulses on subframe 1 + u16 vback1; // V back porch (after VSYNC, before image) on subframe 1 + u16 vact1; // active visible scanlines, subframe 1 + u16 vfront1; // V front porch (after image, before VSYNC) on subframe 1 + u16 vpre1; // V sync pre half-pulses on subframe 1 + + // subframe 2 (ignored if not interlaced) + u16 vsync2; // V sync half-pulses on subframe 2 + u16 vpost2; // V sync post half-pulses on subframe 2 + u16 vback2; // V back porch (after VSYNC, before image) on subframe 2 + u16 vact2; // active visible scanlines, subframe 2 + u16 vfront2; // V front porch (after image, before VSYNC) on subframe 2 + u16 vpre2; // V sync pre half-pulses on subframe 2 + + // name + const char* name; // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + bool inter; // interlaced (use subframes) + bool psync; // positive synchronization + bool odd; // first sub-frame is odd lines 1, 3, 5,... (PAL) +} sVideo; + +// === TV videomodes + +// TV PAL interlaced 5:4 720x576 (4:3 768x576, 16:9 1024x576) +extern const sVideo VideoPAL; + +// TV PAL progressive 5:4 360x288 (4:3 384x288, 16:9 512x288) +extern const sVideo VideoPALp; + +// TV NTSC interlaced 4:3 640x480 (5:4 600x480, 16:9 848x480) +extern const sVideo VideoNTSC; + +// TV NTSC progressive 4:3 320x240 (5:4 300x240, 16:9 424x240) +extern const sVideo VideoNTSCp; + +// === Monitor videomodes + +// EGA 8:5 640x400 (5:4 500x400, 4:3 528x400, 16:9 704x400), vert. 70 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz +extern const sVideo VideoEGA; + +// VGA 4:3 640x480 (16:9 848x480), vert. 60 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz +extern const sVideo VideoVGA; + +// SVGA 4:3 800x600 (16:9 1064x600), vert. 60 Hz, hor. 37.897 kHz, pixel clock 40 MHz +extern const sVideo VideoSVGA; + +// XGA 4:3 1024x768 (16:9 1360x768), vert. 60 Hz, hor. 48.36310 kHz, pixel clock 65 MHz +extern const sVideo VideoXGA; + +// VESA 4:3 1152x864, vert. 60 Hz, hor. 53.697 kHz, pixel clock 81.62 MHz +extern const sVideo VideoVESA; + +// HD 4:3 1280x960, vert. 53 Hz, hor. 51.858 kHz, pixel clock 102.1 MHz +extern const sVideo VideoHD; + +// required configuration to initialize VGA output +typedef struct { + u16 width; // width in pixels + u16 height; // height in lines + u16 wfull; // width of full screen, corresponding to 'hfull' time (0=use 'width' parameter) + const sVideo* video; // used video timings + u32 freq; // required minimal system frequency in kHz (real frequency can be higher) + u32 fmax; // maximal system frequency in kHz (limit resolution if needed) + u8 mode[LAYERS_MAX]; // modes of overlapped layers 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off) + // - mode of layer 0 is ignored (always use LAYERMODE_BASE) + // - all overlapped layers must use same layer program + bool dbly; // double in Y direction + bool lockfreq; // lock required frequency, do not change it +} sVgaCfg; + +// videomode table - used to setup video driver +typedef struct { + // screen resolution + u16 width; // width in pixels + u16 height; // height in lines + u16 wfull; // width of full screen (corresponding to 'hfull' time) + u16 wmax; // maximal width (corresponding to 'hmax' time) + + // setup PLL system clock + u32 freq; // system clock frequency in kHz + u32 vco; // VCO frequency in kHz + u16 fbdiv; // fbdiv PLL divider + u8 pd1; // postdiv1 + u8 pd2; // postdiv2 + + // setup PIO state machine + u16 div; // divide base state machine clock + u16 cpp; // state machine clocks per pixel + u8 prog; // layer program LAYERPROG_* + u8 mode[LAYERS_MAX]; // mode of layer 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off or base layer) + + // horizontal timings + u16 htot; // total state machine clocks per line + u16 hfront; // H front porch in state machine clocks (min. 2) + u16 hsync; // H sync pulse in state machine clocks (min. 4) + u16 hback; // H back porch in state machine clocks (min. 13) + float hfreq; // horizontal frequency in [Hz] + + // vertical timings + u16 vtot; // total scanlines (both sub-frames) + u16 vmax; // maximal height + float vfreq; // vertical frequency in [Hz] + + // subframe 1 + u16 vsync1; // V sync (half-)pulses on subframe 1 + u16 vpost1; // V sync post (half-)pulses on subframe 1 + u16 vback1; // V back porch (after VSYNC, before image) on subframe 1 + u16 vact1; // active visible scanlines, subframe 1 + u16 vfront1; // V front porch (after image, before VSYNC) on subframe 1 + u16 vpre1; // V sync pre (half-)pulses on subframe 1 + u16 vfirst1; // first active scanline, subframe 1 + + // subframe 2 (ignored if not interlaced) + u16 vsync2; // V sync half-pulses on subframe 2 + u16 vpost2; // V sync post half-pulses on subframe 2 + u16 vback2; // V back porch (after VSYNC, before image) on subframe 2 + u16 vact2; // active visible scanlines, subframe 2 + u16 vfront2; // V front porch (after image, before VSYNC) on subframe 2 + u16 vpre2; // V sync pre half-pulses on subframe 2 + u16 vfirst2; // first active scanline, subframe 2 + + // name + const char* name; // video timing name (VIDEO_NAME_LEN characters + terminating 0) + + // flags + bool lockfreq; // lock current frequency, do not change it + bool dbly; // double scanlines + bool inter; // interlaced (use sub-frames) + bool psync; // positive synchronization + bool odd; // first sub-frame is odd lines 1, 3, 5,... (PAL) +} sVmode; + +// output device +enum { + DEV_PAL = 0, // PAL TV + DEV_NTSC, // NTSC TV + DEV_VGA, // VGA monitor + + DEV_MAX +}; + +// preset videomode resolution +enum { + RES_ZX = 0, // 256x192 + RES_CGA, // 320x200 + RES_QVGA, // 320x240 + RES_EGA, // 512x400 + RES_VGA, // 640x480 + RES_SVGA, // 800x600 (not for TV device) + RES_XGA, // 1024x768 (not for TV device) + RES_HD, // 1280x960 (not for TV device) + + RES_MAX +}; + +// graphics formats +enum { + FORM_8BIT = 0, // 8-bit pixel graphics (up to EGA resolution) + FORM_4BIT, // 4-bit pixel graphics (up to SVGA graphics) + FORM_MONO, // 1-bit pixel graphics + FORM_TILE8, // 8x8 tiles + FORM_TILE12, // 12x12 tiles + FORM_TILE16, // 16x16 tiles + FORM_TILE24, // 24x24 tiles + FORM_TILE32, // 32x32 tiles + FORM_TILE48, // 48x48 tiles + FORM_TILE64, // 64x64 tiles + FORM_MTEXT8, // mono text with font 8x8 + FORM_MTEXT16, // mono text with font 8x16 + FORM_TEXT8, // attribute text with font 8x8 + FORM_TEXT16, // attribute text with font 8x16 + FORM_RLE, // images with RLE compression (on overlapped layer 1) + + FORM_MAX +}; + +extern sVmode Vmode; // videomode setup +extern sVgaCfg Cfg; // required configuration +extern sCanvas Canvas; // canvas of draw box + +// 16-color palette translation table +extern u16 Pal16Trans[256]; + +// initialize default VGA configuration +void VgaCfgDef(sVgaCfg* cfg); + +// debug print videomode setup +void VgaPrintCfg(const sVmode* vmode); + +// calculate videomode setup +// cfg ... required configuration +// vmode ... destination videomode setup for driver +void VgaCfg(const sVgaCfg* cfg, sVmode* vmode); + +// initialize videomode +// dev ... device DEV_* +// res ... resolution RES_* +// form ... format FORM_* +// buf ... pointer to frame buffer (must be aligned to 4-bites, use ALIGNED attribute) +// buf2 ...pointer to additional buffer: +// FORM_TILE*: pointer to column of tiles 32x32 in 8-bit graphics +// FORM_TEXT: pointer to font 8x16 or 8x8 (size 4 KB or 2 KB, ALIGNED attribute, should be in RAM) +// - copy font to 4KB or 2 KB RAM buffer with ALIGNED attribute +// - text uses color attributes PC_* +// FORM_RLE: pointer to image rows (ALIGNED attribute, should be in RAM) +// JMH +const sVmode* Video(u8 dev, u8 res, u8 form, u8* buf, const void* buf2 = NULL); + +#endif // _VGA_VMODE_H diff --git a/MCUME_pico/testtft/emuapi.cpp b/MCUME_pico/testkeymax/emuapi.cpp similarity index 88% rename from MCUME_pico/testtft/emuapi.cpp rename to MCUME_pico/testkeymax/emuapi.cpp index b990cf5..1f87100 100644 --- a/MCUME_pico/testtft/emuapi.cpp +++ b/MCUME_pico/testkeymax/emuapi.cpp @@ -491,7 +491,7 @@ int emu_ReadKeys(void) #ifdef PICOMPUTER keymatrix_hitrow = -1; unsigned char row; - unsigned short cols[6]={1,2,3,4,5,14}; + unsigned short cols[6]={KCOLOUT1,KCOLOUT2,KCOLOUT3,KCOLOUT4,KCOLOUT5,KCOLOUT6}; unsigned char keymatrixtmp[6]; for (int i=0;i<6;i++){ @@ -502,15 +502,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -527,15 +527,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -551,15 +551,15 @@ int emu_ReadKeys(void) //__asm volatile ("nop\n"); // 4-8ns #endif row=0; - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(9) ? 0 : 0x01); - row |= (gpio_get(8) ? 0 : 0x02); - row |= (gpio_get(6) ? 0 : 0x04); - row |= (gpio_get(15) ? 0 : 0x08); - row |= (gpio_get(7) ? 0 : 0x10); - row |= (gpio_get(22) ? 0 : 0x20); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN2) ? 0 : 0x01); + row |= (gpio_get(KROWIN4) ? 0 : 0x02); + row |= (gpio_get(KROWIN1) ? 0 : 0x04); + row |= (gpio_get(KROWIN3) ? 0 : 0x08); + row |= (gpio_get(KROWIN5) ? 0 : 0x10); + row |= (gpio_get(KROWIN6) ? 0 : 0x20); //gpio_set_dir(cols[i], GPIO_OUT); gpio_put(cols[i], 1); gpio_set_dir(cols[i], GPIO_IN); @@ -850,57 +850,57 @@ void emu_InitJoysticks(void) { gpio_put(KLED, 1); // Output (rows) - gpio_init(1); - gpio_init(2); - gpio_init(3); - gpio_init(4); - gpio_init(5); - gpio_init(14); - gpio_set_dir(1, GPIO_OUT); - gpio_set_dir(2, GPIO_OUT); - gpio_set_dir(3, GPIO_OUT); - gpio_set_dir(4, GPIO_OUT); - gpio_set_dir(5, GPIO_OUT); - gpio_set_dir(14, GPIO_OUT); - gpio_put(1, 1); - gpio_put(2, 1); - gpio_put(3, 1); - gpio_put(4, 1); - gpio_put(5, 1); - gpio_put(14, 1); + gpio_init(KCOLOUT1); + gpio_init(KCOLOUT2); + gpio_init(KCOLOUT3); + gpio_init(KCOLOUT4); + gpio_init(KCOLOUT5); + gpio_init(KCOLOUT6); + gpio_set_dir(KCOLOUT1, GPIO_OUT); + gpio_set_dir(KCOLOUT2, GPIO_OUT); + gpio_set_dir(KCOLOUT3, GPIO_OUT); + gpio_set_dir(KCOLOUT4, GPIO_OUT); + gpio_set_dir(KCOLOUT5, GPIO_OUT); + gpio_set_dir(KCOLOUT6, GPIO_OUT); + gpio_put(KCOLOUT1, 1); + gpio_put(KCOLOUT2, 1); + gpio_put(KCOLOUT3, 1); + gpio_put(KCOLOUT4, 1); + gpio_put(KCOLOUT5, 1); + gpio_put(KCOLOUT6, 1); // but set as input floating when not used! - gpio_set_dir(1, GPIO_IN); - gpio_set_dir(2, GPIO_IN); - gpio_set_dir(3, GPIO_IN); - gpio_set_dir(4, GPIO_IN); - gpio_set_dir(5, GPIO_IN); - gpio_set_dir(14, GPIO_IN); - gpio_disable_pulls(1); - gpio_disable_pulls(2); - gpio_disable_pulls(3); - gpio_disable_pulls(4); - gpio_disable_pulls(5); - gpio_disable_pulls(14); + gpio_set_dir(KCOLOUT1, GPIO_IN); + gpio_set_dir(KCOLOUT2, GPIO_IN); + gpio_set_dir(KCOLOUT3, GPIO_IN); + gpio_set_dir(KCOLOUT4, GPIO_IN); + gpio_set_dir(KCOLOUT5, GPIO_IN); + gpio_set_dir(KCOLOUT6, GPIO_IN); + gpio_disable_pulls(KCOLOUT1); + gpio_disable_pulls(KCOLOUT2); + gpio_disable_pulls(KCOLOUT3); + gpio_disable_pulls(KCOLOUT4); + gpio_disable_pulls(KCOLOUT5); + gpio_disable_pulls(KCOLOUT6); // Input pins (cols) - gpio_init(6); - gpio_init(9); - gpio_init(15); - gpio_init(8); - gpio_init(7); - gpio_init(22); - gpio_set_dir(6,GPIO_IN); - gpio_set_dir(9,GPIO_IN); - gpio_set_dir(15,GPIO_IN); - gpio_set_dir(8,GPIO_IN); - gpio_set_dir(7,GPIO_IN); - gpio_set_dir(22,GPIO_IN); - gpio_pull_up(6); - gpio_pull_up(9); - gpio_pull_up(15); - gpio_pull_up(8); - gpio_pull_up(7); - gpio_pull_up(22); + gpio_init(KROWIN1); + gpio_init(KROWIN2); + gpio_init(KROWIN3); + gpio_init(KROWIN4); + gpio_init(KROWIN5); + gpio_init(KROWIN6); + gpio_set_dir(KROWIN1,GPIO_IN); + gpio_set_dir(KROWIN2,GPIO_IN); + gpio_set_dir(KROWIN3,GPIO_IN); + gpio_set_dir(KROWIN4,GPIO_IN); + gpio_set_dir(KROWIN5,GPIO_IN); + gpio_set_dir(KROWIN6,GPIO_IN); + gpio_pull_up(KROWIN1); + gpio_pull_up(KROWIN2); + gpio_pull_up(KROWIN3); + gpio_pull_up(KROWIN4); + gpio_pull_up(KROWIN5); + gpio_pull_up(KROWIN6); #endif } @@ -932,7 +932,9 @@ void emu_init(void) if (emu_ReadKeys() & MASK_JOY2_UP) { #ifdef PICOMPUTERMAX +#ifndef USE_VGA tft.flipscreen(true); +#endif #else tft.flipscreen(true); #endif @@ -940,7 +942,9 @@ void emu_init(void) else { #ifdef PICOMPUTERMAX +#ifndef USE_VGA tft.flipscreen(false); +#endif #else tft.flipscreen(false); #endif diff --git a/MCUME_pico/testtft/emuapi.h b/MCUME_pico/testkeymax/emuapi.h similarity index 100% rename from MCUME_pico/testtft/emuapi.h rename to MCUME_pico/testkeymax/emuapi.h diff --git a/MCUME_pico/testtft/testtft.cpp b/MCUME_pico/testkeymax/testkeymax.cpp similarity index 95% rename from MCUME_pico/testtft/testtft.cpp rename to MCUME_pico/testkeymax/testkeymax.cpp index fd2b8b9..2356001 100644 --- a/MCUME_pico/testtft/testtft.cpp +++ b/MCUME_pico/testkeymax/testkeymax.cpp @@ -2,19 +2,20 @@ #include "pico/stdlib.h" #include +extern "C" { + #include "iopins.h" + #include "emuapi.h" +} + #ifdef USE_VGA #include "vga_t_dma.h" #else #include "tft_t_dma.h" #endif -extern "C" { - #include "iopins.h" - #include "emuapi.h" -} - TFT_T_DMA tft; + #define BLUE RGBVAL16(0, 0, 170) #define LIGHT_BLUE RGBVAL16(0, 136, 255) @@ -48,12 +49,16 @@ int main(void) { // set_sys_clock_khz(133000, true); // set_sys_clock_khz(200000, true); // set_sys_clock_khz(225000, true); - set_sys_clock_khz(250000, true); - + set_sys_clock_khz(250000, true); stdio_init_all(); printf("start\n"); - tft.begin(); +#ifdef USE_VGA +// tft.begin(VGA_MODE_400x240); + tft.begin(VGA_MODE_320x240); +#else + tft.begin(); +#endif emu_init(); emu_start(); //tft.startDMA(); @@ -61,6 +66,8 @@ int main(void) { add_repeating_timer_ms(20, repeating_timer_callback, NULL, &timer); tft.fillScreenNoDma(LIGHT_BLUE); + + tft.get_frame_buffer_size(&fb_width, &fb_height); tft.drawRectNoDma((fb_width-320)/2,(fb_height-200)/2, 320,200, BLUE); //tft.drawTextNoDma((fb_width-320)/2,(fb_height-200)/2+1*8," **** COMMODORE 64 BASIC V2 **** ",LIGHT_BLUE,BLUE,false); @@ -77,7 +84,7 @@ int main(void) { buf[0] = digits[r1]; buf[1] = digits[r2]; buf[2] = digits[r3]; - tft.drawTextNoDma(0,0,buf,BLUE,LIGHT_BLUE,false); + tft.drawTextNoDma(8,8,buf,BLUE,LIGHT_BLUE,false); while (true) { uint16_t bClick = emu_GetPad(); diff --git a/MCUME_pico/testvga/testvga.cpp b/MCUME_pico/testvga/testvga.cpp index 36e635d..4a07042 100644 --- a/MCUME_pico/testvga/testvga.cpp +++ b/MCUME_pico/testvga/testvga.cpp @@ -26,11 +26,13 @@ int main(void) { // set_sys_clock_khz(133000, true); // set_sys_clock_khz(200000, true); // set_sys_clock_khz(225000, true); - set_sys_clock_khz(250000, true); + set_sys_clock_khz(252000, true); + stdio_init_all(); printf("start\n"); + // vga.begin(VGA_MODE_400x240); vga.begin(VGA_MODE_320x240); //tft.fillScreenNoDma( RGBVAL16(0xff,0x00,0x00) ); @@ -41,17 +43,18 @@ int main(void) { vga.drawText((fb_width-320)/2,(fb_height-200)/2+3*8," 64K RAM SYSTEM 38911 BASIC BYTES FREE ",LIGHT_BLUE,BLUE,false); vga.drawText((fb_width-320)/2,(fb_height-200)/2+5*8,"READY.",LIGHT_BLUE,BLUE,false); + uint newclock = clock_get_hz(clk_sys)/1000000; + char buf[4] = {32,32,32,0}; - uint sys_clk = clock_get_hz(clk_sys)/1000000; - uint r1 = sys_clk/100; - uint r = sys_clk - r1*100; + uint r1 = newclock/100; + uint r = newclock - r1*100; uint r2 = r/10; - r = sys_clk - r1*100 - r2*10; + r = newclock - r1*100 - r2*10; uint r3 = r; buf[0] = digits[r1]; buf[1] = digits[r2]; buf[2] = digits[r3]; - vga.drawText(4*8,0,buf,BLUE,LIGHT_BLUE,false); + vga.drawText(4*8,8,buf,BLUE,LIGHT_BLUE,false); while (true) { diff --git a/MCUME_pico/vga_t4/scanvideo.c b/MCUME_pico/vga_t4/scanvideo.c index 96af1c5..0439898 100644 --- a/MCUME_pico/vga_t4/scanvideo.c +++ b/MCUME_pico/vga_t4/scanvideo.c @@ -437,8 +437,8 @@ static bool scanvideo_setup_with_timing(const scanvideo_mode_t *mode, const scan __mem_fence_release(); uint pin_mask = 3u << PICO_SCANVIDEO_SYNC_PIN_BASE; - bi_decl_if_func_used(bi_2pins_with_names(PICO_SCANVIDEO_SYNC_PIN_BASE, "HSync", - PICO_SCANVIDEO_SYNC_PIN_BASE + 1, "VSync")); + //bi_decl_if_func_used(bi_2pins_with_names(PICO_SCANVIDEO_SYNC_PIN_BASE, "HSync", + // PICO_SCANVIDEO_SYNC_PIN_BASE + 1, "VSync")); static_assert(PICO_SCANVIDEO_PIXEL_RSHIFT + PICO_SCANVIDEO_PIXEL_RCOUNT <= PICO_SCANVIDEO_COLOR_PIN_COUNT, "red bits do not fit in color pins"); static_assert(PICO_SCANVIDEO_PIXEL_GSHIFT + PICO_SCANVIDEO_PIXEL_GCOUNT <= PICO_SCANVIDEO_COLOR_PIN_COUNT, "green bits do not fit in color pins"); static_assert(PICO_SCANVIDEO_PIXEL_BSHIFT + PICO_SCANVIDEO_PIXEL_BCOUNT <= PICO_SCANVIDEO_COLOR_PIN_COUNT, "blue bits do not fit in color pins"); @@ -448,9 +448,9 @@ static bool scanvideo_setup_with_timing(const scanvideo_mode_t *mode, const scan pin_mask |= RMASK << (PICO_SCANVIDEO_COLOR_PIN_BASE + PICO_SCANVIDEO_PIXEL_RSHIFT); pin_mask |= GMASK << (PICO_SCANVIDEO_COLOR_PIN_BASE + PICO_SCANVIDEO_PIXEL_GSHIFT); pin_mask |= BMASK << (PICO_SCANVIDEO_COLOR_PIN_BASE + PICO_SCANVIDEO_PIXEL_BSHIFT); - bi_decl_if_func_used(bi_pin_mask_with_name(RMASK << (PICO_SCANVIDEO_COLOR_PIN_BASE + PICO_SCANVIDEO_PIXEL_RSHIFT), RMASK == 1 ? "Red" : ("Red 0-" __XSTRING(PICO_SCANVIDEO_PIXEL_GCOUNT)))); - bi_decl_if_func_used(bi_pin_mask_with_name(GMASK << (PICO_SCANVIDEO_COLOR_PIN_BASE + PICO_SCANVIDEO_PIXEL_GSHIFT), GMASK == 1 ? "Green" : ("Green 0-" __XSTRING(PICO_SCANVIDEO_PIXEL_GCOUNT)))); - bi_decl_if_func_used(bi_pin_mask_with_name(BMASK << (PICO_SCANVIDEO_COLOR_PIN_BASE + PICO_SCANVIDEO_PIXEL_BSHIFT), BMASK == 1 ? "Blue" : ("Blue 0-" __XSTRING(PICO_SCANVIDEO_PIXEL_BCOUNT)))); + //bi_decl_if_func_used(bi_pin_mask_with_name(RMASK << (PICO_SCANVIDEO_COLOR_PIN_BASE + PICO_SCANVIDEO_PIXEL_RSHIFT), RMASK == 1 ? "Red" : ("Red 0-" __XSTRING(PICO_SCANVIDEO_PIXEL_GCOUNT)))); + //bi_decl_if_func_used(bi_pin_mask_with_name(GMASK << (PICO_SCANVIDEO_COLOR_PIN_BASE + PICO_SCANVIDEO_PIXEL_GSHIFT), GMASK == 1 ? "Green" : ("Green 0-" __XSTRING(PICO_SCANVIDEO_PIXEL_GCOUNT)))); + //bi_decl_if_func_used(bi_pin_mask_with_name(BMASK << (PICO_SCANVIDEO_COLOR_PIN_BASE + PICO_SCANVIDEO_PIXEL_BSHIFT), BMASK == 1 ? "Blue" : ("Blue 0-" __XSTRING(PICO_SCANVIDEO_PIXEL_BCOUNT)))); for(uint8_t i = 0; pin_mask; i++, pin_mask>>=1u) { if (pin_mask & 1) gpio_set_function(i, GPIO_FUNC_PIO0); diff --git a/MCUME_pico/vga_t4/vga_t_dma.h b/MCUME_pico/vga_t4/vga_t_dma.h index efb79c0..e9d96e6 100644 --- a/MCUME_pico/vga_t4/vga_t_dma.h +++ b/MCUME_pico/vga_t4/vga_t_dma.h @@ -42,7 +42,8 @@ class TFT_T_DMA: public VGA_T4 // fake DMA functions void startDMA(void) { }; void stopDMA(void) { }; - + void flipscreen(bool flip) { }; + // fake no DMA functions void writeScreenNoDma(const vga_pixel *pcolors) { writeScreen(pcolors); } void fillScreenNoDma(vga_pixel color) { clear(color); }