picoVGA lib stripped and reduced clock to 240 for heavy emulators

This commit is contained in:
jean-marcharvengt 2021-11-19 10:40:52 +01:00
parent df037ae74c
commit 447507e8c8
84 changed files with 332 additions and 13959 deletions

View file

@ -67,6 +67,8 @@
VSYNC and HSYNC */
#define VGA_COLORBASE 2
#define VGA_SYNCBASE 14
#define VGA_VSYNC 15
#endif

View file

@ -36,14 +36,17 @@ static int skip=0;
#include "hardware/vreg.h"
int main(void) {
vreg_set_voltage(VREG_VOLTAGE_1_05);
// vreg_set_voltage(VREG_VOLTAGE_1_05);
// set_sys_clock_khz(125000, true);
// set_sys_clock_khz(150000, true);
// set_sys_clock_khz(133000, true);
// set_sys_clock_khz(200000, true);
// set_sys_clock_khz(210000, true);
set_sys_clock_khz(230000, true);
// set_sys_clock_khz(225000, true);
set_sys_clock_khz(250000, true);
// set_sys_clock_khz(250000, true);
stdio_init_all();
#ifdef USE_VGA
// tft.begin(VGA_MODE_400x240);
tft.begin(VGA_MODE_320x240);
@ -98,8 +101,10 @@ void emu_DrawVsync(void)
{
skip += 1;
skip &= VID_FRAME_SKIP;
volatile bool vb=vbl;
while (vbl==vb) {};
#ifdef USE_VGA
tft.waitSync();
//tft.waitSync();
#else
//volatile bool vb=vbl;
//while (vbl==vb) {};

View file

@ -36,14 +36,17 @@ static int skip=0;
#include "hardware/vreg.h"
int main(void) {
vreg_set_voltage(VREG_VOLTAGE_1_05);
// vreg_set_voltage(VREG_VOLTAGE_1_05);
// set_sys_clock_khz(125000, true);
// set_sys_clock_khz(150000, true);
// set_sys_clock_khz(133000, true);
// set_sys_clock_khz(200000, true);
// set_sys_clock_khz(210000, true);
set_sys_clock_khz(230000, true);
// set_sys_clock_khz(225000, true);
set_sys_clock_khz(250000, true);
// set_sys_clock_khz(250000, true);
stdio_init_all();
#ifdef USE_VGA
// tft.begin(VGA_MODE_400x240);
tft.begin(VGA_MODE_320x240);
@ -97,10 +100,10 @@ void emu_DrawVsync(void)
skip += 1;
skip &= VID_FRAME_SKIP;
#ifdef USE_VGA
tft.waitSync();
// tft.waitSync();
#else
// volatile bool vb=vbl;
// while (vbl==vb) {};
// volatile bool vb=vbl;
// while (vbl==vb) {};
#endif
}

View file

@ -36,14 +36,17 @@ static int skip=0;
int main(void) {
vreg_set_voltage(VREG_VOLTAGE_1_05);
// vreg_set_voltage(VREG_VOLTAGE_1_05);
// set_sys_clock_khz(125000, true);
// set_sys_clock_khz(150000, true);
// set_sys_clock_khz(133000, true);
// set_sys_clock_khz(200000, true);
// set_sys_clock_khz(210000, true);
set_sys_clock_khz(230000, true);
// set_sys_clock_khz(225000, true);
set_sys_clock_khz(250000, true);
// set_sys_clock_khz(250000, true);
stdio_init_all();
#ifdef USE_VGA
// tft.begin(VGA_MODE_400x240);
tft.begin(VGA_MODE_320x240);
@ -63,7 +66,7 @@ int main(void) {
tft.fillScreenNoDma( RGBVAL16(0x00,0x00,0x00) );
tft.startDMA();
struct repeating_timer timer;
add_repeating_timer_ms(15, repeating_timer_callback, NULL, &timer);
add_repeating_timer_ms(5, repeating_timer_callback, NULL, &timer);
}
tft.waitSync();
}
@ -96,11 +99,13 @@ void emu_DrawVsync(void)
{
skip += 1;
skip &= VID_FRAME_SKIP;
#ifdef USE_VGA
tft.waitSync();
#else
volatile bool vb=vbl;
while (vbl==vb) {};
#ifdef USE_VGA
// tft.waitSync();
#else
// volatile bool vb=vbl;
// while (vbl==vb) {};
#endif
}

View file

@ -35,13 +35,14 @@ static int skip=0;
#include "hardware/vreg.h"
int main(void) {
vreg_set_voltage(VREG_VOLTAGE_1_05);
// vreg_set_voltage(VREG_VOLTAGE_1_05);
// set_sys_clock_khz(125000, true);
// set_sys_clock_khz(150000, true);
// set_sys_clock_khz(133000, true);
// set_sys_clock_khz(200000, true);
set_sys_clock_khz(200000, true);
// set_sys_clock_khz(225000, true);
set_sys_clock_khz(250000, true);
// set_sys_clock_khz(240000, true);
// set_sys_clock_khz(250000, true);
stdio_init_all();
#ifdef USE_VGA
tft.begin(VGA_MODE_320x240);

View file

@ -35,13 +35,14 @@ static int skip=0;
#include "hardware/vreg.h"
int main(void) {
vreg_set_voltage(VREG_VOLTAGE_1_05);
// vreg_set_voltage(VREG_VOLTAGE_1_05);
// set_sys_clock_khz(125000, true);
// set_sys_clock_khz(150000, true);
// set_sys_clock_khz(133000, true);
// set_sys_clock_khz(200000, true);
set_sys_clock_khz(200000, true);
// set_sys_clock_khz(225000, true);
set_sys_clock_khz(250000, true);
// set_sys_clock_khz(240000, true);
// set_sys_clock_khz(250000, true);
stdio_init_all();
#ifdef USE_VGA
tft.begin(VGA_MODE_320x240);
@ -94,11 +95,13 @@ void emu_DrawVsync(void)
{
skip += 1;
skip &= VID_FRAME_SKIP;
#ifdef USE_VGA
tft.waitSync();
#else
volatile bool vb=vbl;
while (vbl==vb) {};
#ifdef USE_VGA
// tft.waitSync();
#else
// volatile bool vb=vbl;
// while (vbl==vb) {};
#endif
}

View file

@ -35,13 +35,14 @@ static int skip=0;
#include "hardware/vreg.h"
int main(void) {
vreg_set_voltage(VREG_VOLTAGE_1_05);
// vreg_set_voltage(VREG_VOLTAGE_1_05);
// set_sys_clock_khz(125000, true);
// set_sys_clock_khz(150000, true);
// set_sys_clock_khz(133000, true);
// set_sys_clock_khz(200000, true);
set_sys_clock_khz(200000, true);
// set_sys_clock_khz(225000, true);
set_sys_clock_khz(150000, true);
// set_sys_clock_khz(240000, true);
// set_sys_clock_khz(250000, true);
stdio_init_all();
#ifdef USE_VGA
// tft.begin(VGA_MODE_400x240);

View file

@ -569,11 +569,6 @@ int ExecZ80(register Z80 *R,register int RunCycles)
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
#ifndef USE_VGA
asm volatile("nop");
asm volatile("nop");

View file

@ -37,14 +37,17 @@ static int skip=0;
#include "hardware/vreg.h"
int main(void) {
vreg_set_voltage(VREG_VOLTAGE_1_05);
// vreg_set_voltage(VREG_VOLTAGE_1_05);
// set_sys_clock_khz(125000, true);
// set_sys_clock_khz(150000, true);
// set_sys_clock_khz(133000, true);
// set_sys_clock_khz(200000, true);
// set_sys_clock_khz(210000, true);
set_sys_clock_khz(230000, true);
// set_sys_clock_khz(225000, true);
set_sys_clock_khz(250000, true);
// set_sys_clock_khz(250000, true);
stdio_init_all();
#ifdef USE_VGA
tft.begin(VGA_MODE_320x240);
#else
@ -63,7 +66,7 @@ int main(void) {
tft.fillScreenNoDma( RGBVAL16(0x00,0x00,0x00) );
tft.startDMA();
struct repeating_timer timer;
add_repeating_timer_ms(20, repeating_timer_callback, NULL, &timer);
add_repeating_timer_ms(5, repeating_timer_callback, NULL, &timer);
}
tft.waitSync();
}
@ -94,11 +97,13 @@ void emu_DrawVsync(void)
{
skip += 1;
skip &= VID_FRAME_SKIP;
volatile bool vb=vbl;
while (vbl==vb) {};
#ifdef USE_VGA
tft.waitSync();
// tft.waitSync();
#else
// volatile bool vb=vbl;
// while (vbl==vb) {};
// volatile bool vb=vbl;
// while (vbl==vb) {};
#endif
}

View file

@ -35,13 +35,14 @@ static int skip=0;
#include "hardware/vreg.h"
int main(void) {
vreg_set_voltage(VREG_VOLTAGE_1_05);
// vreg_set_voltage(VREG_VOLTAGE_1_05);
// set_sys_clock_khz(125000, true);
// set_sys_clock_khz(150000, true);
// set_sys_clock_khz(133000, true);
// set_sys_clock_khz(200000, true);
set_sys_clock_khz(200000, true);
// set_sys_clock_khz(225000, true);
set_sys_clock_khz(150000, true);
// set_sys_clock_khz(240000, true);
// set_sys_clock_khz(250000, true);
stdio_init_all();
#ifdef USE_VGA
// tft.begin(VGA_MODE_400x240);

View file

@ -78,7 +78,7 @@ static void core1_func()
//VgaTerm(); // terminate
}
else
VgaInit(v);
VgaInit(v,(u8*)framebuffer,320,240,320);
__dmb();
VgaVmodeReq = NULL;
}
@ -133,7 +133,7 @@ vga_error_t VGA_T4::begin(vga_mode_t mode)
sem_init(&core1_initted, 0, 1);
multicore_launch_core1(core1_func);
vmode = Video(DEV_VGA, RES_QVGA, FORM_8BIT, framebuffer);
vmode = Video(DEV_VGA, RES_QVGA);
VgaInitReql(vmode);
// wait for initialization of audio to be complete

View file

@ -1,142 +0,0 @@
// ****************************************************************************
//
// Canvas
//
// ****************************************************************************
#ifndef _CANVAS_H
#define _CANVAS_H
#define DRAW_HWINTER 1 // 1=use hardware interpolator to draw images
// canvas format
// Note: do not use enum, symbols could not be used by the preprocessor
#define CANVAS_8 0 // 8-bit pixels
#define CANVAS_4 1 // 4-bit pixels
#define CANVAS_2 2 // 2-bit pixels
#define CANVAS_1 3 // 1-bit pixels
#define CANVAS_PLANE2 4 // 4 colors on 2 planes
#define CANVAS_ATTRIB8 5 // 2x4 bit color attributes per 8x8 pixel sample
// draw functions: bit 0..3 = draw color
// bit 4 = draw color is background color
// canvas descriptor
typedef struct {
u8* img; // image data
u8* img2; // image data 2 (2nd plane of CANVAS_PLANE2, attributes of CANVAS_ATTRIB8)
int w; // width
int h; // height
int wb; // pitch (bytes between lines)
u8 format; // canvas format CANVAS_*
} sCanvas;
// Draw rectangle
void DrawRect(sCanvas* canvas, int x, int y, int w, int h, u8 col);
// Draw frame
void DrawFrame(sCanvas* canvas, int x, int y, int w, int h, u8 col);
// clear canvas (fill with black color)
void DrawClear(sCanvas* canvas);
// Draw point
void DrawPoint(sCanvas* canvas, int x, int y, u8 col);
// Draw line
void DrawLine(sCanvas* canvas, int x1, int y1, int x2, int y2, u8 col);
// Draw filled circle
// x0, y0 ... coordinate of center
// r ... radius
// col ... color
// col with CANVAS_ATTRIB8 format: bit 0..3 = draw color, bit 4 = draw color is background color
// mask ... mask of used octants (0xff = 255 = draw whole circle)
// . B2|B1 .
// B3 . | . B0
// ------o------
// B4 . | . B7
// . B5|B6 .
void DrawFillCircle(sCanvas* canvas, int x0, int y0, int r, u8 col, u8 mask=0xff);
// Draw circle
// x0, y0 ... coordinate of center
// r ... radius
// col ... color
// col with CANVAS_ATTRIB8 format: bit 0..3 = draw color, bit 4 = draw color is background color
// mask ... mask of used octants (0xff = 255 = draw whole circle)
// . B2|B1 .
// B3 . | . B0
// ------o------
// B4 . | . B7
// . B5|B6 .
void DrawCircle(sCanvas* canvas, int x0, int y0, int r, u8 col, u8 mask=0xff);
// Draw text (transparent background)
// font = pointer to 1-bit font
void DrawText(sCanvas* canvas, const char* text, int x, int y, u8 col,
const void* font, int fontheight=8, int scalex=1, int scaley=1);
// Draw text with background
// font = pointer to 1-bit font
void DrawTextBg(sCanvas* canvas, const char* text, int x, int y, u8 col, u8 bgcol,
const void* font, int fontheight=8, int scalex=1, int scaley=1);
// Draw image
void DrawImg(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int w, int h);
// Draw image with transparency (source and destination must have same format, col = transparency key color)
// CANVAS_ATTRIB8 format replaced by DrawImg function
void DrawBlit(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int w, int h, u8 col);
// DrawImgMat mode
enum {
DRAWIMG_WRAP, // wrap image
DRAWIMG_NOBORDER, // no border (transparent border)
DRAWIMG_CLAMP, // clamp image (use last pixel as border)
DRAWING_COLOR, // color border
DRAWIMG_TRANSP, // transparent image with key color
DRAWIMG_PERSP, // perspective floor
};
// draw 8-bit image with 2D transformation matrix
// canvas ... destination canvas
// src ... source canvas with image
// x ... destination coordinate X
// y ... destination coordinate Y
// w ... destination width
// h ... destination height
// m ... transformation matrix (should be prepared using PrepDrawImg or PrepDrawPersp function)
// mode ... draw mode DRAWIMG_*
// color ... key or border color
// Note to wrap and perspective mode: Width and height of source image must be power of 2!
void DrawImgMat(sCanvas* canvas, const sCanvas* src, int x, int y, int w, int h,
const class cMat2Df* m, u8 mode, u8 color);
// draw tile map using perspective projection
// canvas ... destination canvas
// src ... source canvas with column of 8-bit square tiles (width = tile size, must be power of 2)
// map ... byte map of tile indices
// mapwbits ... number of bits of map width (number of tiles; width must be power of 2)
// maphbits ... number of bits of map height (number of tiles; height must be power of 2)
// tilebits ... number of bits of tile size (e.g. 5 = tile 32x32 pixel)
// x ... destination coordinate X
// y ... destination coordinate Y
// w ... destination width
// h ... destination height
// mat ... transformation matrix (should be prepared using PrepDrawPersp function)
// horizon ... horizon offset (0=do not use perspective projection)
void DrawTileMap(sCanvas* canvas, const sCanvas* src, const u8* map, int mapwbits, int maphbits,
int tilebits, int x, int y, int w, int h, const cMat2Df* mat, u8 horizon);
// draw image line interpolated
// canvas = destination canvas (8-bit pixel format)
// src = source canvas (source image in 8-bit pixel format)
// xd,yd = destination coordinates
// xs,ys = source coordinates
// wd = destination width
// ws = source width
// Overflow in X direction is not checked!
void DrawImgLine(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int wd, int ws);
#endif // _CANVAS_H

View file

@ -1,198 +0,0 @@
// ****************************************************************************
//
// VGA common definitions of C and ASM
//
// ****************************************************************************
#include "vga_config.h" // VGA configuration
#define LAYERS_MAX 4 // max. number of layers (should be 4)
#define BLACK_MAX MAXX // size of buffer with black color (used to clear rest of unused line)
// VGA PIO program
#define BASE_OFFSET 17 // offset of base layer program
#define LAYER_OFFSET 0 // offset of overlapped layer program
// layer program
#define LAYERPROG_BASE 0 // program of base layer (overlapped layers are OFF)
#define LAYERPROG_KEY 1 // layer with key color
#define LAYERPROG_BLACK 2 // layer with black key color
#define LAYERPROG_WHITE 3 // layer with white key color
#define LAYERPROG_MONO 4 // layer with mono pattern or simple color
#define LAYERPROG_RLE 5 // layer with RLE compression
#define LAYERPROG_NUM 6 // number of layer programs
// layer mode (CPP = clock cycles per pixel)
// Control buffer: 16 bytes
// Data buffer: 4 bytes
// fast sprites can be up Control buffer: width*2 bytes
// sprites Data buffer: width bytes
#define LAYERMODE_BASE 0 // base layer
#define LAYERMODE_KEY 1 // layer with key color
#define LAYERMODE_BLACK 2 // layer with black key color
#define LAYERMODE_WHITE 3 // layer with white key color
#define LAYERMODE_MONO 4 // layer with mono pattern
#define LAYERMODE_COLOR 5 // layer with simple color
#define LAYERMODE_RLE 6 // layer with RLE compression
#define LAYERMODE_SPRITEKEY 7 // layer with sprites with key color
#define LAYERMODE_SPRITEBLACK 8 // layer with sprites with black key color
#define LAYERMODE_SPRITEWHITE 9 // layer with sprites with white key color
#define LAYERMODE_FASTSPRITEKEY 10 // layer with fast sprites with key color
#define LAYERMODE_FASTSPRITEBLACK 11 // layer with fast sprites with black key color
#define LAYERMODE_FASTSPRITEWHITE 12 // layer with fast sprites with white key color
#define LAYERMODE_PERSPKEY 13 // layer with key color and image with transformation matrix
#define LAYERMODE_PERSPBLACK 14 // layer with black key color and image with transformation matrix
#define LAYERMODE_PERSPWHITE 15 // layer with white key color and image with transformation matrix
#define LAYERMODE_PERSP2KEY 16 // layer with key color and double-pixel image with transformation matrix
#define LAYERMODE_PERSP2BLACK 17 // layer with black key color and double-pixel image with transformation matrix
#define LAYERMODE_PERSP2WHITE 18 // layer with white key color and double-pixel image with transformation matrix
#define LAYERMODE_NUM 19 // number of overlapped layer modes
// Structure of sprite sSprite (on change update structure sSprite in vga_layer.h)
#define SSPRITE_IMG 0 // u8* img; // pointer to image data
#define SSPRITE_X0 4 // u8* x0; // pointer to pixel offset of start of lines/4 (used with fast sprites)
#define SSPRITE_W0 8 // u8* w0; // pointer to pixel length of length of lines/4 (used with fast sprites)
#define SSPRITE_KEYCOL 12 // u32 keycol; // key color
#define SSPRITE_X 16 // s16 x; // sprite X-coordinate on the screen
#define SSPRITE_Y 18 // s16 y; // sprite Y-coordinate on the screen
#define SSPRITE_W 20 // u16 w; // sprite width
#define SSPRITE_H 22 // u16 h; // sprite height
#define SSPRITE_WB 24 // u16 wb; // sprite pitch (number of bytes between lines)
// u16 res; // ...reserved, structure align
#define SSPRITE_SIZE 28 // size of sSprite structure
// Structure of layer screen sLayer (on change update structure sLayer in vga_layer.h)
#define SLAYER_IMG 0 // const u8* img; // pointer to image in current layer format, or sprite list
#define SLAYER_PAR 4 // const void* par; // additional parameter (RLE index table, transformation matrix)
#define SLAYER_INIT 8 // u32 init; // init word sent on start of scanline
#define SLAYER_KEYCOL 12 // u32 keycol; // key color
#define SLAYER_TRANS 16 // u16 trans; // trans count
#define SLAYER_X 18 // s16 x; // start X coordinate
#define SLAYER_Y 20 // s16 y; // start Y coordinate
#define SLAYER_W 22 // u16 w; // width in pixels
#define SLAYER_H 24 // u16 h; // height
#define SLAYER_WB 26 // u16 wb; // image width in bytes (pitch of lines)
#define SLAYER_MODE 28 // u8 mode; // layer mode
#define SLAYER_HORIZ 29 // s8 horiz; // horizon of perspective projection/4 (only with LAYERMODE_PERSP* modes, 0=no perspecitve, <0 ceilling)
#define SLAYER_XBITS 30 // u8 xbits; // number of bits of width of source image (only with LAYERMODE_PERSP* modes)
#define SLAYER_YBITS 31 // u8 ybits; // number of bits of height of source image (only with LAYERMODE_PERSP* modes)
#define SLAYER_SPRITENUM 32 // u16 spritenum; // number of sprites
#define SLAYER_ON 34 // Bool on; // layer is ON
#define SLAYER_CPP 35 // u8 cpp; // current clock pulses per pixel (used to calculate X coordinate)
#define SLAYER_SIZE 36 // size of sLayer structure
// Structure of video segment sSegm (on change update structure sSegm in vga_screen.h)
#define SSEGM_WIDTH 0 // u16 width; // width of this video segment in pixels (must be multiple of 4, 0=inactive segment)
#define SSEGM_WB 2 // u16 wb; // pitch - number of bytes between lines
#define SSEGM_OFFX 4 // s16 offx; // display offset at X direction (must be multiple of 4)
#define SSEGM_OFFY 6 // s16 offy; // display offset at Y direction
#define SSEGM_WRAPX 8 // u16 wrapx; // wrap width in X direction (number of pixels, must be multiply of 4 and > 0)
// text modes: wrapx must be multiply of 8
#define SSEGM_WRAPY 10 // u16 wrapy; // wrap width in Y direction (number of lines, cannot be 0)
#define SSEGM_DATA 12 // const void* data; // pointer to video buffer with image data
#define SSEGM_FORM 16 // u8 form; // graphics format GF_*
#define SSEGM_DBLY 17 // bool dbly; // double Y (2 scanlines per 1 image line)
#define SSEGM_PAR3 18 // u16 par3; // SSEGM_PAR3 parameter 3
#define SSEGM_PAR 20 // u32 par; // parameter 1: color, pointer to palettes, tile source, font
#define SSEGM_PAR2 24 // u32 par2; // parameter 2
#define SSEGM_SIZE 28 // size of sSegm structure
// Structure of video strip sStrip (on change update structure sStrip in vga_screen.h)
#define SSTRIP_HEIGHT 0 // u16 height; // height of this strip in number of scanlines
#define SSTRIP_NUM 2 // u16 num; // number of video segments
#define SSTRIP_SEG 4 // sSegm seg[SEGMAX];
#define SSTRIP_SIZE (4+SSEGM_SIZE*SEGMAX) // size of sStrip structure (= 4 + 28*8 = 228 bytes)
// Structure of video screen sScreen (on change update structure sScreen in vga_screen.h)
#define SSCREEN_NUM 0 // u16 num; // number of video strips
#define SSCREEN_BACKUP 2 // u16 num_backup; // backup number of video strips during display OFF
#define SSCREEN_STRIP 4 // sStrip strip[STRIPMAX]; // list of video strips
#define SSCREEN_SIZE (4+SSTRIP_SIZE*STRIPMAX) // size of sScreen structure (= 4 + 228*8 = 1828 bytes)
// --- graphics formats
// There are 3 groups of formats - separated due internal reasons, do not mix them.
// 1st group of formats - rendered specially
#define GF_COLOR 0 // simple color (par=color pattern 4-pixels even line, par2=color pattern 4-pixels odd line)
// Data buffer: width bytes (320 pixels: 320 bytes)
// Control buffer: 8 bytes
// 2nd group of formats - rendering into control buffer cbuf
#define GF_GRAPH8 1 // native 8-bit graphics (X1Y1R2G2B2) - fast, transfers "as is" to PIO
// (num = number of pixels/4 = number of bytes/4)
// Control buffer: 8 bytes (320 pixels: 8 bytes)
#define GF_TILE 2 // tiles (par = tile table with one column of tiles,
// par2 = tile height, par3 = tile width as multiple of 4)
// Control buffer: width/tile width*8 bytes (320 pixels of 32x32: 80 bytes)
#define GF_TILE2 3 // alternate tiles (par = tile table with one row of tiles,
// par2 = LOW tile height, HIGH tile width bytes,
// par3 = tile width as multiple of 4)
// Control buffer: width/tile width*8 bytes (320 pixels of 32x32: 80 bytes)
#define GF_PROGRESS 4 // horizontal progress indicator (data = values 0..255 of 4-pixels in rows,
// par = scanline gradient < data, par2 = scanline gradient >= data)
// Control buffer: 16 bytes
#define GF_GRAD1 5 // gradient with 1 line
// Control buffer: 8 bytes (320 pixels: 8 bytes)
#define GF_GRAD2 6 // gradient with 2 lines
// Control buffer: 8 bytes (320 pixels: 8 bytes)
#define GF_GRP2MIN GF_GRAPH8 // 2nd group minimal format
#define GF_GRP2MAX GF_GRAD2 // 2nd group maximal format
// 3rd group of formats - rendering into data buffer dbuf
// Control buffer: 8 bytes
// Data buffer: width bytes
#define GF_GRAPH4 7 // 4-bit graphics (num = number of pixels/4 = number of bytes/2;
// par = pointer to 16-color palette translation table)
#define GF_GRAPH2 8 // 2-bit graphics (num = number of pixels/4 = number of bytes,
// par = pointer to 4-color palette translation table)
#define GF_GRAPH1 9 // 1-bit graphics (num = number of pixels/8 = number of bytes,
// par = 2 colors of palettes)
#define GF_MTEXT 10 // 8-pixel mono text (num = number of characters, font is 8-bit width,
// par = pointer to 1-bit font, par2 = 2 colors of palettes)
#define GF_ATEXT 11 // 8-pixel attribute text, character + 2x4 bit attributes
// (num = number of characters, font is 8-bit width,
// par = pointer to 1-bit font, par2 = pointer to 16 colors of palettes)
#define GF_FTEXT 12 // 8-pixel foreground color text, character + foreground color
// (num = number of characters, font is 8-bit width,
// par = pointer to 1-bit font, par2 = background color)
#define GF_CTEXT 13 // 8-pixel color text, character + background color + foreground color
// (num = number of characters, font is 8-bit width,
// par = pointer to 1-bit font)
#define GF_GTEXT 14 // 8-pixel gradient text (par = pointer to 1-bit font, par2 = pointer to color array)
#define GF_DTEXT 15 // 8-pixel double gradient text (par = pointer to 1-bit font, par2 = pointer to color array)
#define GF_LEVEL 16 // level graph (data=samples 0..255, par = 2 colors of palettes, par2 = Y zero level 0..255)
#define GF_LEVELGRAD 17 // level gradient graph (data = samples 0..255, par = scanline gradient < data, par2 = scanline gradient >= data)
#define GF_OSCIL 18 // oscilloscope pixel graph (data=samples 0..255, par = 2 colors of palettes, par2 = height of pixels - 1)
#define GF_OSCLINE 19 // oscilloscope line graph (data=samples 0..255, par = 2 colors of palettes)
#define GF_PLANE2 20 // 4 colors on 2 graphic planes (data=graphic, par=offset of 2nd graphic plane,
// par2 = pointer to 4-color palette translation table)
#define GF_ATTRIB8 21 // 2x4 bit color attribute per 8x8 pixel sample (data=mono graphic, par=offset of color attributes,
// par2 = pointer to 16-color palette table)
#define GF_GRAPH8MAT 22 // 8-bit graphics with 2D matrix transformation, using hardware interpolator inter1 (inter1 state is not saved during interrup)
// (data=image, par=pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)),
// par2 LOW=number of bits of image width, par2 HIGH=number of bits of image height)
#define GF_GRAPH8PERSP 23 // 8-bit graphics with perspective, using hardware interpolator inter1 (inter1 state is not saved during interrup)
// (data=image, par=pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)),
// par2 LOW=number of bits of image width, par2 HIGH=number of bits of image height,
// par3=horizon offset)
#define GF_TILEPERSP 24 // tiles with perspective, using hardware interpolators inter0 and inter1 (their state is not saved during interrup)
// (data=tile map, par=one column of tiles, par2=pointer to integer matrix,
// wb LOW=number of bits of map width, wb HIGH=number of bits of map height,
// par3 LOW=number of bits of tile size, par3 HIGH=horizon offset/4 or 0=no perspective or <0=ceilling,
// wrapy=segment height)
#define GF_TILEPERSP15 25 // tiles with perspective, 1.5 pixels (parameters as GF_TILEPERSP)
#define GF_TILEPERSP2 26 // tiles with perspective, double pixels (parameters as GF_TILEPERSP)
#define GF_TILEPERSP3 27 // tiles with perspective, triple pixels (parameters as GF_TILEPERSP)
#define GF_TILEPERSP4 28 // tiles with perspective, quadruple pixels (parameters as GF_TILEPERSP)
#define GF_GRP3MIN GF_GRAPH4 // 3rd group minimal format
#define GF_GRP3MAX GF_TILEPERSP4 // 3rd group maximal format
#define FRACT 12 // number of bits of fractional part of fractint number (use max. 13, min. 8)
#define FRACTMUL (1<<FRACT)

View file

@ -3,6 +3,10 @@
//
// Global common definitions
//
// file derived from the PicoVGA project
// https://github.com/Panda381/PicoVGA
// by Miroslav Nemecek
//
// ****************************************************************************
// ----------------------------------------------------------------------------
@ -47,8 +51,10 @@ typedef unsigned char Bool;
// align array to 4-bytes
#define ALIGNED __attribute__((aligned(4)))
#define ALIGN4(x) ((x) & ~3)
#define LED_PIN 25
// swap bytes of command
#define BYTESWAP(n) ((((n)&0xff)<<24)|(((n)&0xff00)<<8)|(((n)&0xff0000)>>8)|(((n)&0xff000000)>>24))
// ----------------------------------------------------------------------------
// Constants
@ -96,8 +102,6 @@ typedef unsigned char Bool;
#define PI 3.14159265358979324
#define PI2 (3.14159265358979324*2)
//extern const ALIGNED u8 FontBoldB8x16[4096];
#define VGA_RGB(r,g,b) ( (((r>>5)&0x07)<<5) | (((g>>5)&0x07)<<2) | (((b>>6)&0x3)<<0) )
@ -125,12 +129,8 @@ typedef unsigned char Bool;
// PicoVGA includes
#include "define.h" // common definitions of C and ASM
#include "canvas.h" // canvas
#include "vga_config.h" // VGA configuration
#include "vga_vmode.h" // VGA videomodes
#include "vga_layer.h" // VGA layers
#include "vga_screen.h" // VGA screen layout
#include "vga_pal.h" // VGA palette
#include "vga.h" // VGA output
#include "picovga.pio.h" // PIO

View file

@ -1,6 +1,11 @@
; ============================================================================
; VGA output - base layer (15 instructions)
;
; file derived from the PicoVGA project
; https://github.com/Panda381/PicoVGA
; by Miroslav Nemecek
;
; ============================================================================
; Control word of "dark" command (left shift):
; - bit 0..7 (8 bits) output color (set to 0 if not used)
@ -60,218 +65,4 @@ public extra2:
; wrap jump to instruction out pc,5
.wrap
; ============================================================================
; VGA output - layer with key color (13 instructions)
; ============================================================================
; Control word (left shift):
; - bit 0..10 (11 bits) number of pixels - 1 (number of pixels must be multiple of 4)
; - bit 11..18 (8 bits) key color
; - bit 19..31 (13 bits) start delay D = clock cycles - 7 between irq and first pixel
; Clocks per pixel: minimum 6, maximum 37.
.program keylayer
.origin 0 ; must load at offset 0 (LAYER_OFF)
; idle wait
.wrap_target
public idle:
pull block ; [1] idle wait
public entry:
wait 0 irq 4 ; [1] wait for IRQ sync goes 0
out x,13 ; [1] get length of delay - 7
layer_wait:
jmp x--,layer_wait ; [1] delay loop
out y,8 ; [1] get key color
out x,11 ; [1] get number of pixels-1
layer_loop:
mov isr,x ; [1] save pixel counter into ISR
out x,8 ; [1] get output pixel
jmp x!=y,layer_2 ; [1] jump if pixel is not transparent
jmp layer_3 ; [1] jump to end of loop
layer_2:
mov pins,x ; [1] output pixel to pins
layer_3:
public extra1:
mov x,isr [0] ; [1+CPP-6] return pixel counter (set extra wait CPP-6)
jmp x--,layer_loop ; [1] loop next pixel
; wrap jump to idle
.wrap
; ============================================================================
; VGA output - layer with black key color (11 instructions)
; ============================================================================
; Control word (left shift):
; - bit 0..15 (16 bits) number of pixels - 1 (number of pixels must be multiple of 4)
; - bit 16..31 (16 bits) start delay D = clock cycles - 5 between irq and first pixel
; Cannot display black pixel (it is used as transparency)
; Clocks per pixel: minimum 4, maximum 34.
.program blacklayer
.origin 0 ; must load at offset 0 (LAYER_OFF)
; idle wait
.wrap_target
public idle:
pull block ; [1] idle wait
public entry:
wait 0 irq 4 ; [1] wait for IRQ sync goes 0
out x,16 ; [1] get length of delay - 5
layer_wait:
jmp x--,layer_wait ; [1] delay loop
out x,16 ; [1] get number of pixels-1
layer_loop:
out y,8 ; [1] get output pixel
jmp !y,layer_2 ; [1] jump if pixel is transparent (color = 0)
mov pins,y ; [1] output pixel to pins
public extra1:
jmp x--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4)
jmp idle ; [1] go idle
layer_2:
public extra2:
jmp x--,layer_loop [0] ; [1+CPP-3] loop next pixel (set extra wait CPP-3)
; wrap jump to idle
.wrap
; ============================================================================
; VGA output - layer with white key color (10 instructions)
; ============================================================================
; Control word (left shift):
; - bit 0..15 (16 bits) number of pixels - 1 (number of pixels must be multiple of 4)
; - bit 16..31 (16 bits) start delay D = clock cycles - 5 between irq and first pixel
; Cannot display white pixel (it is used as transparency). Source pixels must be incremented + 1.
; Clocks per pixel: minimum 4, maximum 35.
.program whitelayer
.origin 0 ; must load at offset 0 (LAYER_OFF)
; idle wait
.wrap_target
public idle:
pull block ; [1] idle wait
public entry:
wait 0 irq 4 ; [1] wait for IRQ sync goes 0
out x,16 ; [1] get length of delay - 7
layer_wait:
jmp x--,layer_wait ; [1] delay loop
out x,16 ; [1] get number of pixels-1
layer_loop:
out y,8 ; [1] get output pixel
jmp y--,layer_2 ; [1] jump if pixel is not transparent (color != 0)
jmp layer_3 ; [1] jump to end of loop
layer_2:
mov pins,y ; [1] output pixel to pins
public extra1:
layer_3:
jmp x--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4)
; wrap jump to idle
.wrap
; ============================================================================
; VGA output - layer with mono or color pattern (16 instructions)
; ============================================================================
; Control word (left shift):
; - bit 0 (1 bit) flag 0=use color opaque mode, 1=use mono transparent mode
; - bit 1..11 (11 bits) number of pixels - 1 (number of pixels must be multiple of 32 in mono, or 4 in color)
; - bit 12..19 (8 bits) key color
; - bit 20..31 (12 bits) start delay D = clock cycles - 8 between irq and first mono pixel, or 6 for color pixel
; Mono, clocks per pixel: minimum 4, maximum 35.
; Color, clocks per pixel: minimum 2, maximum 33.
.program monolayer
.origin 0 ; must load at offset 0 (LAYER_OFF)
.wrap_target
public idle:
pull block ; [1] idle wait
public entry:
wait 0 irq 4 ; [1] wait for IRQ sync goes 0
out x,12 ; [1] get length of delay - 8 (or 6 in color)
layer_wait:
jmp x--,layer_wait ; [1] delay loop
out isr,8 ; [1] get key color
out y,11 ; [1] get number of pixels-1
out x,1 ; [1] get mode flag
jmp !x,layer_color ; [1] 0=use color mode
layer_loop:
out x,1 ; [1] get one bit
jmp !x,layer_out ; [1] bit=0, output pixel
jmp layer_skip ; [1] jump to end of loop
layer_out:
mov pins,isr ; [1] output pixel
layer_skip:
public extra1:
jmp y--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4)
jmp idle
layer_color:
out pins,8
public extra2:
jmp y--,layer_color [0] ; [1+CPP-2] loop next pixel (set extra wait CPP-2)
; wrap jump to idle
.wrap
; ============================================================================
; VGA output - layer with RLE compression (17 instructions)
; ============================================================================
; Input is left shifted with byte-swap (lower byte comes first)
; Requires 3 clock cycles per pixel.
; Clocks per pixel: minimum 3, maximum 32.
.program rlelayer
.origin 0 ; must load at offset 0 (LAYER_OFF)
; [1 instruction] idle wait (tokens: {8} ignored, {8} 'idle' command)
public idle:
out pc,8 ; [1] idle wait
; [4 instructions] start
public entry:
wait 0 irq 4 ; [1] wait for IRQ sync goes 0
out x,32 [2] ; [3] get length of delay - 7
entry_wait:
jmp x--,entry_wait ; [1] delay
jmp raw_next ; [1]
; [1 instruction] skip N+2 (2..257) pixels (tokens: {8} N = number of pixels - 2, {8} 'skip' command)
public skip:
public extra1:
jmp x--,skip [0] ; [1+CPP-1] wait (set extra wait CPP-1)
; [1 instruction] skip 1 pixel (tokens: {8} ignored, {8} 'skip1' command)
public skip1:
public extra2:
jmp raw_next [0] ; [1+CPP-3] jump (set extra wait CPP-3)
; [4 instructions] repeat N+3 (3..258) pixels (tokens: {8} pixel to repeat, {8} 'run' command, {8} N = number of pixels - 3)
public run:
public extra3:
mov pins,x [0] ; [1+CPP-2] output pixel (set extra wait CPP-2)
out y,8 ; [1] get counter N
run_loop:
public extra4:
mov pins,x [0] ; [1+CPP-2] output pixel (set extra wait CPP-2)
jmp y--,run_loop ; [1] next pixel
; [1 instruction] output 1 RAW pixel (tokens: {8} pixel, {8} 'raw1' command)
public raw1:
public extra5:
mov pins,x [0] ; [1+CPP-3] output pixel (set extra wait CPP-3)
.wrap_target
raw_next:
out x,8 ; [1] get counter N
out pc,8 ; [1] jump
; [5 instructions] output N+2 (2..257) RAW pixels (tokens: {8} N = number of pixels - 2, {8} 'raw' command, {(N+2)*8} pixels)
public raw: ; 14:
raw_loop:
public extra6:
out pins,8 [0] ; [1+CPP-2] output pixel (set extra wait CPP-2)
jmp x--,raw_loop ; [1] loop next pixel
public extra7:
out pins,8 [0] ; [1+CPP-3] output pixel (set extra wait CPP-3)
; wrap jump to raw_next
.wrap

View file

@ -1,362 +0,0 @@
// ****************************************************************************
//
// VGA render GF_ATEXT
//
// ****************************************************************************
// u32 par SSEGM_PAR pointer to the font
// u32 par2 SSEGM_PAR2 pointer to 16 colors of palettes
// u16 par3 font height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderAText(u8* dbuf, int x, int y, int w, sSegm* segm)
// render 8-pixel attribute text GF_ATEXT
// R0 ... destination data buffer
// R1 ... start X coordinate (in pixels, must be multiple of 4)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4 and > 0)
// [stack] ... segm video segment sSegm
// Output new pointer to destination data buffer.
// 320 pixels takes 11.9 us on 151 MHz.
.thumb_func
.global RenderAText
RenderAText:
// push registers
push {r1-r7,lr}
mov r4,r8
push {r4}
// Stack content:
// SP+0: R8
// SP+4: R1 start X coordinate
// SP+8: R2 start Y coordinate (later: base pointer to text data row)
// SP+12: R3 width to display
// SP+16: R4
// SP+20: R5
// SP+24: R6
// SP+28: R7
// SP+32: LR
// SP+36: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#36] // load video segment -> R4
// start divide Y/font height
ldr r6,RenderAText_pSioBase // get address of SIO base -> R6
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
// - now we must wait at least 8 clock cycles to get result of division
// [6] get wrap width -> [SP+36]
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
movs r7,#3 // [1] mask to align to 32-bit
bics r5,r7 // [1] align wrap
str r5,[sp,#36] // [2] save wrap width
// [1] align X coordinate to 32-bit
bics r1,r7 // [1]
// [3] align remaining width
bics r3,r7 // [1]
str r3,[sp,#12] // [2] save new width
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
// Note: QUOTIENT must be read last
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
// pointer to font line -> R3
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
add r3,r5 // line offset + font base -> pointer to current font line R3
// base pointer to text data (without X) -> [SP+8], R2
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
muls r2,r5 // Y * WB -> offset of row in text buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
add r2,r5 // base address of text buffer
str r2,[sp,#8] // save pointer to text buffer
// prepare pointer to text data with X -> R2 (1 position is 1 character + 1 attributes)
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
add r2,r6 // add index
add r2,r6 // add index*2, pointer to source text buffer -> R2
// prepare pointer to palettes -> R8
ldr r5,[r4,#SSEGM_PAR2] // get pointer to palette table -> R4
mov r8,r5 // save pointer to palette table
// prepare pointer to conversion table -> LR
ldr r5,RenderAText_Addr // get pointer to conversion table -> R5
mov lr,r5 // conversion table -> LR
// ---- render 2nd half of first character
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate
// R2 ... pointer to source text buffer
// R3 ... pointer to font line
// R4 ... background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... foreground color (expanded to 32-bit)
// R7 ... (temporary)
// R8 ... pointer to palette table
// LR ... pointer to conversion table
// [SP+8] ... base pointer to text data (without X)
// [SP+12] ... remaining width
// [SP+36] ... wrap width
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
lsls r6,r1,#29 // check bit 2 of X coordinate
bpl 2f // bit 2 not set, starting even 4-pixels
// [6] load background color -> R4
ldrb r6,[r2,#1] // [2] load color attributes -> R6
mov r5,r8 // [1] get palette table -> R5
lsrs r4,r6,#4 // [1] prepare index of background color
ldrb r4,[r5,r4] // [2] load background color
// [4] load foreground color -> R6
lsls r6,#28 // [1] isolate lower 4 bits
lsrs r6,#28 // [1] mask lower 4 bits
ldrb r6,[r5,r6] // [2] load foreground color
// [4] expand background color to 32-bit -> R4
lsls r5,r4,#8 // [1] shift background color << 8
orrs r5,r4 // [1] color expanded to 16 bits
lsls r4,r5,#16 // [1] shift 16-bit color << 16
orrs r4,r5 // [1] color expanded to 32 bits
// [4] expand foreground color to 32-bit -> R6
lsls r5,r6,#8 // [1] shift foreground color << 8
orrs r5,r6 // [1] color expanded to 16 bits
lsls r6,r5,#16 // [1] shift 16-bit color << 16
orrs r6,r5 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [4] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
adds r2,#2 // [1] shift pointer to source text buffer
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// shift X coordinate
adds r1,#4 // shift X coordinate
// check end of segment
ldr r7,[sp,#36] // load wrap width
cmp r1,r7 // end of segment?
blo 1f
movs r1,#0 // reset X coordinate
ldr r2,[sp,#8] // get base pointer to text data -> R2
// shift remaining width
1: ldr r7,[sp,#12] // get remaining width
subs r7,#4 // shift width
str r7,[sp,#12] // save new width
// prepare wrap width - start X -> R7
2: ldr r7,[sp,#36] // load wrap width
subs r7,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of characters to generate in one part of segment
// R2 ... *pointer to source text buffer
// R3 ... *pointer to font line
// R4 ... background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... foreground color (expanded to 32-bit)
// R7 ... *wrap width of this segment, later: temporary
// R8 ... *pointer to palette table
// LR ... *pointer to conversion table
// [SP+8] ... *base pointer to text data (without X)
// [SP+12] ... *remaining width
// [SP+36] ... *wrap width
RenderAText_OutLoop:
// limit wrap width by total width -> R7
ldr r6,[sp,#12] // get remaining width
cmp r7,r6 // compare with wrap width
bls 2f // width is OK
mov r7,r6 // limit wrap width
// check if remain whole characters
2: cmp r7,#8 // check number of remaining pixels
bhs 5f // enough characters remain
// check if 1st part of last character remains
cmp r7,#4 // check 1st part of last character
blo 3f // all done
// ---- render 1st part of last character
RenderAText_Last:
// [6] load background color -> R4
ldrb r6,[r2,#1] // [2] load color attributes -> R6
mov r5,r8 // [1] get palette table -> R5
lsrs r4,r6,#4 // [1] prepare index of background color
ldrb r4,[r5,r4] // [2] load background color
// [4] load foreground color -> R6
lsls r6,#28 // [1] isolate lower 4 bits
lsrs r6,#28 // [1] mask lower 4 bits
ldrb r6,[r5,r6] // [2] load foreground color
// [4] expand background color to 32-bit -> R4
lsls r5,r4,#8 // [1] shift background color << 8
orrs r5,r4 // [1] color expanded to 16 bits
lsls r4,r5,#16 // [1] shift 16-bit color << 16
orrs r4,r5 // [1] color expanded to 32 bits
// [4] expand foreground color to 32-bit -> R6
lsls r5,r6,#8 // [1] shift foreground color << 8
orrs r5,r6 // [1] color expanded to 16 bits
lsls r6,r5,#16 // [1] shift 16-bit color << 16
orrs r6,r5 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [4] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
adds r2,#2 // [1] shift pointer to source text buffer
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
stmia r0!,{r1} // [2] store first 4 pixels
// check if continue with next segment
ldr r2,[sp,#8] // get base pointer to text data -> R2
cmp r7,#4
bhi RenderAText_OutLoop
// pop registers and return
3: pop {r4}
mov r8,r4
pop {r1-r7,pc}
// ---- prepare to render whole characters
// prepare number of whole characters to render -> R1
5: lsrs r1,r7,#2 // shift to get number of characters*2
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
subs r6,r5 // get remaining width
str r6,[sp,#12] // save new remaining width
subs r1,#1 // number of characters*2 - 1
// ---- [41*N-1] start inner loop, render characters in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of characters to generate*2 - 1 (loop counter)
// R2 ... *pointer to source text buffer
// R3 ... *pointer to font line
// R4 ... background color (expanded to 32-bit)
// R5 ... font sample
// R6 ... foreground color (expanded to 32-bit)
// R7 ... (temporary)
// R8 ... *pointer to palette table
// LR ... *pointer to conversion table
RenderAText_InLoop:
// [6] load background color -> R4
ldrb r6,[r2,#1] // [2] load color attributes -> R6
mov r5,r8 // [1] get palette table -> R5
lsrs r4,r6,#4 // [1] prepare index of background color
ldrb r4,[r5,r4] // [2] load background color
// [4] load foreground color -> R6
lsls r6,#28 // [1] isolate lower 4 bits
lsrs r6,#28 // [1] mask lower 4 bits
ldrb r6,[r5,r6] // [2] load foreground color
// [4] expand background color to 32-bit -> R4
lsls r5,r4,#8 // [1] shift background color << 8
orrs r5,r4 // [1] color expanded to 16 bits
lsls r4,r5,#16 // [1] shift 16-bit color << 16
orrs r4,r5 // [1] color expanded to 32 bits
// [4] expand foreground color to 32-bit -> R6
lsls r5,r6,#8 // [1] shift foreground color << 8
orrs r5,r6 // [1] color expanded to 16 bits
lsls r6,r5,#16 // [1] shift 16-bit color << 16
orrs r6,r5 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [4] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
adds r2,#2 // [1] shift pointer to source text buffer
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r7,[r5,#0] // [2] load mask for higher 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store first 4 pixels
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// [2,3] loop counter
subs r1,#2 // [1] shift loop counter
bhi RenderAText_InLoop // [1,2] > 0, render next whole character
// ---- end inner loop, continue with last character, or start new part
// continue to outer loop
ldr r7,[sp,#36] // load wrap width
beq RenderAText_Last // render 1st half of last character
ldr r2,[sp,#8] // get base pointer to text data -> R2
b RenderAText_OutLoop // go back to outer loop
.align 2
RenderAText_Addr:
.word RenderTextMask
RenderAText_pSioBase:
.word SIO_BASE // addres of SIO base

View file

@ -1,346 +0,0 @@
// ****************************************************************************
//
// VGA render GF_ATTRIB8
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderAttrib8(u8* dbuf, int x, int y, int w, sSegm* segm)
// render 8-pixel attribute text GF_ATTRIB8
// R0 ... destination data buffer
// R1 ... start X coordinate (in pixels, must be multiple of 4)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4 and > 0)
// [stack] ... segm video segment sSegm
// Output new pointer to destination data buffer.
// 320 pixels takes 11 us on 151 MHz.
.thumb_func
.global RenderAttrib8
RenderAttrib8:
// push registers
push {r2-r7,lr}
mov r4,r8
push {r4}
// Input variables and stack content:
// R1 ... start X coordinate
// SP+0: R8
// SP+4: R2 start Y coordinate (later: base pointer to pixel data row)
// SP+8: R3 width to display
// SP+12: R4
// SP+16: R5
// SP+20: R6
// SP+24: R7
// SP+28: LR
// SP+32: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#32] // load video segment -> R4
// get wrap width -> [SP+32]
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
movs r7,#3 // mask to align to 32-bit
bics r5,r7 // align wrap
str r5,[sp,#32] // save wrap width
// align X coordinate to 32-bit -> R1
bics r1,r7
// align remaining width -> [SP+8]
bics r3,r7 // width
str r3,[sp,#8] // save new width
// base pointer to attributes (without X) -> R3
lsrs r3,r2,#3 // delete low 3 bits of Y coordinate -> row index
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
muls r3,r5 // Y * WB -> offset of row in text buffer
ldr r7,[r4,#SSEGM_PAR] // pointer to attributes
add r3,r7 // base address of attributes -> R3
// base pointer to pixel data (without X) -> [SP+4], R2
muls r2,r5 // Y * WB -> offset of row in text buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
add r2,r5 // base address of text buffer
str r2,[sp,#4] // save pointer to text buffer
// offset of attributes -> R3
subs r3,r2 // offset of attributes, relative to source text buffer
// prepare pointer to pixel data with X -> R2 (1 position is 1 character + 1 attributes)
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
add r2,r6 // add index, pointer to source text buffer -> R2
// prepare pointer to palettes -> R8
ldr r5,[r4,#SSEGM_PAR2] // get pointer to palette table -> R4
mov r8,r5 // save pointer to palette table
// prepare pointer to conversion table -> LR
ldr r5,RenderAttrib8_Addr // get pointer to conversion table -> R5
mov lr,r5 // conversion table -> LR
// ---- render 2nd half of first character
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate
// R2 ... pointer to source text buffer
// R3 ... offset of attributes (relative to source text buffer)
// R4 ... background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... foreground color (expanded to 32-bit)
// R7 ... (temporary)
// R8 ... pointer to palette table
// LR ... pointer to conversion table
// [SP+4] ... base pointer to pixel data (without X)
// [SP+8] ... remaining width
// [SP+32] ... wrap width
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
lsls r6,r1,#29 // check bit 2 of X coordinate
bpl 2f // bit 2 not set, starting even 4-pixels
// [6] load background color -> R4
ldrb r6,[r2,r3] // [2] load color attributes -> R6
mov r5,r8 // [1] get palette table -> R5
lsrs r4,r6,#4 // [1] prepare index of background color
ldrb r4,[r5,r4] // [2] load background color -> R4
// [4] load foreground color -> R6
lsls r6,#28 // [1] isolate lower 4 bits
lsrs r6,#28 // [1] mask lower 4 bits
ldrb r6,[r5,r6] // [2] load foreground color -> R6
// [4] expand background color to 32-bit -> R4
lsls r5,r4,#8 // [1] shift background color << 8
orrs r5,r4 // [1] color expanded to 16 bits
lsls r4,r5,#16 // [1] shift 16-bit color << 16
orrs r4,r5 // [1] color expanded to 32 bits
// [4] expand foreground color to 32-bit -> R6
lsls r5,r6,#8 // [1] shift foreground color << 8
orrs r5,r6 // [1] color expanded to 16 bits
lsls r6,r5,#16 // [1] shift 16-bit color << 16
orrs r6,r5 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [4] load pixel sample -> R5
ldrb r5,[r2,#0] // [2] load pixels from source buffer -> R5
adds r2,#1 // [1] shift pointer to source buffer
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// shift X coordinate
adds r1,#4 // shift X coordinate
// check end of segment
ldr r7,[sp,#32] // load wrap width
cmp r1,r7 // end of segment?
blo 1f
movs r1,#0 // reset X coordinate
ldr r2,[sp,#4] // get base pointer to pixel data -> R2
// shift remaining width
1: ldr r7,[sp,#8] // get remaining width
subs r7,#4 // shift width
str r7,[sp,#8] // save new width
// prepare wrap width - start X -> R7
2: ldr r7,[sp,#32] // load wrap width
subs r7,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of characters to generate in one part of segment
// R2 ... *pointer to source text buffer
// R3 ... *offset of attributes (relative to source text buffer)
// R4 ... background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... foreground color (expanded to 32-bit)
// R7 ... *wrap width of this segment, later: temporary
// R8 ... *pointer to palette table
// LR ... *pointer to conversion table
// [SP+4] ... *base pointer to pixel data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderAttrib8_OutLoop:
// limit wrap width by total width -> R7
ldr r6,[sp,#8] // get remaining width
cmp r7,r6 // compare with wrap width
bls 2f // width is OK
mov r7,r6 // limit wrap width
// check if remain whole characters
2: cmp r7,#8 // check number of remaining pixels
bhs 5f // enough characters remain
// check if 1st part of last character remains
cmp r7,#4 // check 1st part of last character
blo 3f // all done
// ---- render 1st part of last character
RenderAttrib8_Last:
// [6] load background color -> R4
ldrb r6,[r2,r3] // [2] load color attributes -> R6
mov r5,r8 // [1] get palette table -> R5
lsrs r4,r6,#4 // [1] prepare index of background color
ldrb r4,[r5,r4] // [2] load background color -> R4
// [4] load foreground color -> R6
lsls r6,#28 // [1] isolate lower 4 bits
lsrs r6,#28 // [1] mask lower 4 bits
ldrb r6,[r5,r6] // [2] load foreground color -> R6
// [4] expand background color to 32-bit -> R4
lsls r5,r4,#8 // [1] shift background color << 8
orrs r5,r4 // [1] color expanded to 16 bits
lsls r4,r5,#16 // [1] shift 16-bit color << 16
orrs r4,r5 // [1] color expanded to 32 bits
// [4] expand foreground color to 32-bit -> R6
lsls r5,r6,#8 // [1] shift foreground color << 8
orrs r5,r6 // [1] color expanded to 16 bits
lsls r6,r5,#16 // [1] shift 16-bit color << 16
orrs r6,r5 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [4] load pixel sample -> R5
ldrb r5,[r2,#0] // [2] load pixels from source buffer -> R5
adds r2,#1 // [1] shift pointer to source buffer
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
stmia r0!,{r1} // [2] store first 4 pixels
// check if continue with next segment
ldr r2,[sp,#4] // get base pointer to pixel data -> R2
cmp r7,#4
bhi RenderAttrib8_OutLoop
// pop registers and return
3: pop {r4}
mov r8,r4
pop {r2-r7,pc}
// ---- prepare to render whole characters
// prepare number of whole characters to render -> R1
5: lsrs r1,r7,#2 // shift to get number of characters*2
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
subs r6,r5 // get remaining width
str r6,[sp,#8] // save new remaining width
subs r1,#1 // number of characters*2 - 1
// ---- [38*N-1] start inner loop, render characters in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of characters to generate*2 - 1 (loop counter)
// R2 ... *pointer to source text buffer
// R3 ... *offset of attributes (relative to source text buffer)
// R4 ... background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... foreground color (expanded to 32-bit)
// R7 ... (temporary)
// R8 ... *pointer to palette table
// LR ... *pointer to conversion table
// [SP+4] ... *base pointer to pixel data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderAttrib8_InLoop:
// [6] load background color -> R4
ldrb r6,[r2,r3] // [2] load color attributes -> R6
mov r5,r8 // [1] get palette table -> R5
lsrs r4,r6,#4 // [1] prepare index of background color
ldrb r4,[r5,r4] // [2] load background color -> R4
// [4] load foreground color -> R6
lsls r6,#28 // [1] isolate lower 4 bits
lsrs r6,#28 // [1] mask lower 4 bits
ldrb r6,[r5,r6] // [2] load foreground color -> R6
// [4] expand background color to 32-bit -> R4
lsls r5,r4,#8 // [1] shift background color << 8
orrs r5,r4 // [1] color expanded to 16 bits
lsls r4,r5,#16 // [1] shift 16-bit color << 16
orrs r4,r5 // [1] color expanded to 32 bits
// [4] expand foreground color to 32-bit -> R6
lsls r5,r6,#8 // [1] shift foreground color << 8
orrs r5,r6 // [1] color expanded to 16 bits
lsls r6,r5,#16 // [1] shift 16-bit color << 16
orrs r6,r5 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [3] load pixel sample -> R7
ldrb r7,[r2,#0] // [2] load pixels from source buffer -> R7
adds r2,#1 // [1] shift pointer to source buffer
// [2] prepare conversion table -> R7
lsls r7,#3 // [1] multiply sample * 8
add r7,lr // [1] add pointer to conversion table
// [4] convert first 4 pixels (higher 4 bits)
ldr r5,[r7,#0] // [2] load mask for higher 4 bits
ands r5,r6 // [1] mask foreground color
eors r5,r4 // [1] combine with background color
// [4] convert second 4 pixels (lower 4 bits)
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
// [3] write pixels
stmia r0!,{r5,r7} // [3] store 8 pixels
// [2,3] loop counter
subs r1,#2 // [1] shift loop counter
bhi RenderAttrib8_InLoop // [1,2] > 0, render next whole character
// ---- end inner loop, continue with last character, or start new part
// continue to outer loop
ldr r7,[sp,#32] // load wrap width
beq RenderAttrib8_Last // render 1st half of last character
ldr r2,[sp,#4] // get base pointer to pixel data -> R2
b RenderAttrib8_OutLoop // go back to outer loop
.align 2
RenderAttrib8_Addr:
.word RenderTextMask

View file

@ -1,89 +0,0 @@
// ****************************************************************************
//
// VGA render GF_COLOR
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u8* RenderColor(u8* dbuf, u32 color, int w);
// render color GF_COLOR
// R0 ... pointer to destination data buffer
// R1 ... color pattern 4-pixels
// R2 ... width of this segment as multiply of 4 pixels (=width in pixels/4)
// Output new pointer to destination data buffer.
// 320 pixels takes 1.1 us on 151 MHz.
// - using only small transfer (24 pixels per loop) takes 1.22 us
// - using only single transfer (4 pixels per loop) takes 2.91 us
// - memset takes 1.42 us
.thumb_func
.global RenderColor
RenderColor:
// fill memory buffer with u32 words
// buf ... data buffer, must be 32-bit aligned
// data ... data word to store
// num ... number of 32-bit words (= number of bytes/4)
// Returns new destination address.
// extern "C" u32* MemSet4(u32* buf, u32 data, int num);
.thumb_func
.global MemSet4
MemSet4:
// push registers
push {r4,r5,r6,r7,lr}
// duplicate color pattern
mov r3,r1
mov r4,r1
mov r5,r1
mov r6,r1
mov r7,r1
// go to big transfer
b 3f
// ---- [38 per loop] big transfer 120 pixels, speed 0.317 clk per pixel
// [38] store 30 words (=120 pixels)
2: stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
3: subs r2,#30 // [1] decrement number of words
bge 2b // [1,2] loop next 30 words
adds r2,#30 // [1] restore
// go to small transfer
b 6f
// ---- [10 per loop] small transfer 24 pixels, speed 0.417 clk per pixel
// [8] store 6 words (=24 pixels)
4: stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
6: subs r2,#6 // [1] decrement number of words
bge 4b // [1,2] loop next 6 words
adds r2,#6 // [1] restore
// go to single transfer
b 8f
// ---- [5 per loop] single transfer 4 pixels, speed 1.25 clk per pixel
// [4,5] store 1 word (=4 pixels)
7: stmia r0!,{r1} // [2] 1 word, 4 pixels
8: subs r2,#1 // [1] loop counter
bge 7b // [1,2] next word
// pop registers
pop {r4,r5,r6,r7,pc}

View file

@ -1,335 +0,0 @@
// ****************************************************************************
//
// VGA render GF_CTEXT
//
// ****************************************************************************
// u32 par SSEGM_PAR pointer to the font
// u16 par3 font height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderCText(u8* dbuf, int x, int y, int w, sSegm* segm)
// render 8-pixel color text GF_CTEXT
// R0 ... destination data buffer
// R1 ... start X coordinate (in pixels, must be multiple of 4)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4 and > 0)
// [stack] ... segm video segment sSegm
// Output new pointer to destination data buffer.
// 320 pixels takes 10.4 us on 151 MHz.
.thumb_func
.global RenderCText
RenderCText:
// push registers
push {r1-r7,lr}
// Stack content:
// SP+0: R1 start X coordinate
// SP+4: R2 start Y coordinate (later: base pointer to text data row)
// SP+8: R3 width to display
// SP+12: R4
// SP+16: R5
// SP+20: R6
// SP+24: R7
// SP+28: LR
// SP+32: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#32] // load video segment -> R4
// start divide Y/font height
ldr r6,RenderCText_pSioBase // get address of SIO base -> R6
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
// - now we must wait at least 8 clock cycles to get result of division
// [6] get wrap width -> [SP+32]
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
movs r7,#3 // [1] mask to align to 32-bit
bics r5,r7 // [1] align wrap
str r5,[sp,#32] // [2] save wrap width
// [1] align X coordinate to 32-bit
bics r1,r7 // [1]
// [3] align remaining width
bics r3,r7 // [1]
str r3,[sp,#8] // [2] save new width
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
// Note: QUOTIENT must be read last
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
// pointer to font line -> R3
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
add r3,r5 // line offset + font base -> pointer to current font line R3
// base pointer to text data (without X) -> [SP+4], R2
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
muls r2,r5 // Y * WB -> offset of row in text buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
add r2,r5 // base address of text buffer
str r2,[sp,#4] // save pointer to text buffer
// prepare pointer to text data with X -> R2 (1 position is 1 character + 1 background + 1 foreground)
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
add r2,r6 // add index
add r2,r6 // add index*2
add r2,r6 // add index*3, pointer to source text buffer -> R2
// prepare pointer to conversion table -> LR
ldr r5,RenderCText_Addr // get pointer to conversion table -> R5
mov lr,r5 // conversion table -> LR
// ---- render 2nd half of first character
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate
// R2 ... pointer to source text buffer
// R3 ... pointer to font line
// R4 ... background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... foreground color (expanded to 32-bit)
// R7 ... (temporary)
// LR ... pointer to conversion table
// [SP+4] ... base pointer to text data (without X)
// [SP+8] ... remaining width
// [SP+32] ... wrap width
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
lsls r6,r1,#29 // check bit 2 of X coordinate
bpl 2f // bit 2 not set, starting even 4-pixels
// [4] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
// [2] load background color -> R4
ldrb r4,[r2,#1] // [2] load background color from source text buffer
// [4] expand background color to 32-bit -> R4
lsls r7,r4,#8 // [1] shift background color << 8
orrs r7,r4 // [1] color expanded to 16 bits
lsls r4,r7,#16 // [1] shift 16-bit color << 16
orrs r4,r7 // [1] color expanded to 32 bits
// [3] load foreground color -> R6
ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6
adds r2,#3 // [1] shift pointer to source text buffer
// [4] expand foreground color to 32-bit -> R6
lsls r7,r6,#8 // [1] shift foreground color << 8
orrs r7,r6 // [1] color expanded to 16 bits
lsls r6,r7,#16 // [1] shift 16-bit color << 16
orrs r6,r7 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// shift X coordinate
adds r1,#4 // shift X coordinate
// check end of segment
ldr r7,[sp,#32] // load wrap width
cmp r1,r7 // end of segment?
blo 1f
movs r1,#0 // reset X coordinate
ldr r2,[sp,#4] // get base pointer to text data -> R2
// shift remaining width
1: ldr r7,[sp,#8] // get remaining width
subs r7,#4 // shift width
str r7,[sp,#8] // save new width
// prepare wrap width - start X -> R7
2: ldr r7,[sp,#32] // load wrap width
subs r7,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of characters to generate in one part of segment
// R2 ... *pointer to source text buffer
// R3 ... *pointer to font line
// R4 ... background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... foreground color (expanded to 32-bit)
// R7 ... *wrap width of this segment, later: temporary
// LR ... *pointer to conversion table
// [SP+4] ... *base pointer to text data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderCText_OutLoop:
// limit wrap width by total width -> R7
ldr r6,[sp,#8] // get remaining width
cmp r7,r6 // compare with wrap width
bls 2f // width is OK
mov r7,r6 // limit wrap width
// check if remain whole characters
2: cmp r7,#8 // check number of remaining pixels
bhs 5f // enough characters remain
// check if 1st part of last character remains
cmp r7,#4 // check 1st part of last character
blo 3f // all done
// ---- render 1st part of last character
RenderCText_Last:
// [4] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
// [2] load background color -> R4
ldrb r4,[r2,#1] // [2] load background color from source text buffer
// [4] expand background color to 32-bit -> R4
lsls r1,r4,#8 // [1] shift background color << 8
orrs r1,r4 // [1] color expanded to 16 bits
lsls r4,r1,#16 // [1] shift 16-bit color << 16
orrs r4,r1 // [1] color expanded to 32 bits
// [3] load foreground color -> R6
ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6
adds r2,#3 // [1] shift pointer to source text buffer
// [4] expand foreground color to 32-bit
lsls r1,r6,#8 // [1] shift foreground color << 8
orrs r1,r6 // [1] color expanded to 16 bits
lsls r6,r1,#16 // [1] shift 16-bit color << 16
orrs r6,r1 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
stmia r0!,{r1} // [2] store first 4 pixels
// check if continue with next segment
ldr r2,[sp,#4] // get base pointer to text data -> R2
cmp r7,#4
bhi RenderCText_OutLoop
// pop registers and return
3: pop {r1-r7,pc}
// ---- prepare to render whole characters
// prepare number of whole characters to render -> R1
5: lsrs r1,r7,#2 // shift to get number of characters*2
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
subs r6,r5 // get remaining width
str r6,[sp,#8] // save new remaining width
subs r1,#1 // number of characters*2 - 1
// ---- [35*N-1] start inner loop, render characters in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of characters to generate*2 - 1 (loop counter)
// R2 ... *pointer to source text buffer
// R3 ... *pointer to font line
// R4 ... background color (expanded to 32-bit)
// R5 ... font sample
// R6 ... foreground color (expanded to 32-bit)
// R7 ... (temporary)
// LR ... *pointer to conversion table
RenderCText_InLoop:
// [4] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
// [2] load background color -> R4
ldrb r4,[r2,#1] // [2] load background color from source text buffer
// [4] expand background color to 32-bit -> R4
lsls r7,r4,#8 // [1] shift background color << 8
orrs r7,r4 // [1] color expanded to 16 bits
lsls r4,r7,#16 // [1] shift 16-bit color << 16
orrs r4,r7 // [1] color expanded to 32 bits
// [3] load foreground color -> R6
ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6
adds r2,#3 // [1] shift pointer to source text buffer
// [4] expand foreground color to 32-bit
lsls r7,r6,#8 // [1] shift foreground color << 8
orrs r7,r6 // [1] color expanded to 16 bits
lsls r6,r7,#16 // [1] shift 16-bit color << 16
orrs r6,r7 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r7,[r5,#0] // [2] load mask for higher 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store first 4 pixels
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// [2,3] loop counter
subs r1,#2 // [1] shift loop counter
bhi RenderCText_InLoop // [1,2] > 0, render next whole character
// ---- end inner loop, continue with last character, or start new part
// continue to outer loop
ldr r7,[sp,#32] // load wrap width
beq RenderCText_Last // render 1st half of last character
ldr r2,[sp,#4] // get base pointer to text data -> R2
b RenderCText_OutLoop // go back to outer loop
.align 2
RenderCText_Addr:
.word RenderTextMask
RenderCText_pSioBase:
.word SIO_BASE // addres of SIO base

View file

@ -1,436 +0,0 @@
// ****************************************************************************
//
// VGA render GF_DTEXT
//
// ****************************************************************************
// u32 par SSEGM_PAR pointer to the font
// u32 par2 SSEGM_PAR2 pointer to font gradient
// u16 par3 LOW background color, HIGH font height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderDText(u8* dbuf, int x, int y, int w, sSegm* segm)
// render 8-pixel double gradient color text GF_DTEXT
// R0 ... destination data buffer
// R1 ... start X coordinate (in pixels, must be multiple of 4)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4 and > 0)
// [stack] ... segm video segment sSegm
// Output new pointer to destination data buffer.
// 320 pixels takes us on 151 MHz.
.thumb_func
.global RenderDText
RenderDText:
// Stack content:
// SP+0: R8
// SP+4: R1 start X coordinate (later: base pointer to gradient array)
// SP+8: R2 start Y coordinate (later: base pointer to text data row)
// SP+12: R3 width to display
// SP+16: R4
// SP+20: R5
// SP+24: R6
// SP+28: R7
// SP+32: LR
// SP+36: video segment (later: wrap width in X direction)
// push registers
push {r1-r7,lr}
mov r4,r8
push {r4}
// get pointer to video segment -> R4
ldr r4,[sp,#36] // load video segment -> R4
// start divide Y/font height
ldr r6,RenderDText_pSioBase // get address of SIO base -> R6
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
ldrb r2,[r4,#SSEGM_PAR3+1] // font height -> R2
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
// - now we must wait at least 8 clock cycles to get result of division
// [6] get wrap width -> [SP+36]
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
movs r7,#3 // [1] mask to align to 32-bit
bics r5,r7 // [1] align wrap
str r5,[sp,#36] // [1] save wrap width
// [1] align X coordinate to 32-bit
bics r1,r7 // [1]
// [3] align remaining width
bics r3,r7 // [1]
str r3,[sp,#12] // [2] save new width
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
// Note: QUOTIENT must be read last
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
// pointer to font line -> R3
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
add r3,r5 // line offset + font base -> pointer to current font line R3
mov r8,r3
// base pointer to text data (without X) -> [SP+8], R2
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
muls r2,r5 // Y * WB -> offset of row in text buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
add r2,r5 // base address of text buffer
str r2,[sp,#8] // save pointer to text buffer
// base pointer to gradient array -> [SP+4], R3
ldr r3,[r4,#SSEGM_PAR2] // pointer to graient array
str r3,[sp,#4] // save pointer to gradient array
// prepare pointer to text data with X -> R2
lsrs r6,r1,#3 // convert X to gradient index
lsls r6,#2 // round to 4-bytes
add r3,r6 // pointer to source gradient array
lsrs r6,r1,#4 // convert X to character index (1 character is 16 pixels width)
add r2,r6 // pointer to source text buffer -> R2
// prepare background color, expand to 32 bits -> R4
ldrb r4,[r4,#SSEGM_PAR3] // load background color
lsls r5,r4,#8 // shift background color << 8
orrs r5,r4 // color expanded to 16 bits
lsls r4,r5,#16 // shift 16-bit color << 16
orrs r4,r5 // color expanded to 32 bits
// prepare pointer to conversion table -> LR
ldr r5,RenderDText_Addr // get pointer to conversion table -> R5
mov lr,r5 // conversion table -> LR
// ---- render 2nd half of first character
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate
// R2 ... pointer to source text buffer
// R3 ... pointer to gradient array
// R4 ... background color (expanded to 32-bit)
// R5..R7 ... (temporary)
// R8 ... pointer to font line
// LR ... pointer to conversion table
// [SP+4] ... base pointer to gradient array
// [SP+8] ... base pointer to text data (without X)
// [SP+12] ... remaining width
// [SP+36] ... wrap width
// check if X is aligned
lsls r6,r1,#(32-4) // check if X is aligned
beq 2f // X not aligned
// shift X coordinate
lsrs r5,r6,#(32-4) // [1] X pixel offset in last character -> R5
movs r6,16 // character width
subs r6,r5 // pixels remain
adds r1,r6 // shift X coordinate (align to next character)
ldr r7,[sp,#12]
subs r7,r6 // shift width
str r7,[sp,#12]
push {r1}
// [6] load font sample -> R7
ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7
adds r2,#1 // [1] shift pointer to source text buffer
add r7,r8 // [1] pointer to font line
ldrb r7,[r7] // [2] load font sample -> R7
// [3] load foreground color, XOR with background -> R6
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R7
lsls r7,#3 // [1] multiply font sample * 8
add r7,lr // [1] add pointer to conversion table
// [4] convert first 4 pixels (higher 4 bits) -> R1
ldr r1,[r7,#0] // [2] load mask for higher 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
cmp r5,#4 // check start position of X
bhi 3f // > 4
// [20] store 8 pixels
lsrs r1,#16 // [1]
strb r1,[r0,#0] // [2]
strb r1,[r0,#1] // [2]
lsrs r1,#8 // [1]
strb r1,[r0,#2] // [2]
strb r1,[r0,#3] // [2]
adds r0,#4 // [1]
// [3] load foreground color, XOR with background -> R6
3: ldmia r3!,{r6} // [2] load foreground color from gradient buffer
eors r6,r4 // [1] XOR foreground color with background color
// [4] convert second 4 pixels (lower 4 bits)
ldr r1,[r7,#4] // [2] load mask for lower 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
// store 8 pixels
cmp r5,#8 // check start position of X
bhi 4f // > 8
strb r1,[r0,#0] // [2]
strb r1,[r0,#1] // [2]
lsrs r1,#8 // [1]
strb r1,[r0,#2] // [2]
strb r1,[r0,#3] // [2]
lsls r1,#8
adds r0,#4
4: lsrs r1,#16 // [1]
strb r1,[r0,#0] // [2]
strb r1,[r0,#1] // [2]
lsrs r1,#8 // [1]
strb r1,[r0,#2] // [2]
strb r1,[r0,#3] // [2]
adds r0,#4 // [1]
pop {r1}
// check end of segment
ldr r7,[sp,#36] // load wrap width
cmp r1,r7 // end of segment?
blo 2f
movs r1,#0 // reset X coordinate
ldr r2,[sp,#8] // get base pointer to text data -> R2
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
// prepare wrap width - start X -> R7
2: ldr r7,[sp,#36] // load wrap width
subs r7,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of characters to generate in one part of segment
// R2 ... *pointer to source text buffer
// R3 ... *pointer to gradient array
// R4 ... *background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... (temporary)
// R7 ... *wrap width of this segment, later: temporary
// R8 ... *pointer to font line
// LR ... *pointer to conversion table
// [SP+4] ... base pointer to gradient array
// [SP+8] ... base pointer to text data (without X)
// [SP+12] ... remaining width
// [SP+36] ... wrap width
RenderDText_OutLoop:
// limit wrap width by total width -> R7
ldr r6,[sp,#12] // get remaining width
cmp r7,r6 // compare with wrap width
bls 2f // width is OK
mov r7,r6 // limit wrap width
// check if remain whole characters
2: cmp r7,#16 // check number of remaining pixels
bhs 5f // enough characters remain
// check if 1st part of last character remains
cmp r7,#4 // check 1st part of last character
blo 3f // all done
mov r1,r7 // width to render
// ---- render 1st part of last character
RenderDText_Last:
push {r7}
// [6] load font sample -> R7
ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7
adds r2,#1 // [1] shift pointer to source text buffer
add r7,r8 // [1] pointer to font line
ldrb r7,[r7] // [2] load font sample -> R7
// [3] load foreground color, XOR with background -> R6
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R7
lsls r7,#3 // [1] multiply font sample * 8
add r7,lr // [1] add pointer to conversion table
// [4] convert first 4 pixels (higher 4 bits) -> R5
ldr r5,[r7,#0] // [2] load mask for higher 4 bits
ands r5,r6 // [1] mask foreground color
eors r5,r4 // [1] combine with background color
// [20] store 8 pixels
strb r5,[r0,#0] // [2]
strb r5,[r0,#1] // [2]
lsrs r5,#8 // [1]
strb r5,[r0,#2] // [2]
strb r5,[r0,#3] // [2]
adds r0,#4 // [1]
cmp r1,#4
bls 4f
lsrs r5,#8 // [1]
strb r5,[r0,#0] // [2]
strb r5,[r0,#1] // [2]
lsrs r5,#8 // [1]
strb r5,[r0,#2] // [2]
strb r5,[r0,#3] // [2]
adds r0,#4 // [1]
cmp r1,#8
bls 4f
// [3] load foreground color, XOR with background -> R6
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
eors r6,r4 // [1] XOR foreground color with background color
// [4] convert second 4 pixels (lower 4 bits)
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
// [20] store 8 pixels
strb r7,[r0,#0] // [2]
strb r7,[r0,#1] // [2]
lsrs r7,#8 // [1]
strb r7,[r0,#2] // [2]
strb r7,[r0,#3] // [2]
adds r0,#4 // [1]
// check if continue with next segment
4: pop {r7}
ldr r2,[sp,#8] // get base pointer to text data -> R2
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
cmp r7,#16
bhs RenderDText_OutLoop
// pop registers and return
3: pop {r4}
mov r8,r4
pop {r1-r7,pc}
// ---- prepare to render whole characters
// prepare number of whole characters to render -> R1
5: lsrs r1,r7,#2 // shift to get number of characters*2
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
subs r6,r5 // get remaining width
str r6,[sp,#12] // save new remaining width
subs r1,#3 // number of characters*2 - 3
// ---- [65*N-1] start inner loop, render characters in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of characters to generate*2 - 3 (loop counter)
// R2 ... *pointer to source text buffer
// R3 ... *pointer to gradient array
// R4 ... *background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... foreground color
// R7 ... font sample
// R8 ... *pointer to font line
// LR ... *pointer to conversion table
// [SP+4] ... base pointer to gradient array
// [SP+8] ... base pointer to text data (without X)
// [SP+12] ... remaining width
// [SP+36] ... wrap width
RenderDText_InLoop:
// [6] load font sample -> R7
ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7
adds r2,#1 // [1] shift pointer to source text buffer
add r7,r8 // [1] pointer to font line
ldrb r7,[r7] // [2] load font sample -> R7
// [3] load foreground color, XOR with background -> R6
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R7
lsls r7,#3 // [1] multiply font sample * 8
add r7,lr // [1] add pointer to conversion table
// [4] convert first 4 pixels (higher 4 bits) -> R5
ldr r5,[r7,#0] // [2] load mask for higher 4 bits
ands r5,r6 // [1] mask foreground color
eors r5,r4 // [1] combine with background color
// [20] store 8 pixels
strb r5,[r0,#0] // [2]
strb r5,[r0,#1] // [2]
lsrs r5,#8 // [1]
strb r5,[r0,#2] // [2]
strb r5,[r0,#3] // [2]
lsrs r5,#8 // [1]
strb r5,[r0,#4] // [2]
strb r5,[r0,#5] // [2]
lsrs r5,#8 // [1]
strb r5,[r0,#6] // [2]
strb r5,[r0,#7] // [2]
adds r0,#8 // [1]
// [3] load foreground color, XOR with background -> R6
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
eors r6,r4 // [1] XOR foreground color with background color
// [4] convert second 4 pixels (lower 4 bits)
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
// [20] store 8 pixels
strb r7,[r0,#0] // [2]
strb r7,[r0,#1] // [2]
lsrs r7,#8 // [1]
strb r7,[r0,#2] // [2]
strb r7,[r0,#3] // [2]
lsrs r7,#8 // [1]
strb r7,[r0,#4] // [2]
strb r7,[r0,#5] // [2]
lsrs r7,#8 // [1]
strb r7,[r0,#6] // [2]
strb r7,[r0,#7] // [2]
adds r0,#8 // [1]
// [2,3] loop counter
subs r1,#4 // [1] shift loop counter
bhi RenderDText_InLoop // [1,2] > 0, render next whole character
// ---- end inner loop, continue with last character, or start new part
// continue to outer loop
ldr r7,[sp,#36] // load wrap width
adds r1,#3 // return size of last tile
lsls r1,#2 // convert back to pixels
bne RenderDText_Last // render 1st half of last character
ldr r2,[sp,#8] // get base pointer to text data -> R2
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
b RenderDText_OutLoop // go back to outer loop
.align 2
RenderDText_Addr:
.word RenderTextMask
RenderDText_pSioBase:
.word SIO_BASE // addres of SIO base

View file

@ -1,160 +0,0 @@
// ****************************************************************************
//
// VGA render LAYERMODE_FASTSPRITE*
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderFastSprite(u32* cbuf, int y, sLayer* scr, u8* buf)
// render layers with fast sprites LAYERMODE_FASTSPRITE*
// R0 ... cbuf pointer to control buffer
// R1 ... y coordinate of scanline
// R2 ... scr pointer to layer screen structure sLayer
// R3 ... buf pointer to destination data buffer with transparent color
// Output new pointer to control buffer.
.thumb_func
.global RenderFastSprite
RenderFastSprite:
// push registers
push {r1-r7,lr}
// Stack content and input variables:
// R0 cbuf pointer to control buffer
// SP+0: R1 Y coordinate of scanline
// SP+4: R2 scr pointer to layer screen structure sLayer, later: num number of sprites
// SP+8: R3 buf pointer to data buffer with transparent color
// SP+12: R4
// SP+16: R5
// SP+20: R6
// SP+24: R7
// SP+28: LR
// Variables:
// R0 ... pointer to destination control buffer
// R1 ... X0 absolute coordinate counted from start
// R2 ... W layer screen width
// R3 ... s pointer to current sprite, later: absolute X coordinate of start of sprite
// R4 ... Y2 coordinate relative to sprite base, later: s->img[Y2*WB] address of sprite line
// R5 ... relative X2 coordinate of sprite segment
// R6 ... W2 width of sprite segment
// R7 ... (temporary)
// LR ... spr pointer to list of sprites
// [SP+0] ... (R1) Y coordinate of scanline
// [SP+4] ... (R2) num number of sprites (loop counter)
// [SP+8] ... (R3) buf pointer to data buffer with transparent color
// load pointer to list of sprites -> LR
ldr r7,[r2,#SLAYER_IMG]
mov lr,r7
// load number of sprites -> [SP+4]
ldrh r7,[r2,#SLAYER_SPRITENUM]
str r7,[sp,#4]
// load screen width -> R2
ldrh r2,[r2,#SLAYER_W]
// reset absolute coordinate X0 -> R1
movs r1,#0 // R1 <- 0
// count number of sprites, end if num = 0
2: ldr r7,[sp,#4] // get number of sprites
subs r7,#1 // decrement number of sprites
blo 8f // no other sprites
str r7,[sp,#4] // save new number of sprites
// get pointer to next sprite -> R3
mov r7,lr // pointer to list of sprites -> R7
ldmia r7!,{r3} // pointer to sprite -> R3
mov lr,r7 // save new pointer to list of sprites -> LR
// prepare Y2 coordinate relative to sprite base -> R4
ldrh r7,[r3,#SSPRITE_Y] // get Y coordinate of the sprite -> R7
sxth r7,r7 // signed extend Y2
ldr r4,[sp,#0] // Y coordinate of scanline -> R4
subs r4,r7 // relative coordinate Y2 = Y - s->y
// check if Y2 coordinate is valid
bmi 2b // Y2 < 0, go next sprite
ldrh r7,[r3,#SSPRITE_H] // get sprite height
cmp r4,r7 // check sprite height
bge 2b // Y2 >= s->h, go next sprite
// get relative start X2 coordinate of this line segment -> R5
ldr r7,[r3,#SSPRITE_X0] // get table of X0 of lines
ldrb r5,[r7,r4] // get X2 coordinate -> R5
lsls r5,#2 // convert X2 coordinate to byte offset
// get width W2 of this line segment -> R6
ldr r7,[r3,#SSPRITE_W0] // get table of W0 of lines
ldrb r6,[r7,r4] // get W2 width -> R6
lsls r6,#2 // convert W2 width to bytes
// get address of sprite line s->img[Y2*s->wb] -> R4
ldrh r7,[r3,#SSPRITE_WB] // get sprite pitch w->wb
muls r4,r4,r7 // sprite offset Y2*s->wb
ldr r7,[r3,#SSPRITE_IMG] // get sprite image
add r4,r7 // line address -> R4
// get absolute X coordinate of start of line -> R3
ldrh r3,[r3,#SSPRITE_X] // get sprite X coordinate -> R3
sxth r3,r3 // signed extend X
adds r3,r3,r5 // s->X + X2, X coordinate of start of line -> R3
// check if sprite coordinate X lies below current X0 coordinate
subs r7,r1,r3 // difference X0 - X -> R7
ble 3f // X0 <= X, sprite does not lie below current X0
// sprite correction
adds r5,r7 // X2 += X0 - X
subs r6,r7 // W2 -= X0 - X
mov r3,r1 // X = X0
// check line length W2
3: subs r7,r2,r3 // W - X -> R7
cmp r6,r7 // compare W2 with W - X
ble 4f // W2 <= W - X, length is OK
mov r6,r7 // limit segment width W2 -> R6
// align to word
4: movs r7,#3 // mask to word
bics r3,r7 // align X
bics r5,r7 // align X2
bics r6,r7 // align W2
ble 2b // no W2 left (W2 <= 0)
// decode space before sprite
subs r7,r3,r1 // X - X0 -> R7
ble 5f // no space left before sprite
lsrs r7,#2 // number of words (X - X0)/4
stmia r0!,{r7} // write number of words
ldr r7,[sp,#8] // pointer to data buffer -> R7
stmia r0!,{r7} // write address
mov r1,r3 // shift X0
// write sprite line
5: adds r7,r4,r5 // address of pixel &s->img[y2*s->wb+x2] -> R7
lsrs r4,r6,#2 // W2/4 line length -> R4
stmia r0!,{r4,r7} // write sprite length and address
adds r1,r6 // add X0 += W2
b 2b // next sprite
// clear rest of scanline
8: subs r2,r1 // subtract W - X0
bls 9f // no pixels left
lsrs r2,#2 // (W - X0)/4
ldr r3,[sp,#8] // pointer to data buffer -> R3
stmia r0!,{r2,r3} // write number of pixels and address
// pop registers and return
9: pop {r1-r7,pc}

View file

@ -1,313 +0,0 @@
// ****************************************************************************
//
// VGA render GF_FTEXT
//
// ****************************************************************************
// u32 par SSEGM_PAR pointer to the font
// u32 par2 SSEGM_PAR2 background color
// u16 par3 font height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderFText(u8* dbuf, int x, int y, int w, sSegm* segm)
// render 8-pixel foreground color text GF_FTEXT
// R0 ... destination data buffer
// R1 ... start X coordinate (in pixels, must be multiple of 4)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4 and > 0)
// [stack] ... segm video segment sSegm
// Output new pointer to destination data buffer.
// 320 pixels takes 8.7 us on 151 MHz.
.thumb_func
.global RenderFText
RenderFText:
// push registers
push {r1-r7,lr}
// Stack content:
// SP+0: R1 start X coordinate
// SP+4: R2 start Y coordinate (later: base pointer to text data row)
// SP+8: R3 width to display
// SP+12: R4
// SP+16: R5
// SP+20: R6
// SP+24: R7
// SP+28: LR
// SP+32: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#32] // load video segment -> R4
// start divide Y/font height
ldr r6,RenderFText_pSioBase // get address of SIO base -> R6
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
// - now we must wait at least 8 clock cycles to get result of division
// [6] get wrap width -> [SP+32]
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
movs r7,#3 // [1] mask to align to 32-bit
bics r5,r7 // [1] align wrap
str r5,[sp,#32] // [2] save wrap width
// [1] align X coordinate to 32-bit
bics r1,r7 // [1]
// [3] align remaining width
bics r3,r7 // [1]
str r3,[sp,#8] // [2] save new width
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
// Note: QUOTIENT must be read last
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
// pointer to font line -> R3
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
add r3,r5 // line offset + font base -> pointer to current font line R3
// base pointer to text data (without X) -> [SP+4], R2
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
muls r2,r5 // Y * WB -> offset of row in text buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
add r2,r5 // base address of text buffer
str r2,[sp,#4] // save pointer to text buffer
// prepare pointer to text data with X -> R2
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
lsls r6,#1 // convert to character offset (1 position is: 1 character + 1 color)
add r2,r6 // pointer to source text buffer -> R2
// prepare background color, expand to 32 bits -> R4
ldrb r4,[r4,#SSEGM_PAR2] // load background color
lsls r5,r4,#8 // shift background color << 8
orrs r5,r4 // color expanded to 16 bits
lsls r4,r5,#16 // shift 16-bit color << 16
orrs r4,r5 // color expanded to 32 bits
// prepare pointer to conversion table -> LR
ldr r5,RenderFText_Addr // get pointer to conversion table -> R5
mov lr,r5 // conversion table -> LR
// ---- render 2nd half of first character
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate
// R2 ... pointer to source text buffer
// R3 ... pointer to font line
// R4 ... background color (expanded to 32-bit)
// R5..R7 ... (temporary)
// LR ... pointer to conversion table
// [SP+4] ... base pointer to text data (without X)
// [SP+8] ... remaining width
// [SP+32] ... wrap width
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
lsls r6,r1,#29 // check bit 2 of X coordinate
bpl 2f // bit 2 not set, starting even 4-pixels
// [4] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
// [3] load foreground color -> R6
ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6
adds r2,#2 // [1] shift pointer to source text buffer
// [4] expand foreground color to 32-bit -> R6
lsls r7,r6,#8 // [1] shift foreground color << 8
orrs r7,r6 // [1] color expanded to 16 bits
lsls r6,r7,#16 // [1] shift 16-bit color << 16
orrs r6,r7 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// shift X coordinate
adds r1,#4 // shift X coordinate
// check end of segment
ldr r7,[sp,#32] // load wrap width
cmp r1,r7 // end of segment?
blo 1f
movs r1,#0 // reset X coordinate
ldr r2,[sp,#4] // get base pointer to text data -> R2
// shift remaining width
1: ldr r7,[sp,#8] // get remaining width
subs r7,#4 // shift width
str r7,[sp,#8] // save new width
// prepare wrap width - start X -> R7
2: ldr r7,[sp,#32] // load wrap width
subs r7,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of characters to generate in one part of segment
// R2 ... *pointer to source text buffer
// R3 ... *pointer to font line
// R4 ... *background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... (temporary)
// R7 ... *wrap width of this segment, later: temporary
// LR ... *pointer to conversion table
// [SP+4] ... *base pointer to text data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderFText_OutLoop:
// limit wrap width by total width -> R7
ldr r6,[sp,#8] // get remaining width
cmp r7,r6 // compare with wrap width
bls 2f // width is OK
mov r7,r6 // limit wrap width
// check if remain whole characters
2: cmp r7,#8 // check number of remaining pixels
bhs 5f // enough characters remain
// check if 1st part of last character remains
cmp r7,#4 // check 1st part of last character
blo 3f // all done
// ---- render 1st part of last character
RenderFText_Last:
// [4] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
// [3] load foreground color -> R6
ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6
adds r2,#2 // [1] shift pointer to source text buffer
// [4] expand foreground color to 32-bit
lsls r1,r6,#8 // [1] shift foreground color << 8
orrs r1,r6 // [1] color expanded to 16 bits
lsls r6,r1,#16 // [1] shift 16-bit color << 16
orrs r6,r1 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
stmia r0!,{r1} // [2] store first 4 pixels
// check if continue with next segment
ldr r2,[sp,#4] // get base pointer to text data -> R2
cmp r7,#4
bhi RenderFText_OutLoop
// pop registers and return
3: pop {r1-r7,pc}
// ---- prepare to render whole characters
// prepare number of whole characters to render -> R1
5: lsrs r1,r7,#2 // shift to get number of characters*2
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
subs r6,r5 // get remaining width
str r6,[sp,#8] // save new remaining width
subs r1,#1 // number of characters*2 - 1
// ---- [29*N-1] start inner loop, render characters in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of characters to generate*2 - 1 (loop counter)
// R2 ... *pointer to source text buffer
// R3 ... *pointer to font line
// R4 ... *background color (expanded to 32-bit)
// R5 ... font sample
// R6 ... foreground color (expanded to 32-bit)
// R7 ... (temporary)
// LR ... *pointer to conversion table
RenderFText_InLoop:
// [4] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
// [3] load foreground color -> R6
ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6
adds r2,#2 // [1] shift pointer to source text buffer
// [4] expand foreground color to 32-bit
lsls r7,r6,#8 // [1] shift foreground color << 8
orrs r7,r6 // [1] color expanded to 16 bits
lsls r6,r7,#16 // [1] shift 16-bit color << 16
orrs r6,r7 // [1] color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r7,[r5,#0] // [2] load mask for higher 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store first 4 pixels
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// [2,3] loop counter
subs r1,#2 // [1] shift loop counter
bhi RenderFText_InLoop // [1,2] > 0, render next whole character
// ---- end inner loop, continue with last character, or start new part
// continue to outer loop
ldr r7,[sp,#32] // load wrap width
beq RenderFText_Last // render 1st half of last character
ldr r2,[sp,#4] // get base pointer to text data -> R2
b RenderFText_OutLoop // go back to outer loop
.align 2
RenderFText_Addr:
.word RenderTextMask
RenderFText_pSioBase:
.word SIO_BASE // addres of SIO base

View file

@ -1,258 +0,0 @@
// ****************************************************************************
//
// VGA render GF_GRAPH1
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderGraph1(u8* dbuf, int x, int y, int w, sSegm* segm);
// render 1-bit palette graphics GF_GRAPH1
// dbuf ... destination data buffer
// x ... start X coordinate (must be multiple of 4)
// y ... start Y coordinate
// w ... width of this segment (must be multiple of 4)
// segm ... video segment
// Output new dbuf pointer.
// 320 pixels takes 6 us on 151 MHz.
.thumb_func
.global RenderGraph1
RenderGraph1:
// push registers
push {r3-r7,lr}
// Input registers and stack content:
// R0 ... destination data buffer
// R1 ... start X coordinate
// R2 ... start Y coordinate
// SP+0: R3 width to display
// SP+4: R4
// SP+8: R5
// SP+12: R6
// SP+16: R7
// SP+20: LR
// SP+24: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#24] // load video segment -> R4
// get wrap width -> [SP+24]
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
movs r7,#3 // mask to align to 32-bit
bics r5,r7 // align wrap
str r5,[sp,#24] // save wrap width
// align X coordinate to 32-bit -> R1
bics r1,r7
// align remaining width -> [SP+0]
bics r3,r7
str r3,[sp,#0] // save new width
// base pointer to image data (without X) -> LR
ldrh r5,[r4,#SSEGM_WB] // get pitch of lines
muls r2,r5 // Y * WB -> offset of row in text buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
add r2,r5 // base address of text buffer
mov lr,r2 // save pointer to text buffer
// prepare pointer to image data with X -> R2
lsrs r2,r1,#3 // convert X to character index (1 character is 8 pixels width)
add r2,lr // pointer to source text buffer -> R2
// prepare foreground color, expand to 32-bit -> R6
ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color
lsls r7,r6,#8 // [1] shift foreground color << 8
orrs r7,r6 // [1] color expanded to 16 bits
lsls r6,r7,#16 // [1] shift 16-bit color << 16
orrs r6,r7 // [1] color expanded to 32 bits
// prepare background color, expand to 32 bits -> R4
ldrb r4,[r4,#SSEGM_PAR] // load background color
lsls r5,r4,#8 // shift background color << 8
orrs r5,r4 // color expanded to 16 bits
lsls r4,r5,#16 // shift 16-bit color << 16
orrs r4,r5 // color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// prepare pointer to conversion table -> R3
ldr r3,RenderGraph1_Addr // get pointer to conversion table -> R3
// ---- render 2nd half of first character
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate
// R2 ... current pointer to image buffer
// R3 ... pointer to conversion table
// R4 ... background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... foreground color (expanded to 32-bit)
// R7 ... (temporary)
// LR ... base pointer to image data (without X)
// [SP+0] ... remaining width
// [SP+24] ... wrap width
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
lsls r5,r1,#29 // check bit 2 of X coordinate
bpl 2f // bit 2 not set, starting even 4-pixels
// [3] load image sample -> R5
ldrb r5,[r2,#0] // [2] load image sample -> R5
adds r2,#1 // [1] shift pointer to image buffer
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply image sample * 8
add r5,r3 // [1] add pointer to conversion table
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// shift X coordinate
adds r1,#4 // shift X coordinate
// check end of segment
ldr r7,[sp,#24] // load wrap width
cmp r1,r7 // X=end of segment?
blo 1f
movs r1,#0 // reset X coordinate
mov r2,lr // get base pointer to image data -> R2
// shift remaining width
1: ldr r7,[sp,#0] // get remaining width
subs r7,#4 // shift width
str r7,[sp,#0] // save new width
// prepare wrap width - start X -> R7
2: ldr r7,[sp,#24] // load wrap width
subs r7,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of characters to generate in one part of segment
// R2 ... *current pointer to image buffer
// R3 ... *pointer to conversion table
// R4 ... *background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... *foreground color (expanded to 32-bit)
// R7 ... *wrap width of this segment, later: temporary
// LR ... *base pointer to image data (without X)
// [SP+0] ... *remaining width
// [SP+24] ... *wrap width
RenderGraph1_OutLoop:
// limit wrap width by total width -> R7
ldr r5,[sp,#0] // get remaining width
cmp r7,r5 // compare with wrap width
bls 2f // width is OK
mov r7,r5 // limit wrap width
// check if remain whole characters
2: cmp r7,#8 // check number of remaining pixels
bhs 5f // enough characters remain
// check if 1st part of last character remains
cmp r7,#4 // check 1st part of last character
blo 3f // all done
// ---- render 1st part of last character
RenderGraph1_Last:
// [3] load image sample -> R5
ldrb r5,[r2,#0] // [2] load image sample -> R5
adds r2,#1 // [1] shift pointer to image buffer
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply image sample * 8
add r5,r3 // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
stmia r0!,{r1} // [2] store first 4 pixels
// check if continue with next segment
mov r2,lr // get base pointer to image data -> R2
cmp r7,#4
bhi RenderGraph1_OutLoop
// pop registers and return
3: pop {r3-r7,pc}
// ---- prepare to render whole characters
// prepare number of whole characters to render -> R1
5: lsrs r1,r7,#2 // shift to get number of characters*2
lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7
subs r5,r7 // get remaining width
str r5,[sp,#0] // save new remaining width
subs r1,#1 // number of characters*2 - 1
// ---- [20*N-1] start inner loop, render characters in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of characters to generate*2 - 1 (loop counter)
// R2 ... *current pointer to image buffer
// R3 ... *pointer to conversion table
// R4 ... *background color (expanded to 32-bit)
// R5 ... font sample
// R6 ... *foreground color (expanded to 32-bit)
// R7 ... (temporary)
// LR ... *base pointer to image data (without X)
RenderGraph1_InLoop:
// [3] load image sample -> R5
ldrb r5,[r2,#0] // [2] load image sample -> R5
adds r2,#1 // [1] shift pointer to image buffer
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply image sample * 8
add r5,r3 // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r7,[r5,#0] // [2] load mask for higher 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store first 4 pixels
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// [2,3] loop counter
subs r1,#2 // [1] shift loop counter
bhi RenderGraph1_InLoop // [1,2] > 0, render next whole character
// ---- end inner loop, continue with last character, or start new part
// continue to outer loop
ldr r7,[sp,#24] // load wrap width
beq RenderGraph1_Last // render 1st half of last character
mov r2,lr // get base pointer to image data -> R2
b RenderGraph1_OutLoop // go back to outer loop
.align 2
RenderGraph1_Addr:
.word RenderTextMask

View file

@ -1,173 +0,0 @@
// ****************************************************************************
//
// VGA render GF_GRAPH2
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u8* RenderGraph2(u8* dbuf, int x, int y, int w, sSegm* segm);
// render 2-bit palette graphics GF_GRAPH2
// R0 ... destination data buffer
// R1 ... start X coordinate (must be multiple of 4)
// R2 ... start Y coordinate
// R3 ... width of this segment (must be multiple of 4)
// segm ... video segment
// Output new dbuf pointer.
// 320 pixels takes 5 us on 151 MHz.
.thumb_func
.global RenderGraph2
RenderGraph2:
// push registers
push {r3-r7,lr}
// Input registers and stack content:
// R0 ... destination data buffer
// R1 ... start X coordinate
// R2 ... start Y coordinate
// SP+0: R3 ... width to display (remaining width)
// SP+4: R4
// SP+8: R5
// SP+12: R6
// SP+16: R7
// SP+20: LR
// SP+24: video segment
// get pointer to video segment -> R4
ldr r4,[sp,#24] // load video segment -> R4
// get wrap width -> R7
ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width
movs r6,#3 // mask to align to 32-bit
bics r7,r6 // align wrap
// align X coordinate to 32-bit -> R1
bics r1,r6
// align remaining width -> [SP+0]
bics r3,r6
str r3,[sp,#0] // save new width
// base pointer to image data (without X) -> LR, R2
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
muls r2,r5 // Y * WB -> offset of row in image buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
add r2,r5 // base address of image buffer
mov lr,r2 // save pointer to image buffer
// prepare pointer to image data with X -> R2
lsrs r6,r1,#2 // convert X to character index (1 character is 4 pixels width)
add r2,r6 // add index, pointer to source image buffer -> R2
// prepare pointer to palette translation table -> R3
ldr r3,[r4,#SSEGM_PAR] // get pointer to palette translation table -> R3
// prepare wrap width - start X -> R6
subs r6,r7,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of 4-pixels to generate in one part of segment
// R2 ... *pointer to source image buffer
// R3 ... *pointer to palette translation table
// R4 ... (temporary)
// R5 ... (temporary)
// R6 ... part width
// R7 ... *wrap width
// LR ... *base pointer to image data (without X)
// [SP+0] ... width to display
RenderGraph2_OutLoop:
// limit wrap width by total width -> R7
ldr r4,[sp,#0] // get remaining width
cmp r6,r4 // compare with wrap width
bls 2f // width is OK
mov r6,r4 // limit wrap width
// check number of pixels
2: cmp r6,#4 // check number of remaining pixels
bhs 5f // enough characters remain
// pop registers and return
pop {r3-r7,pc}
// ---- prepare to render whole characters
// prepare number of 4-pixels to render -> R1
5: lsrs r1,r6,#2 // shift to get number of 4-pixels
lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6
subs r4,r6 // get remaining width
str r4,[sp,#0] // save new remaining width
// ---- generate odd pixel
// [2,3] check odd pixel
lsrs r1,#1 // [1] check odd pixel
bcc RenderGraph2_InLoop // [1,2] odd pixel not set
// [3] load image sample -> R4
ldrb r4,[r2,#0] // [2] load image sample
adds r2,#1 // [1] increase pointer to image data
// [5] write 4 pixels
lsls r4,#2 // [1] index*4
ldr r5,[r3,r4] // [2] load colors
stmia r0!,{r5} // [2] write pixels
// [2,3] check end of data
tst r1,r1 // [1] check counter
beq RenderGraph2_EndLoop // [1,2] end
// ---- [17*N-1] start inner loop, render pixels in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of 4-pixels to generate (loop counter)
// R2 ... *pointer to source image buffer
// R3 ... *pointer to palette translation table
// R4 ... image sample
// R5 ... output pixels
// R6 ... output pixels
// R7 ... *wrap width
// LR ... *base pointer to image data (without X)
RenderGraph2_InLoop:
// [2] load image sample -> R4
ldrb r4,[r2,#0] // [2] load image sample
// [3] prepare 4 pixels
lsls r4,#2 // [1] index*4
ldr r5,[r3,r4] // [2] load colors
// [3] load image sample -> R4
ldrb r4,[r2,#1] // [2] load image sample
adds r2,#2 // [1] increase pointer to image data
// [6] prepare and write next 4 pixels
lsls r4,#2 // [1] index*4
ldr r6,[r3,r4] // [2] load colors
stmia r0!,{r5,r6} // [3] write pixels
// [2,3] loop counter
subs r1,#1 // [1] loop counter
bne RenderGraph2_InLoop // [1,2] next step
// ---- end inner loop, start new part
RenderGraph2_EndLoop:
// continue to outer loop
mov r6,r7 // load wrap width -> R6
mov r2,lr // get base pointer to text data -> R2
b RenderGraph2_OutLoop // go back to outer loop

View file

@ -1,214 +0,0 @@
// ****************************************************************************
//
// VGA render GF_GRAPH4
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u8* RenderGraph4(u8* dbuf, int x, int y, int w, sSegm* segm);
// render 4-bit palette graphics GF_GRAPH4
// R0 ... destination data buffer
// R1 ... start X coordinate (must be multiple of 4)
// R2 ... start Y coordinate
// R3 ... width of this segment (must be multiple of 4)
// segm ... video segment
// Output new dbuf pointer.
// 320 pixels takes 8.8 us on 151 MHz.
.thumb_func
.global RenderGraph4
RenderGraph4:
// push registers
push {r3-r7,lr}
// Input registers and stack content:
// R0 ... destination data buffer
// R1 ... start X coordinate
// R2 ... start Y coordinate
// SP+0: R3 ... width to display (remaining width)
// SP+4: R4
// SP+8: R5
// SP+12: R6
// SP+16: R7
// SP+20: LR
// SP+24: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#24] // load video segment -> R4
// get wrap width -> [SP+24]
ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width
movs r6,#3 // mask to align to 32-bit
bics r7,r6 // align wrap
str r7,[sp,#24] // save wrap width
// align X coordinate to 32-bit -> R1
bics r1,r6
// align remaining width -> [SP+0]
bics r3,r6
str r3,[sp,#0] // save new width
// base pointer to image data (without X) -> LR, R2
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
muls r2,r5 // Y * WB -> offset of row in image buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
add r2,r5 // base address of image buffer
mov lr,r2 // save pointer to image buffer
// prepare pointer to image data with X -> R2
lsrs r6,r1,#1 // convert X to character index (1 character is 2 pixels width)
add r2,r6 // add index, pointer to source image buffer -> R2
// prepare pointer to palette translation table -> R3
ldr r3,[r4,#SSEGM_PAR] // get pointer to palette translation table -> R3
// prepare wrap width - start X -> R6
ldr r6,[sp,#24] // load wrap width
subs r6,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of 4-pixels to generate in one part of segment
// R2 ... *pointer to source image buffer
// R3 ... *pointer to palette translation table
// R4 ... (temporary)
// R5 ... (temporary)
// R6 ... part width
// R7 ... (temporary)
// LR ... *base pointer to image data (without X)
// [SP+0] ... width to display
// [SP+24] ... wrap width
RenderGraph4_OutLoop:
// limit wrap width by total width -> R6
ldr r4,[sp,#0] // get remaining width
cmp r6,r4 // compare with wrap width
bls 2f // width is OK
mov r6,r4 // limit wrap width
// check number of pixels
2: cmp r6,#4 // check number of remaining pixels
bhs 5f // enough characters remain
// pop registers and return
pop {r3-r7,pc}
// ---- prepare to render whole characters
// prepare number of 4-pixels to render -> R1
5: lsrs r1,r6,#2 // shift to get number of 4-pixels
lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6
subs r4,r6 // get remaining width
str r4,[sp,#0] // save new remaining width
// ---- generate odd pixel
// [2,3] check odd pixel
lsrs r1,#1 // [1] check odd pixel
bcc RenderGraph4_InLoop // [1,2] odd pixel not set
// [2] load image sample -> R4
ldrb r4,[r2,#0] // [2] load image sample
// [3] prepare 1st and 2nd pixel -> R5
lsls r4,#1 // [1] index*2
ldrh r5,[r3,r4] // [2] load 2 pixels
// [3] load image sample -> R4
ldrb r4,[r2,#1] // [2] load image sample
adds r2,#2 // [1] increase pointer to image data
// [3] prepare 3rd and 4th pixel -> R6
lsls r4,#1 // [1] index*2
ldrh r6,[r3,r4] // [2] load 2 pixels
// [2] compose pixels -> R5
lsls r6,#16 // [1] shift 3rd and 4th pixels
orrs r5,r6 // [1] compose pixels
// [2] write pixels
stmia r0!,{r5} // [2] write 4 pixels
// [2,3] check end of data
tst r1,r1 // [1] check counter
beq RenderGraph4_EndLoop // [1,2] end
// ---- [31*N-1] start inner loop, render pixels in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of 4-pixels to generate (loop counter)
// R2 ... *pointer to source image buffer
// R3 ... *pointer to palette translation table
// R4 ... image sample
// R5 ... output pixels
// R6 ... output pixels
// R7 ... output pixels
// LR ... *base pointer to image data (without X)
// [SP+24] ... wrap width
RenderGraph4_InLoop:
// [2] load image sample -> R4
ldrb r4,[r2,#0] // [2] load image sample
// [3] prepare 1st and 2nd pixel -> R5
lsls r4,#1 // [1] index*2
ldrh r5,[r3,r4] // [2] load 2 pixels
// [2] load image sample -> R4
ldrb r4,[r2,#1] // [2] load image sample
// [3] prepare 3rd and 4th pixel -> R6
lsls r4,#1 // [1] index*2
ldrh r6,[r3,r4] // [2] load 2 pixels
// [2] compose pixels -> R5
lsls r6,#16 // [1] shift 3rd and 4th pixels
orrs r5,r6 // [1] compose pixels
// [2] load image sample -> R4
ldrb r4,[r2,#2] // [2] load image sample
// [3] prepare 1st and 2nd pixel -> R6
lsls r4,#1 // [1] index*2
ldrh r6,[r3,r4] // [2] load 2 pixels
// [3] load image sample -> R4
ldrb r4,[r2,#3] // [2] load image sample
adds r2,#4 // [1] increase pointer to image data
// [3] prepare 3rd and 4th pixel -> R7
lsls r4,#1 // [1] index*2
ldrh r7,[r3,r4] // [2] load 2 pixels
// [2] compose pixels -> R6
lsls r7,#16 // [1] shift 3rd and 4th pixels
orrs r6,r7 // [1] compose pixels
// [3] write pixels
stmia r0!,{r5,r6} // [3] write 8 pixels
// [2,3] loop counter
subs r1,#1 // [1] loop counter
bne RenderGraph4_InLoop // [1,2] next step
// ---- end inner loop, start new part
RenderGraph4_EndLoop:
// continue to outer loop
ldr r6,[sp,#24] // load wrap width -> R6
mov r2,lr // get base pointer to text data -> R2
b RenderGraph4_OutLoop // go back to outer loop

View file

@ -1,134 +0,0 @@
// ****************************************************************************
//
// VGA render GF_GRAPH8
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderGrad1(u32* cbuf, int x, int y, int w, sSegm* segm);
// render gradient with 1 line GF_GRAD1
// R0 ... pointer to control buffer
// R1 ... start X coordinate (in pixels, must be multiple of 4)
// R2 ... start Y coordinate (in graphics lines), will be ignored and substituted with 0
// R3 ... width to display (must be multiple of 4 and > 0)
// [stack] ... segm video segment sSegm
// Output new pointer to control buffer.
// 320 pixels takes 0.45 us on 151 MHz.
.thumb_func
.global RenderGrad1
RenderGrad1:
movs r2,#0
// extern "C" u32* RenderGrad2(u32* cbuf, int x, int y, int w, sSegm* segm);
// render gradient with 2 lines GF_GRAD2
// R0 ... pointer to control buffer
// R1 ... start X coordinate (in pixels, must be multiple of 4)
// R2 ... start Y coordinate (in graphics lines), will be masked to values 0 and 1
// R3 ... width to display (must be multiple of 4 and > 0)
// [stack] ... segm video segment sSegm
// Output new pointer to control buffer.
// 320 pixels takes 0.45 us on 151 MHz.
.thumb_func
.global RenderGrad2
RenderGrad2:
lsls r2,#31
lsrs r2,#31
// extern "C" u32* RenderGraph8(u32* cbuf, int x, int y, int w, sSegm* segm);
// render native 8-bit graphics GF_GRAPH8
// R0 ... pointer to control buffer
// R1 ... start X coordinate (in pixels, must be multiple of 4)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4 and > 0)
// [stack] ... segm video segment sSegm
// Output new pointer to control buffer.
// 320 pixels takes 0.45 us on 151 MHz.
.thumb_func
.global RenderGraph8
RenderGraph8:
// push registers
push {r4-r7,lr}
// Stack content:
// SP+0: R4
// SP+4: R5
// SP+8: R6
// SP+12: R7
// SP+16: LR
// SP+20: video segment
// Variables:
// R0 ... pointer to control buffer
// R1 ... X coordinate, later: width of one segment
// R2 ... Y coordinate, later: current pointer to data buffer
// R3 ... remaining width
// R4 ... base pointer to data buffer
// R5 ... (temporary)
// R6 ... (temporary)
// R7 ... wrap width
// get pointer to video segment -> R4
ldr r4,[sp,#20] // load video segment -> R4
// get wrap width -> R7
ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width
movs r6,#3 // mask to align to 32-bit
bics r7,r6 // align wrap
// align X coordinate to 32-bit -> R1
bics r1,r6
// align remaining width -> R3
bics r3,r6
// base pointer to data buffer (without X) -> R4
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
muls r2,r5 // Y * WB -> offset in data buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
adds r4,r2,r5 // base address of data buffer -> R4
// prepare current pointer to image data with X -> R2
adds r2,r4,r1 // pointer to source data buffer -> R2
// prepare wrap width - start X -> R1
subs r1,r7,r1 // pixels remaining to end of segment
// check remaining width
2: tst r3,r3 // check remaining width
beq 6f // end of data
// limit wrap width by total width -> R1
cmp r1,r3 // compare with wrap width
bls 4f // width is OK
mov r1,r3 // limit wrap width
// decrease remaining width
4: subs r3,r1 // subtract from remaining width
// save control block
lsrs r1,#2 // width / 4
stm r0!,{r1,r2} // save width and pointer to control block
// continue to next loop
mov r1,r7 // load wrap width -> R1
mov r2,r4 // get base pointer to text data -> R2
b 2b // go next loop
// pop registers and return
6: pop {r4-r7,pc}

View file

@ -1,310 +0,0 @@
// ****************************************************************************
//
// VGA render GF_GRAPH8MAT
//
// ****************************************************************************
// data ... image data
// par ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
// par2 ... LOW=number of bits of image width, HIGH=number of bits of image height
// image width must be max. 4096 (= 1<<FRACT); image with and height must be power of 2
// wrapy ... segment height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
#define ACCUM0_OFFSET 0
#define ACCUM1_OFFSET 4
#define BASE0_OFFSET 8
#define BASE1_OFFSET 12
#define BASE2_OFFSET 16
#define POP_LANE0_OFFSET 20
#define POP_LANE1_OFFSET 24
#define POP_FULL_OFFSET 28
#define PEEK_LANE0_OFFSET 32
#define PEEK_LANE1_OFFSET 36
#define PEEK_FULL_OFFSET 40
#define CTRL_LANE0_OFFSET 44
#define CTRL_LANE1_OFFSET 48
#define ACCUM0_ADD_OFFSET 52
#define ACCUM1_ADD_OFFSET 56
#define BASE_1AND0_OFFSET 60
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderGraph8Mat(u32* cbuf, int x, int y, int w, sSegm* segm);
// render 8-bit graphics GF_GRAPH8MAT, with 2D matrix transformation,
// using hardware interpolator inter1 (inter1 state is not saved during interrup)
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate (not used)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4)
// [stack] ... segm video segment sSegm
// Output new pointer to data buffer.
.thumb_func
.global RenderGraph8Mat
RenderGraph8Mat:
// Input registers and stack:
// R0 ... pointer to destination data buffer
// R1 ... X coordinate (not used)
// R2 ... Y coordinate
// R3 ... remaining width
// SP+0: R4
// SP+4: R5
// SP+8: R6
// SP+12: R7
// SP+16: LR
// SP+20: video segment
// push registers
push {r4-r7,lr}
// ---- prepare registers
// prepare start coordinate X0 = -w/2 -> LR
lsrs r1,r3,#1 // width/2
negs r1,r1 // negate
mov lr,r1 // store start coordinate X0 -> LR
// prepare number of 4-pixels (loop counter) -> R7
lsrs r7,r3,#2 // width/4 -> R7
// get pointer to video segment -> R4
ldr r4,[sp,#20] // load video segment -> R4
// prepare current coordinate Y0 = -h/2 + y -> R12
ldrh r1,[r4,#SSEGM_WRAPY] // get segment height -> R1
lsrs r1,#1 // height/2
negs r1,r1 // negate
adds r1,r2 // add current Y coordinate
mov r12,r1 // store current coordinate Y0 -> R12
// get number of bits of image width "xbits" -> R1
ldrh r1,[r4,#SSEGM_PAR2] // number of bits of image width -> R1
// get number of bits of image height "ybits" -> R2
ldrh r2,[r4,#SSEGM_PAR2+2] // number of bits of image height -> R2
// prepare address of interpolator base -> R3
ldr r3,RenderGraph8Mat_Interp // get address of interpolator base -> R3
// R0 ... pointer to data buffer
// R1 ... number of bits of image width xbits
// R2 ... number of bits of image height ybits
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- setup interpolator
// set image base to base2
ldr r6,[r4,#SSEGM_DATA] // load image base
str r6,[r3,#BASE2_OFFSET] // set image base
// set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1
ldr r6,RenderGraph8Mat_Ctrl // load control word
subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
lsls r5,r1,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5
orrs r6,r5 // add xbits to control word
subs r1,#1 // xbits - 1 -> R1
adds r5,r1,r2 // xbits-1+ybits -> R5
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1
// R0 ... pointer to data buffer
// R1 ... image width xbits-1
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1
ldr r6,RenderGraph8Mat_Ctrl // load control word
lsls r1,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position
orrs r6,r1 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- set matrix
// get pointer to matrix -> R4
ldr r4,[r4,#SSEGM_PAR] // get pointer to matrix -> R4
// r4+0 ... m11
// r4+4 ... m12
// r4+8 ... m13
// r4+12 ... m21
// r4+16 ... m22
// r4+20 ... m23
// set m11 -> R5 base0
ldr r5,[r4,#0] // load m11
str r5,[r3,#BASE0_OFFSET] // set base0
// set m21 -> R6 base1
ldr r6,[r4,#12] // load m21
str r6,[r3,#BASE1_OFFSET] // set base1
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... pointer to matrix
// R5 ... m11
// R6 ... m21
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// set x0*m11 + y0*m12 + m13 -> accum0
mov r2,lr // start coordinate X0 -> X2
muls r5,r2 // x0*m11 -> R5
muls r2,r6 // x0*m21 -> R2
ldr r1,[r4,#4] // load m12 -> R1
mov r6,r12 // load coordinate Y0 -> R6
muls r1,r6 // y0*m12 -> R1
adds r5,r1 // x0*m11 + y0*m12 -> R5
ldr r1,[r4,#8] // load m13 -> R1
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
str r5,[r3,#ACCUM0_OFFSET] // set accum0
// R0 ... pointer to data buffer
// R2 ... x0*m21
// R3 ... interpolator base
// R4 ... pointer to matrix
// R6 ... current coordinate Y0
// R7 ... width/4
// set x0*m21 + y0*m22 + m23 -> accum1
ldr r1,[r4,#16] // load m22 -> R1
muls r1,r6 // y0*m22 -> R1
adds r2,r1 // x0*m21 + y0*m22 -> R2
ldr r1,[r4,#20] // load m23 -> R1
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
str r2,[r3,#ACCUM1_OFFSET] // set accum1
// ---- process odd 4-pixel
// R0 ... pointer to destination data buffer
// R3 ... interpolator base
// R4 ... (temporary - get pointer to pixel)
// R5 ... (temporary - load pixel)
// R6 ... (temporary - pixel accumulator)
// R7 ... width/4 (loop counter)
lsrs r7,#1 // width/4/2
bcc 2f // no odd 4-pixel
// [3] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r6,[r4,#0] // [2] load pixel
// [5] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r5,[r4,#0] // [2] load pixel
lsls r5,#8 // [1] shift 1 byte left
orrs r6,r5 // [1] add pixel to accumulator
// [5] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r5,[r4,#0] // [2] load pixel
lsls r5,#16 // [1] shift 2 bytes left
orrs r6,r5 // [1] add pixel to accumulator
// [5] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r5,[r4,#0] // [2] load pixel
lsls r5,#24 // [1] shift 3 bytes left
orrs r6,r5 // [1] add pixel to accumulator
// [2] store 4 pixels
stmia r0!,{r6} // [2] store 4 pixels
// check number of remaining pixels
2: tst r7,r7 // check number of pixels
beq 8f // end
// ---- [42 per 8 pixels] inner loop
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to pixel, load pixel)
// R7 ... width/8 (loop counter)
// [3] load 1st pixel
6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r1,[r4,#0] // [2] load pixel
// [5] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [5] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [5] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#24 // [1] shift 3 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [3] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r2,[r4,#0] // [2] load pixel
// [5] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
// [5] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r2,r4 // [1] add pixel to accumulator
// [5] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#24 // [1] shift 3 bytes left
orrs r2,r4 // [1] add pixel to accumulator
// [3] store 8 pixels
stmia r0!,{r1,r2} // [3] store 8 pixels
// [2,3] loop counter
subs r7,#1 // [1] 8-pixel counter
bne 6b // [1,2] next 8-pixels
// pop registers
8: pop {r4-r7,pc}
.align 2
// pointer to Interp1 base
RenderGraph8Mat_Interp:
.word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base
RenderGraph8Mat_Ctrl: // lane control word
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)

View file

@ -1,340 +0,0 @@
// ****************************************************************************
//
// VGA render GF_GRAPH8PERSP
//
// ****************************************************************************
// data ... image data
// par ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
// par2 ... LOW=number of bits of image width, HIGH=number of bits of image height
// image width must be max. 4096 (= 1<<FRACT); image with and height must be power of 2
// par3 ... horizon offset
// wrapy ... segment height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
#define ACCUM0_OFFSET 0
#define ACCUM1_OFFSET 4
#define BASE0_OFFSET 8
#define BASE1_OFFSET 12
#define BASE2_OFFSET 16
#define POP_LANE0_OFFSET 20
#define POP_LANE1_OFFSET 24
#define POP_FULL_OFFSET 28
#define PEEK_LANE0_OFFSET 32
#define PEEK_LANE1_OFFSET 36
#define PEEK_FULL_OFFSET 40
#define CTRL_LANE0_OFFSET 44
#define CTRL_LANE1_OFFSET 48
#define ACCUM0_ADD_OFFSET 52
#define ACCUM1_ADD_OFFSET 56
#define BASE_1AND0_OFFSET 60
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderGraph8Persp(u32* cbuf, int x, int y, int w, sSegm* segm);
// render 8-bit graphics GF_GRAPH8PERSP, with 2D matrix transformation,
// using hardware interpolator inter1 (inter1 state is not saved during interrup)
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate (not used)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4)
// [stack] ... segm video segment sSegm
// Output new pointer to data buffer.
// 320 pixels takes ?? us on 151 MHz.
.thumb_func
.global RenderGraph8Persp
RenderGraph8Persp:
// Input registers and stack:
// R0 ... pointer to destination data buffer
// R1 ... X coordinate (not used)
// R2 ... Y coordinate
// R3 ... remaining width
// SP+0: R4
// SP+4: R5
// SP+8: R6
// SP+12: R7
// SP+16: LR
// SP+20: video segment
// push registers
push {r4-r7,lr}
// ---- prepare registers
// get pointer to video segment -> R4
ldr r4,[sp,#20] // load video segment -> R4
// prepare current coordinate Y0 = y - h -> R12
ldrh r1,[r4,#SSEGM_WRAPY] // get segment height -> R1
subs r2,r1 // y - h = current Y coordinate
mov r12,r2 // store current coordinate Y0 -> R12
// start calculating distance coefficient dist = FRACTMUL*h/(y - h + horiz + 1)
lsls r6,r1,#FRACT // segment height * FRACTMUL -> R6
ldr r5,RenderGraph8Persp_pSioBase // get address of SIO base -> R5
str r6,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
ldrh r6,[r4,#SSEGM_PAR3] // horizon offset -> R6
adds r2,r1 // y = current Y coordinate
adds r6,r2 // horizon + y -> R6
adds r6,#1 // horizon + y + 1 -> R6
str r6,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz + 1
// prepare start coordinate X0 = -w/2 -> LR
lsrs r5,r3,#1 // width/2
negs r5,r5 // negate
mov lr,r5 // store start coordinate X0 -> LR
// prepare number of 4-pixels (loop counter) -> R7
lsrs r7,r3,#2 // width/4 -> R7
// get number of bits of image width "xbits" -> R1
ldrh r1,[r4,#SSEGM_PAR2] // number of bits of image width -> R1
// get number of bits of image height "ybits" -> R2
ldrh r2,[r4,#SSEGM_PAR2+2] // number of bits of image height -> R2
// prepare address of interpolator base -> R3
ldr r3,RenderGraph8Persp_Interp // get address of interpolator base -> R3
// R0 ... pointer to data buffer
// R1 ... number of bits of image width xbits
// R2 ... number of bits of image height ybits
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- setup interpolator
// set image base to base2
ldr r6,[r4,#SSEGM_DATA] // load image base
str r6,[r3,#BASE2_OFFSET] // set image base
// set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1
ldr r6,RenderGraph8Persp_Ctrl // load control word
subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
lsls r5,r1,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5
orrs r6,r5 // add xbits to control word
subs r1,#1 // xbits - 1 -> R1
adds r5,r1,r2 // xbits-1+ybits -> R5
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1
// R0 ... pointer to data buffer
// R1 ... image width xbits-1
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1
ldr r6,RenderGraph8Persp_Ctrl // load control word
lsls r1,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position
orrs r6,r1 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- set matrix
// get pointer to matrix -> R4
ldr r4,[r4,#SSEGM_PAR] // get pointer to matrix -> R4
// get distance coefficient dist -> R1
ldr r1,RenderGraph8Persp_pSioBase // get address of SIO base -> R1
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
// r4+0 ... m11
// r4+4 ... m12
// r4+8 ... m13
// r4+12 ... m21
// r4+16 ... m22
// r4+20 ... m23
// set m11 -> R5 base0
ldr r5,[r4,#0] // load m11
muls r5,r1 // m11*dist
asrs r5,#FRACT // (m11*dist)>>FRACT
str r5,[r3,#BASE0_OFFSET] // set base0
// set m21 -> R6 base1
ldr r6,[r4,#12] // load m21
muls r6,r1 // m11*dist
asrs r6,#FRACT // (m11*dist)>>FRACT
str r6,[r3,#BASE1_OFFSET] // set base1
// R0 ... pointer to data buffer
// R1 ... distance coefficient
// R3 ... interpolator base
// R4 ... pointer to matrix
// R5 ... m11
// R6 ... m21
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// set x0*m11 + y0*m12 + m13 -> accum0
mov r2,lr // start coordinate X0 -> X2
muls r5,r2 // x0*m11 -> R5
muls r2,r6 // x0*m21 -> R2
mov lr,r1 // save distance coefficient -> LR
ldr r6,[r4,#4] // load m12 -> R6
muls r1,r6 // m12*dist
asrs r1,#FRACT // (m12*dist)>>FRACT
mov r6,r12 // load coordinate Y0 -> R6
muls r1,r6 // y0*m12 -> R1
adds r5,r1 // x0*m11 + y0*m12 -> R5
ldr r1,[r4,#8] // load m13 -> R1
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
str r5,[r3,#ACCUM0_OFFSET] // set accum0
// R0 ... pointer to data buffer
// R2 ... x0*m21
// R3 ... interpolator base
// R4 ... pointer to matrix
// R6 ... current coordinate Y0
// R7 ... width/4
// LR ... distance coefficient
// set x0*m21 + y0*m22 + m23 -> accum1
ldr r1,[r4,#16] // load m22 -> R1
mov r5,lr // distance coefficient -> R5
muls r1,r5 // m22*dist
asrs r1,#FRACT // (m22*dist)>>FRACT
muls r1,r6 // y0*m22 -> R1
adds r2,r1 // x0*m21 + y0*m22 -> R2
ldr r1,[r4,#20] // load m23 -> R1
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
str r2,[r3,#ACCUM1_OFFSET] // set accum1
// ---- process odd 4-pixel
// R0 ... pointer to destination data buffer
// R3 ... interpolator base
// R4 ... (temporary - get pointer to pixel)
// R5 ... (temporary - load pixel)
// R6 ... (temporary - pixel accumulator)
// R7 ... width/4 (loop counter)
lsrs r7,#1 // width/4/2
bcc 2f // no odd 4-pixel
// [3] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r6,[r4,#0] // [2] load pixel
// [5] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r5,[r4,#0] // [2] load pixel
lsls r5,#8 // [1] shift 1 byte left
orrs r6,r5 // [1] add pixel to accumulator
// [5] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r5,[r4,#0] // [2] load pixel
lsls r5,#16 // [1] shift 2 bytes left
orrs r6,r5 // [1] add pixel to accumulator
// [5] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r5,[r4,#0] // [2] load pixel
lsls r5,#24 // [1] shift 3 bytes left
orrs r6,r5 // [1] add pixel to accumulator
// [2] store 4 pixels
stmia r0!,{r6} // [2] store 4 pixels
// check number of remaining pixels
2: tst r7,r7 // check number of pixels
beq 8f // end
// ---- [42 per 8 pixels] inner loop
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to pixel, load pixel)
// R7 ... width/8 (loop counter)
// [3] load 1st pixel
6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r1,[r4,#0] // [2] load pixel
// [5] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [5] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [5] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#24 // [1] shift 3 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [3] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r2,[r4,#0] // [2] load pixel
// [5] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
// [5] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r2,r4 // [1] add pixel to accumulator
// [5] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#24 // [1] shift 3 bytes left
orrs r2,r4 // [1] add pixel to accumulator
// [3] store 8 pixels
stmia r0!,{r1,r2} // [3] store 8 pixels
// [2,3] loop counter
subs r7,#1 // [1] 8-pixel counter
bne 6b // [1,2] next 8-pixels
// pop registers
8: pop {r4-r7,pc}
.align 2
// pointer to SIO base
RenderGraph8Persp_pSioBase:
.word SIO_BASE // addres of SIO base
// pointer to Interp1 base
RenderGraph8Persp_Interp:
.word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base
RenderGraph8Persp_Ctrl: // lane control word
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)

View file

@ -1,317 +0,0 @@
// ****************************************************************************
//
// VGA render GF_GTEXT
//
// ****************************************************************************
// u32 par SSEGM_PAR pointer to the font
// u32 par2 SSEGM_PAR2 pointer to font gradient
// u16 par3 LOW background color, HIGH font height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderGText(u8* dbuf, int x, int y, int w, sSegm* segm)
// render 8-pixel gradient color text GF_GTEXT
// R0 ... destination data buffer
// R1 ... start X coordinate (in pixels, must be multiple of 4)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4 and > 0)
// [stack] ... segm video segment sSegm
// Output new pointer to destination data buffer.
// 320 pixels takes 8.3 us on 151 MHz.
.thumb_func
.global RenderGText
RenderGText:
// push registers
push {r1-r7,lr}
mov r4,r8
push {r4}
// Stack content:
// SP+0: R8
// SP+4: R1 start X coordinate (later: base pointer to gradient array)
// SP+8: R2 start Y coordinate (later: base pointer to text data row)
// SP+12: R3 width to display
// SP+16: R4
// SP+20: R5
// SP+24: R6
// SP+28: R7
// SP+32: LR
// SP+36: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#36] // load video segment -> R4
// start divide Y/font height
ldr r6,RenderGText_pSioBase // get address of SIO base -> R6
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
// - now we must wait at least 8 clock cycles to get result of division
// [6] get wrap width -> [SP+36]
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
movs r7,#3 // [1] mask to align to 32-bit
bics r5,r7 // [1] align wrap
str r5,[sp,#36] // [2] save wrap width
// [1] align X coordinate to 32-bit
bics r1,r7 // [1]
// [3] align remaining width
bics r3,r7 // [1]
str r3,[sp,#12] // [2] save new width
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
// Note: QUOTIENT must be read last
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
// pointer to font line -> R8
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
add r3,r5 // line offset + font base -> pointer to current font line R3
mov r8,r3
// base pointer to text data (without X) -> [SP+8], R2
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
muls r2,r5 // Y * WB -> offset of row in text buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
add r2,r5 // base address of text buffer
str r2,[sp,#8] // save pointer to text buffer
// base pointer to gradient array -> [SP+4], R3
ldr r3,[r4,#SSEGM_PAR2] // pointer to graient array
str r3,[sp,#4] // save pointer to gradient array
// prepare pointer to text data with X -> R2
add r3,r1 // pointer to source gradient array
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
add r2,r6 // pointer to source text buffer -> R2
// prepare background color, expand to 32 bits -> R4
ldrb r4,[r4,#SSEGM_PAR3] // load background color
lsls r5,r4,#8 // shift background color << 8
orrs r5,r4 // color expanded to 16 bits
lsls r4,r5,#16 // shift 16-bit color << 16
orrs r4,r5 // color expanded to 32 bits
// prepare pointer to conversion table -> LR
ldr r5,RenderGText_Addr // get pointer to conversion table -> R5
mov lr,r5 // conversion table -> LR
// ---- render 2nd half of first character
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate
// R2 ... pointer to source text buffer
// R3 ... pointer to gradient array
// R4 ... background color (expanded to 32-bit)
// R5..R7 ... (temporary)
// R8 ... pointer to font line
// LR ... pointer to conversion table
// [SP+4] ... base pointer to gradient array
// [SP+8] ... base pointer to text data (without X)
// [SP+12] ... remaining width
// [SP+36] ... wrap width
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
lsls r6,r1,#29 // check bit 2 of X coordinate
bpl 2f // bit 2 not set, starting even 4-pixels
// [6] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
adds r2,#1 // [1] shift pointer to source text buffer
add r5,r8 // [1] pointer to font line
ldrb r5,[r5] // [2] load font sample -> R5
// [3] load foreground color, XOR with background -> R6
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// shift X coordinate
adds r1,#4 // shift X coordinate
// check end of segment
ldr r7,[sp,#36] // load wrap width
cmp r1,r7 // end of segment?
blo 1f
movs r1,#0 // reset X coordinate
ldr r2,[sp,#8] // get base pointer to text data -> R2
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
// shift remaining width
1: ldr r7,[sp,#12] // get remaining width
subs r7,#4 // shift width
str r7,[sp,#12] // save new width
// prepare wrap width - start X -> R7
2: ldr r7,[sp,#36] // load wrap width
subs r7,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of characters to generate in one part of segment
// R2 ... *pointer to source text buffer
// R3 ... *pointer to gradient array
// R4 ... *background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... (temporary)
// R7 ... *wrap width of this segment, later: temporary
// R8 ... *pointer to font line
// LR ... *pointer to conversion table
// [SP+4] ... base pointer to gradient array
// [SP+8] ... base pointer to text data (without X)
// [SP+12] ... remaining width
// [SP+36] ... wrap width
RenderGText_OutLoop:
// limit wrap width by total width -> R7
ldr r6,[sp,#12] // get remaining width
cmp r7,r6 // compare with wrap width
bls 2f // width is OK
mov r7,r6 // limit wrap width
// check if remain whole characters
2: cmp r7,#8 // check number of remaining pixels
bhs 5f // enough characters remain
// check if 1st part of last character remains
cmp r7,#4 // check 1st part of last character
blo 3f // all done
// ---- render 1st part of last character
RenderGText_Last:
// [6] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
adds r2,#1 // [1] shift pointer to source text buffer
add r5,r8 // [1] pointer to font line
ldrb r5,[r5] // [2] load font sample -> R5
// [3] load foreground color, XOR with background -> R6
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
stmia r0!,{r1} // [2] store first 4 pixels
// check if continue with next segment
ldr r2,[sp,#8] // get base pointer to text data -> R2
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
cmp r7,#4
bhi RenderGText_OutLoop
// pop registers and return
3: pop {r4}
mov r8,r4
pop {r1-r7,pc}
// ---- prepare to render whole characters
// prepare number of whole characters to render -> R1
5: lsrs r1,r7,#2 // shift to get number of characters*2
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
subs r6,r5 // get remaining width
str r6,[sp,#12] // save new remaining width
subs r1,#1 // number of characters*2 - 1
// ---- [28*N-1] start inner loop, render characters in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of characters to generate*2 - 1 (loop counter)
// R2 ... *pointer to source text buffer
// R3 ... *pointer to gradient array
// R4 ... *background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... foreground color
// R7 ... font sample
// R8 ... *pointer to font line
// LR ... *pointer to conversion table
// [SP+4] ... base pointer to gradient array
// [SP+8] ... base pointer to text data (without X)
// [SP+12] ... remaining width
// [SP+36] ... wrap width
RenderGText_InLoop:
// [6] load font sample -> R7
ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7
adds r2,#1 // [1] shift pointer to source text buffer
add r7,r8 // [1] pointer to font line
ldrb r7,[r7] // [2] load font sample -> R7
// [3] load foreground color, XOR with background -> R6
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
eors r6,r4 // [1] XOR foreground color with background color
// [2] prepare conversion table -> R7
lsls r7,#3 // [1] multiply font sample * 8
add r7,lr // [1] add pointer to conversion table
// [4] convert first 4 pixels (higher 4 bits) -> R5
ldr r5,[r7,#0] // [2] load mask for higher 4 bits
ands r5,r6 // [1] mask foreground color
eors r5,r4 // [1] combine with background color
// [3] load foreground color, XOR with background -> R6
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
eors r6,r4 // [1] XOR foreground color with background color
// [7] convert and store second 4 pixels (lower 4 bits)
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r5,r7} // [3] store 8 pixels
// [2,3] loop counter
subs r1,#2 // [1] shift loop counter
bhi RenderGText_InLoop // [1,2] > 0, render next whole character
// ---- end inner loop, continue with last character, or start new part
// continue to outer loop
ldr r7,[sp,#36] // load wrap width
beq RenderGText_Last // render 1st half of last character
ldr r2,[sp,#8] // get base pointer to text data -> R2
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
b RenderGText_OutLoop // go back to outer loop
.align 2
RenderGText_Addr:
.word RenderTextMask
RenderGText_pSioBase:
.word SIO_BASE // addres of SIO base

View file

@ -1,431 +0,0 @@
// ****************************************************************************
//
// VGA render GF_LEVEL
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderLevel(u8* dbuf, int x, int y, int w, sSegm* segm);
// render level graph GF_LEVEL
// dbuf ... destination data buffer
// x ... start X coordinate (must be multiple of 4)
// y ... start Y coordinate
// w ... width of this segment (must be multiple of 4)
// segm ... video segment
// Output new dbuf pointer.
// 320 pixels takes 14 us on 151 MHz.
.thumb_func
.global RenderLevel
RenderLevel:
// push registers
push {r1-r7,lr}
// Input registers and stack content:
// R0 ... pointer to testination data buffer
// SP+0: R1 start X coordinate (later: zero level)
// SP+4: R2 start Y coordinate (later: base pointer to sample data)
// SP+8: R3 width to display
// SP+12: R4
// SP+16: R5
// SP+20: R6
// SP+24: R7
// SP+28: LR
// SP+32: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#32] // load video segment -> R4
// get wrap width -> [SP+32]
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
movs r7,#3 // mask to align to 32-bit
bics r5,r7 // align wrap
str r5,[sp,#32] // save wrap width
// align X coordinate to 32-bit -> R1
bics r1,r7
// align remaining width -> [SP+8]
bics r3,r7
str r3,[sp,#8] // save new width
// current Y in direction from bottom to up -> R5
ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height
subs r5,#1 // wrapy - 1
subs r5,r2 // subtract Y, get Y relative to bottom -> R5
// get zero level -> [SP+0]
ldrb r3,[r4,#SSEGM_PAR2] // get zero level
str r3,[sp,#0] // save zero level
// base pointer to sample data (without X) -> [SP+4], R2
ldr r2,[r4,#SSEGM_DATA] // pointer to sample data
str r2,[sp,#4] // save pointer to sample buffer
// prepare pointer to sample data with X -> R2
add r2,r1 // pointer to source sample buffer -> R2
// prepare foreground color, expand to 32-bit -> R6
ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color
lsls r3,r6,#8 // [1] shift foreground color << 8
orrs r3,r6 // [1] color expanded to 16 bits
lsls r6,r3,#16 // [1] shift 16-bit color << 16
orrs r6,r3 // [1] color expanded to 32 bits
// prepare background color, expand to 32 bits -> R4
ldrb r4,[r4,#SSEGM_PAR] // load background color
lsls r3,r4,#8 // shift background color << 8
orrs r3,r4 // color expanded to 16 bits
lsls r4,r3,#16 // shift 16-bit color << 16
orrs r4,r3 // color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// prepare pointer to conversion table -> LR
ldr r3,RenderLevel_Addr // get pointer to conversion table -> R5
mov lr,r3 // conversion table -> LR
// prepare wrap width - start X -> R7
ldr r7,[sp,#32] // load wrap width
subs r7,r1 // pixels remaining to end of segment
// last 4-pixels
cmp r7,#4
bhi RenderLevel_OutLoop
ldr r7,[sp,#32] // load wrap width
b RenderLevel_Last // render last 4-pixels of first segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of 4-pixels to generate in one part of segment
// R2 ... *pointer to source sample buffer
// R3 ... remaining width, later: (temporary)
// R4 ... *background color (expanded to 32-bit)
// R5 ... *current line Y (in direction from bottom to up)
// R6 ... *foreground color (expanded to 32-bit)
// R7 ... *wrap width of this segment, later: (temporary)
// LR ... *pointer to conversion table
// [SP+0] ... *zero level
// [SP+4] ... *base pointer to sample data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderLevel_OutLoop:
// limit wrap width by total width -> R7
ldr r3,[sp,#8] // get remaining width
cmp r7,r3 // compare with wrap width
bls 2f // width is OK
mov r7,r3 // limit wrap width
// check number of pixels
2: cmp r7,#8 // check number of remaining pixels
bhs 5f // enough pixels remain to render 8-pixels
// check last 4-pixels
cmp r7,#4 // check last 4-pixels
blo 3f // all done
// ---- render last 4 pixels
RenderLevel_Last:
// check half of graph
ldr r3,[sp,#0] // get zero level
cmp r5,r3 // check current line
blo RenderLevel_Last2 // bottom half of graph
// ---- top half
// [1] clear sample accumulator
movs r1,#0 // [1] clear sample accumulator
// [4] get sample 0
ldrb r3,[r2,#0] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
// [4] get sample 1
ldrb r3,[r2,#1] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
// [4] get sample 2
ldrb r3,[r2,#2] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
// [4] get sample 3
ldrb r3,[r2,#3] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
adds r2,#4 // [1] shift pointer to source buffer
// [2] prepare conversion table -> R1
lsls r1,#3 // [1] multiply sample * 8
add r1,lr // [1] add pointer to conversion table
// [7] convert 4 pixels (lower 4 bits)
ldr r1,[r1,#4] // [2] load mask for lower 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
stmia r0!,{r1} // [3] store 4 pixels
b 7f
// ---- bottom half
RenderLevel_Last2:
// [1] clear sample accumulator
movs r1,#0 // [1] clear sample accumulator
// [4] get sample 0
ldrb r3,[r2,#0] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
// [4] get sample 1
ldrb r3,[r2,#1] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
// [4] get sample 2
ldrb r3,[r2,#2] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
// [4] get sample 3
ldrb r3,[r2,#3] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
adds r2,#4 // [1] shift pointer to source buffer
// [2] prepare conversion table -> R1
lsls r1,#3 // [1] multiply sample * 8
add r1,lr // [1] add pointer to conversion table
// [7] convert 4 pixels (lower 4 bits)
ldr r1,[r1,#4] // [2] load mask for lower 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
stmia r0!,{r1} // [3] store 4 pixels
// check if continue with next segment
7: ldr r2,[sp,#4] // get base pointer to sample data -> R2
cmp r7,#4
bhi RenderLevel_OutLoop
// pop registers and return
3: pop {r1-r7,pc}
// ---- prepare to render 8-pixels
// prepare number of whole 4-pixels to render -> R1
5: lsrs r1,r7,#2 // shift width to get number of 4-pixels
lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7
subs r3,r7 // get remaining width
str r3,[sp,#8] // save new remaining width
subs r1,#1 // number of 4-pixels - 1
// check half of graph
ldr r3,[sp,#0] // get zero level
cmp r5,r3 // check current line
blo RenderLevel_InLoopBot // bottom half of graph
// ---- [50*N-1] start inner loop, render in one part of segment - top half of graph
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter)
// R2 ... *pointer to source sample buffer
// R3 ... sample
// R4 ... *background color (expanded to 32-bit)
// R5 ... *current line Y (in direction from bottom to up)
// R6 ... *foreground color (expanded to 32-bit)
// R7 ... sample accumulator, conversion table
// LR ... *pointer to conversion table
// [SP+0] ... *zero level
// [SP+4] ... *base pointer to sample data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderLevel_InLoopTop: // render 8 pixels in one loop step, top half of graph
// [1] clear sample accumulator
movs r7,#0 // [1] clear sample accumulator
// [4] get sample 0
ldrb r3,[r2,#0] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 1
ldrb r3,[r2,#1] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 2
ldrb r3,[r2,#2] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 3
ldrb r3,[r2,#3] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 4
ldrb r3,[r2,#4] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 5
ldrb r3,[r2,#5] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 6
ldrb r3,[r2,#6] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 7
ldrb r3,[r2,#7] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
adds r2,#8 // [1] shift pointer to source buffer
// [2] prepare conversion table -> R7
lsls r7,#3 // [1] multiply sample * 8
add r7,lr // [1] add pointer to conversion table
// [4] convert first 4 pixels (higher 4 bits)
ldr r3,[r7,#0] // [2] load mask for higher 4 bits
ands r3,r6 // [1] mask foreground color
eors r3,r4 // [1] combine with background color
// [7] convert second 4 pixels (lower 4 bits)
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r3,r7} // [3] store second 4 pixels
// [2,3] loop counter
subs r1,#2 // [1] shift loop counter
bhi RenderLevel_InLoopTop // [1,2] > 0, render next whole 8-pixels
// ---- end inner loop, continue with last 4-pixels, or start new part
// continue to outer loop
ldr r7,[sp,#32] // load wrap width
8: beq RenderLevel_Last // render last 4-pixels
ldr r2,[sp,#4] // get base pointer to sample data -> R2
b RenderLevel_OutLoop // go back to outer loop
// ---- [50*N-1] start inner loop, render in one part of segment - bottom half of graph
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter)
// R2 ... *pointer to source sample buffer
// R3 ... sample
// R4 ... *background color (expanded to 32-bit)
// R5 ... *current line Y (in direction from bottom to up)
// R6 ... *foreground color (expanded to 32-bit)
// R7 ... sample accumulator, conversion table
// LR ... *pointer to conversion table
// [SP+0] ... *zero level
// [SP+4] ... *base pointer to sample data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderLevel_InLoopBot: // render 8 pixels in one loop step, bottom half of graph
// [1] clear sample accumulator
movs r7,#0 // [1] clear sample accumulator
// [4] get sample 0
ldrb r3,[r2,#0] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 1
ldrb r3,[r2,#1] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 2
ldrb r3,[r2,#2] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 3
ldrb r3,[r2,#3] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 4
ldrb r3,[r2,#4] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 5
ldrb r3,[r2,#5] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 6
ldrb r3,[r2,#6] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 7
ldrb r3,[r2,#7] // [2] get data sample -> R3
cmp r5,r3 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
adds r2,#8 // [1] shift pointer to source buffer
// [2] prepare conversion table -> R7
lsls r7,#3 // [1] multiply sample * 8
add r7,lr // [1] add pointer to conversion table
// [4] convert first 4 pixels (higher 4 bits)
ldr r3,[r7,#0] // [2] load mask for higher 4 bits
ands r3,r6 // [1] mask foreground color
eors r3,r4 // [1] combine with background color
// [7] convert second 4 pixels (lower 4 bits)
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r3,r7} // [3] store second 4 pixels
// [2,3] loop counter
subs r1,#2 // [1] shift loop counter
bhi RenderLevel_InLoopBot // [1,2] > 0, render next whole 8-pixels
// ---- end inner loop, continue with last 4-pixels, or start new part
// continue to outer loop
ldr r7,[sp,#32] // load wrap width
beq 8b // render last 4-pixels
ldr r2,[sp,#4] // get base pointer to sample data -> R2
b RenderLevel_OutLoop // go back to outer loop
.align 2
RenderLevel_Addr:
.word RenderTextMask

View file

@ -1,287 +0,0 @@
// ****************************************************************************
//
// VGA render GF_LEVELGRAD
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderLevelGrad(u8* dbuf, int x, int y, int w, sSegm* segm);
// render level gradient graph GF_LEVELGRAD
// dbuf ... destination data buffer
// x ... start X coordinate (must be multiple of 4)
// y ... start Y coordinate
// w ... width of this segment (must be multiple of 4)
// segm ... video segment
// Output new dbuf pointer.
// 320 pixels takes 14 us on 151 MHz.
.thumb_func
.global RenderLevelGrad
RenderLevelGrad:
// push registers
push {r1-r7,lr}
// Input registers and stack content:
// R0 ... pointer to testination data buffer
// SP+0: R1 start X coordinate
// SP+4: R2 start Y coordinate (later: base pointer to sample data)
// SP+8: R3 width to display
// SP+12: R4
// SP+16: R5
// SP+20: R6
// SP+24: R7
// SP+28: LR
// SP+32: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#32] // load video segment -> R4
// get wrap width -> [SP+32]
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
movs r7,#3 // mask to align to 32-bit
bics r5,r7 // align wrap
str r5,[sp,#32] // save wrap width
// align X coordinate to 32-bit -> R1
bics r1,r7
// align remaining width -> [SP+8]
bics r3,r7
str r3,[sp,#8] // save new width
// current Y in direction from bottom to up -> R5
ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height
subs r5,#1 // wrapy - 1
subs r5,r2 // subtract Y, get Y relative to bottom -> R5
// base pointer to sample data (without X) -> [SP+4], R2
ldr r2,[r4,#SSEGM_DATA] // pointer to sample data
str r2,[sp,#4] // save pointer to sample buffer
// prepare pointer to sample data with X -> R2
add r2,r1 // pointer to source sample buffer -> R2
// prepare foreground color, expand to 32-bit -> R6
ldr r6,[r4,#SSEGM_PAR] // pointer to gradient 1
ldrb r6,[r6,r5] // load foreground color
lsls r3,r6,#8 // [1] shift foreground color << 8
orrs r3,r6 // [1] color expanded to 16 bits
lsls r6,r3,#16 // [1] shift 16-bit color << 16
orrs r6,r3 // [1] color expanded to 32 bits
// prepare background color, expand to 32 bits -> R4
ldr r4,[r4,#SSEGM_PAR2] // pointer to gradient 2
ldrb r4,[r4,r5] // load background color
lsls r3,r4,#8 // shift background color << 8
orrs r3,r4 // color expanded to 16 bits
lsls r4,r3,#16 // shift 16-bit color << 16
orrs r4,r3 // color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// prepare pointer to conversion table -> LR
ldr r3,RenderLevelGrad_Addr // get pointer to conversion table -> R5
mov lr,r3 // conversion table -> LR
// prepare wrap width - start X -> R7
ldr r7,[sp,#32] // load wrap width
subs r7,r1 // pixels remaining to end of segment
// last 4-pixels
cmp r7,#4
bhi RenderLevelGrad_OutLoop
ldr r7,[sp,#32] // load wrap width
b RenderLevelGrad_Last // render last 4-pixels of first segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of 4-pixels to generate in one part of segment
// R2 ... *pointer to source sample buffer
// R3 ... remaining width, later: (temporary)
// R4 ... *background color (expanded to 32-bit)
// R5 ... *current line Y (in direction from bottom to up)
// R6 ... *foreground color (expanded to 32-bit)
// R7 ... *wrap width of this segment, later: (temporary)
// LR ... *pointer to conversion table
// [SP+0]
// [SP+4] ... *base pointer to sample data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderLevelGrad_OutLoop:
// limit wrap width by total width -> R7
ldr r3,[sp,#8] // get remaining width
cmp r7,r3 // compare with wrap width
bls 2f // width is OK
mov r7,r3 // limit wrap width
// check number of pixels
2: cmp r7,#8 // check number of remaining pixels
bhs 5f // enough pixels remain to render 8-pixels
// check last 4-pixels
cmp r7,#4 // check last 4-pixels
blo 3f // all done
// ---- render last 4 pixels
RenderLevelGrad_Last:
// [1] clear sample accumulator
movs r1,#0 // [1] clear sample accumulator
// [4] get sample 0
ldrb r3,[r2,#0] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
// [4] get sample 1
ldrb r3,[r2,#1] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
// [4] get sample 2
ldrb r3,[r2,#2] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
// [4] get sample 3
ldrb r3,[r2,#3] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
adds r2,#4 // [1] shift pointer to source buffer
// [2] prepare conversion table -> R1
lsls r1,#3 // [1] multiply sample * 8
add r1,lr // [1] add pointer to conversion table
// [7] convert 4 pixels (lower 4 bits)
ldr r1,[r1,#4] // [2] load mask for lower 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
stmia r0!,{r1} // [3] store 4 pixels
// check if continue with next segment
7: ldr r2,[sp,#4] // get base pointer to sample data -> R2
cmp r7,#4
bhi RenderLevelGrad_OutLoop
// pop registers and return
3: pop {r1-r7,pc}
// ---- prepare to render 8-pixels
// prepare number of whole 4-pixels to render -> R1
5: lsrs r1,r7,#2 // shift width to get number of 4-pixels
lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7
subs r3,r7 // get remaining width
str r3,[sp,#8] // save new remaining width
subs r1,#1 // number of 4-pixels - 1
// ---- [50*N-1] start inner loop, render in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter)
// R2 ... *pointer to source sample buffer
// R3 ... sample
// R4 ... *background color (expanded to 32-bit)
// R5 ... *current line Y (in direction from bottom to up)
// R6 ... *foreground color (expanded to 32-bit)
// R7 ... sample accumulator, conversion table
// LR ... *pointer to conversion table
// [SP+0] ...
// [SP+4] ... *base pointer to sample data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderLevelGrad_InLoopTop: // render 8 pixels in one loop step, top half of graph
// [1] clear sample accumulator
movs r7,#0 // [1] clear sample accumulator
// [4] get sample 0
ldrb r3,[r2,#0] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 1
ldrb r3,[r2,#1] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 2
ldrb r3,[r2,#2] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 3
ldrb r3,[r2,#3] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 4
ldrb r3,[r2,#4] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 5
ldrb r3,[r2,#5] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [4] get sample 6
ldrb r3,[r2,#6] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 7
ldrb r3,[r2,#7] // [2] get data sample -> R3
cmp r3,r5 // [1] compare sample with current line
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
adds r2,#8 // [1] shift pointer to source buffer
// [2] prepare conversion table -> R7
lsls r7,#3 // [1] multiply sample * 8
add r7,lr // [1] add pointer to conversion table
// [4] convert first 4 pixels (higher 4 bits)
ldr r3,[r7,#0] // [2] load mask for higher 4 bits
ands r3,r6 // [1] mask foreground color
eors r3,r4 // [1] combine with background color
// [7] convert second 4 pixels (lower 4 bits)
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r3,r7} // [3] store second 4 pixels
// [2,3] loop counter
subs r1,#2 // [1] shift loop counter
bhi RenderLevelGrad_InLoopTop // [1,2] > 0, render next whole 8-pixels
// ---- end inner loop, continue with last 4-pixels, or start new part
// continue to outer loop
ldr r7,[sp,#32] // load wrap width
8: beq RenderLevelGrad_Last // render last 4-pixels
ldr r2,[sp,#4] // get base pointer to sample data -> R2
b RenderLevelGrad_OutLoop // go back to outer loop
.align 2
RenderLevelGrad_Addr:
.word RenderTextMask

View file

@ -1,288 +0,0 @@
// ****************************************************************************
//
// VGA render GF_MTEXT
//
// ****************************************************************************
// u32 par SSEGM_PAR pointer to the font
// u32 par2 SSEGM_PAR2 LOW background color, HIGH foreground color
// u16 par3 font height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderMText(u8* dbuf, int x, int y, int w, sSegm* segm)
// render 8-pixel mono text GF_MTEXT
// R0 ... destination data buffer
// R1 ... start X coordinate (in pixels, must be multiple of 4)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4 and > 0)
// [stack] ... segm video segment sSegm
// Output new pointer to destination data buffer.
// 320 pixels takes 6.9 us on 151 MHz.
.thumb_func
.global RenderMText
RenderMText:
// push registers
push {r1-r7,lr}
// Stack content:
// SP+0: R1 start X coordinate
// SP+4: R2 start Y coordinate (later: base pointer to text data row)
// SP+8: R3 width to display
// SP+12: R4
// SP+16: R5
// SP+20: R6
// SP+24: R7
// SP+28: LR
// SP+32: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#32] // load video segment -> R4
// start divide Y/font height
ldr r6,RenderMText_pSioBase // get address of SIO base -> R6
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
// - now we must wait at least 8 clock cycles to get result of division
// [6] get wrap width -> [SP+32]
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
movs r7,#3 // [1] mask to align to 32-bit
bics r5,r7 // [1] align wrap
str r5,[sp,#32] // [2] save wrap width
// [1] align X coordinate to 32-bit
bics r1,r7 // [1]
// [3] align remaining width
bics r3,r7 // [1]
str r3,[sp,#8] // [2] save new width
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
// Note: QUOTIENT must be read last
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
// pointer to font line -> R3
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
add r3,r5 // line offset + font base -> pointer to current font line R3
// base pointer to text data (without X) -> [SP+4], R2
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
muls r2,r5 // Y * WB -> offset of row in text buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
add r2,r5 // base address of text buffer
str r2,[sp,#4] // save pointer to text buffer
// prepare pointer to text data with X -> R2
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
add r2,r6 // pointer to source text buffer -> R2
// prepare foreground color, expand to 32-bit -> R6
ldrb r6,[r4,#SSEGM_PAR2+1] // load foreground color
lsls r7,r6,#8 // [1] shift foreground color << 8
orrs r7,r6 // [1] color expanded to 16 bits
lsls r6,r7,#16 // [1] shift 16-bit color << 16
orrs r6,r7 // [1] color expanded to 32 bits
// prepare background color, expand to 32 bits -> R4
ldrb r4,[r4,#SSEGM_PAR2] // load background color
lsls r5,r4,#8 // shift background color << 8
orrs r5,r4 // color expanded to 16 bits
lsls r4,r5,#16 // shift 16-bit color << 16
orrs r4,r5 // color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// prepare pointer to conversion table -> LR
ldr r5,RenderMText_Addr // get pointer to conversion table -> R5
mov lr,r5 // conversion table -> LR
// ---- render 2nd half of first character
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate
// R2 ... pointer to source text buffer
// R3 ... pointer to font line
// R4 ... background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... foreground color (expanded to 32-bit)
// R7 ... (temporary)
// LR ... pointer to conversion table
// [SP+4] ... base pointer to text data (without X)
// [SP+8] ... remaining width
// [SP+32] ... wrap width
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
lsls r5,r1,#29 // check bit 2 of X coordinate
bpl 2f // bit 2 not set, starting even 4-pixels
// [5] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
adds r2,#1 // [1] shift pointer to source text buffer
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// shift X coordinate
adds r1,#4 // shift X coordinate
// check end of segment
ldr r7,[sp,#32] // load wrap width
cmp r1,r7 // end of segment?
blo 1f
movs r1,#0 // reset X coordinate
ldr r2,[sp,#4] // get base pointer to text data -> R2
// shift remaining width
1: ldr r7,[sp,#8] // get remaining width
subs r7,#4 // shift width
str r7,[sp,#8] // save new width
// prepare wrap width - start X -> R7
2: ldr r7,[sp,#32] // load wrap width
subs r7,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of characters to generate in one part of segment
// R2 ... *pointer to source text buffer
// R3 ... *pointer to font line
// R4 ... *background color (expanded to 32-bit)
// R5 ... (temporary)
// R6 ... *foreground color (expanded to 32-bit)
// R7 ... *wrap width of this segment, later: temporary
// LR ... *pointer to conversion table
// [SP+4] ... *base pointer to text data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderMText_OutLoop:
// limit wrap width by total width -> R7
ldr r5,[sp,#8] // get remaining width
cmp r7,r5 // compare with wrap width
bls 2f // width is OK
mov r7,r5 // limit wrap width
// check if remain whole characters
2: cmp r7,#8 // check number of remaining pixels
bhs 5f // enough characters remain
// check if 1st part of last character remains
cmp r7,#4 // check 1st part of last character
blo 3f // all done
// ---- render 1st part of last character
RenderMText_Last:
// [5] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
adds r2,#1 // [1] shift pointer to source text buffer
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
stmia r0!,{r1} // [2] store first 4 pixels
// check if continue with next segment
ldr r2,[sp,#4] // get base pointer to text data -> R2
cmp r7,#4
bhi RenderMText_OutLoop
// pop registers and return
3: pop {r1-r7,pc}
// ---- prepare to render whole characters
// prepare number of whole characters to render -> R1
5: lsrs r1,r7,#2 // shift to get number of characters*2
lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7
subs r5,r7 // get remaining width
str r5,[sp,#8] // save new remaining width
subs r1,#1 // number of characters*2 - 1
// ---- [22*N-1] start inner loop, render characters in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of characters to generate*2 - 1 (loop counter)
// R2 ... *pointer to source text buffer
// R3 ... *pointer to font line
// R4 ... *background color (expanded to 32-bit)
// R5 ... font sample
// R6 ... *foreground color (expanded to 32-bit)
// R7 ... (temporary)
// LR ... *pointer to conversion table
RenderMText_InLoop:
// [5] load font sample -> R5
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
ldrb r5,[r3,r5] // [2] load font sample -> R5
adds r2,#1 // [1] shift pointer to source text buffer
// [2] prepare conversion table -> R5
lsls r5,#3 // [1] multiply font sample * 8
add r5,lr // [1] add pointer to conversion table
// [6] convert first 4 pixels (higher 4 bits)
ldr r7,[r5,#0] // [2] load mask for higher 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store first 4 pixels
// [6] convert second 4 pixels (lower 4 bits)
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r7} // [2] store second 4 pixels
// [2,3] loop counter
subs r1,#2 // [1] shift loop counter
bhi RenderMText_InLoop // [1,2] > 0, render next whole character
// ---- end inner loop, continue with last character, or start new part
// continue to outer loop
ldr r7,[sp,#32] // load wrap width
beq RenderMText_Last // render 1st half of last character
ldr r2,[sp,#4] // get base pointer to text data -> R2
b RenderMText_OutLoop // go back to outer loop
.align 2
RenderMText_Addr:
.word RenderTextMask
RenderMText_pSioBase:
.word SIO_BASE // addres of SIO base

View file

@ -1,297 +0,0 @@
// ****************************************************************************
//
// VGA render GF_OSCIL
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderOscil(u8* dbuf, int x, int y, int w, sSegm* segm);
// render oscilloscope graph GF_OSCIL
// dbuf ... destination data buffer
// x ... start X coordinate (must be multiple of 4)
// y ... start Y coordinate
// w ... width of this segment (must be multiple of 4)
// segm ... video segment
// Output new dbuf pointer.
// 320 pixels takes 16.6 us on 151 MHz.
.thumb_func
.global RenderOscil
RenderOscil:
// push registers
push {r1-r7,lr}
// Input registers and stack content:
// R0 ... pointer to testination data buffer
// SP+0: R1 start X coordinate
// SP+4: R2 start Y coordinate (later: base pointer to sample data)
// SP+8: R3 width to display
// SP+12: R4
// SP+16: R5
// SP+20: R6
// SP+24: R7
// SP+28: LR
// SP+32: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#32] // load video segment -> R4
// get wrap width -> [SP+32]
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
movs r7,#3 // mask to align to 32-bit
bics r5,r7 // align wrap
str r5,[sp,#32] // save wrap width
// align X coordinate to 32-bit -> R1
bics r1,r7
// align remaining width -> [SP+8]
bics r3,r7
str r3,[sp,#8] // save new width
// current Y in direction from bottom to up -> R5
ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height
subs r5,#1 // wrapy - 1
subs r5,r2 // subtract Y, get Y relative to bottom -> R5
// get pixel height -> LR
ldrb r3,[r4,#SSEGM_PAR2] // get pixel height
mov lr,r3 // pixel height -> LR
// base pointer to sample data (without X) -> [SP+4], R2
ldr r2,[r4,#SSEGM_DATA] // pointer to sample data
str r2,[sp,#4] // save pointer to sample buffer
// prepare pointer to sample data with X -> R2
add r2,r1 // pointer to source sample buffer -> R2
// prepare foreground color, expand to 32-bit -> R6
ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color
lsls r3,r6,#8 // [1] shift foreground color << 8
orrs r3,r6 // [1] color expanded to 16 bits
lsls r6,r3,#16 // [1] shift 16-bit color << 16
orrs r6,r3 // [1] color expanded to 32 bits
// prepare background color, expand to 32 bits -> R4
ldrb r4,[r4,#SSEGM_PAR] // load background color
lsls r3,r4,#8 // shift background color << 8
orrs r3,r4 // color expanded to 16 bits
lsls r4,r3,#16 // shift 16-bit color << 16
orrs r4,r3 // color expanded to 32 bits
// [1] XOR foreground and background color -> R6
eors r6,r4 // [1] XOR foreground color with background color
// prepare wrap width - start X -> R7
ldr r7,[sp,#32] // load wrap width
subs r7,r1 // pixels remaining to end of segment
// last 4-pixels
cmp r7,#4
bhi RenderOscil_OutLoop
ldr r7,[sp,#32] // load wrap width
b RenderOscil_Last // render last 4-pixels of first segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of 4-pixels to generate in one part of segment
// R2 ... *pointer to source sample buffer
// R3 ... remaining width, later: (temporary)
// R4 ... *background color (expanded to 32-bit)
// R5 ... *current line Y (in direction from bottom to up)
// R6 ... *foreground color (expanded to 32-bit)
// R7 ... *wrap width of this segment, later: (temporary)
// LR ... *pixel height
// [SP+4] ... *base pointer to sample data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderOscil_OutLoop:
// limit wrap width by total width -> R7
ldr r3,[sp,#8] // get remaining width
cmp r7,r3 // compare with wrap width
bls 2f // width is OK
mov r7,r3 // limit wrap width
// check number of pixels
2: cmp r7,#8 // check number of remaining pixels
bhs 5f // enough pixels remain to render 8-pixels
// check last 4-pixels
cmp r7,#4 // check last 4-pixels
blo 3f // all done
// ---- render last 4 pixels
RenderOscil_Last:
// [1] clear sample accumulator
movs r1,#0 // [1] clear sample accumulator
// [5] get sample 0
ldrb r3,[r2,#0] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 1
ldrb r3,[r2,#1] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 2
ldrb r3,[r2,#2] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 3
ldrb r3,[r2,#3] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7
adds r2,#4 // [1] shift pointer to source buffer
// [4] prepare conversion table -> R1
lsls r1,#3 // [1] multiply sample * 8
ldr r3,RenderOscil_Addr // [2] get pointer to conversion table -> R3
add r1,r3 // [1] add pointer to conversion table
// [7] convert 4 pixels (lower 4 bits)
ldr r1,[r1,#4] // [2] load mask for lower 4 bits
ands r1,r6 // [1] mask foreground color
eors r1,r4 // [1] combine with background color
stmia r0!,{r1} // [3] store 4 pixels
// check if continue with next segment
ldr r2,[sp,#4] // get base pointer to sample data -> R2
cmp r7,#4
bhi RenderOscil_OutLoop
// pop registers and return
3: pop {r1-r7,pc}
// ---- prepare to render 8-pixels
// prepare number of whole 4-pixels to render -> R1
5: lsrs r1,r7,#2 // shift width to get number of 4-pixels
lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7
subs r3,r7 // get remaining width
str r3,[sp,#8] // save new remaining width
subs r1,#1 // number of 4-pixels - 1
// ---- [50*N-1] start inner loop, render in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter)
// R2 ... *pointer to source sample buffer
// R3 ... sample
// R4 ... *background color (expanded to 32-bit)
// R5 ... *current line Y (in direction from bottom to up)
// R6 ... *foreground color (expanded to 32-bit)
// R7 ... sample accumulator, conversion table
// LR ... *pixel height
// [SP+4] ... *base pointer to sample data (without X)
// [SP+8] ... *remaining width
// [SP+32] ... *wrap width
RenderOscil_InLoop: // render 8 pixels in one loop step, top half of graph
// [1] clear sample accumulator
movs r7,#0 // [1] clear sample accumulator
// [5] get sample 0
ldrb r3,[r2,#0] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 1
ldrb r3,[r2,#1] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 2
ldrb r3,[r2,#2] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 3
ldrb r3,[r2,#3] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 4
ldrb r3,[r2,#4] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 5
ldrb r3,[r2,#5] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [5] get sample 6
ldrb r3,[r2,#6] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
// [6] get sample 7
ldrb r3,[r2,#7] // [2] get data sample -> R3
subs r3,r5 // [1] distance from current line
cmp lr,r3 // [1] compare with pixel height
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
adds r2,#8 // [1] shift pointer to source buffer
// [4] prepare conversion table -> R7
lsls r7,#3 // [1] multiply sample * 8
ldr r3,RenderOscil_Addr // [2] get pointer to conversion table -> R3
add r7,r3 // [1] add pointer to conversion table
// [4] convert first 4 pixels (higher 4 bits)
ldr r3,[r7,#0] // [2] load mask for higher 4 bits
ands r3,r6 // [1] mask foreground color
eors r3,r4 // [1] combine with background color
// [7] convert second 4 pixels (lower 4 bits)
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
ands r7,r6 // [1] mask foreground color
eors r7,r4 // [1] combine with background color
stmia r0!,{r3,r7} // [3] store second 4 pixels
// [2,3] loop counter
subs r1,#2 // [1] shift loop counter
bhi RenderOscil_InLoop // [1,2] > 0, render next whole 8-pixels
// ---- end inner loop, continue with last 4-pixels, or start new part
// continue to outer loop
ldr r7,[sp,#32] // load wrap width
8: beq RenderOscil_Last // render last 4-pixels
ldr r2,[sp,#4] // get base pointer to sample data -> R2
b RenderOscil_OutLoop // go back to outer loop
.align 2
RenderOscil_Addr:
.word RenderTextMask

View file

@ -1,190 +0,0 @@
// ****************************************************************************
//
// VGA render GF_OSCLINE
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// render font pixel mask
.extern RenderTextMask // u32 RenderTextMask[512];
// extern "C" u8* RenderOscLine(u8* dbuf, int x, int y, int w, sSegm* segm);
// render oscilloscope graph GF_OSCLINE
// dbuf ... destination data buffer
// x ... start X coordinate (must be multiple of 4)
// y ... start Y coordinate
// w ... width of this segment (must be multiple of 4)
// segm ... video segment
// Output new dbuf pointer.
// 320 pixels takes 21.5 us on 151 MHz.
.thumb_func
.global RenderOscLine
RenderOscLine:
// push registers
push {r2-r7,lr}
// Input registers and stack content:
// R0 ... pointer to testination data buffer
// R1 ... start X coordinate
// SP+0: R2 start Y coordinate (later: base pointer to sample data)
// SP+4: R3 width to display
// SP+8: R4
// SP+12: R5
// SP+16: R6
// SP+20: R7
// SP+24: LR
// SP+28: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#28] // load video segment -> R4
// get wrap width/2 -> [SP+28]
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
lsrs r5,#1 // wrap width / 2
str r5,[sp,#28] // save wrap width
// X coordinate/2 -> R1
lsrs r1,#1
// remaining width/2 -> [SP+4]
lsrs r3,#1
str r3,[sp,#4] // save new width
// current Y in direction from bottom to up -> LR
ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height
subs r5,#1 // wrapy - 1
subs r5,r2 // subtract Y, get Y relative to bottom -> R5
mov lr,r5
// base pointer to sample data (without X) -> [SP+0], R2
ldr r2,[r4,#SSEGM_DATA] // pointer to sample data
str r2,[sp,#0] // save pointer to sample buffer
// prepare pointer to sample data with X -> R2
add r2,r1 // pointer to source sample buffer -> R2
// prepare foreground color -> R6
ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color
lsls r7,r6,#8
orrs r6,r7
// prepare background color -> R4
ldrb r4,[r4,#SSEGM_PAR] // load background color
lsls r7,r4,#8
orrs r4,r7
// prepare wrap width - start X -> R1
ldr r7,[sp,#28] // load wrap width
subs r1,r7,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... *wrap width of this segment, later: number of pixels to generate in one part of segment
// R2 ... *pointer to source sample buffer
// R3 ... remaining width, later: (temporary)
// R4 ... *background color
// R5 ... (temporary)
// R6 ... *foreground color
// R7 ... (temporary)
// LR ... *current line Y (in direction from bottom to up)
// [SP+0] ... *base pointer to sample data (without X)
// [SP+4] ... *remaining width
// [SP+28] ... *wrap width
RenderOscLine_OutLoop:
// limit wrap width by total width -> R1
ldr r3,[sp,#4] // get remaining width
cmp r1,r3 // compare with wrap width
bls 2f // width is OK
mov r1,r3 // limit wrap width
// check number of pixels
2: cmp r1,#0 // check number of remaining pixels
beq RenderOscLine_Stop // stop
subs r3,r1 // get remaining width
str r3,[sp,#4] // save new remaining width
// ---- start inner loop, render in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of pixels to generate (loop counter)
// R2 ... *pointer to source sample buffer
// R3 ... sample
// R4 ... *background color
// R5 ... previous sample
// R6 ... *foreground color
// R7 ... current color
// LR ... *current line Y (in direction from bottom to up)
// [SP+0] ... *base pointer to sample data (without X)
// [SP+4] ... *remaining width
// [SP+28] ... *wrap width
ldrb r5,[r2,#0] // [2] prepare previous sample -> R5
RenderOscLine_InLoop: // render 8 pixels in one loop step, top half of graph
// [3] get sample
ldrb r3,[r2,#0] // [2] get data sample -> R3
adds r2,#1 // [1] increment pointer
// [1] preset to background color
mov r7,r4 // [1] preset to background color
// [3..8] (sample > previous sample) AND (sample > line) AND (line > previous sample) - display pixel
cmp r3,lr // [1] compare sample with line
beq 4f // [1,2] (sample == line), true, display pixel everytime
blo 2f // [1,2] (sample < line), false
cmp r3,r5 // [1] compare sample with previous sample
bls 2f // [1,2] (sample <= previous), false
cmp lr,r5 // [1] compare line with previous sample
bhi 4f // [1,2] (line > previous), true
// [3..7] (sample < previous sample) AND (sample < line) AND (line < previous sample) - display pixel
2: cmp r3,r5 // [1] compare sample with previous sample
bhs 6f // [1,2] (sample >= previous), false
cmp r3,lr // [1] compare sample with line
bhs 6f // [1,2] (sample >= line), false
cmp lr,r5 // [1] compare line with previous sample
bhs 6f // [1,2] (line >= previous), false
// [1] use foreground color
4: mov r7,r6 // [1] use foreground color
// [3] write 2 pixels
6: strh r7,[r0,#0] // [2] write pixel
adds r0,#2 // [1] increment pointer
// [1] save previous sample
mov r5,r3 // [1]
// [2,3] loop counter
subs r1,#1 // [1] shift loop counter
bne RenderOscLine_InLoop // [1,2] render next pixel
// ---- end inner loop, start new part
// continue to outer loop
ldr r1,[sp,#28] // load wrap width
ldr r2,[sp,#0] // get base pointer to sample data -> R2
b RenderOscLine_OutLoop // go back to outer loop
RenderOscLine_Stop:
// pop registers and return
pop {r2-r7,pc}
.align 2
RenderOscLine_Addr:
.word RenderTextMask

View file

@ -1,360 +0,0 @@
// ****************************************************************************
//
// VGA render LAYERMODE_PERSP*
//
// ****************************************************************************
// img ... (const u8*) SLAYER_IMG image data
// par ... (const void*) SLAYER_PAR pointer to 6 matrix integer parameters m11,m12..m23
// horiz ... (s8) SLAYER_HORIZ horizon offset/4 (0=no perspecitve, <0 ceilling)
// xbits ... (u8) SLAYER_XBITS number of bits of image width
// ybits ... (u8) SLAYER_YBITS number of bits of image height
// w ... (u16) SLAYER_W destination width
// h ... (u16) SLAYER_H destination height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
#define ACCUM0_OFFSET 0
#define ACCUM1_OFFSET 4
#define BASE0_OFFSET 8
#define BASE1_OFFSET 12
#define BASE2_OFFSET 16
#define POP_LANE0_OFFSET 20
#define POP_LANE1_OFFSET 24
#define POP_FULL_OFFSET 28
#define PEEK_LANE0_OFFSET 32
#define PEEK_LANE1_OFFSET 36
#define PEEK_FULL_OFFSET 40
#define CTRL_LANE0_OFFSET 44
#define CTRL_LANE1_OFFSET 48
#define ACCUM0_ADD_OFFSET 52
#define ACCUM1_ADD_OFFSET 56
#define BASE_1AND0_OFFSET 60
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" void RenderPersp(u8* dbuf, int y, sLayer* scr)
// render layers with transformatio matrix LAYERMODE_PERSP*
// R0 ... dbuf pointer to data buffer
// R1 ... y coordinate of scanline (relative in destination image)
// R2 ... scr pointer to layer screen structure sLayer
.thumb_func
.global RenderPersp
RenderPersp:
// push registers
push {r4-r7,lr}
// Stack content and input variables:
// R0 dbuf pointer to data buffer
// R1 Y coordinate of scanline
// R2 scr pointer to layer screen structure sLayer
// R3
// SP+0: R4
// SP+4: R5
// SP+8: R6
// SP+12: R7
// SP+16: LR
// R0 ... pointer to destination data buffer
// R1 ... Y coordinate
// R2 ... sLayer
// load horizon offset -> R4, check if use perspective
ldr r6,RenderPersp_pSioBase // get address of SIO base -> R6
ldrh r5,[r2,#SLAYER_H] // get destination height -> R5
ldrb r4,[r2,#SLAYER_HORIZ] // get horizon offset -> R4
sxtb r4,r4 // signed extension
lsls r4,#2 // horizon * 4, horizon = 0 ?
bne 2f // use perspective
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
lsrs r5,#1 // destination height/2 -> R5
subs r1,r5 // y - h/2 -> R1
mov r12,r1 // current coordinate Y0 = y - h/2 -> R12
// prepare divide result to get 1<<FRACT
movs r5,#1 // R5 <- 1
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
lsls r5,#FRACT // constant 1<<FRACT -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
b 4f
// using perspective, check ceilling mode
2: bpl 3f // horizon is not negative
subs r1,r5,r1 // negate, y = h - y
subs r1,#1 // y = h - 1 - y
negs r4,r4 // absolute value of horizon
// prepare current coordinate Y0 = y - h -> R12
3: subs r7,r1,r5 // y - h = current Y coordinate -> R7
mov r12,r7 // store current coordinate Y0 -> R12
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
lsls r5,#FRACT // destination height * FRACTMUL -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
adds r1,r4 // horizon + y -> R2
str r1,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
// R0 ... pointer to destination data buffer
// R2 ... sLayer
// R12 ... current coordinate Y0
// prepare start coordinate X0 = -w/2 -> LR
4: ldrh r3,[r2,#SLAYER_W] // get destination width -> R3
lsrs r5,r3,#1 // width/2
negs r5,r5 // negate
mov lr,r5 // store start coordinate X0 -> LR
// prepare number of 4-pixels (loop counter) -> R7
lsrs r7,r3,#2 // width/4 -> R7
// get number of bits of image width "xbits" -> R1
ldrb r1,[r2,#SLAYER_XBITS] // number of bits of image width -> R1
// get number of bits of image height "ybits" -> R4
ldrb r4,[r2,#SLAYER_YBITS] // number of bits of image height -> R4
// prepare address of interpolator base -> R3
ldr r3,RenderPersp_Interp // get address of interpolator base -> R3
// R0 ... pointer to destination data buffer
// R1 ... number of bits of image width xbits
// R2 ... sLayer
// R3 ... interpolator base
// R4 ... number of bits of image height ybits
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- setup interpolator
// set image base to base2
ldr r6,[r2,#SLAYER_IMG] // load image base
str r6,[r3,#BASE2_OFFSET] // set image base
// set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1
ldr r6,RenderPersp_Ctrl // load control word
subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5
orrs r6,r5 // add xbits to control word
subs r1,#1 // xbits - 1 -> R1
adds r5,r1,r4 // xbits-1+ybits -> R5
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1
// R0 ... pointer to destination data buffer
// R1 ... image width xbits-1
// R2 ... sLayer
// R3 ... interpolator base
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1
ldr r6,RenderPersp_Ctrl // load control word
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position
orrs r6,r1 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0
// R0 ... pointer to destination data buffer
// R2 ... sLayer
// R3 ... interpolator base
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- set matrix
// get pointer to matrix -> R4
ldr r4,[r2,#SLAYER_PAR] // get pointer to matrix -> R4
// get distance coefficient dist -> R1
ldr r1,RenderPersp_pSioBase // get address of SIO base -> R1
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
// r4+0 ... m11
// r4+4 ... m12
// r4+8 ... m13
// r4+12 ... m21
// r4+16 ... m22
// r4+20 ... m23
// set m11 -> R5 base0
ldr r5,[r4,#0] // load m11
muls r5,r1 // m11*dist
asrs r5,#FRACT // (m11*dist)>>FRACT
str r5,[r3,#BASE0_OFFSET] // set base0
// set m21 -> R6 base1
ldr r6,[r4,#12] // load m21
muls r6,r1 // m21*dist
asrs r6,#FRACT // (m21*dist)>>FRACT
str r6,[r3,#BASE1_OFFSET] // set base1
// R0 ... pointer to destination data buffer
// R1 ... distance coefficient
// R3 ... interpolator base
// R4 ... pointer to matrix
// R5 ... m11
// R6 ... m21
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// set x0*m11 + y0*m12 + m13 -> accum0
mov r2,lr // start coordinate X0 -> X2
muls r5,r2 // x0*m11 -> R5
muls r2,r6 // x0*m21 -> R2
mov lr,r1 // save distance coefficient -> LR
ldr r6,[r4,#4] // load m12 -> R6
muls r1,r6 // m12*dist -> R1
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
mov r6,r12 // load coordinate Y0 -> R6
muls r1,r6 // y0*m12 -> R1
adds r5,r1 // x0*m11 + y0*m12 -> R5
ldr r1,[r4,#8] // load m13 -> R1
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
str r5,[r3,#ACCUM0_OFFSET] // set accum0
// R0 ... pointer to destination data buffer
// R2 ... x0*m21
// R3 ... interpolator base
// R4 ... pointer to matrix
// R6 ... current coordinate Y0
// R7 ... width/4
// LR ... distance coefficient
// set x0*m21 + y0*m22 + m23 -> accum1
ldr r1,[r4,#16] // load m22 -> R1
mov r5,lr // distance coefficient -> R5
muls r1,r5 // m22*dist
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
muls r1,r6 // y0*m22 -> R1
adds r2,r1 // x0*m21 + y0*m22 -> R2
ldr r1,[r4,#20] // load m23 -> R1
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
str r2,[r3,#ACCUM1_OFFSET] // set accum1
// ---- process odd 4-pixel
// R0 ... pointer to destination data buffer
// R3 ... interpolator base
// R4 ... (temporary - get pointer to pixel)
// R5 ... (temporary - load pixel)
// R6 ... (temporary - pixel accumulator)
// R7 ... width/4 (loop counter)
lsrs r7,#1 // width/4/2
bcc 2f // no odd 4-pixel
// [3] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r6,[r4,#0] // [2] load pixel
// [5] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r5,[r4,#0] // [2] load pixel
lsls r5,#8 // [1] shift 1 byte left
orrs r6,r5 // [1] add pixel to accumulator
// [5] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r5,[r4,#0] // [2] load pixel
lsls r5,#16 // [1] shift 2 bytes left
orrs r6,r5 // [1] add pixel to accumulator
// [5] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r5,[r4,#0] // [2] load pixel
lsls r5,#24 // [1] shift 3 bytes left
orrs r6,r5 // [1] add pixel to accumulator
// [2] store 4 pixels
stmia r0!,{r6} // [2] store 4 pixels
// check number of remaining pixels
2: tst r7,r7 // check number of pixels
beq 8f // end
// ---- [42 per 8 pixels] inner loop
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to pixel, load pixel)
// R7 ... width/8 (loop counter)
// [3] load 1st pixel
6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r1,[r4,#0] // [2] load pixel
// [5] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [5] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [5] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#24 // [1] shift 3 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [3] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r2,[r4,#0] // [2] load pixel
// [5] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
// [5] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r2,r4 // [1] add pixel to accumulator
// [5] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#24 // [1] shift 3 bytes left
orrs r2,r4 // [1] add pixel to accumulator
// [3] store 8 pixels
stmia r0!,{r1,r2} // [3] store 8 pixels
// [2,3] loop counter
subs r7,#1 // [1] 8-pixel counter
bne 6b // [1,2] next 8-pixels
// pop registers
8: pop {r4-r7,pc}
.align 2
// pointer to SIO base
RenderPersp_pSioBase:
.word SIO_BASE // addres of SIO base
// pointer to Interp1 base
RenderPersp_Interp:
.word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base
RenderPersp_Ctrl: // lane control word
.word SIO_INTERP1_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP1_CTRL_LANE0_SHIFT_LSB)

View file

@ -1,337 +0,0 @@
// ****************************************************************************
//
// VGA render LAYERMODE_PERSP2*
//
// ****************************************************************************
// img ... (const u8*) SLAYER_IMG image data
// par ... (const void*) SLAYER_PAR pointer to 6 matrix integer parameters m11,m12..m23
// horiz ... (s8) SLAYER_HORIZ horizon offset/4 (0=no perspecitve, <0 ceilling)
// xbits ... (u8) SLAYER_XBITS number of bits of image width
// ybits ... (u8) SLAYER_YBITS number of bits of image height
// w ... (u16) SLAYER_W destination width
// h ... (u16) SLAYER_H destination height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
#define ACCUM0_OFFSET 0
#define ACCUM1_OFFSET 4
#define BASE0_OFFSET 8
#define BASE1_OFFSET 12
#define BASE2_OFFSET 16
#define POP_LANE0_OFFSET 20
#define POP_LANE1_OFFSET 24
#define POP_FULL_OFFSET 28
#define PEEK_LANE0_OFFSET 32
#define PEEK_LANE1_OFFSET 36
#define PEEK_FULL_OFFSET 40
#define CTRL_LANE0_OFFSET 44
#define CTRL_LANE1_OFFSET 48
#define ACCUM0_ADD_OFFSET 52
#define ACCUM1_ADD_OFFSET 56
#define BASE_1AND0_OFFSET 60
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" void RenderPersp2(u8* dbuf, int y, sLayer* scr)
// render layers with transformatio matrix LAYERMODE_PERSP2*, double pixel
// R0 ... dbuf pointer to data buffer
// R1 ... y coordinate of scanline (relative in destination image)
// R2 ... scr pointer to layer screen structure sLayer
.thumb_func
.global RenderPersp2
RenderPersp2:
// push registers
push {r4-r7,lr}
// Stack content and input variables:
// R0 dbuf pointer to data buffer
// R1 Y coordinate of scanline
// R2 scr pointer to layer screen structure sLayer
// R3
// SP+0: R4
// SP+4: R5
// SP+8: R6
// SP+12: R7
// SP+16: LR
// R0 ... pointer to destination data buffer
// R1 ... Y coordinate
// R2 ... sLayer
// load horizon offset -> R4, check if use perspective
ldr r6,RenderPersp_pSioBase // get address of SIO base -> R6
ldrh r5,[r2,#SLAYER_H] // get destination height -> R5
ldrb r4,[r2,#SLAYER_HORIZ] // get horizon offset -> R4
sxtb r4,r4 // signed extension
lsls r4,#2 // horizon * 4, horizon = 0 ?
bne 2f // use perspective
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
lsrs r5,#1 // destination height/2 -> R5
subs r1,r5 // y - h/2 -> R1
mov r12,r1 // current coordinate Y0 = y - h/2 -> R12
// prepare divide result to get 1<<FRACT
movs r5,#1 // R5 <- 1
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
lsls r5,#FRACT // constant 1<<FRACT -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
b 4f
// using perspective, check ceilling mode
2: bpl 3f // horizon is not negative
subs r1,r5,r1 // negate, y = h - y
subs r1,#1 // y = h - 1 - y
negs r4,r4 // absolute value of horizon
// prepare current coordinate Y0 = y - h -> R12
3: subs r7,r1,r5 // y - h = current Y coordinate -> R7
mov r12,r7 // store current coordinate Y0 -> R12
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
lsls r5,#FRACT // destination height * FRACTMUL -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
adds r1,r4 // horizon + y -> R2
str r1,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
// R0 ... pointer to destination data buffer
// R2 ... sLayer
// R12 ... current coordinate Y0
// prepare start coordinate X0 = -w/2 -> LR
4: ldrh r3,[r2,#SLAYER_W] // get destination width -> R3
lsrs r5,r3,#1 // width/2
negs r5,r5 // negate
mov lr,r5 // store start coordinate X0 -> LR
// prepare number of 4-pixels (loop counter) -> R7
lsrs r7,r3,#2 // width/4 -> R7
// get number of bits of image width "xbits" -> R1
ldrb r1,[r2,#SLAYER_XBITS] // number of bits of image width -> R1
// get number of bits of image height "ybits" -> R4
ldrb r4,[r2,#SLAYER_YBITS] // number of bits of image height -> R4
// prepare address of interpolator base -> R3
ldr r3,RenderPersp_Interp // get address of interpolator base -> R3
// R0 ... pointer to destination data buffer
// R1 ... number of bits of image width xbits
// R2 ... sLayer
// R3 ... interpolator base
// R4 ... number of bits of image height ybits
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- setup interpolator
// set image base to base2
ldr r6,[r2,#SLAYER_IMG] // load image base
str r6,[r3,#BASE2_OFFSET] // set image base
// set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1
ldr r6,RenderPersp_Ctrl // load control word
subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5
orrs r6,r5 // add xbits to control word
subs r1,#1 // xbits - 1 -> R1
adds r5,r1,r4 // xbits-1+ybits -> R5
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1
// R0 ... pointer to destination data buffer
// R1 ... image width xbits-1
// R2 ... sLayer
// R3 ... interpolator base
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1
ldr r6,RenderPersp_Ctrl // load control word
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position
orrs r6,r1 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0
// R0 ... pointer to destination data buffer
// R2 ... sLayer
// R3 ... interpolator base
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- set matrix
// get pointer to matrix -> R4
ldr r4,[r2,#SLAYER_PAR] // get pointer to matrix -> R4
// get distance coefficient dist -> R1
ldr r1,RenderPersp_pSioBase // get address of SIO base -> R1
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
// r4+0 ... m11
// r4+4 ... m12
// r4+8 ... m13
// r4+12 ... m21
// r4+16 ... m22
// r4+20 ... m23
// set m11 -> R5 base0
ldr r5,[r4,#0] // load m11
muls r5,r1 // m11*dist
asrs r5,#FRACT-1 // (m11*dist)>>(FRACT-1) ... 2*delta
str r5,[r3,#BASE0_OFFSET] // set base0
asrs r5,#1 // (m11*dist)>>FRACT
// set m21 -> R6 base1
ldr r6,[r4,#12] // load m21
muls r6,r1 // m21*dist
asrs r6,#FRACT-1 // (m21*dist)>>(FRACT-1) ... 2*delta
str r6,[r3,#BASE1_OFFSET] // set base1
asrs r6,#1 // (m21*dist)>>FRACT
// R0 ... pointer to destination data buffer
// R1 ... distance coefficient
// R3 ... interpolator base
// R4 ... pointer to matrix
// R5 ... m11
// R6 ... m21
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// set x0*m11 + y0*m12 + m13 -> accum0
mov r2,lr // start coordinate X0 -> X2
muls r5,r2 // x0*m11 -> R5
muls r2,r6 // x0*m21 -> R2
mov lr,r1 // save distance coefficient -> LR
ldr r6,[r4,#4] // load m12 -> R6
muls r1,r6 // m12*dist -> R1
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
mov r6,r12 // load coordinate Y0 -> R6
muls r1,r6 // y0*m12 -> R1
adds r5,r1 // x0*m11 + y0*m12 -> R5
ldr r1,[r4,#8] // load m13 -> R1
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
str r5,[r3,#ACCUM0_OFFSET] // set accum0
// R0 ... pointer to destination data buffer
// R2 ... x0*m21
// R3 ... interpolator base
// R4 ... pointer to matrix
// R6 ... current coordinate Y0
// R7 ... width/4
// LR ... distance coefficient
// set x0*m21 + y0*m22 + m23 -> accum1
ldr r1,[r4,#16] // load m22 -> R1
mov r5,lr // distance coefficient -> R5
muls r1,r5 // m22*dist
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
muls r1,r6 // y0*m22 -> R1
adds r2,r1 // x0*m21 + y0*m22 -> R2
ldr r1,[r4,#20] // load m23 -> R1
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
str r2,[r3,#ACCUM1_OFFSET] // set accum1
// ---- process odd 4-pixel
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to pixel, load pixel)
// R7 ... width/4 (loop counter)
lsrs r7,#1 // width/4/2
bcc 2f // no odd 4-pixel
// [5] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r1,[r4,#0] // [2] load pixel
lsls r4,r1,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [7] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [2] store 4 pixels
stmia r0!,{r1} // [2] store 4 pixels
// check number of remaining pixels
2: tst r7,r7 // check number of pixels
beq 8f // end
// ---- [30 per 8 pixels] inner loop
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to pixel, load pixel)
// R7 ... width/8 (loop counter)
// [5] load 1st pixel
6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r1,[r4,#0] // [2] load pixel
lsls r4,r1,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [7] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [5] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r2,[r4,#0] // [2] load pixel
lsls r4,r2,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
// [7] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
ldrb r4,[r4,#0] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r2,r4 // [1] add pixel to accumulator
lsls r4,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
// [3] store 8 pixels
stmia r0!,{r1,r2} // [3] store 8 pixels
// [2,3] loop counter
subs r7,#1 // [1] 8-pixel counter
bne 6b // [1,2] next 8-pixels
// pop registers
8: pop {r4-r7,pc}
.align 2
// pointer to SIO base
RenderPersp_pSioBase:
.word SIO_BASE // addres of SIO base
// pointer to Interp1 base
RenderPersp_Interp:
.word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base
RenderPersp_Ctrl: // lane control word
.word SIO_INTERP1_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP1_CTRL_LANE0_SHIFT_LSB)

View file

@ -1,251 +0,0 @@
// ****************************************************************************
//
// VGA render GF_PLANE2
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u8* RenderPlane2(u8* dbuf, int x, int y, int w, sSegm* segm);
// render 2-bit palette graphics GF_GRAPH2
// R0 ... destination data buffer
// R1 ... start X coordinate (must be multiple of 4)
// R2 ... start Y coordinate
// R3 ... width of this segment (must be multiple of 4)
// segm ... video segment
// Output new dbuf pointer.
// 320 pixels takes 7.3 us on 151 MHz.
.thumb_func
.global RenderPlane2
RenderPlane2:
// push registers
push {r3-r7,lr}
// Input registers and stack content:
// R0 ... destination data buffer
// R1 ... start X coordinate
// R2 ... start Y coordinate
// SP+0: R3 width to display (remaining width)
// SP+4: R4
// SP+8: R5
// SP+12: R6
// SP+16: R7
// SP+20: LR
// SP+24: video segment (later: wrap width in X direction)
// get pointer to video segment -> R4
ldr r4,[sp,#24] // load video segment -> R4
// get wrap width -> [SP+24]
ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width
movs r6,#3 // mask to align to 32-bit
bics r7,r6 // align wrap
str r7,[sp,#24] // save wrap width
// align X coordinate to 32-bit -> R1
bics r1,r6
// align remaining width -> [SP+0]
bics r3,r6
str r3,[sp,#0] // save new width
// base pointer to image data (without X) -> LR, R2
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
muls r2,r5 // Y * WB -> offset of row in image buffer
ldr r5,[r4,#SSEGM_DATA] // pointer to data
add r2,r5 // base address of image buffer
mov lr,r2 // save pointer to image buffer
// prepare pointer to image data with X -> R2
lsrs r6,r1,#3 // convert X to 8-pixel offset
add r2,r6 // pointer to source image buffer -> R2
// prepare size of one plane -> R3
ldr r3,[r4,#SSEGM_PAR] // get size of one plane -> R3
// prepare pointer to palette translation table -> R7
ldr r7,[r4,#SSEGM_PAR2] // get pointer to palette translation table -> R7
// ---- render 2nd half of first 8-pixel
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate
// R2 ... pointer to source image data
// R3 ... size of one plane (= offset of plane 1 from plane 0)
// R4 ... (temporary)
// R5 ... (temporary)
// R6 ... (temporary)
// R7 ... *pointer to palette translation table
// LR ... *base pointer to image data (without X)
// [SP+0] ... *remaining width
// [SP+24] ... *wrap width
// check bit 2 of X coordinate - check if image starts with 2nd half of first 8-pixel
lsls r5,r1,#29 // check bit 2 of X coordinate
bpl 2f // bit 2 not set, starting even 4-pixels
// [5] load samples -> R5, R6
ldrb r5,[r2,#0] // [2] load sample from plane 1
ldrb r6,[r2,r3] // [2] load sample from plane 2
adds r2,#1 // [1] increase pointer
// [5] compose samples LOW -> R5
lsls r6,#28 // [1] isolate low 4 bits from sample 2
lsrs r6,#22 // [1] shift to bit position 6
lsls r5,#28 // [1] isolate low 4 bit from sample 1
lsrs r5,#26 // [1] shift to bit position 2
orrs r5,r6 // [1] compose samples
// [5] write pixels
ldr r5,[r7,r5] // [2] load colors
stmia r0!,{r5} // [3] write pixels
// shift X coordinate
adds r1,#4 // shift X coordinate
// check end of segment
ldr r6,[sp,#24] // load wrap width
cmp r1,r6 // X=end of segment?
blo 1f
movs r1,#0 // reset X coordinate
mov r2,lr // get base pointer to image data -> R2
// shift remaining width
1: ldr r6,[sp,#0] // get remaining width
subs r6,#4 // shift width
str r6,[sp,#0] // save new width
// prepare wrap width - start X -> R6
2: ldr r6,[sp,#24] // load wrap width
subs r6,r1 // pixels remaining to end of segment
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination data buffer
// R1 ... number of 4-pixels - 1 to generate in one part of segment
// R2 ... *pointer to source image data
// R3 ... *size of one plane (= offset of plane 1 from plane 0)
// R4 ... (temporary)
// R5 ... (temporary)
// R6 ... part width
// R7 ... *pointer to palette translation table
// LR ... *base pointer to image data (without X)
// [SP+0] ... *remaining width
// [SP+24] ... *wrap width
RenderPlane2_OutLoop:
// limit wrap width by total width -> R7
ldr r4,[sp,#0] // get remaining width
cmp r6,r4 // compare with wrap width
bls 2f // width is OK
mov r6,r4 // limit wrap width
// check number of pixels
2: cmp r6,#8 // check number of remaining pixels
bhs 5f // enough 8-pixels remain
// check if 1st part of last 8-pixel remains
cmp r6,#4 // check number of pixels
blo 3f // all done
// ---- render 1st part of last 8-pixel
RenderPlane2_Last:
// [5] load samples -> R5, R4
ldrb r5,[r2,#0] // [2] load sample from plane 1
ldrb r4,[r2,r3] // [2] load sample from plane 2
adds r2,#1 // [1] increase pointer
// [5] compose samples HIGH -> R4
lsrs r4,#4 // [1] isolate high 4 bits from sample 2
lsls r4,#8 // [1] shift left
orrs r4,r5 // [1] compose sample 2 with sample 1
lsrs r4,#4 // [1] isolate high 4 bits from sample 1
lsls r4,#2 // [1] 2 shifts to get index*4
// [4] write pixels
ldr r4,[r7,r4] // [2] load colors
stmia r0!,{r4} // [2] write pixels
// check if continue with next segment
mov r2,lr // get base pointer to image data -> R2
cmp r6,#4
bhi RenderPlane2_OutLoop
// pop registers and return
3: pop {r3-r7,pc}
// ---- prepare to render whole 8-pixels
// prepare number of 4-pixels to render -> R1
5: lsrs r1,r6,#2 // shift to get number of 4-pixels
lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6
subs r4,r6 // get remaining width
str r4,[sp,#0] // save new remaining width
subs r1,#1 // number of 4-pixels - 1
// ---- [25*N-1] start inner loop, render whole 8-pixels in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination data buffer
// R1 ... *number of 4-pixels - 1 to generate (loop counter)
// R2 ... *pointer to source image data
// R3 ... *size of one plane (= offset of plane 1 from plane 0)
// R4 ... output sample
// R5 ... sample from plane 1
// R6 ... sample from plane 2
// R7 ... *pointer to palette translation table
// LR ... *base pointer to image data (without X)
// [SP+0] ... *remaining width
// [SP+24] ... *wrap width
RenderPlane2_InLoop:
// [5] load samples -> R5, R6
ldrb r5,[r2,#0] // [2] load sample from plane 1
ldrb r6,[r2,r3] // [2] load sample from plane 2
adds r2,#1 // [1] increase pointer
// [5] compose samples HIGH -> R4
lsrs r4,r6,#4 // [1] isolate high 4 bits from sample 2
lsls r4,#8 // [1] shift left
orrs r4,r5 // [1] compose sample 2 with sample 1
lsrs r4,#4 // [1] isolate high 4 bits from sample 1
lsls r4,#2 // [1] 2 shifts to get index*4
// [2] prepare first 4 pixels
ldr r4,[r7,r4] // [2] load colors
// [5] compose samples LOW -> R5
lsls r6,#28 // [1] isolate low 4 bits from sample 2
lsrs r6,#22 // [1] shift to bit position 6
lsls r5,#28 // [1] isolate low 4 bit from sample 1
lsrs r5,#26 // [1] shift to bit position 2
orrs r5,r6 // [1] compose samples
// [5] write pixels
ldr r5,[r7,r5] // [2] load colors
stmia r0!,{r4,r5} // [3] write pixels
// [2,3] loop counter
subs r1,#2 // [1] loop counter
bhi RenderPlane2_InLoop // [1,2] > 0, next step
// ---- end inner loop
RenderPlane2_EndLoop:
// continue to outer loop
ldr r6,[sp,#24] // load wrap width -> R6
beq RenderPlane2_Last // render 1st half of last 8-pixels
mov r2,lr // get base pointer to image data -> R2
b RenderPlane2_OutLoop // go back to outer loop

View file

@ -1,123 +0,0 @@
// ****************************************************************************
//
// VGA render GF_PROGRESS
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderProgress(u32* cbuf, int x, int y, int w, sSegm* segm);
// render horizontal progress indicator GF_PROGRESS
// R0 ... pointer to control buffer
// R1 ... start X coordinate (in pixels, must be multiple of 4)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4 and > 0)
// [stack] ... segm video segment sSegm
// Output new pointer to control buffer.
// 320 pixels takes 0.5 us on 151 MHz.
.thumb_func
.global RenderProgress
RenderProgress:
// push registers
push {r4-r7,lr}
// Stack content:
// SP+0: R4
// SP+4: R5
// SP+8: R6
// SP+12: R7
// SP+16: LR
// SP+20: video segment
// Variables:
// R0 ... pointer to control buffer
// R1 ... X coordinate/4
// R2 ... data sample
// R3 ... remaining width
// R4 ... gradient buffer 1
// R5 ... gradient buffer 2
// R6 ... (temporary)
// R7 ... current wrap width
// LR ... wrap width
// get pointer to video segment -> R4
ldr r4,[sp,#20] // load video segment -> R4
// prepare X coordinate/4 -> R1
lsrs r1,#2 // X coordinate/4 -> R1
// load data sample -> R2
ldr r5,[r4,#SSEGM_DATA] // pointer to data
ldrb r2,[r5,r2] // load data sample -> R2
// prepare remaining width/4 -> R3
lsrs r3,#2 // width/4 -> R3
// get wrap width/4 -> LR
ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width
lsrs r7,#2 // wrap width/4 -> R7
mov lr,r7
// prepare gradient buffers -> R4, R5
ldr r5,[r4,#SSEGM_PAR2] // gradient buffer 2 -> R5
ldr r4,[r4,#SSEGM_PAR] // gradient buffer 1 -> R4
// check remaining width
2: tst r3,r3 // check remaining width
beq 9f // end of data
// prepare wrap width - start X -> R7
mov r7,lr // wrap width
subs r7,r1 // pixels remaining to end of segment
// limit wrap width by total width -> R7
cmp r7,r3 // compare with wrap width
bls 4f // width is OK
mov r7,r3 // limit wrap width
// decrease remaining width
4: subs r3,r7 // subtract from remaining width
// first part visible if x < data
cmp r1,r2
bhs 6f // x >= data
// width of this part
subs r6,r2,r1 // width <- data - x
// limit width
cmp r6,r7 // check width
bls 5f // width is OK
mov r6,r7 // limit width
5: subs r7,r6 // decrease width
// save control block with 1st part
5: stm r0!,{r6} // write width
adds r6,r4,r1 // gradient address at offset x
stm r0!,{r6} // write address
mov r1,r2 // X <- data
// check if some width remain
6: tst r7,r7 // check with of this part
beq 7f // end of segment
// save control block width 2nd part
stm r0!,{r7} // write width
adds r6,r5,r1 // gradient address at offset x
stm r0!,{r6} // write address
// reset X
7: movs r1,#0 // reset X
b 2b // next segment
// pop registers and return
9: pop {r4-r7,pc}

View file

@ -1,164 +0,0 @@
// ****************************************************************************
//
// VGA render LAYERMODE_SPRITE*
//
// ****************************************************************************
#include "../define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" void RenderSprite(u8* dbuf, int y, sLayer* scr)
// render layers with sprites LAYERMODE_SPRITE*
// R0 ... dbuf pointer to data buffer
// R1 ... y coordinate of scanline
// R2 ... scr pointer to layer screen structure sLayer
.thumb_func
.global RenderSprite
RenderSprite:
// push registers
push {r4-r7,lr}
// Stack content and input variables:
// R0 dbuf pointer to data buffer
// R1 Y coordinate of scanline
// R2 scr pointer to layer screen structure sLayer, later: num number of sprites
// R3
// SP+0: R4
// SP+4: R5
// SP+8: R6
// SP+12: R7
// SP+16: LR
// Variables:
// R0 ... dbuf pointer to data buffer, later: dbuf[x] destination address
// R1 ... Y coordinate of scanline, later: Y2 coordinate relative to sprite base, later: s->img[Y2*WB+X2] address of sprite line
// R2 ... num number of sprites (loop counter), later: W2 width of sprite segment
// R3 ... s pointer to current sprite, later: col key color
// R4 ... (temporary), later: absolute X coordinate of start of line
// R5 ... relative X2 coordinate of sprite segment
// R6 ... W layer screen width
// R7 ... spr pointer to list of sprites
// LR
// load pointer to list of sprites -> R7
ldr r7,[r2,#SLAYER_IMG]
// load screen width -> R6
ldrh r6,[r2,#SLAYER_W]
// load number of sprites -> R2
ldrh r2,[r2,#SLAYER_SPRITENUM]
// count number of sprites, end if num = 0
2: subs r2,#1 // decrement number of sprites
blo 9f // no other sprites
// R0 ... dbuf pointer to data buffer
// R1 ... Y coordinate of scanline
// R2 ... num number of sprites (loop counter)
// R3 ...
// R4 ...
// R5 ...
// R6 ... W layer screen width
// R7 ... spr pointer to list of sprites
// push registers
push {r0-r2} // push resiters R0..R2
// get pointer to next sprite -> R3
ldmia r7!,{r3} // pointer to sprite -> R3
// R3 ... s pointer to current sprite
// prepare Y2 coordinate relative to sprite base -> R1
ldrh r4,[r3,#SSPRITE_Y] // get Y coordinate of the sprite -> R4
sxth r4,r4 // signed extend Y2
subs r1,r1,r4 // relative coordinate Y2 = Y - s->y
// R1 ... Y2 coordinate relative to sprite base
// check if Y2 coordinate is valid
bmi 8f // Y2 < 0, go next sprite
ldrh r4,[r3,#SSPRITE_H] // get sprite height
cmp r1,r4 // check sprite height
bge 8f // Y2 >= s->h, go next sprite
// prepare relative start X2 coordinate of this line segment -> R5
ldr r4,[r3,#SSPRITE_X0] // get table of X0 of lines
ldrb r5,[r4,r1] // get X2 coordinate s->x0[y2] -> R5
// lsls r5,#2 // convert X2 coordinate to byte offset
// R5 ... relative X2 coordinate of sprite segment
// get width W2 of this line segment -> R2
ldr r4,[r3,#SSPRITE_W0] // get table of W0 of lines
ldrb r2,[r4,r1] // get W2 width s->w0[y2] -> R2
// lsls r2,#2 // convert W2 width to bytes
// R2 ... W2 width of sprite segment
// get address of sprite line s->img[Y2*s->wb] -> R1
ldrh r4,[r3,#SSPRITE_WB] // get sprite pitch w->wb
muls r1,r1,r4 // sprite offset Y2*s->wb
ldr r4,[r3,#SSPRITE_IMG] // get sprite image
add r1,r4 // line address -> R1
// R1 ... s->img[Y2*WB] address of sprite line
// get absolute X coordinate of start of line -> R4
ldrh r4,[r3,#SSPRITE_X] // get sprite X coordinate -> R4
sxth r4,r4 // signed extend X
// R4 ... absolute X coordinate of start of line
// get key color -> R3
ldrb r3,[r3,#SSPRITE_KEYCOL] // get key color -> R3
// R3 ... col key color
// check if X coordinate >= 0
adds r4,r4,r5 // s->X + X2, X coordinate of start of line -> R4
bpl 3f // X >= 0, sprite does not lie below start
// sprite correction
subs r5,r4 // X2 -= X
adds r2,r4 // W2 += X
movs r4,#0 // X = 0
// shift source address -> R1
3: adds r1,r5 // add X2
// R1 ... s->img[Y2*WB+X2] address of sprite line
// R5 ...
// check line length W2
subs r5,r6,r4 // W - X -> R5
cmp r2,r5 // compare W2 with W - X
ble 4f // W2 <= W - X, length is OK
mov r2,r5 // limit segment width W2 -> R2
// check width W2
4: tst r2,r2 // check W2
ble 8f // no W2 left (W2 <= 0)
// shift destination address
adds r0,r4
// R0 ... dbuf pointer to data buffer
// R1 ... s->img[Y2*WB+X2] address of sprite line
// R2 ... W2 width of sprite segment
// R3 ... col key color
// R4 ...
// R5 ...
// R6 ... W layer screen width
// R7 ... spr pointer to list of sprites
// blit sprite line
bl BlitKey // blit sprite line
// pop registers and continue loop
8: pop {r0-r2} // pop registers R0..R2
b 2b // continue loop
// pop registers and return
9: pop {r4-r7,pc}

View file

@ -1,431 +0,0 @@
// ****************************************************************************
//
// VGA render GF_TILE
//
// ****************************************************************************
// u16 par3; // SSEGM_PAR3 tile width (must be multiple of 4)
// u32 par; // SSEGM_PAR tile table with one column of tiles
// u32 par2; // SSEGM_PAR2 tile height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderTile(u32* cbuf, int x, int y, int w, sSegm* segm);
// render tiles GF_TILE
// cbuf ... destination control buffer
// x ... start X coordinate (must be multiple of 4)
// y ... start Y coordinate
// w ... width of this segment (must be multiple of 4)
// segm ... video segment
// Output new cbuf pointer.
// 320 pixels takes on 151 MHz: tiles 8x8 3.5 us, tile 16x16 2 us, tiles 32x32 1.3 us, tiles 64x64 0.9 us.
.thumb_func
.global RenderTile
RenderTile:
// push registers
push {r1-r7,lr}
// Input registers and stack content:
// R0 ... destination control buffer
// SP+0: R1 ... X coordinate
// SP+4: R2 ... Y coordinate
// SP+8: R3 ... width to display
// SP+12: R4
// SP+16: R5
// SP+20: R6
// SP+24: R7
// SP+28: LR
// SP+32: video segment
// get pointer to video segment -> R4
ldr r4,[sp,#32] // load video segment -> R4
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... Y coordinate
// R3 ... remaining width
// R4 ... sSegm*
// start divide Y/tile_height
ldr r5,RenderTile_pSioBase // get address of SIO base -> R5
str r2,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
ldr r2,[r4,#SSEGM_PAR2] // tile height -> R2
str r2,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile height
// - now we must wait at least 8 clock cycles to get result of division
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile height
// R3 ... remaining width
// R4 ... sSegm*
// R5 ... SIO_BASE
// [6] get wrap width -> [SP+0]
ldrh r7,[r4,#SSEGM_WRAPX] // [2] get wrap width
movs r6,#3 // [1] mask to align to 32-bit
bics r7,r6 // [1] align wrap
str r7,[sp,#0] // [2] save wrap width
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile height
// R3 ... remaining width
// R4 ... sSegm*
// R5 ... SIO_BASE
// R6 ... align mask #3
// [SP+0] ... wrap width
// [1] align X coordinate to 32-bit -> R1
bics r1,r6 // [1] align X
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile height
// R3 ... remaining width
// R4 ... sSegm*
// R5 ... SIO_BASE
// R6 ... align mask #3
// [SP+0] ... wrap width
// [3] align remaining width -> [SP+4]
bics r3,r6 // [1] align width
str r3,[sp,#4] // [2] store aligned width to [SP+4]
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile height
// R4 ... sSegm*
// R5 ... SIO_BASE
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [4] prepare tile width -> [SP+8], R3
ldrh r3,[r4,#SSEGM_PAR3] // [2] get tile width -> R3
str r3,[sp,#8] // [2] save tile width -> [SP+8]
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile height
// R3 ... tile width
// R4 ... sSegm*
// R5 ... SIO_BASE
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
// load result of division Y/tile_height -> R6 Y relative at row, R7 Y row
// Note: QUOTIENT must be read last
ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, Y coordinate relative to current row
ldr r7,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R7, index of row
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile height
// R3 ... tile width
// R4 ... sSegm*
// R5 ... SIO_BASE
// R6 ... Y relative at row
// R7 ... Y row index
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
// start divide X/tile_width
str r1,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, X coordinate
str r3,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile width
// - now we must wait at least 8 clock cycles to get result of division
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile height
// R3 ... tile width
// R4 ... sSegm*
// R5 ... SIO_BASE
// R6 ... Y relative at row
// R7 ... Y row index
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
// [1] prepare tile size -> R2
muls r2,r3 // [1] tile height*width -> size R2
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile size
// R3
// R4 ... sSegm*
// R5 ... SIO_BASE
// R6 ... Y relative at row
// R7 ... Y row index
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
// [7] base pointer to source data buffer (without X) -> LR, R7
ldrh r3,[r4,#SSEGM_WB] // [2] get pitch of rows -> R3
muls r7,r3 // [1] pitch * row (Y * WB) -> offset of row in data buffer
ldr r3,[r4,#SSEGM_DATA] // [2] pointer to data -> R3
adds r7,r3 // [1] base address of data buffer
mov lr,r7 // [1] save base address
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile size
// R3
// R4 ... sSegm*
// R5 ... SIO_BASE
// R6 ... Y relative at row
// R7 ... base address of data buffer (without X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
// [6] tile base address -> R4
ldr r3,[sp,#8] // [2] tile width
muls r6,r3 // [1] tile width * Y relative to row -> tile line offset R6
ldr r4,[r4,#SSEGM_PAR] // [2] pointer to tiles
adds r4,r6 // [1] tile base address -> R4
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile size
// R3 ... tile width
// R4 ... tile base address
// R5 ... SIO_BASE
// R6
// R7 ... base address of data buffer (without X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
// load result of division X/tile_width -> R6 X pixel relative, R5 tile position
// Note: QUOTIENT must be read last
ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, X pixel relative in tile
ldr r5,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R5, tile position
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile size
// R3 ... tile width
// R4 ... tile base address
// R5 ... tile position
// R6 ... X pixel relative in tile
// R7 ... base address of data buffer (without X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
// prepare current pointer to source data buffer with X -> R7
adds r7,r5 // tile source address -> R7
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... tile size
// R3 ... tile width
// R4 ... tile base address
// R5
// R6 ... X pixel relative in tile
// R7 ... pointer to source data buffer (with X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
// ---- render rest of first tile
// check if X is tile-aligned
tst r6,r6 // check tile align
beq 2f // X is tile aligned
// shift X coordinate
subs r5,r3,r6 // pixels remain in current tile -> R5
adds r1,r5 // shift X coordinate (align to next tile)
// shift remaining width
ldr r3,[sp,#4] // get remaining width
subs r3,r5 // shift width
str r3,[sp,#4] // store remaining width
// write number of 4-pixels
lsrs r5,#2 // number of 4-pixels
stmia r0!,{r5} // save width
// load tile index -> R3
ldrb r3,[r7,#0] // [2] load tile index
adds r7,#1 // [1] increase tile address
// write tile addres
muls r3,r2 // tile index * tile size = tile offset
add r3,r4 // [1] add tile base address
add r3,r6 // [1] shift to tile start
stmia r0!,{r3} // [3] save pointer
// check end of segment
ldr r3,[sp,#0] // get wrap width
cmp r1,r3 // check end of segment
blo 2f // not end of segment
movs r1,#0 // reset X coordinate
mov r7,lr // get base pointer to tile data
// prepare wrap width - start X -> R5
2: ldr r3,[sp,#0] // get wrap width
subs r5,r3,r1 // pixels remaining to end of segment
ldr r3,[sp,#4] // total remaining width -> R3
// ---- start outer loop, render one part of segment
// Outer loop variables (* prepared before outer loop):
// R0 ... *pointer to destination control buffer
// R1 ...
// R2 ... *tile size
// R3 ... *total remaining width
// R4 ... *tile base address
// R5 ... *wrap width of this segment
// R6 ...
// R7 ... *pointer to source data buffer
// LR ... *base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
RenderTile_OutLoop:
// limit wrap width by total width -> R5
cmp r5,r3 // compare wrap width with total width
bls 2f // width is OK
mov r5,r3 // limit wrap width
// check if remain whole tile
2: ldr r1,[sp,#8] // get tile width -> R1
cmp r5,r1 // check number of remaining pixels
bhs 5f // remain whole tiles
// check if start of last tile remains
cmp r5,#4 // check start of last tile
blo 3f // all done
mov r1,r5 // width to render
// ---- render start of last tile
// R0 ... *pointer to destination control buffer
// R1 ... *width to render in this segment
// R2 ... *tile size
// R3 ... *total remaining width
// R4 ... *tile base address
// R5 ... *wrap width of this segment
// R6 ...
// R7 ... *pointer to source data buffer (with X)
// LR ... *base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
RenderTile_Last:
// save width
lsrs r6,r1,#2 // number of 4-pixels
stmia r0!,{r6} // save width
// load tile index -> R6
ldrb r6,[r7,#0] // [2] load tile index
adds r7,#1 // [1] increase tile index
// save tile addres
muls r6,r2 // multiply tile index * tile size
add r6,r4 // [1] add tile base address
stmia r0!,{r6} // [3] save pointer
// check if continue with next segment
mov r7,lr // get base pointer to tile data
ldr r6,[sp,#8] // get tile width -> R6
cmp r5,r6 // whole tile remains?
bhs RenderTile_OutLoop // render next segment
// pop registers and return
3: pop {r1-r7,pc}
// ---- prepare to render whole tiles
// R0 ... pointer to destination control buffer
// R1
// R2 ... tile size
// R3 ... total remaining width
// R4 ... tile base address
// R5 ... width of this segment
// R6
// R7 ... pointer to source data buffer (with X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
// prepare number of 4-pixels to render -> R1
5: lsrs r1,r5,#2 // shift to get number of tiles in multiply of 4-pixels -> R1
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
subs r3,r5 // update remaining width -> R3
ldr r5,[sp,#8] // get tile width -> R5
lsrs r5,#2 // tile width/4 -> R5
subs r1,r5 // number of 4-pixels - width/4
adds r1,#1 // number of 4-pixels - (width/4-1)
// ---- [11*N-1] start inner loop, render in one part of segment
// Inner loop variables (* prepared before inner loop):
// R0 ... *pointer to destination control buffer
// R1 ... *number of 4-pixels to generate - 1 (loop counter)
// R2 ... *tile size
// R3 ... *total remaining width
// R4 ... *tile base address
// R5 ... *tile width/4
// R6 ... (temporary)
// R7 ... *pointer to source data buffer (with X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [SP+8] ... tile width
RenderTile_InLoop:
// [3] load tile index -> R6
ldrb r6,[r7,#0] // [2] load tile index
adds r7,#1 // [1] increase tile index
// [2] get tile addres
muls r6,r2 // [1] multiply tile index * tile size
add r6,r4 // [1] add tile base address
// [3] save control block
stmia r0!,{r5,r6} // [3] save width and pointer
// [2,3] loop
subs r1,r5 // [1] shift loop counter, subtract tile width/4
bhi RenderTile_InLoop // [1,2] > 0, render next whole tile
// ---- end inner loop, continue with last tile, or start new part
// continue to outer loop
adds r1,r5 // return size of last tile
subs r1,#1 // add "tile size/4 - 1"
ldr r5,[sp,#0] // load wrap width -> R5
lsls r1,#2 // convert back to pixels
bne RenderTile_Last // render 1st half of last tile
mov r7,lr // get base pointer to tile data -> R7
b RenderTile_OutLoop // go back to outer loop
.align 2
// pointer to SIO base
RenderTile_pSioBase:
.word SIO_BASE // addres of SIO base

View file

@ -1,376 +0,0 @@
// ****************************************************************************
//
// VGA render GF_TILE2
//
// ****************************************************************************
// u16 par3; // SSEGM_PAR3 tile width (must be multiple of 4)
// u32 par; // SSEGM_PAR tile table with one column of tiles
// u32 par2; // SSEGM_PAR2 LOW tile height, HIGH tile width bytes
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderTile2(u32* cbuf, int x, int y, int w, sSegm* segm);
// render tiles GF_TILE2
// cbuf ... destination control buffer
// x ... start X coordinate (must be multiple of 4)
// y ... start Y coordinate
// w ... width of this segment (must be multiple of 4)
// segm ... video segment
// Output new cbuf pointer.
// 320 pixels takes on 151 MHz: tiles 8x8 3.5 us, tile 16x16 2 us, tiles 32x32 1.3 us, tiles 64x64 0.9 us.
.thumb_func
.global RenderTile2
RenderTile2:
// push registers
push {r2-r7,lr}
// Input registers and stack content:
// R0 ... destination control buffer
// R1 ... X coordinate
// SP+0: R2 ... Y coordinate
// SP+4: R3 ... width to display
// SP+8: R4
// SP+12: R5
// SP+16: R6
// SP+20: R7
// SP+24: LR
// SP+28: video segment
// get pointer to video segment -> R4
ldr r4,[sp,#28] // load video segment -> R4
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R2 ... Y coordinate
// R3 ... remaining width
// R4 ... sSegm*
// start divide Y/tile_height
ldr r5,RenderTile_pSioBase // get address of SIO base -> R5
str r2,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
ldrh r2,[r4,#SSEGM_PAR2] // tile height -> R2
str r2,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile height
// - now we must wait at least 8 clock cycles to get result of division
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R3 ... remaining width
// R4 ... sSegm*
// R5 ... SIO_BASE
// [6] get wrap width -> [SP+0]
ldrh r7,[r4,#SSEGM_WRAPX] // [2] get wrap width
movs r6,#3 // [1] mask to align to 32-bit
bics r7,r6 // [1] align wrap
str r7,[sp,#0] // [2] save wrap width
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R3 ... remaining width
// R4 ... sSegm*
// R5 ... SIO_BASE
// R6 ... align mask #3
// [SP+0] ... wrap width
// [1] align X coordinate to 32-bit -> R1
bics r1,r6 // [1] align X
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R3 ... remaining width
// R4 ... sSegm*
// R5 ... SIO_BASE
// R6 ... align mask #3
// [SP+0] ... wrap width
// [3] align remaining width -> [SP+4]
bics r3,r6 // [1] align width
str r3,[sp,#4] // [2] store aligned width to [SP+4]
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R4 ... sSegm*
// R5 ... SIO_BASE
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [2] prepare tile width -> R3
ldrh r3,[r4,#SSEGM_PAR3] // [2] get tile width -> R3
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R3 ... tile width
// R4 ... sSegm*
// R5 ... SIO_BASE
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// load result of division Y/tile_height -> R6 Y relative at row, R7 Y row
// Note: QUOTIENT must be read last
ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, Y coordinate relative to current row
ldr r7,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R7, index of row
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R3 ... tile width
// R4 ... sSegm*
// R5 ... SIO_BASE
// R6 ... Y relative at row
// R7 ... Y row index
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// start divide X/tile_width
str r1,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, X coordinate
str r3,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile width
// - now we must wait at least 8 clock cycles to get result of division
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R3 ... tile width
// R4 ... sSegm*
// R5 ... SIO_BASE
// R6 ... Y relative at row
// R7 ... Y row index
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [7] base pointer to source data buffer (without X) -> LR, R7
ldrh r2,[r4,#SSEGM_WB] // [2] get pitch of rows -> R2
muls r7,r2 // [1] pitch * row (Y * WB) -> offset of row in data buffer
ldr r2,[r4,#SSEGM_DATA] // [2] pointer to data -> R2
adds r7,r2 // [1] base address of data buffer
mov lr,r7 // [1] save base address
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R3 ... tile width
// R4 ... sSegm*
// R5 ... SIO_BASE
// R6 ... Y relative at row
// R7 ... base address of data buffer (without X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// [6] tile base address -> R4
ldrh r2,[r4,#SSEGM_PAR2+2] // [2] tile width bytes -> R2
muls r6,r2 // [1] tile width bytes * Y relative to row -> tile line offset R6
ldr r4,[r4,#SSEGM_PAR] // [2] pointer to tiles
adds r4,r6 // [1] tile base address -> R4
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R3 ... tile width
// R4 ... tile base address
// R5 ... SIO_BASE
// R7 ... base address of data buffer (without X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// load result of division X/tile_width -> R6 X pixel relative, R5 tile position
// Note: QUOTIENT must be read last
ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, X pixel relative in tile
ldr r5,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R5, tile position
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R3 ... tile width
// R4 ... tile base address
// R5 ... tile position
// R6 ... X pixel relative in tile
// R7 ... base address of data buffer (without X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// prepare current pointer to source data buffer with X -> R7
adds r7,r5 // tile source address -> R7
// R0 ... pointer to destination control buffer
// R1 ... X coordinate
// R3 ... tile width
// R4 ... tile base address
// R6 ... X pixel relative in tile
// R7 ... pointer to source data buffer (with X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
// [SP+4] ... remaining width
// ---- render rest of first tile
// check if X is tile-aligned
tst r6,r6 // check tile align
beq 2f // X is tile aligned
// shift X coordinate
subs r5,r3,r6 // pixels remain in current tile -> R5
adds r1,r5 // shift X coordinate (align to next tile)
// shift remaining width
ldr r2,[sp,#4] // get remaining width
subs r2,r5 // shift width
str r2,[sp,#4] // store remaining width
// write number of 4-pixels
lsrs r5,#2 // number of 4-pixels
stmia r0!,{r5} // save width
// load tile index -> R2
ldrb r2,[r7,#0] // [2] load tile index
adds r7,#1 // [1] increase tile address
// write tile addres
muls r2,r3 // tile index * tile width = tile offset
add r2,r4 // [1] add tile base address
add r2,r6 // [1] shift to tile start
stmia r0!,{r2} // [3] save pointer
// check end of segment
ldr r2,[sp,#0] // get wrap width
cmp r1,r2 // check end of segment
blo 2f // not end of segment
movs r1,#0 // reset X coordinate
mov r7,lr // get base pointer to tile data
// prepare wrap width - start X -> R5
2: ldr r2,[sp,#0] // get wrap width
subs r5,r2,r1 // pixels remaining to end of segment
ldr r2,[sp,#4] // total remaining width -> R3
// ---- start outer loop, render one part of segment
// R0 ... pointer to destination control buffer
// R2 ... total remaining width
// R3 ... tile width
// R4 ... tile base address
// R5 ... wrap width of this segment
// R7 ... pointer to source data buffer
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
RenderTile_OutLoop:
// limit wrap width by total width -> R5
cmp r5,r2 // compare wrap width with total width
bls 2f // width is OK
mov r5,r2 // limit wrap width
// check if remain whole tile
2: cmp r5,r3 // check number of remaining pixels
bhs 5f // remain whole tiles
// check if start of last tile remains
cmp r5,#4 // check start of last tile
blo 3f // all done
mov r1,r5 // width to render
// ---- render start of last tile
// R0 ... pointer to destination control buffer
// R1 ... width to render in this segment
// R2 ... total remaining width
// R3 ... tile width
// R4 ... tile base address
// R5 ... wrap width of this segment
// R7 ... pointer to source data buffer (with X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
RenderTile_Last:
// save width
lsrs r6,r1,#2 // number of 4-pixels
stmia r0!,{r6} // save width
// load tile index -> R6
ldrb r6,[r7,#0] // [2] load tile index
adds r7,#1 // [1] increase tile index
// save tile addres
muls r6,r3 // multiply tile index * tile width
add r6,r4 // [1] add tile base address
stmia r0!,{r6} // [3] save pointer
// check if continue with next segment
mov r7,lr // get base pointer to tile data
cmp r5,r3 // whole tile remains?
bhs RenderTile_OutLoop // render next segment
// pop registers and return
3: pop {r2-r7,pc}
// ---- prepare to render whole tiles
// R0 ... pointer to destination control buffer
// R2 ... total remaining width
// R3 ... tile width
// R4 ... tile base address
// R5 ... width of this segment
// R7 ... pointer to source data buffer (with X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
// prepare number of 4-pixels to render -> R1
5: lsrs r1,r5,#2 // shift to get number of tiles in multiply of 4-pixels -> R1
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
subs r2,r5 // update remaining width -> R2
lsrs r5,r3,#2 // tile width/4 -> R5
subs r1,r5 // number of 4-pixels - width/4
adds r1,#1 // number of 4-pixels - (width/4-1)
// ---- [11*N-1] start inner loop, render in one part of segment
// R0 ... pointer to destination control buffer
// R1 ... number of 4-pixels to generate - 1 (loop counter)
// R2 ... total remaining width
// R3 ... tile width
// R4 ... tile base address
// R5 ... tile width/4
// R7 ... pointer to source data buffer (with X)
// LR ... base address of data buffer (without X)
// [SP+0] ... wrap width
RenderTile_InLoop:
// [3] load tile index -> R6
ldrb r6,[r7,#0] // [2] load tile index
adds r7,#1 // [1] increase tile index
// [2] get tile addres
muls r6,r3 // [1] multiply tile index * tile width
add r6,r4 // [1] add tile base address
// [3] save control block
stmia r0!,{r5,r6} // [3] save width and pointer
// [2,3] loop
subs r1,r5 // [1] shift loop counter, subtract tile width/4
bhi RenderTile_InLoop // [1,2] > 0, render next whole tile
// ---- end inner loop, continue with last tile, or start new part
// continue to outer loop
adds r1,r5 // return size of last tile
subs r1,#1 // add "tile size/4 - 1"
ldr r5,[sp,#0] // load wrap width -> R5
lsls r1,#2 // convert back to pixels
bne RenderTile_Last // render start of last tile
mov r7,lr // get base pointer to tile data -> R7
b RenderTile_OutLoop // go back to outer loop
.align 2
// pointer to SIO base
RenderTile_pSioBase:
.word SIO_BASE // addres of SIO base

View file

@ -1,450 +0,0 @@
// ****************************************************************************
//
// VGA render GF_TILEPERSP
//
// ****************************************************************************
// data ... tile map
// par ... column of tile images
// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset
// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height
// wrapy ... segment height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
#define ACCUM0_OFFSET0 0
#define ACCUM1_OFFSET0 4
#define BASE0_OFFSET0 8
#define BASE1_OFFSET0 12
#define BASE2_OFFSET0 16
#define POP_LANE0_OFFSET0 20
#define POP_LANE1_OFFSET0 24
#define POP_FULL_OFFSET0 28
#define PEEK_LANE0_OFFSET0 32
#define PEEK_LANE1_OFFSET0 36
#define PEEK_FULL_OFFSET0 40
#define CTRL_LANE0_OFFSET0 44
#define CTRL_LANE1_OFFSET0 48
#define ACCUM0_ADD_OFFSET0 52
#define ACCUM1_ADD_OFFSET0 56
#define BASE_1AND0_OFFSET0 60
#define ACCUM0_OFFSET1 64
#define ACCUM1_OFFSET1 68
#define BASE0_OFFSET1 72
#define BASE1_OFFSET1 76
#define BASE2_OFFSET1 80
#define POP_LANE0_OFFSET1 84
#define POP_LANE1_OFFSET1 88
#define POP_FULL_OFFSET1 92
#define PEEK_LANE0_OFFSET1 96
#define PEEK_LANE1_OFFSET1 100
#define PEEK_FULL_OFFSET1 104
#define CTRL_LANE0_OFFSET1 108
#define CTRL_LANE1_OFFSET1 112
#define ACCUM0_ADD_OFFSET1 116
#define ACCUM1_ADD_OFFSET1 120
#define BASE_1AND0_OFFSET1 124
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderTilePersp(u32* cbuf, int x, int y, int w, sSegm* segm);
// render tiles with perspective GF_TILEPERSP
// using hardware interpolator inter0 and inter1 (their state is not saved during interrup)
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate (not used)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4)
// [stack] ... segm video segment sSegm
// Output new pointer to data buffer.
// 320 pixels takes ?? us on 151 MHz.
.thumb_func
.global RenderTilePersp
RenderTilePersp:
// Input registers and stack:
// R0 ... pointer to destination data buffer
// R1 ... X coordinate (not used)
// R2 ... Y coordinate
// SP+0: R3 ... remaining width
// SP+4: R4
// SP+8: R5
// SP+12: R6
// SP+16: R7
// SP+20: LR
// SP+24: video segment
// push registers
push {r3-r7,lr}
// ---- prepare registers
// get pointer to video segment -> R4
ldr r4,[sp,#24] // load video segment -> R4
// R0 ... pointer to data buffer
// R2 ... Y coordinate
// R3 ... remaining width
// R4 ... video segment
// load horizon offset -> R1, check if use perspective
ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6
ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5
ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1
sxtb r1,r1 // signed extension
lsls r1,#2 // horizon * 4, horizon = 0 ?
bne 2f // use perspective
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
lsrs r5,#1 // segment height/2 -> R5
subs r2,r5 // y - h/2 -> R2
mov r12,r2 // current coordinate Y0 = y - h/2 -> R12
// prepare divide result to get 1<<FRACT
movs r5,#1 // R5 <- 1
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
lsls r5,#FRACT // constant 1<<FRACT -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
b 4f
// using perspective, check ceilling mode
2: bpl 3f // horizon is not negative
subs r2,r5,r2 // negate, y = h - y
subs r2,#1 // y = h - 1 - y
negs r1,r1 // absolute value of horizon
// prepare current coordinate Y0 = y - h -> R12
3: subs r7,r2,r5 // y - h = current Y coordinate -> R7
mov r12,r7 // store current coordinate Y0 -> R12
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
lsls r5,#FRACT // segment height * FRACTMUL -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
adds r2,r1 // horizon + y -> R2
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
// R0 ... pointer to data buffer
// R3 ... remaining width
// R4 ... video segment
// R12 ... current coordinate Y0
// prepare start coordinate X0 = -w/2 -> LR
4: lsrs r5,r3,#1 // width/2
negs r5,r5 // negate
mov lr,r5 // store start coordinate X0 -> LR
// prepare number of 4-pixels (loop counter) -> R7
lsrs r7,r3,#2 // width/4 -> R7
// prepare address of interpolator 0 base -> R3
ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- setup interpolator 0 to get tile index
// set tile map base to base2
ldr r6,[r4,#SSEGM_DATA] // load tile map base
str r6,[r3,#BASE2_OFFSET0] // set tile map base
// set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1
ldr r6,RenderTilePersp_Ctrl // load control word
ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1
str r1,[sp,#0] // save tile size -> [SP+0]
adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2
subs r5,r2,#1 // mapwbits - 1
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0
// set control word of lane 1: shift=FRACT+tilebits-mapwbits,
// mask=mapwbits..mapwbits+maphbits-1
subs r6,r2 // FRACT + tilebits - mapwbits
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position
orrs r6,r2 // add mapwbits to control word
ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position
adds r6,r2 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1
// ---- setup interpolator 1 to get pixel index
// set tile image to base2
ldr r6,[r4,#SSEGM_PAR] // load tile image base
str r6,[r3,#BASE2_OFFSET1] // set tile image base
// set control word of lane 0: shift=FRACT, mask=0..tilebits-1
ldr r6,RenderTilePersp_Ctrl // load control word
subs r5,r1,#1 // tilebits - 1
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0
// set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1
subs r6,r1 // FRACT - tilebits
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position
orrs r6,r5 // add tilebits to control word
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position
adds r6,r1 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// [SP+0] ... number of bits of tile width and height
// ---- set matrix
// get pointer to matrix -> R4
ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4
// get distance coefficient dist -> R1
ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
// r4+0 ... m11
// r4+4 ... m12
// r4+8 ... m13
// r4+12 ... m21
// r4+16 ... m22
// r4+20 ... m23
// set m11 -> R5 base0
ldr r5,[r4,#0] // load m11
muls r5,r1 // m11*dist
asrs r5,#FRACT // (m11*dist)>>FRACT
str r5,[r3,#BASE0_OFFSET0] // set base0
str r5,[r3,#BASE0_OFFSET1] // set base0
// set m21 -> R6 base1
ldr r6,[r4,#12] // load m21
muls r6,r1 // m21*dist
asrs r6,#FRACT // (m21*dist)>>FRACT
str r6,[r3,#BASE1_OFFSET0] // set base1
str r6,[r3,#BASE1_OFFSET1] // set base1
// R0 ... pointer to data buffer
// R1 ... distance coefficient
// R3 ... interpolator base
// R4 ... pointer to matrix
// R5 ... m11
// R6 ... m21
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// [SP+0] ... number of bits of tile width and height
// set x0*m11 + y0*m12 + m13 -> accum0
mov r2,lr // start coordinate X0 -> X2
muls r5,r2 // x0*m11 -> R5
muls r2,r6 // x0*m21 -> R2
mov lr,r1 // save distance coefficient -> LR
ldr r6,[r4,#4] // load m12 -> R6
muls r1,r6 // m12*dist -> R1
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
mov r6,r12 // load coordinate Y0 -> R6
muls r1,r6 // y0*m12 -> R1
adds r5,r1 // x0*m11 + y0*m12 -> R5
ldr r1,[r4,#8] // load m13 -> R1
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
str r5,[r3,#ACCUM0_OFFSET0] // set accum0
str r5,[r3,#ACCUM0_OFFSET1] // set accum0
// R0 ... pointer to data buffer
// R2 ... x0*m21
// R3 ... interpolator base
// R4 ... pointer to matrix
// R6 ... current coordinate Y0
// R7 ... width/4
// LR ... distance coefficient
// [SP+0] ... number of bits of tile width and height
// set x0*m21 + y0*m22 + m23 -> accum1
ldr r1,[r4,#16] // load m22 -> R1
mov r5,lr // distance coefficient -> R5
muls r1,r5 // m22*dist
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
muls r1,r6 // y0*m22 -> R1
adds r2,r1 // x0*m21 + y0*m22 -> R2
ldr r1,[r4,#20] // load m23 -> R1
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
str r2,[r3,#ACCUM1_OFFSET0] // set accum1
str r2,[r3,#ACCUM1_OFFSET1] // set accum1
// ---- process odd 4-pixel
// prepare tile bits * 2
ldr r6,[sp,#0] // get tile bits
lsls r6,#1 // tile bits * 2
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to tile map, load tile index)
// R5 ... (temporary - get pointer to pixel, load pixel)
// R6 ... tilebits*2
// R7 ... width/4 (loop counter)
// [SP+0] ... number of bits of tile width and height
// check odd 4-pixels
lsrs r7,#1 // width/4/2
bcc 2f // no odd 4-pixel
// [7] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r1,[r5,r4] // [2] load pixel
// [9] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [9] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [9] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#24 // [1] shift 3 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [2] store 4 pixels
stmia r0!,{r1} // [2] store 4 pixels
// check number of remaining pixels
2: tst r7,r7 // check number of pixels
beq 8f // end
// ---- [74 per 8 pixels] inner loop
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to tile map, load tile index)
// R5 ... (temporary - get pointer to pixel, load pixel)
// R6 ... tilebits*2
// R7 ... width/8 (loop counter)
// [7] load 1st pixel
6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r1,[r5,r4] // [2] load pixel
// [9] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [9] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [9] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#24 // [1] shift 3 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [7] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r2,[r5,r4] // [2] load pixel
// [9] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
// [9] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r2,r4 // [1] add pixel to accumulator
// [9] load 4th pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#24 // [1] shift 3 bytes left
orrs r2,r4 // [1] add pixel to accumulator
// [3] store 8 pixels
stmia r0!,{r1,r2} // [3] store 8 pixels
// [2,3] loop counter
subs r7,#1 // [1] 8-pixel counter
bne 6b // [1,2] next 8-pixels
// pop registers
8: pop {r3-r7,pc}
.align 2
// pointer to SIO base
RenderTilePersp_pSioBase:
.word SIO_BASE // addres of SIO base
// pointer to Interp0 base
RenderTilePersp_Interp:
.word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base
RenderTilePersp_Ctrl: // lane control word
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)

View file

@ -1,433 +0,0 @@
// ****************************************************************************
//
// VGA render GF_TILEPERSP15
//
// ****************************************************************************
// data ... tile map
// par ... column of tile images
// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset
// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height
// wrapy ... segment height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
#define ACCUM0_OFFSET0 0
#define ACCUM1_OFFSET0 4
#define BASE0_OFFSET0 8
#define BASE1_OFFSET0 12
#define BASE2_OFFSET0 16
#define POP_LANE0_OFFSET0 20
#define POP_LANE1_OFFSET0 24
#define POP_FULL_OFFSET0 28
#define PEEK_LANE0_OFFSET0 32
#define PEEK_LANE1_OFFSET0 36
#define PEEK_FULL_OFFSET0 40
#define CTRL_LANE0_OFFSET0 44
#define CTRL_LANE1_OFFSET0 48
#define ACCUM0_ADD_OFFSET0 52
#define ACCUM1_ADD_OFFSET0 56
#define BASE_1AND0_OFFSET0 60
#define ACCUM0_OFFSET1 64
#define ACCUM1_OFFSET1 68
#define BASE0_OFFSET1 72
#define BASE1_OFFSET1 76
#define BASE2_OFFSET1 80
#define POP_LANE0_OFFSET1 84
#define POP_LANE1_OFFSET1 88
#define POP_FULL_OFFSET1 92
#define PEEK_LANE0_OFFSET1 96
#define PEEK_LANE1_OFFSET1 100
#define PEEK_FULL_OFFSET1 104
#define CTRL_LANE0_OFFSET1 108
#define CTRL_LANE1_OFFSET1 112
#define ACCUM0_ADD_OFFSET1 116
#define ACCUM1_ADD_OFFSET1 120
#define BASE_1AND0_OFFSET1 124
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderTilePersp15(u32* cbuf, int x, int y, int w, sSegm* segm);
// render tiles with perspective GF_TILEPERSP15, 1.5 pixel
// using hardware interpolator inter0 and inter1 (their state is not saved during interrup)
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate (not used)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4)
// [stack] ... segm video segment sSegm
// Output new pointer to data buffer.
// 320 pixels takes ?? us on 151 MHz.
.thumb_func
.global RenderTilePersp15
RenderTilePersp15:
// Input registers and stack:
// R0 ... pointer to destination data buffer
// R1 ... X coordinate (not used)
// R2 ... Y coordinate
// SP+0: R3 ... remaining width
// SP+4: R4
// SP+8: R5
// SP+12: R6
// SP+16: R7
// SP+20: LR
// SP+24: video segment
// push registers
push {r3-r7,lr}
// ---- prepare registers
// get pointer to video segment -> R4
ldr r4,[sp,#24] // load video segment -> R4
// R0 ... pointer to data buffer
// R2 ... Y coordinate
// R3 ... remaining width
// R4 ... video segment
// load horizon offset -> R1, check if use perspective
ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6
ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5
ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1
sxtb r1,r1 // signed extension
lsls r1,#2 // horizon * 4, horizon = 0 ?
bne 2f // use perspective
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
lsrs r5,#1 // segment height/2 -> R5
subs r2,r5 // y - h/2 -> R2
mov r12,r2 // current coordinate Y0 = y - h/2 -> R12
// prepare divide result to get 1<<FRACT
movs r5,#1 // R5 <- 1
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
lsls r5,#FRACT // constant 1<<FRACT -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
b 4f
// using perspective, check ceilling mode
2: bpl 3f // horizon is not negative
subs r2,r5,r2 // negate, y = h - y
subs r2,#1 // y = h - 1 - y
negs r1,r1 // absolute value of horizon
// prepare current coordinate Y0 = y - h -> R12
3: subs r7,r2,r5 // y - h = current Y coordinate -> R7
mov r12,r7 // store current coordinate Y0 -> R12
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
lsls r5,#FRACT // segment height * FRACTMUL -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
adds r2,r1 // horizon + y -> R2
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
// R0 ... pointer to data buffer
// R3 ... remaining width
// R4 ... video segment
// R12 ... current coordinate Y0
// prepare start coordinate X0 = -w/2 -> LR
4: lsrs r5,r3,#1 // width/2
negs r5,r5 // negate
mov lr,r5 // store start coordinate X0 -> LR
// prepare number of 4-pixels (loop counter) -> R7
lsrs r7,r3,#2 // width/4 -> R7
// prepare address of interpolator 0 base -> R3
ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- setup interpolator 0 to get tile index
// set tile map base to base2
ldr r6,[r4,#SSEGM_DATA] // load tile map base
str r6,[r3,#BASE2_OFFSET0] // set tile map base
// set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1
ldr r6,RenderTilePersp_Ctrl // load control word
ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1
str r1,[sp,#0] // save tile size -> [SP+0]
adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2
subs r5,r2,#1 // mapwbits - 1
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0
// set control word of lane 1: shift=FRACT+tilebits-mapwbits,
// mask=mapwbits..mapwbits+maphbits-1
subs r6,r2 // FRACT + tilebits - mapwbits
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position
orrs r6,r2 // add mapwbits to control word
ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position
adds r6,r2 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1
// ---- setup interpolator 1 to get pixel index
// set tile image to base2
ldr r6,[r4,#SSEGM_PAR] // load tile image base
str r6,[r3,#BASE2_OFFSET1] // set tile image base
// set control word of lane 0: shift=FRACT, mask=0..tilebits-1
ldr r6,RenderTilePersp_Ctrl // load control word
subs r5,r1,#1 // tilebits - 1
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0
// set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1
subs r6,r1 // FRACT - tilebits
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position
orrs r6,r5 // add tilebits to control word
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position
adds r6,r1 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// [SP+0] ... number of bits of tile width and height
// ---- set matrix
// get pointer to matrix -> R4
ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4
// get distance coefficient dist -> R1
ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
// r4+0 ... m11
// r4+4 ... m12
// r4+8 ... m13
// r4+12 ... m21
// r4+16 ... m22
// r4+20 ... m23
// set m11 -> R5 base0
ldr r5,[r4,#0] // load m11
muls r5,r1 // m11*dist
asrs r5,#FRACT // (m11*dist)>>FRACT
asrs r2,r5,#1 // delta/2
adds r2,r5 // delta*1.5
str r2,[r3,#BASE0_OFFSET0] // set base0
str r2,[r3,#BASE0_OFFSET1] // set base0
// set m21 -> R6 base1
ldr r6,[r4,#12] // load m21
muls r6,r1 // m21*dist
asrs r6,#FRACT // (m21*dist)>>FRACT
asrs r2,r6,#1 // delta/2
adds r2,r6 // delta*1.5
str r2,[r3,#BASE1_OFFSET0] // set base1
str r2,[r3,#BASE1_OFFSET1] // set base1
// R0 ... pointer to data buffer
// R1 ... distance coefficient
// R3 ... interpolator base
// R4 ... pointer to matrix
// R5 ... m11
// R6 ... m21
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// [SP+0] ... number of bits of tile width and height
// set x0*m11 + y0*m12 + m13 -> accum0
mov r2,lr // start coordinate X0 -> X2
muls r5,r2 // x0*m11 -> R5
muls r2,r6 // x0*m21 -> R2
mov lr,r1 // save distance coefficient -> LR
ldr r6,[r4,#4] // load m12 -> R6
muls r1,r6 // m12*dist -> R1
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
mov r6,r12 // load coordinate Y0 -> R6
muls r1,r6 // y0*m12 -> R1
adds r5,r1 // x0*m11 + y0*m12 -> R5
ldr r1,[r4,#8] // load m13 -> R1
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
str r5,[r3,#ACCUM0_OFFSET0] // set accum0
str r5,[r3,#ACCUM0_OFFSET1] // set accum0
// R0 ... pointer to data buffer
// R2 ... x0*m21
// R3 ... interpolator base
// R4 ... pointer to matrix
// R6 ... current coordinate Y0
// R7 ... width/4
// LR ... distance coefficient
// [SP+0] ... number of bits of tile width and height
// set x0*m21 + y0*m22 + m23 -> accum1
ldr r1,[r4,#16] // load m22 -> R1
mov r5,lr // distance coefficient -> R5
muls r1,r5 // m22*dist
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
muls r1,r6 // y0*m22 -> R1
adds r2,r1 // x0*m21 + y0*m22 -> R2
ldr r1,[r4,#20] // load m23 -> R1
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
str r2,[r3,#ACCUM1_OFFSET0] // set accum1
str r2,[r3,#ACCUM1_OFFSET1] // set accum1
// ---- process odd 4-pixel
// prepare tile bits * 2
ldr r6,[sp,#0] // get tile bits
lsls r6,#1 // tile bits * 2
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to tile map, load tile index)
// R5 ... (temporary - get pointer to pixel, load pixel)
// R6 ... tilebits*2
// R7 ... width/4 (loop counter)
// [SP+0] ... number of bits of tile width and height
// check odd 4-pixels
lsrs r7,#1 // width/4/2
bcc 2f // no odd 4-pixel
// [7] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r1,[r5,r4] // [2] load pixel
// [9] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [11] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [2] store 4 pixels
stmia r0!,{r1} // [2] store 4 pixels
// check number of remaining pixels
2: tst r7,r7 // check number of pixels
beq 8f // end
// ---- [60 per 8 pixels] inner loop
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to tile map, load tile index)
// R5 ... (temporary - get pointer to pixel, load pixel)
// R6 ... tilebits*2
// R7 ... width/8 (loop counter)
// [7] load 1st pixel
6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r1,[r5,r4] // [2] load pixel
// [9] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [11] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [7] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r2,[r5,r4] // [2] load pixel
// [9] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
// [11] load 3rd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r2,r4 // [1] add pixel to accumulator
lsls r4,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
// [3] store 8 pixels
stmia r0!,{r1,r2} // [3] store 8 pixels
// [2,3] loop counter
subs r7,#1 // [1] 8-pixel counter
bne 6b // [1,2] next 8-pixels
// pop registers
8: pop {r3-r7,pc}
.align 2
// pointer to SIO base
RenderTilePersp_pSioBase:
.word SIO_BASE // addres of SIO base
// pointer to Interp0 base
RenderTilePersp_Interp:
.word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base
RenderTilePersp_Ctrl: // lane control word
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)

View file

@ -1,410 +0,0 @@
// ****************************************************************************
//
// VGA render GF_TILEPERSP2
//
// ****************************************************************************
// data ... tile map
// par ... column of tile images
// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset
// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height
// wrapy ... segment height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
#define ACCUM0_OFFSET0 0
#define ACCUM1_OFFSET0 4
#define BASE0_OFFSET0 8
#define BASE1_OFFSET0 12
#define BASE2_OFFSET0 16
#define POP_LANE0_OFFSET0 20
#define POP_LANE1_OFFSET0 24
#define POP_FULL_OFFSET0 28
#define PEEK_LANE0_OFFSET0 32
#define PEEK_LANE1_OFFSET0 36
#define PEEK_FULL_OFFSET0 40
#define CTRL_LANE0_OFFSET0 44
#define CTRL_LANE1_OFFSET0 48
#define ACCUM0_ADD_OFFSET0 52
#define ACCUM1_ADD_OFFSET0 56
#define BASE_1AND0_OFFSET0 60
#define ACCUM0_OFFSET1 64
#define ACCUM1_OFFSET1 68
#define BASE0_OFFSET1 72
#define BASE1_OFFSET1 76
#define BASE2_OFFSET1 80
#define POP_LANE0_OFFSET1 84
#define POP_LANE1_OFFSET1 88
#define POP_FULL_OFFSET1 92
#define PEEK_LANE0_OFFSET1 96
#define PEEK_LANE1_OFFSET1 100
#define PEEK_FULL_OFFSET1 104
#define CTRL_LANE0_OFFSET1 108
#define CTRL_LANE1_OFFSET1 112
#define ACCUM0_ADD_OFFSET1 116
#define ACCUM1_ADD_OFFSET1 120
#define BASE_1AND0_OFFSET1 124
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderTilePersp2(u32* cbuf, int x, int y, int w, sSegm* segm);
// render tiles with perspective GF_TILEPERSP2, double pixels
// using hardware interpolator inter0 and inter1 (their state is not saved during interrup)
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate (not used)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4)
// [stack] ... segm video segment sSegm
// Output new pointer to data buffer.
// 320 pixels takes ?? us on 151 MHz.
.thumb_func
.global RenderTilePersp2
RenderTilePersp2:
// Input registers and stack:
// R0 ... pointer to destination data buffer
// R1 ... X coordinate (not used)
// R2 ... Y coordinate
// SP+0: R3 ... remaining width
// SP+4: R4
// SP+8: R5
// SP+12: R6
// SP+16: R7
// SP+20: LR
// SP+24: video segment
// push registers
push {r3-r7,lr}
// ---- prepare registers
// get pointer to video segment -> R4
ldr r4,[sp,#24] // load video segment -> R4
// R0 ... pointer to data buffer
// R2 ... Y coordinate
// R3 ... remaining width
// R4 ... video segment
// load horizon offset -> R1, check if use perspective
ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6
ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5
ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1
sxtb r1,r1 // signed extension
lsls r1,#2 // horizon * 4, horizon = 0 ?
bne 2f // use perspective
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
lsrs r5,#1 // segment height/2 -> R5
subs r2,r5 // y - h/2 -> R2
mov r12,r2 // current coordinate Y0 = y - h/2 -> R12
// prepare divide result to get 1<<FRACT
movs r5,#1 // R5 <- 1
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
lsls r5,#FRACT // constant 1<<FRACT -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
b 4f
// using perspective, check ceilling mode
2: bpl 3f // horizon is not negative
subs r2,r5,r2 // negate, y = h - y
subs r2,#1 // y = h - 1 - y
negs r1,r1 // absolute value of horizon
// prepare current coordinate Y0 = y - h -> R12
3: subs r7,r2,r5 // y - h = current Y coordinate -> R7
mov r12,r7 // store current coordinate Y0 -> R12
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
lsls r5,#FRACT // segment height * FRACTMUL -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
adds r2,r1 // horizon + y -> R2
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
// R0 ... pointer to data buffer
// R3 ... remaining width
// R4 ... video segment
// R12 ... current coordinate Y0
// prepare start coordinate X0 = -w/2 -> LR
4: lsrs r5,r3,#1 // width/2
negs r5,r5 // negate
mov lr,r5 // store start coordinate X0 -> LR
// prepare number of 4-pixels (loop counter) -> R7
lsrs r7,r3,#2 // width/4 -> R7
// prepare address of interpolator 0 base -> R3
ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- setup interpolator 0 to get tile index
// set tile map base to base2
ldr r6,[r4,#SSEGM_DATA] // load tile map base
str r6,[r3,#BASE2_OFFSET0] // set tile map base
// set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1
ldr r6,RenderTilePersp_Ctrl // load control word
ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1
str r1,[sp,#0] // save tile size -> [SP+0]
adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2
subs r5,r2,#1 // mapwbits - 1
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0
// set control word of lane 1: shift=FRACT+tilebits-mapwbits,
// mask=mapwbits..mapwbits+maphbits-1
subs r6,r2 // FRACT + tilebits - mapwbits
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position
orrs r6,r2 // add mapwbits to control word
ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position
adds r6,r2 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1
// ---- setup interpolator 1 to get pixel index
// set tile image to base2
ldr r6,[r4,#SSEGM_PAR] // load tile image base
str r6,[r3,#BASE2_OFFSET1] // set tile image base
// set control word of lane 0: shift=FRACT, mask=0..tilebits-1
ldr r6,RenderTilePersp_Ctrl // load control word
subs r5,r1,#1 // tilebits - 1
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0
// set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1
subs r6,r1 // FRACT - tilebits
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position
orrs r6,r5 // add tilebits to control word
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position
adds r6,r1 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// [SP+0] ... number of bits of tile width and height
// ---- set matrix
// get pointer to matrix -> R4
ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4
// get distance coefficient dist -> R1
ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
// r4+0 ... m11
// r4+4 ... m12
// r4+8 ... m13
// r4+12 ... m21
// r4+16 ... m22
// r4+20 ... m23
// set m11 -> R5 base0
ldr r5,[r4,#0] // load m11
muls r5,r1 // m11*dist
asrs r5,#FRACT-1 // (m11*dist)>>(FRACT-1) ... 2*delta
str r5,[r3,#BASE0_OFFSET0] // set base0
str r5,[r3,#BASE0_OFFSET1] // set base0
asrs r5,#1 // (m11*dist)>>FRACT
// set m21 -> R6 base1
ldr r6,[r4,#12] // load m21
muls r6,r1 // m21*dist
asrs r6,#FRACT-1 // (m21*dist)>>(FRACT-1) ... 2*delta
str r6,[r3,#BASE1_OFFSET0] // set base1
str r6,[r3,#BASE1_OFFSET1] // set base1
asrs r6,#1 // (m21*dist)>>FRACT
// R0 ... pointer to data buffer
// R1 ... distance coefficient
// R3 ... interpolator base
// R4 ... pointer to matrix
// R5 ... m11
// R6 ... m21
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// [SP+0] ... number of bits of tile width and height
// set x0*m11 + y0*m12 + m13 -> accum0
mov r2,lr // start coordinate X0 -> X2
muls r5,r2 // x0*m11 -> R5
muls r2,r6 // x0*m21 -> R2
mov lr,r1 // save distance coefficient -> LR
ldr r6,[r4,#4] // load m12 -> R6
muls r1,r6 // m12*dist -> R1
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
mov r6,r12 // load coordinate Y0 -> R6
muls r1,r6 // y0*m12 -> R1
adds r5,r1 // x0*m11 + y0*m12 -> R5
ldr r1,[r4,#8] // load m13 -> R1
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
str r5,[r3,#ACCUM0_OFFSET0] // set accum0
str r5,[r3,#ACCUM0_OFFSET1] // set accum0
// R0 ... pointer to data buffer
// R2 ... x0*m21
// R3 ... interpolator base
// R4 ... pointer to matrix
// R6 ... current coordinate Y0
// R7 ... width/4
// LR ... distance coefficient
// [SP+0] ... number of bits of tile width and height
// set x0*m21 + y0*m22 + m23 -> accum1
ldr r1,[r4,#16] // load m22 -> R1
mov r5,lr // distance coefficient -> R5
muls r1,r5 // m22*dist
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
muls r1,r6 // y0*m22 -> R1
adds r2,r1 // x0*m21 + y0*m22 -> R2
ldr r1,[r4,#20] // load m23 -> R1
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
str r2,[r3,#ACCUM1_OFFSET0] // set accum1
str r2,[r3,#ACCUM1_OFFSET1] // set accum1
// ---- process odd 4-pixel
// prepare tile bits * 2
ldr r6,[sp,#0] // get tile bits
lsls r6,#1 // tile bits * 2
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to tile map, load tile index)
// R5 ... (temporary - get pointer to pixel, load pixel)
// R6 ... tilebits*2
// R7 ... width/4 (loop counter)
// [SP+0] ... number of bits of tile width and height
// check odd 4-pixels
lsrs r7,#1 // width/4/2
bcc 2f // no odd 4-pixel
// [9] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r1,[r5,r4] // [2] load pixel
lsls r4,r1,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [11] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [2] store 4 pixels
stmia r0!,{r1} // [2] store 4 pixels
// check number of remaining pixels
2: tst r7,r7 // check number of pixels
beq 8f // end
// ---- [46 per 8 pixels] inner loop
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to tile map, load tile index)
// R5 ... (temporary - get pointer to pixel, load pixel)
// R6 ... tilebits*2
// R7 ... width/8 (loop counter)
// [9] load 1st pixel
6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r1,[r5,r4] // [2] load pixel
lsls r4,r1,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [11] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [9] load 1st pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r2,[r5,r4] // [2] load pixel
lsls r4,r2,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
// [11] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r2,r4 // [1] add pixel to accumulator
lsls r4,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
// [3] store 8 pixels
stmia r0!,{r1,r2} // [3] store 8 pixels
// [2,3] loop counter
subs r7,#1 // [1] 8-pixel counter
bne 6b // [1,2] next 8-pixels
// pop registers
8: pop {r3-r7,pc}
.align 2
// pointer to SIO base
RenderTilePersp_pSioBase:
.word SIO_BASE // addres of SIO base
// pointer to Interp0 base
RenderTilePersp_Interp:
.word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base
RenderTilePersp_Ctrl: // lane control word
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)

View file

@ -1,394 +0,0 @@
// ****************************************************************************
//
// VGA render GF_TILEPERSP3
//
// ****************************************************************************
// data ... tile map
// par ... column of tile images
// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset
// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height
// wrapy ... segment height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
#define ACCUM0_OFFSET0 0
#define ACCUM1_OFFSET0 4
#define BASE0_OFFSET0 8
#define BASE1_OFFSET0 12
#define BASE2_OFFSET0 16
#define POP_LANE0_OFFSET0 20
#define POP_LANE1_OFFSET0 24
#define POP_FULL_OFFSET0 28
#define PEEK_LANE0_OFFSET0 32
#define PEEK_LANE1_OFFSET0 36
#define PEEK_FULL_OFFSET0 40
#define CTRL_LANE0_OFFSET0 44
#define CTRL_LANE1_OFFSET0 48
#define ACCUM0_ADD_OFFSET0 52
#define ACCUM1_ADD_OFFSET0 56
#define BASE_1AND0_OFFSET0 60
#define ACCUM0_OFFSET1 64
#define ACCUM1_OFFSET1 68
#define BASE0_OFFSET1 72
#define BASE1_OFFSET1 76
#define BASE2_OFFSET1 80
#define POP_LANE0_OFFSET1 84
#define POP_LANE1_OFFSET1 88
#define POP_FULL_OFFSET1 92
#define PEEK_LANE0_OFFSET1 96
#define PEEK_LANE1_OFFSET1 100
#define PEEK_FULL_OFFSET1 104
#define CTRL_LANE0_OFFSET1 108
#define CTRL_LANE1_OFFSET1 112
#define ACCUM0_ADD_OFFSET1 116
#define ACCUM1_ADD_OFFSET1 120
#define BASE_1AND0_OFFSET1 124
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderTilePersp3(u32* cbuf, int x, int y, int w, sSegm* segm);
// render tiles with perspective GF_TILEPERSP3, triple pixels
// using hardware interpolator inter0 and inter1 (their state is not saved during interrup)
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate (not used)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4)
// [stack] ... segm video segment sSegm
// Output new pointer to data buffer.
// 320 pixels takes ?? us on 151 MHz.
.thumb_func
.global RenderTilePersp3
RenderTilePersp3:
// Input registers and stack:
// R0 ... pointer to destination data buffer
// R1 ... X coordinate (not used)
// R2 ... Y coordinate
// SP+0: R3 ... remaining width
// SP+4: R4
// SP+8: R5
// SP+12: R6
// SP+16: R7
// SP+20: LR
// SP+24: video segment
// push registers
push {r3-r7,lr}
// ---- prepare registers
// get pointer to video segment -> R4
ldr r4,[sp,#24] // load video segment -> R4
// R0 ... pointer to data buffer
// R2 ... Y coordinate
// R3 ... remaining width
// R4 ... video segment
// load horizon offset -> R1, check if use perspective
ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6
ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5
ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1
sxtb r1,r1 // signed extension
lsls r1,#2 // horizon * 4, horizon = 0 ?
bne 2f // use perspective
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
lsrs r5,#1 // segment height/2 -> R5
subs r2,r5 // y - h/2 -> R2
mov r12,r2 // current coordinate Y0 = y - h/2 -> R12
// prepare divide result to get 1<<FRACT
movs r5,#1 // R5 <- 1
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
lsls r5,#FRACT // constant 1<<FRACT -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
b 4f
// using perspective, check ceilling mode
2: bpl 3f // horizon is not negative
subs r2,r5,r2 // negate, y = h - y
subs r2,#1 // y = h - 1 - y
negs r1,r1 // absolute value of horizon
// prepare current coordinate Y0 = y - h -> R12
3: subs r7,r2,r5 // y - h = current Y coordinate -> R7
mov r12,r7 // store current coordinate Y0 -> R12
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
lsls r5,#FRACT // segment height * FRACTMUL -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
adds r2,r1 // horizon + y -> R2
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
// R0 ... pointer to data buffer
// R3 ... remaining width
// R4 ... video segment
// R12 ... current coordinate Y0
// prepare start coordinate X0 = -w/2 -> LR
4: lsrs r5,r3,#1 // width/2
negs r5,r5 // negate
mov lr,r5 // store start coordinate X0 -> LR
// prepare number of 4-pixels (loop counter) -> R7
lsrs r7,r3,#2 // width/4 -> R7
// prepare address of interpolator 0 base -> R3
ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- setup interpolator 0 to get tile index
// set tile map base to base2
ldr r6,[r4,#SSEGM_DATA] // load tile map base
str r6,[r3,#BASE2_OFFSET0] // set tile map base
// set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1
ldr r6,RenderTilePersp_Ctrl // load control word
ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1
str r1,[sp,#0] // save tile size -> [SP+0]
adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2
subs r5,r2,#1 // mapwbits - 1
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0
// set control word of lane 1: shift=FRACT+tilebits-mapwbits,
// mask=mapwbits..mapwbits+maphbits-1
subs r6,r2 // FRACT + tilebits - mapwbits
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position
orrs r6,r2 // add mapwbits to control word
ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position
adds r6,r2 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1
// ---- setup interpolator 1 to get pixel index
// set tile image to base2
ldr r6,[r4,#SSEGM_PAR] // load tile image base
str r6,[r3,#BASE2_OFFSET1] // set tile image base
// set control word of lane 0: shift=FRACT, mask=0..tilebits-1
ldr r6,RenderTilePersp_Ctrl // load control word
subs r5,r1,#1 // tilebits - 1
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0
// set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1
subs r6,r1 // FRACT - tilebits
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position
orrs r6,r5 // add tilebits to control word
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position
adds r6,r1 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// [SP+0] ... number of bits of tile width and height
// ---- set matrix
// get pointer to matrix -> R4
ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4
// get distance coefficient dist -> R1
ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
// r4+0 ... m11
// r4+4 ... m12
// r4+8 ... m13
// r4+12 ... m21
// r4+16 ... m22
// r4+20 ... m23
// set m11 -> R5 base0
ldr r5,[r4,#0] // load m11
muls r5,r1 // m11*dist
asrs r5,#FRACT // (m11*dist)>>FRACT ... delta
lsls r2,r5,#1 // delta*2
adds r2,r5 // delta*3
str r2,[r3,#BASE0_OFFSET0] // set base0
str r2,[r3,#BASE0_OFFSET1] // set base0
// set m21 -> R6 base1
ldr r6,[r4,#12] // load m21
muls r6,r1 // m21*dist
asrs r6,#FRACT // (m21*dist)>>FRACT ... delta
lsls r2,r6,#1 // delta*2
adds r2,r6 // delta*3
str r2,[r3,#BASE1_OFFSET0] // set base1
str r2,[r3,#BASE1_OFFSET1] // set base1
// R0 ... pointer to data buffer
// R1 ... distance coefficient
// R3 ... interpolator base
// R4 ... pointer to matrix
// R5 ... m11
// R6 ... m21
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// [SP+0] ... number of bits of tile width and height
// set x0*m11 + y0*m12 + m13 -> accum0
mov r2,lr // start coordinate X0 -> X2
muls r5,r2 // x0*m11 -> R5
muls r2,r6 // x0*m21 -> R2
mov lr,r1 // save distance coefficient -> LR
ldr r6,[r4,#4] // load m12 -> R6
muls r1,r6 // m12*dist -> R1
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
mov r6,r12 // load coordinate Y0 -> R6
muls r1,r6 // y0*m12 -> R1
adds r5,r1 // x0*m11 + y0*m12 -> R5
ldr r1,[r4,#8] // load m13 -> R1
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
str r5,[r3,#ACCUM0_OFFSET0] // set accum0
str r5,[r3,#ACCUM0_OFFSET1] // set accum0
// R0 ... pointer to data buffer
// R2 ... x0*m21
// R3 ... interpolator base
// R4 ... pointer to matrix
// R6 ... current coordinate Y0
// R7 ... width/4
// LR ... distance coefficient
// [SP+0] ... number of bits of tile width and height
// set x0*m21 + y0*m22 + m23 -> accum1
ldr r1,[r4,#16] // load m22 -> R1
mov r5,lr // distance coefficient -> R5
muls r1,r5 // m22*dist
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
muls r1,r6 // y0*m22 -> R1
adds r2,r1 // x0*m21 + y0*m22 -> R2
ldr r1,[r4,#20] // load m23 -> R1
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
str r2,[r3,#ACCUM1_OFFSET0] // set accum1
str r2,[r3,#ACCUM1_OFFSET1] // set accum1
// ---- process odd 4-pixel
// prepare tile bits * 2
ldr r6,[sp,#0] // get tile bits
lsls r6,#1 // tile bits * 2
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to tile map, load tile index)
// R5 ... (temporary - get pointer to pixel, load pixel)
// R6 ... tilebits*2
// R7 ... width/4 (loop counter)
// [SP+0] ... number of bits of tile width and height
// check odd 4-pixels
lsrs r7,#1 // width/4/2
bcc 2f // no odd 4-pixel
// load pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r1,[r5,r4] // [2] load pixel
lsls r4,r1,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
lsls r4,r1,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [2] store 4 pixels
stmia r0!,{r1} // [2] store 4 pixels
// check number of remaining pixels
2: tst r7,r7 // check number of pixels
beq 8f // end
// ---- [37 per 8 pixels] inner loop
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to tile map, load tile index)
// R5 ... (temporary - get pointer to pixel, load pixel)
// R6 ... tilebits*2
// R7 ... width/8 (loop counter)
// [9] load 1st pixel
6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r1,[r5,r4] // [2] load pixel
lsls r4,r1,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [11] load 2nd pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r4,[r5,r4] // [2] load pixel
lsls r4,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
lsls r4,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
// [11] load pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r2,[r5,r4] // [2] load pixel
lsls r4,r2,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
lsls r4,r2,#16 // [1] shift 2 bytes left
orrs r2,r4 // [1] add pixel to accumulator
// [3] store 8 pixels
stmia r0!,{r1,r2} // [3] store 8 pixels
// [2,3] loop counter
subs r7,#1 // [1] 8-pixel counter
bne 6b // [1,2] next 8-pixels
// pop registers
8: pop {r3-r7,pc}
.align 2
// pointer to SIO base
RenderTilePersp_pSioBase:
.word SIO_BASE // addres of SIO base
// pointer to Interp0 base
RenderTilePersp_Interp:
.word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base
RenderTilePersp_Ctrl: // lane control word
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)

View file

@ -1,383 +0,0 @@
// ****************************************************************************
//
// VGA render GF_TILEPERSP4
//
// ****************************************************************************
// data ... tile map
// par ... column of tile images
// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset
// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height
// wrapy ... segment height
#include "../define.h" // common definitions of C and ASM
#include "hardware/regs/sio.h" // registers of hardware divider
#include "hardware/regs/addressmap.h" // SIO base address
#define ACCUM0_OFFSET0 0
#define ACCUM1_OFFSET0 4
#define BASE0_OFFSET0 8
#define BASE1_OFFSET0 12
#define BASE2_OFFSET0 16
#define POP_LANE0_OFFSET0 20
#define POP_LANE1_OFFSET0 24
#define POP_FULL_OFFSET0 28
#define PEEK_LANE0_OFFSET0 32
#define PEEK_LANE1_OFFSET0 36
#define PEEK_FULL_OFFSET0 40
#define CTRL_LANE0_OFFSET0 44
#define CTRL_LANE1_OFFSET0 48
#define ACCUM0_ADD_OFFSET0 52
#define ACCUM1_ADD_OFFSET0 56
#define BASE_1AND0_OFFSET0 60
#define ACCUM0_OFFSET1 64
#define ACCUM1_OFFSET1 68
#define BASE0_OFFSET1 72
#define BASE1_OFFSET1 76
#define BASE2_OFFSET1 80
#define POP_LANE0_OFFSET1 84
#define POP_LANE1_OFFSET1 88
#define POP_FULL_OFFSET1 92
#define PEEK_LANE0_OFFSET1 96
#define PEEK_LANE1_OFFSET1 100
#define PEEK_FULL_OFFSET1 104
#define CTRL_LANE0_OFFSET1 108
#define CTRL_LANE1_OFFSET1 112
#define ACCUM0_ADD_OFFSET1 116
#define ACCUM1_ADD_OFFSET1 120
#define BASE_1AND0_OFFSET1 124
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// extern "C" u32* RenderTilePersp4(u32* cbuf, int x, int y, int w, sSegm* segm);
// render tiles with perspective GF_TILEPERSP4, quadruple pixels
// using hardware interpolator inter0 and inter1 (their state is not saved during interrup)
// R0 ... pointer to destination data buffer
// R1 ... start X coordinate (not used)
// R2 ... start Y coordinate (in graphics lines)
// R3 ... width to display (must be multiple of 4)
// [stack] ... segm video segment sSegm
// Output new pointer to data buffer.
// 320 pixels takes ?? us on 151 MHz.
.thumb_func
.global RenderTilePersp4
RenderTilePersp4:
// Input registers and stack:
// R0 ... pointer to destination data buffer
// R1 ... X coordinate (not used)
// R2 ... Y coordinate
// SP+0: R3 ... remaining width
// SP+4: R4
// SP+8: R5
// SP+12: R6
// SP+16: R7
// SP+20: LR
// SP+24: video segment
// push registers
push {r3-r7,lr}
// ---- prepare registers
// get pointer to video segment -> R4
ldr r4,[sp,#24] // load video segment -> R4
// R0 ... pointer to data buffer
// R2 ... Y coordinate
// R3 ... remaining width
// R4 ... video segment
// load horizon offset -> R1, check if use perspective
ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6
ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5
ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1
sxtb r1,r1 // signed extension
lsls r1,#2 // horizon * 4, horizon = 0 ?
bne 2f // use perspective
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
lsrs r5,#1 // segment height/2 -> R5
subs r2,r5 // y - h/2 -> R2
mov r12,r2 // current coordinate Y0 = y - h/2 -> R12
// prepare divide result to get 1<<FRACT
movs r5,#1 // R5 <- 1
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
lsls r5,#FRACT // constant 1<<FRACT -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
b 4f
// using perspective, check ceilling mode
2: bpl 3f // horizon is not negative
subs r2,r5,r2 // negate, y = h - y
subs r2,#1 // y = h - 1 - y
negs r1,r1 // absolute value of horizon
// prepare current coordinate Y0 = y - h -> R12
3: subs r7,r2,r5 // y - h = current Y coordinate -> R7
mov r12,r7 // store current coordinate Y0 -> R12
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
lsls r5,#FRACT // segment height * FRACTMUL -> R5
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
adds r2,r1 // horizon + y -> R2
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
// R0 ... pointer to data buffer
// R3 ... remaining width
// R4 ... video segment
// R12 ... current coordinate Y0
// prepare start coordinate X0 = -w/2 -> LR
4: lsrs r5,r3,#1 // width/2
negs r5,r5 // negate
mov lr,r5 // store start coordinate X0 -> LR
// prepare number of 4-pixels (loop counter) -> R7
lsrs r7,r3,#2 // width/4 -> R7
// prepare address of interpolator 0 base -> R3
ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// ---- setup interpolator 0 to get tile index
// set tile map base to base2
ldr r6,[r4,#SSEGM_DATA] // load tile map base
str r6,[r3,#BASE2_OFFSET0] // set tile map base
// set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1
ldr r6,RenderTilePersp_Ctrl // load control word
ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1
str r1,[sp,#0] // save tile size -> [SP+0]
adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2
subs r5,r2,#1 // mapwbits - 1
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0
// set control word of lane 1: shift=FRACT+tilebits-mapwbits,
// mask=mapwbits..mapwbits+maphbits-1
subs r6,r2 // FRACT + tilebits - mapwbits
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position
orrs r6,r2 // add mapwbits to control word
ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position
adds r6,r2 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1
// ---- setup interpolator 1 to get pixel index
// set tile image to base2
ldr r6,[r4,#SSEGM_PAR] // load tile image base
str r6,[r3,#BASE2_OFFSET1] // set tile image base
// set control word of lane 0: shift=FRACT, mask=0..tilebits-1
ldr r6,RenderTilePersp_Ctrl // load control word
subs r5,r1,#1 // tilebits - 1
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
orrs r6,r5 // add to control word
str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0
// set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1
subs r6,r1 // FRACT - tilebits
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position
orrs r6,r5 // add tilebits to control word
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position
adds r6,r1 // add to control word
str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1
// R0 ... pointer to data buffer
// R3 ... interpolator base
// R4 ... video segment
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// [SP+0] ... number of bits of tile width and height
// ---- set matrix
// get pointer to matrix -> R4
ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4
// get distance coefficient dist -> R1
ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
// r4+0 ... m11
// r4+4 ... m12
// r4+8 ... m13
// r4+12 ... m21
// r4+16 ... m22
// r4+20 ... m23
// set m11 -> R5 base0
ldr r5,[r4,#0] // load m11
muls r5,r1 // m11*dist
asrs r5,#FRACT-2 // (m11*dist)>>(FRACT-2) ... 4*delta
str r5,[r3,#BASE0_OFFSET0] // set base0
str r5,[r3,#BASE0_OFFSET1] // set base0
asrs r5,#2 // (m11*dist)>>FRACT
// set m21 -> R6 base1
ldr r6,[r4,#12] // load m21
muls r6,r1 // m21*dist
asrs r6,#FRACT-2 // (m21*dist)>>(FRACT-2) ... 4*delta
str r6,[r3,#BASE1_OFFSET0] // set base1
str r6,[r3,#BASE1_OFFSET1] // set base1
asrs r6,#2 // (m21*dist)>>FRACT
// R0 ... pointer to data buffer
// R1 ... distance coefficient
// R3 ... interpolator base
// R4 ... pointer to matrix
// R5 ... m11
// R6 ... m21
// R7 ... width/4
// LR ... start coordinate X0
// R12 ... current coordinate Y0
// [SP+0] ... number of bits of tile width and height
// set x0*m11 + y0*m12 + m13 -> accum0
mov r2,lr // start coordinate X0 -> X2
muls r5,r2 // x0*m11 -> R5
muls r2,r6 // x0*m21 -> R2
mov lr,r1 // save distance coefficient -> LR
ldr r6,[r4,#4] // load m12 -> R6
muls r1,r6 // m12*dist -> R1
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
mov r6,r12 // load coordinate Y0 -> R6
muls r1,r6 // y0*m12 -> R1
adds r5,r1 // x0*m11 + y0*m12 -> R5
ldr r1,[r4,#8] // load m13 -> R1
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
str r5,[r3,#ACCUM0_OFFSET0] // set accum0
str r5,[r3,#ACCUM0_OFFSET1] // set accum0
// R0 ... pointer to data buffer
// R2 ... x0*m21
// R3 ... interpolator base
// R4 ... pointer to matrix
// R6 ... current coordinate Y0
// R7 ... width/4
// LR ... distance coefficient
// [SP+0] ... number of bits of tile width and height
// set x0*m21 + y0*m22 + m23 -> accum1
ldr r1,[r4,#16] // load m22 -> R1
mov r5,lr // distance coefficient -> R5
muls r1,r5 // m22*dist
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
muls r1,r6 // y0*m22 -> R1
adds r2,r1 // x0*m21 + y0*m22 -> R2
ldr r1,[r4,#20] // load m23 -> R1
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
str r2,[r3,#ACCUM1_OFFSET0] // set accum1
str r2,[r3,#ACCUM1_OFFSET1] // set accum1
// ---- process odd 4-pixel
// prepare tile bits * 2
ldr r6,[sp,#0] // get tile bits
lsls r6,#1 // tile bits * 2
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to tile map, load tile index)
// R5 ... (temporary - get pointer to pixel, load pixel)
// R6 ... tilebits*2
// R7 ... width/4 (loop counter)
// [SP+0] ... number of bits of tile width and height
// check odd 4-pixels
lsrs r7,#1 // width/4/2
bcc 2f // no odd 4-pixel
// load pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r1,[r5,r4] // [2] load pixel
lsls r4,r1,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
lsls r4,r1,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [2] store 4 pixels
stmia r0!,{r1} // [2] store 4 pixels
// check number of remaining pixels
2: tst r7,r7 // check number of pixels
beq 8f // end
// ---- [28 per 8 pixels] inner loop
// R0 ... pointer to destination data buffer
// R1 ... (temporary - pixel accumulator 1)
// R2 ... (temporary - pixel accumulator 2)
// R3 ... interpolator base
// R4 ... (temporary - get pointer to tile map, load tile index)
// R5 ... (temporary - get pointer to pixel, load pixel)
// R6 ... tilebits*2
// R7 ... width/8 (loop counter)
// [11] load pixel
6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r1,[r5,r4] // [2] load pixel
lsls r4,r1,#8 // [1] shift 1 byte left
orrs r1,r4 // [1] add pixel to accumulator
lsls r4,r1,#16 // [1] shift 2 bytes left
orrs r1,r4 // [1] add pixel to accumulator
// [11] load pixel
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
ldrb r4,[r4,#0] // [2] load tile index
lsls r4,r6 // [1] tile index * tile size
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
ldrb r2,[r5,r4] // [2] load pixel
lsls r4,r2,#8 // [1] shift 1 byte left
orrs r2,r4 // [1] add pixel to accumulator
lsls r4,r2,#16 // [1] shift 2 bytes left
orrs r2,r4 // [1] add pixel to accumulator
// [3] store 8 pixels
stmia r0!,{r1,r2} // [3] store 8 pixels
// [2,3] loop counter
subs r7,#1 // [1] 8-pixel counter
bne 6b // [1,2] next 8-pixels
// pop registers
8: pop {r3-r7,pc}
.align 2
// pointer to SIO base
RenderTilePersp_pSioBase:
.word SIO_BASE // addres of SIO base
// pointer to Interp0 base
RenderTilePersp_Interp:
.word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base
RenderTilePersp_Ctrl: // lane control word
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,11 @@
// ****************************************************************************
//
// VGA output
//
// file derived from the PicoVGA project
// https://github.com/Panda381/PicoVGA
// by Miroslav Nemecek
//
// ****************************************************************************
#ifndef _VGA_H
@ -10,124 +13,34 @@
// scanline type
#define LINE_VSYNC 0 // long vertical sync
#define LINE_VVSYNC 1 // short vertical + vertical sync
#define LINE_VHSYNC 2 // short vertical + horizontal sync
#define LINE_HHSYNC 3 // short horizontal + horizontal sync
#define LINE_HVSYNC 4 // short horizontal + vertical sync
#define LINE_DARK 5 // dark line
#define LINE_IMG 6 // progressive image 0, 1, 2,...
#define LINE_IMGEVEN1 7 // interlaced image even 0, 2, 4,..., 1st subframe
#define LINE_IMGEVEN2 8 // interlaced image even 0, 2, 4,..., 2nd subframe
#define LINE_IMGODD1 9 // interlaced image odd 1, 3, 5,..., 1st subframe
#define LINE_IMGODD2 10 // interlaced image odd 1, 3, 5,..., 2nd subframe
#define LINE_DARK 1 // dark line
#define LINE_IMG 2 // progressive image 0, 1, 2,...
extern u8 ScanlineType[MAXLINE];
extern int DispDev; // current display device
extern sVmode CurVmode; // copy of current videomode table
//extern int LayerMode; // current layer mode (LAYERMODE_*)
extern volatile int ScanLine; // current scan line 1...
extern volatile u32 Frame; // frame counter
extern volatile int BufInx; // current buffer set (0..1)
extern volatile Bool VSync; // current scan line is vsync or dark
// line buffers
extern ALIGNED u8 LineBuf1[DBUF_MAX]; // scanline 1 image data
extern ALIGNED u8 LineBuf2[DBUF_MAX]; // scanline 2 image data
extern int LineBufSize[LAYERS_MAX]; // size of data buffers
extern u32 LineBufHsBp[4]; // HSYNC ... back porch-1 ... IRQ command ... image command
extern u32 LineBufFp; // front porch+1
extern u32 LineBufDark[2]; // HSYNC ... dark line
extern u32 LineBufSync[10]; // vertical synchronization
// interlaced (5x half scanlines):
// 2x half synchronization (HSYNC pulse/2 ... line dark/2)
// 2x vertical synchronization (invert line dark/2 ... invert HSYNC pulse)
// 1x half synchronization (HSYNC pulse/2 ... line dark/2)
// progressive: 1x scanline with vertical synchronization (invert line dark ... invert HSYNC pulse)
extern ALIGNED u8 LineBuf0[BLACK_MAX]; // line buffer with black color (used to clear rest of scanline)
extern u32 LineBufHsBp[4]; // HSYNC ... back porch-1 ... IRQ command ... image command
extern u32 LineBufFp; // front porch+1
extern u32 LineBufDark[2]; // HSYNC ... dark line
extern u32 LineBufSync[10]; // vertical synchronization
// control buffers
extern u32 CtrlBuf1[CBUF_MAX]; // control pairs: u32 count, read address (must be terminated with [0,0])
extern u32 CtrlBuf2[CBUF_MAX]; // control pairs: u32 count, read address (must be terminated with [0,0])
extern int CtrlBufSize[LAYERS_MAX]; // size of control buffers
// render font pixel mask
extern u32 RenderTextMask[512];
// fill memory buffer with u32 words
// buf ... data buffer, must be 32-bit aligned
// data ... data word to store
// num ... number of 32-bit words (= number of bytes/4)
// Returns new destination address.
extern "C" u32* MemSet4(u32* buf, u32 data, int num);
// blit scanline using key color
// dst ... destination buffer
// src ... source buffer
// w ... width
// key ... key color
extern "C" void BlitKey(u8* dst, u8* src, int w, u8 key);
// render layers with sprites LAYERMODE_SPRITE*
// dbuf ... pointer to data buffer
// y ... coordinate of scanline
// scr ... pointer to layer screen structure sLayer
extern "C" void RenderSprite(u8* dbuf, int y, sLayer* scr);
// render layers with fast sprites LAYERMODE_FASTSPRITE*
// cbuf ... pointer to control buffer
// y ... coordinate of scanline
// scr ... pointer to layer screen structure sLayer
// buf ... pointer to destination data buffer with transparent color
// Output new pointer to control buffer.
extern "C" u32* RenderFastSprite(u32* cbuf, int y, sLayer* scr, u8* buf);
// render layers with transformation matrix LAYERMODE_PERSP*
// R0 ... dbuf pointer to data buffer
// R1 ... y coordinate of scanline (relative in destination image)
// R2 ... scr pointer to layer screen structure sLayer
extern "C" void RenderPersp(u8* dbuf, int y, sLayer* scr);
// render layers double pixel with transformation matrix LAYERMODE_PERSP2*
// R0 ... dbuf pointer to data buffer
// R1 ... y coordinate of scanline (relative in destination image)
// R2 ... scr pointer to layer screen structure sLayer
extern "C" void RenderPersp2(u8* dbuf, int y, sLayer* scr);
// render scanline
// cbuf ... control buffer
// dbuf ... data buffer (pixel data)
// line ... current line 0..
// pixnum ... total pixels (must be multiple of 4)
// Returns new pointer to control buffer
extern "C" u32* Render(u32* cbuf, u8* dbuf, int line, int pixnum);
// initialize scanline type table
void ScanlineTypeInit(const sVmode* v);
// print table if scanline types
void ScanlineTypePrint(const u8* scan, int lines);
// initialize videomode (returns False on bad configuration)
// - All layer modes must use same layer program (LAYERMODE_BASE = overlapped layers are OFF)
void VgaInit(const sVmode* vmode); //, u8 layer1mode=LAYERMODE_BASE, u8 layer2mode=LAYERMODE_BASE, u8 layer3mode=LAYERMODE_BASE);
// VGA core
void VgaCore();
// request to initialize VGA videomode, NULL=only stop driver (wait to initialization completes)
void VgaInitReq(const sVmode* vmode);
// execute core 1 remote function
void Core1Exec(void (*fnc)());
// check if core 1 is busy (executing remote function)
Bool Core1Busy();
// wait if core 1 is busy (executing remote function)
void Core1Wait();
void VgaInit(const sVmode* vmode, u8* buf, int width, int height, int stride); //, u8 layer1mode=LAYERMODE_BASE, u8 layer2mode=LAYERMODE_BASE, u8 layer3mode=LAYERMODE_BASE);
// wait for VSync scanline
void WaitVSync();

View file

@ -1,90 +0,0 @@
// ****************************************************************************
//
// VGA sprites
//
// ****************************************************************************
// Takes 100 bytes
#include "define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.BlitKey, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
// [6,7] blit macro (4 instructions, 8 bytes)
.macro blitkey n
ldrb r4,[r1,#\n] // [2] load 1 pixel
cmp r4,r3 // [1] is it transparent color?
beq 2f // [1,2] pixel is transparent
strb r4,[r0,#\n] // [2] write 1 pixel
2:
.endm
// blit scanline using key color
// dst ... destination buffer
// src ... source buffer
// w ... width
// key ... key color
//extern "C" void BlitKey(u8* dst, u8* src, int w, u8 key);
.thumb_func
.global BlitKey
BlitKey:
// push registers
push {r4,lr}
// Registers:
// R0 ... destination buffer
// R1 ... source buffer
// R2 ... width counter
// R3 ... key color
// R4 ... (temporary)
// save start of destination buffer
mov lr,r0 // start buffer
// get number of pixels aligned to 8 bytes
lsrs r4,r2,#3 // number of pixels / 8
lsls r4,#3 // number of pixels aligned to 8 bytes down -> R4
eors r2,r4 // number of pixels last 3 bits (modulo 8)
// shift pointers to last 8-byte group
add r0,r4 // shift destination pointer to the end
add r1,r4 // shift source pointer to the end
// jump to blit rest of pixels in last 8-byte group
adr r4,3f // get address of label '3:' (must be word aligned)
lsls r2,#3 // *8, convert number of pixels to offset of blit macro (1 macro is 8 bytes long)
subs r4,r2 // subtract offset of first valid blit macro
adds r4,#1 // set bit 0 - flag to use thumb instructions
bx r4 // jump into loop
// ---- [53..61 per loop] blend pixels, speed 6.625..7.625 clock cycles per pixel
.align 2 // address of label '3:' must be word aligned (32 bits)
// [2] shift pointers 8 bytes down
1: subs r0,#8 // [1] shift destination pointer by 8 bytes down
subs r1,#8 // [1] shift source pointer by 8 bytes down
// [48..56] blit 8 pixels (32 instructions)
blitkey 7 // [6,7] blit pixel 7
blitkey 6 // [6,7] blit pixel 6
blitkey 5 // [6,7] blit pixel 5
blitkey 4 // [6,7] blit pixel 4
blitkey 3 // [6,7] blit pixel 3
blitkey 2 // [6,7] blit pixel 2
blitkey 1 // [6,7] blit pixel 1
blitkey 0 // [6,7] blit pixel 0
// this address must be word aligned
// [2,3] next 8 pixels
3: cmp r0,lr // [1] start address reached?
bhi 1b // [1,2] not start address yet
// pop registers and return from function
9: pop {r4,pc}

View file

@ -5,59 +5,23 @@
//
// VGA configuration
//
// file derived from the PicoVGA project
// https://github.com/Panda381/PicoVGA
// by Miroslav Nemecek
//
// ****************************************************************************
// === Configuration
#define LAYERS 1 //4 // total layers 1..4 (1 base layer + 3 overlapped layers)
#define SEGMAX 8 // max. number of video segment per video strip (size of 1 sSegm = 28 bytes)
#define STRIPMAX 8 // max. number of video strips (size of 1 sStrip = sSegm size*SEGMAX+4 = 228 bytes)
// size of sScreen = sStrip size*STRIPMAX+4 = 1828 bytes
#define MAXX 320 //640 // max. resolution in X direction (must be power of 4)
#define MAXY 240 //480 // max. resolution in Y direction
#define MAXLINE 700 // max. number of scanlines (including sync and dark lines)
#define MAXLINE 525 //700 // max. number of scanlines (including sync and dark lines)
// === Scanline render buffers (800 pixels: default size of buffers = 2*4*(800+8+800+24)+800 = 13856 bytes
// Requirements by format, base layer 0, 1 wrap X segment:
// GF_GRAPH8 ... control buffer 16 bytes
// GF_TILE8 ... control buffer "width"+8 bytes
// GF_TILE16 ... control buffer "width/2"+8 bytes
// GF_TILE32 ... control buffer "width/4"+8 bytes
// GF_TILE64 ... control buffer "width/8"+8 bytes
// GF_PROGRESS ... control buffer 24 bytes
// other formats: data buffer "width" bytes, control buffer 16 bytes
#define DBUF0_MAX (MAXX+8) // max. size of data buffer of layer 0
#define CBUF0_MAX ((MAXX+24)/4) // max. size of control buffer of layer 0
// GF_GRAPH8 ... control buffer 4*4=16 bytes
#define CBUF_MAX 8 //((MAXX+24)/4) // max. size of control buffer of layer 0
// Requirements by format, overlapped layer 1..3:
// LAYERMODE_SPRITE* ... data buffer "width"+4 bytes, control buffer 24 bytes
// LAYERMODE_FASTSPRITE* ... data buffer "width"+4 bytes, control buffer up to "width*2"+16 bytes
// other formats ... data buffer 4 bytes, control buffer 24 bytes
#define DBUF1_MAX (MAXX+8) // max. size of data buffer of layer 1
#define CBUF1_MAX ((MAXX+24)/4) // max. size of control buffer of layer 1
#define DBUF2_MAX (MAXX+8) // max. size of data buffer of layer 2
#define CBUF2_MAX ((MAXX+24)/4) // max. size of control buffer of layer 2
#define DBUF3_MAX (MAXX+8) // max. size of data buffer of layer 3
#define CBUF3_MAX ((MAXX+24)/4) // max. size of control buffer of layer 3
#if LAYERS==1
#define DBUF_MAX DBUF0_MAX // max. size of data buffer
#define CBUF_MAX CBUF0_MAX // max. size of control buffer
#elif LAYERS==2
#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX) // max. size of data buffer
#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX) // max. size of control buffer
#elif LAYERS==3
#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX+DBUF2_MAX) // max. size of data buffer
#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX+CBUF2_MAX) // max. size of control buffer
#elif LAYERS==4
#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX+DBUF2_MAX+DBUF3_MAX) // max. size of data buffer
#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX+CBUF2_MAX+CBUF3_MAX) // max. size of control buffer
#else
#error Unsupported number of layers!
#endif
// === VGA port pins
// GP0 ... VGA B0 blue
@ -75,39 +39,21 @@
#define VGA_GPIO_LAST (VGA_GPIO_FIRST+VGA_GPIO_NUM-1) // last VGA GPIO
#define VGA_GPIO_SYNC VGA_SYNCBASE // VGA SYNC GPIO
// === VGA PIO program
#define BASE_OFFSET 17 // offset of base layer program
// VGA PIO and state machines
#define VGA_PIO pio0 // VGA PIO
#define VGA_SM0 0 // VGA state machine of base layer 0
#define VGA_SM1 1 // VGA state machine of overlapped layer 1
#define VGA_SM2 2 // VGA state machine of overlapped layer 2
#define VGA_SM3 3 // VGA state machine of overlapped layer 3
#define VGA_SM(layer) (VGA_SM0+(layer)) // VGA state machine of the layer
#if LAYERS==1
// LAYERS==1
#define VGA_SMALL B0 // mask of all state machines
#elif LAYERS==2
#define VGA_SMALL (B0+B1) // mask of all state machines
#elif LAYERS==3
#define VGA_SMALL (B0+B1+B2) // mask of all state machines
#elif LAYERS==4
#define VGA_SMALL (B0+B1+B2+B3) // mask of all state machines
#else
#error Unsupported number of layers!
#endif
// VGA DMA
#define VGA_DMA 2 // VGA DMA base channel
#define VGA_DMA_CB0 (VGA_DMA+0) // VGA DMA channel - control block of base layer
#define VGA_DMA_PIO0 (VGA_DMA+1) // VGA DMA channel - copy data of base layer to PIO (raises IRQ0 on quiet)
#define VGA_DMA_CB1 (VGA_DMA+2) // VGA DMA channel - control block of overlapped layer 1
#define VGA_DMA_PIO1 (VGA_DMA+3) // VGA DMA channel - copy data of overlapped layer 1 to PIO
#define VGA_DMA_CB2 (VGA_DMA+4) // VGA DMA channel - control block of overlapped layer 1
#define VGA_DMA_PIO2 (VGA_DMA+5) // VGA DMA channel - copy data of overlapped layer 2 to PIO
#define VGA_DMA_CB3 (VGA_DMA+6) // VGA DMA channel - control block of overlapped layer 1
#define VGA_DMA_PIO3 (VGA_DMA+7) // VGA DMA channel - copy data of overlapped layer 3 to PIO
#define VGA_DMA_CB(layer) (VGA_DMA_CB0+(layer)*2) // VGA DMA control channel of the layer
#define VGA_DMA_PIO(layer) (VGA_DMA_PIO0+(layer)*2) // VGA DMA data channel of the layer
#define VGA_DMA_NUM (LAYERS*2) // number of used DMA channels
#define VGA_DMA_FIRST VGA_DMA // first used DMA

View file

@ -1,505 +0,0 @@
// ****************************************************************************
//
// VGA layers
//
// ****************************************************************************
#include "include.h"
// layer program descriptors
const sLayerProg LayerProg[LAYERPROG_NUM] = {
// LAYERPROG_BASE base layer
{
.ins=vga_program_instructions, // pointer to program instructions
.prg=&vga_program, // pointer to program descriptor
.length=vga_program.length, // program length (number of instructions)
.wrap_target=vga_wrap_target, // offset of wrap target
.wrap=vga_wrap, // offset of wrap end
.idle=vga_offset_entry, // offset of idle
.entry=vga_offset_entry, // offset of entry
.maxidle=2, // max. offset of idle to detect end of job
.extranum=2, // number of extra offsets
.extra={ // extra offsets, pairs: offset, CPP-correction
vga_offset_extra1, 2,
vga_offset_extra2, 2,
},
},
// LAYERPROG_KEY layer with key color
{
.ins=keylayer_program_instructions, // pointer to program instructions
.prg=&keylayer_program, // pointer to program descriptor
.length=keylayer_program.length, // program length (number of instructions)
.wrap_target=keylayer_wrap_target, // offset of wrap target
.wrap=keylayer_wrap, // offset of wrap end
.idle=keylayer_offset_idle, // offset of idle
.entry=keylayer_offset_entry, // offset of entry
.maxidle=2, // max. offset of idle to detect end of job
.extranum=1, // number of extra offsets
.extra={ // extra offsets, pairs: offset, CPP-correction
keylayer_offset_extra1, 6,
},
},
// LAYERPROG_BLACK layer with black key color
{
.ins=blacklayer_program_instructions, // pointer to program instructions
.prg=&blacklayer_program, // pointer to program descriptor
.length=blacklayer_program.length, // program length (number of instructions)
.wrap_target=blacklayer_wrap_target, // offset of wrap target
.wrap=blacklayer_wrap, // offset of wrap end
.idle=blacklayer_offset_idle, // offset of idle
.entry=blacklayer_offset_entry, // offset of entry
.maxidle=2, // max. offset of idle to detect end of job
.extranum=2, // number of extra offsets
.extra={ // extra offsets, pairs: offset, CPP-correction
blacklayer_offset_extra1, 4,
blacklayer_offset_extra2, 3,
},
},
// LAYERPROG_WHITE layer with white key color
{
.ins=whitelayer_program_instructions, // pointer to program instructions
.prg=&whitelayer_program, // pointer to program descriptor
.length=whitelayer_program.length, // program length (number of instructions)
.wrap_target=whitelayer_wrap_target, // offset of wrap target
.wrap=whitelayer_wrap, // offset of wrap end
.idle=whitelayer_offset_idle, // offset of idle
.entry=whitelayer_offset_entry, // offset of entry
.maxidle=2, // max. offset of idle to detect end of job
.extranum=1, // number of extra offsets
.extra={ // extra offsets, pairs: offset, CPP-correction
whitelayer_offset_extra1, 4,
},
},
// LAYERPROG_MONO layer with mono pattern or simple color
{
.ins=monolayer_program_instructions, // pointer to program instructions
.prg=&monolayer_program, // pointer to program descriptor
.length=monolayer_program.length, // program length (number of instructions)
.wrap_target=monolayer_wrap_target, // offset of wrap target
.wrap=monolayer_wrap, // offset of wrap end
.idle=monolayer_offset_idle, // offset of idle
.entry=monolayer_offset_entry, // offset of entry
.maxidle=2, // max. offset of idle to detect end of job
.extranum=2, // number of extra offsets
.extra={ // extra offsets, pairs: offset, CPP-correction
monolayer_offset_extra1, 4,
monolayer_offset_extra2, 2,
},
},
// LAYERPROG_RLE layer with RLE compression
{
.ins=rlelayer_program_instructions, // pointer to program instructions
.prg=&rlelayer_program, // pointer to program descriptor
.length=rlelayer_program.length, // program length (number of instructions)
.wrap_target=rlelayer_wrap_target, // offset of wrap target
.wrap=rlelayer_wrap, // offset of wrap end
.idle=rlelayer_offset_idle, // offset of idle
.entry=rlelayer_offset_entry, // offset of entry
.maxidle=2, // max. offset of idle to detect end of job
.extranum=7, // number of extra offsets
.extra={ // extra offsets, pairs: offset, CPP-correction
rlelayer_offset_extra1, 1,
rlelayer_offset_extra2, 3,
rlelayer_offset_extra3, 2,
rlelayer_offset_extra4, 2,
rlelayer_offset_extra5, 3,
rlelayer_offset_extra6, 2,
rlelayer_offset_extra7, 3,
},
},
};
// current layer program of overlapped layers
u8 LayerProgInx; // index of current layer program (LAYERPROG_*)
sLayerProg CurLayerProg; // copy of current layer program
// layer mode descriptors
const sLayerMode LayerMode[LAYERMODE_NUM] = {
// LAYERMODE_BASE base layer
{
.prog=LAYERPROG_BASE, // layer program (LAYERPROG_*)
.mincpp=2, // minimal clock cycles per pixel
.maxcpp=17, // maximal clock cycles per pixel
},
// LAYERMODE_KEY layers with key color
{
.prog=LAYERPROG_KEY, // layer program (LAYERPROG_*)
.mincpp=6, // minimal clock cycles per pixel
.maxcpp=37, // maximal clock cycles per pixel
},
// LAYERMODE_BLACK layers with black key color
{
.prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*)
.mincpp=4, // minimal clock cycles per pixel
.maxcpp=34, // maximal clock cycles per pixel
},
// LAYERMODE_WHITE layers with white key color
{
.prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*)
.mincpp=4, // minimal clock cycles per pixel
.maxcpp=35, // maximal clock cycles per pixel
},
// LAYERMODE_MONO layers with mono pattern
{
.prog=LAYERPROG_MONO, // layer program (LAYERPROG_*)
.mincpp=4, // minimal clock cycles per pixel
.maxcpp=35, // maximal clock cycles per pixel
},
// LAYERMODE_COLOR layers with simple color
{
.prog=LAYERPROG_MONO, // layer program (LAYERPROG_*)
.mincpp=2, // minimal clock cycles per pixel
.maxcpp=33, // maximal clock cycles per pixel
},
// LAYERMODE_RLE layers with RLE compression
{
.prog=LAYERPROG_RLE, // layer program (LAYERPROG_*)
.mincpp=3, // minimal clock cycles per pixel
.maxcpp=32, // maximal clock cycles per pixel
},
// LAYERMODE_SPRITEKEY layers with sprites with key color
{
.prog=LAYERPROG_KEY, // layer program (LAYERPROG_*)
.mincpp=6, // minimal clock cycles per pixel
.maxcpp=37, // maximal clock cycles per pixel
},
// LAYERMODE_SPRITEBLACK layers with sprites with black key color
{
.prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*)
.mincpp=4, // minimal clock cycles per pixel
.maxcpp=34, // maximal clock cycles per pixel
},
// LAYERMODE_SPRITEWHITE layers with sprites with white key color
{
.prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*)
.mincpp=4, // minimal clock cycles per pixel
.maxcpp=35, // maximal clock cycles per pixel
},
// LAYERMODE_FASTSPRITEKEY layers with fast sprites with key color
{
.prog=LAYERPROG_KEY, // layer program (LAYERPROG_*)
.mincpp=6, // minimal clock cycles per pixel
.maxcpp=37, // maximal clock cycles per pixel
},
// LAYERMODE_FASTSPRITEBLACK layers with fast sprites with black key color
{
.prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*)
.mincpp=4, // minimal clock cycles per pixel
.maxcpp=34, // maximal clock cycles per pixel
},
// LAYERMODE_FASTSPRITEWHITE layers with fast sprites with white key color
{
.prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*)
.mincpp=4, // minimal clock cycles per pixel
.maxcpp=35, // maximal clock cycles per pixel
},
// LAYERMODE_PERSPKEY layer with key color and image with transformation matrix
{
.prog=LAYERPROG_KEY, // layer program (LAYERPROG_*)
.mincpp=6, // minimal clock cycles per pixel
.maxcpp=37, // maximal clock cycles per pixel
},
// LAYERMODE_PERSPBLACK layer with black key color and image with transformation matrix
{
.prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*)
.mincpp=4, // minimal clock cycles per pixel
.maxcpp=34, // maximal clock cycles per pixel
},
// LAYERMODE_PERSPWHITE layer with white key color and image with transformation matrix
{
.prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*)
.mincpp=4, // minimal clock cycles per pixel
.maxcpp=35, // maximal clock cycles per pixel
},
// LAYERMODE_PERSP2KEY layer with key color and double pixel image with transformation matrix
{
.prog=LAYERPROG_KEY, // layer program (LAYERPROG_*)
.mincpp=6, // minimal clock cycles per pixel
.maxcpp=37, // maximal clock cycles per pixel
},
// LAYERMODE_PERSP2BLACK layer with black key color and double pixel image with transformation matrix
{
.prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*)
.mincpp=4, // minimal clock cycles per pixel
.maxcpp=34, // maximal clock cycles per pixel
},
// LAYERMODE_PERSP2WHITE layer with white key color and double pixel image with transformation matrix
{
.prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*)
.mincpp=4, // minimal clock cycles per pixel
.maxcpp=35, // maximal clock cycles per pixel
},
};
// current layer mode of layers
u8 LayerModeInx[LAYERS]; // index of current layer mode (LAYERMODE_*)
sLayerMode CurLayerMode[LAYERS]; // copy of current layer mode
// current layer screens
sLayer LayerScreen[LAYERS]; // layer screens
u8 LayerMask; // mask of active layers
// index of first pin of layer (base layer should stay VGA_GPIO_FIRST)
u8 LayerFirstPin[LAYERS_MAX] = { VGA_GPIO_FIRST, VGA_GPIO_FIRST, VGA_GPIO_FIRST, VGA_GPIO_FIRST};
// number of pins of overlapped layer (base layer should stay VGA_GPIO_OUTNUM)
u8 LayerNumPin[LAYERS_MAX] = { VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM};
// set overlapped layer 1..3 ON
void LayerOn(u8 inx)
{
__dmb();
LayerScreen[inx].on = True;
__dmb();
}
// set overlapped layer 1..3 OFF
void LayerOff(u8 inx)
{
__dmb();
LayerScreen[inx].on = False;
__dmb();
}
// set coordinate X of overlapped layer
void LayerSetX(u8 inx, s16 x)
{
sLayer* lay = &LayerScreen[inx];
s32 cppx = lay->cpp*x; // initial delay
if (cppx < 0) cppx = 0;
u32 w = lay->w; // image width
u32 init = 0; // init word
// prepare init word
switch (lay->mode)
{
case LAYERMODE_PERSP2KEY: // layer with key color and double pixel image with transformation matrix
case LAYERMODE_PERSPKEY: // layer with key color and image with transformation matrix
case LAYERMODE_FASTSPRITEKEY: // layer with fast sprites with key color
case LAYERMODE_SPRITEKEY: // layer with sprites with key color
case LAYERMODE_KEY: // layer with key color
init = VGAKEY(cppx, w, (lay->keycol & 0xff));
break;
case LAYERMODE_PERSP2BLACK: // layer with black key color and double pixel image with transformation matrix
case LAYERMODE_PERSPBLACK: // layer with black key color and image with transformation matrix
case LAYERMODE_FASTSPRITEBLACK: // layer with fast sprites with black key color
case LAYERMODE_SPRITEBLACK: // layer with sprites with black key color
case LAYERMODE_BLACK: // layer with black key color
init = VGABLACK(cppx, w);
break;
case LAYERMODE_PERSP2WHITE: // layer with white key color and double pixel image with transformation matrix
case LAYERMODE_PERSPWHITE: // layer with white key color and image with transformation matrix
case LAYERMODE_FASTSPRITEWHITE: // layer with fast sprites with white key color
case LAYERMODE_SPRITEWHITE: // layer with sprites with white key color
case LAYERMODE_WHITE: // layer with white key color
init = VGAWHITE(cppx, w);
break;
case LAYERMODE_MONO: // layer with mono pattern
init = VGAMONO(cppx, w, (lay->keycol & 0xff));
break;
case LAYERMODE_COLOR: // layer with simple color
init = VGACOLOR(cppx, w);
break;
case LAYERMODE_RLE: // layer with RLE compression
init = VGARLE(cppx);
break;
}
lay->init = init; // init word
lay->x = x; // start X coordinate
}
// set coordinate Y of overlapped layer
void LayerSetY(u8 inx, s16 y)
{
sLayer* lay = &LayerScreen[inx];
lay->y = y;
}
// set width of image of overlapped layer
// Uses auto pitch wb (full line). Set custom wb after calling this function.
void LayerSetW(u8 inx, u16 w)
{
sLayer* lay = &LayerScreen[inx];
lay->w = w; // image width
Bool mono = (lay->mode == LAYERMODE_MONO);
lay->trans = mono ? (((w/8)+3)/4) : (w/4); // transfer count
lay->wb = mono ? (w/8) : w; // width bytes
LayerSetX(inx, lay->x); // update init word
}
// set height of image of overlapped layer
void LayerSetH(u8 inx, u16 h)
{
sLayer* lay = &LayerScreen[inx];
lay->h = h;
}
// setup overlapped layer 1..3 (not for sprites and not for perspective mode)
// inx ... layer index 1..3
// img ... pointer to image data
// vmode ... pointer to initialized video configuration
// w ... image width in pixels (must be multiple of 4)
// h ... image height
// col ... key color (needed for LAYERMODE_KEY and LAYERMODE_MONO layer mode)
// par ... additional data (RLE index table, integer transformation matrix)
// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn
void LayerSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 col /* = 0 */, const void* par /* = NULL */)
{
LayerOff(inx); // set layer OFF
sLayer* lay = &LayerScreen[inx]; // get pointer to layer
lay->img = img; // pointer to image data
lay->par = par; // additional parameter
lay->keycol = col | ((u16)col << 8) | ((u32)col << 16) | ((u32)col << 24); // key color
lay->x = 0; // X coordinate
lay->y = 0; // Y coordinate
lay->h = h; // height of image
lay->spritenum = 0; // number of sprites
lay->cpp = vmode->cpp; // save clocks per pixel
lay->mode = vmode->mode[inx]; // layer mode
LayerSetW(inx, w); // set width of image, update parameters init, trans and wb
}
// setup overlapped layer 1..3 for LAYERMODE_PERSP* modes
// inx ... layer index 1..3
// img ... pointer to source image data (image width and height must be power of 2)
// vmode ... pointer to initialized video configuration
// w ... destination image width in pixels (must be multiple of 4)
// h ... destination image height
// xbits ... number of bits of width of source image
// ybits ... number of bits of height of source image
// horiz ... horizon of perspective projection/4 (0=no perspecitve, <0 ceilling)
// mat ... integer transformation matrix
// col ... key color (needed for LAYERMODE_PERSPKEY layer mode)
// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn
void LayerPerspSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 xbits, u8 ybits,
s8 horiz, const int* mat, u8 col /* = 0 */)
{
LayerSetup(inx, img, vmode, w, h, col, mat);
sLayer* lay = &LayerScreen[inx]; // get pointer to layer
lay->xbits = xbits;
lay->ybits = ybits;
lay->horiz = horiz;
}
// setup overlapped layer 1..3 for LAYERMODE_SPRITE* and LAYERMODE_FASTSPRITE* modes
// inx ... layer index 1..3
// sprite ... pointer to list of sprites (array of pointers to sprites; sorted by X on LAYERMODE_FASTSPRITE* modes)
// spritenum ... number of sprites in the list (to turn sprite off, you can set its coordinate Y out of the screen)
// vmode ... pointer to initialized video configuration
// x ... start coordinate X of area with sprites
// y ... start coordinate Y of area with sprites
// w ... width of area with sprites (must be multiple of 4)
// h ... height of area with sprites
// col ... key color (needed for LAYERMODE_SPRITEKEY and LAYERMODE_FASTSPRITEKEY layer mode)
// Use functions LayerOn after layer setup.
void LayerSpriteSetup(u8 inx, sSprite** sprite, u16 spritenum, const sVmode* vmode,
s16 x, s16 y, u16 w, u16 h, u8 col /* = 0 */)
{
LayerSetup(inx, (const u8*)sprite, vmode, w, h, col);
LayerSetX(inx, x);
LayerSetY(inx, y);
sLayer* lay = &LayerScreen[inx]; // get pointer to layer
lay->spritenum = spritenum;
}
// prepare array of start and length of lines (detects transparent pixels)
// img ... image
// x0 ... array of start of lines
// w0 ... array of length of lines
// w ... sprite width (slow sprite: max. width 255)
// h ... sprite height
// wb ... sprite pitch (bytes between lines)
// col ... key color
// fast ... fast sprite, divide start and length of line by 4
void SpritePrepLines(const u8* img, u8* x0, u8* w0, u16 w, u16 h, u16 wb, u8 col, Bool fast)
{
int x1, x2, w2, y;
const u8* d;
// loop through lines
for (y = 0; y < h; y++)
{
// find start of line
d = &img[y*wb];
for (x1 = 0; x1 < w; x1++)
{
if (*d != col) break;
d++;
}
// find end of line
d = &img[y*wb + w - 1];
for (x2 = w; x2 > x1; x2--)
{
if (*d != col) break;
d--;
}
// prepare start and length
w2 = x2 - x1;
if (fast)
{
w2 += ((x2 + 3) & ~3) - x2;
x1 /= 4;
w2 = (w2 + 3)/4;
}
if (x1 > 255) x1 = 255;
if (w2 > 255) w2 = 255;
// store start and length
*x0++ = x1;
*w0++ = w2;
}
}
// sort fast sprite list by X coordinate
void SortSprite(sSprite** list, int num)
{
int i;
sSprite* s;
sSprite* s2;
for (i = 0; i < num-1; i++)
{
s = list[i];
s2 = list[i+1];
if (s->x > s2->x)
{
list[i] = s2;
list[i+1] = s;
if (i > 0) i -= 2;
}
}
}

View file

@ -1,195 +0,0 @@
// ****************************************************************************
//
// VGA layers
//
// ****************************************************************************
#ifndef _VGA_LAYER_H
#define _VGA_LAYER_H
// base layer commands
#define VGADARK(num,col) (((u32)(vga_offset_dark+BASE_OFFSET)<<27) | ((u32)(num)<<8) | (u32)(col)) // assemble control word of "dark" command
#define VGACMD(jmp,num) (((u32)(jmp)<<27) | (u32)(num)) // assemble control word
// --- overlapped layer init word (delay: use number of offset pixels * Vmode.cpp, num: number of pixels)
// init word of key color layer LAYERPROG_KEY
#define VGAKEY(delay,num,col) (((u32)((delay)+1)<<19) | ((u32)(col)<<11) | (u32)((num)-1))
// init word of mono layer LAYERPROG_MONO
#define VGAMONO(delay,num,col) (((u32)((delay)+0)<<20) | ((u32)(col)<<12) | ((u32)((num)-1)<<1) | B0)
// init word of color layer LAYERPROG_MONO
#define VGACOLOR(delay,num) (((u32)((delay)+2)<<20) | ((u32)0xff<<12) | ((u32)((num)-1)<<1) | 0)
// init word of black color layer LAYERPROG_BLACK
#define VGABLACK(delay,num) (((u32)((delay)+3)<<16) | (u32)((num)-1))
// init word of white color layer LAYERPROG_WHITE
#define VGAWHITE(delay,num) (((u32)((delay)+3)<<16) | (u32)((num)-1))
// init word of RLE layer LAYERPROG_RLE
#define VGARLE(delay) ((delay)+1)
// swap bytes of command
#define BYTESWAP(n) ((((n)&0xff)<<24)|(((n)&0xff00)<<8)|(((n)&0xff0000)>>8)|(((n)&0xff000000)>>24))
// align to multiple of 4
#define ALIGN4(x) ((x) & ~3)
// layer program descriptor
typedef struct {
const u16* ins; // pointer to program instructions (NULL=layers is OFF)
const struct pio_program* prg; // pointer to program descriptor
u8 length; // program length (number of instructions)
u8 wrap_target; // offset of wrap target
u8 wrap; // offset of wrap end
u8 idle; // offset of idle
u8 entry; // offset of entry
u8 maxidle; // max. offset of idle to detect end of job
u8 extranum; // number of extra offsets
u8 extra[2*16]; // extra offsets, pairs: offset, CPP-correction
} sLayerProg;
// layer program descriptors
extern const sLayerProg LayerProg[LAYERPROG_NUM];
// current layer program of overlapped layers
extern u8 LayerProgInx; // index of current layer program (LAYERPROG_*, LAYERPROG_BASE = overlapped layers are OFF)
extern sLayerProg CurLayerProg; // copy of current layer program
// layer mode descriptor
typedef struct {
u8 prog; // layer program (LAYERPROG_*)
u8 mincpp; // minimal clock cycles per pixel
u8 maxcpp; // maximal clock cycles per pixel
} sLayerMode;
// layer mode descriptors
extern const sLayerMode LayerMode[LAYERMODE_NUM];
// current layer mode of layers
extern u8 LayerModeInx[LAYERS]; // index of current layer mode (LAYERMODE_*)
extern sLayerMode CurLayerMode[LAYERS]; // copy of current layer mode
// layer screen descriptor (on change update SLAYER_* in define.h)
typedef struct {
const u8* img; // pointer to image in current layer format, or sprite list
const void* par; // additional parameter (RLE index table, integer transformation matrix)
u32 init; // init word sent on start of scanline (start X coordinate)
u32 keycol; // key color
u16 trans; // trans count
s16 x; // start X coordinate
s16 y; // start Y coordinate
u16 w; // width in pixels
u16 h; // height
u16 wb; // image width in bytes (pitch of lines)
u8 mode; // layer mode
s8 horiz; // horizon of perspective projection/4 (only with LAYERMODE_PERSP* modes, 0=no perspecitve, <0 ceilling)
u8 xbits; // number of bits of width of source image (only with LAYERMODE_PERSP* modes)
u8 ybits; // number of bits of height of source image (only with LAYERMODE_PERSP* modes)
u16 spritenum; // number of sprites
Bool on; // layer is ON
u8 cpp; // current clock pulses per pixel (used to calculate X coordinate)
} sLayer;
// sprite (on change update SSPRITE_* in define.h)
typedef struct {
u8* img; // SSPRITE_IMG pointer to image data
u8* x0; // SSPRITE_X0 pointer to array of start of lines, or fast sprite start of lines/4
u8* w0; // SSPRITE_W0 pointer to array of length of lines, or fast sprite length of lines/4
u32 keycol; // SSPRITE_KEYCOL key color
s16 x; // SSPRITE_X sprite X-coordinate on the screen
s16 y; // SSPRITE_Y sprite Y-coordinate on the screen
u16 w; // SSPRITE_W sprite width (slow sprite: max. width 255)
u16 h; // SSPRITE_H sprite height
u16 wb; // SSPRITE_WB sprite pitch (number of bytes between lines)
u16 res; // ...reserved, structure align
} sSprite;
// current layer screens
extern sLayer LayerScreen[LAYERS]; // layer screens
extern u8 LayerMask; // mask of active layers
// index of first pin of layer (base layer should stay VGA_GPIO_FIRST)
extern u8 LayerFirstPin[LAYERS_MAX];
// number of pins of overlapped layer (base layer should stay VGA_GPIO_OUTNUM)
extern u8 LayerNumPin[LAYERS_MAX];
// set overlapped layer 1..3 ON
void LayerOn(u8 inx);
// set overlapped layer 1..3 OFF
void LayerOff(u8 inx);
// set coordinate X of overlapped layer
void LayerSetX(u8 inx, s16 x);
// set coordinate Y of overlapped layer
void LayerSetY(u8 inx, s16 y);
// set width of image of overlapped layer
// Uses auto pitch wb (full line). Set custom wb after calling this function.
void LayerSetW(u8 inx, u16 w);
// set height of image of overlapped layer
void LayerSetH(u8 inx, u16 h);
// setup overlapped layer 1..3 (not for sprites and not for perspective mode)
// inx ... layer index 1..3
// img ... pointer to image data
// vmode ... pointer to initialized video configuration
// w ... image width in pixels (must be multiple of 4)
// h ... image height
// col ... key color (needed for LAYERMODE_KEY and LAYERMODE_MONO layer mode)
// par ... additional data (RLE index table, integer transformation matrix)
// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn
void LayerSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 col = 0, const void* par = NULL);
// setup overlapped layer 1..3 for LAYERMODE_PERSP* modes
// inx ... layer index 1..3
// img ... pointer to source image data (image width and height must be power of 2)
// vmode ... pointer to initialized video configuration
// w ... destination image width in pixels (must be multiple of 4)
// h ... destination image height
// xbits ... number of bits of width of source image
// ybits ... number of bits of height of source image
// horiz ... horizon of perspective projection/4 (0=no perspecitve, <0 ceilling)
// mat ... integer transformation matrix
// col ... key color (needed for LAYERMODE_PERSPKEY layer mode)
// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn
void LayerPerspSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 xbits, u8 ybits,
s8 horiz, const int* mat, u8 col = 0);
// setup overlapped layer 1..3 for LAYERMODE_SPRITE* and LAYERMODE_FASTSPRITE* modes
// inx ... layer index 1..3
// sprite ... pointer to list of sprites (array of pointers to sprites; sorted by X on LAYERMODE_FASTSPRITE* modes)
// spritenum ... number of sprites in the list (to turn sprite off, you can set its coordinate Y out of the screen)
// vmode ... pointer to initialized video configuration
// x ... start coordinate X of area with sprites
// y ... start coordinate Y of area with sprites
// w ... width of area with sprites (must be multiple of 4)
// h ... height of area with sprites
// col ... key color (needed for LAYERMODE_SPRITEKEY and LAYERMODE_FASTSPRITEKEY layer mode)
// Use functions LayerOn after layer setup.
void LayerSpriteSetup(u8 inx, sSprite** sprite, u16 spritenum, const sVmode* vmode,
s16 x, s16 y, u16 w, u16 h, u8 col = 0);
// prepare array of start and length of lines (detects transparent pixels)
// img ... image
// x0 ... array of start of lines
// w0 ... array of length of lines
// w ... sprite width (slow sprite: max. width 255)
// h ... sprite height
// wb ... sprite pitch (bytes between lines)
// col ... key color
// fast ... fast sprite, divide start and length of line by 4
void SpritePrepLines(const u8* img, u8* x0, u8* w0, u16 w, u16 h, u16 wb, u8 col, Bool fast);
// sort fast sprite list by X coordinate
void SortSprite(sSprite** list, int num);
#endif // _VGA_LAYER_H

View file

@ -1,109 +0,0 @@
// ****************************************************************************
//
// VGA colors and palettes
//
// ****************************************************************************
#ifndef _VGA_PAL_H
#define _VGA_PAL_H
#define MULTICOL(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) // multiply color pattern (used in mode GF_COLOR)
// CGA colors
#define CGACOL_0 0 // 0x000000 black
#define CGACOL_1 2 // 0x0000C3 dark blue
#define CGACOL_2 20 // 0x00C300 dark green
#define CGACOL_3 22 // 0x00C3C3 dark cyan
#define CGACOL_4 160 // 0xC30000 dark red
#define CGACOL_5 162 // 0xC300C3 dark magenta
#define CGACOL_6 168 // 0xC35400 brown
#define CGACOL_7 182 // 0xC3C3C3 light gray
#define CGACOL_8 73 // 0x545454 dark gray
#define CGACOL_9 75 // 0x5454FF light blue
#define CGACOL_10 93 // 0x54FF54 light green
#define CGACOL_11 95 // 0x54FFFF light cyan
#define CGACOL_12 233 // 0xFF5454 light red
#define CGACOL_13 235 // 0xFF54FF light magenta
#define CGACOL_14 253 // 0xFFFF54 yellow
#define CGACOL_15 255 // 0xFFFFFF white
// ZX Spectrum color
#define ZXCOL_0 0 // 0x000000 black
#define ZXCOL_1 2 // 0x0000C3 dark blue
#define ZXCOL_2 160 // 0xC30000 dark red
#define ZXCOL_3 162 // 0xC300C3 dark magenta
#define ZXCOL_4 20 // 0x00C300 dark green
#define ZXCOL_5 22 // 0x00C3C3 dark cyan
#define ZXCOL_6 180 // 0xC3C300 dark yellow
#define ZXCOL_7 182 // 0xC3C3C3 light gray
#define ZXCOL_8 73 // 0x545454 dark gray
#define ZXCOL_9 3 // 0x0000FF light blue
#define ZXCOL_10 224 // 0xFF0000 light red
#define ZXCOL_11 227 // 0xFF00FF light magenta
#define ZXCOL_12 28 // 0x00FF00 light green
#define ZXCOL_13 31 // 0x00FFFF light cyan
#define ZXCOL_14 252 // 0xFFFF00 yellow
#define ZXCOL_15 255 // 0xFFFFFF white
// Colors
// GP0 ... B0 ... VGA B0 blue
// GP1 ... B1 ... VGA B1
// GP2 ... B2 ... VGA G0 green
// GP3 ... B3 ... VGA G1
// GP4 ... B4 ... VGA G2
// GP5 ... B5 ... VGA R0 red
// GP6 ... B6 ... VGA R1
// GP7 ... B7 ... VGA R2
#define COL_BLACK 0
#define COL_DARKBLUE B0
#define COL_SEMIBLUE B1
#define COL_BLUE (B0+B1)
#define COL_MOREBLUE (COL_BLUE+B3+B6)
#define COL_LIGHTBLUE (COL_BLUE+B4+B7)
#define COL_DARKGREEN B3
#define COL_SEMIGREEN B4
#define COL_GREEN (B2+B3+B4)
#define COL_MOREGREEN (COL_GREEN+B0+B6)
#define COL_LIGHTGREEN (COL_GREEN+B1+B7)
#define COL_DARKRED B6
#define COL_SEMIRED B7
#define COL_RED (B5+B6+B7)
#define COL_MORERED (COL_RED+B0+B3)
#define COL_LIGHTRED (COL_RED+B1+B4)
#define COL_DARKCYAN (B0+B3)
#define COL_SEMICYAN (B1+B4)
#define COL_CYAN (B0+B1+B2+B3+B4)
#define COL_DARKMAGENTA (B0+B6)
#define COL_SEMIMAGENTA (B1+B7)
#define COL_MAGENTA (B0+B1+B5+B6+B7)
#define COL_DARKYELLOW (B3+B6)
#define COL_SEMIYELLOW (B4+B7)
#define COL_YELLOW (B2+B3+B4+B5+B6+B7)
#define COL_GRAY0 0
#define COL_GRAY1 (B2+B5)
#define COL_GRAY2 (B0+B3+B6)
#define COL_GRAY3 (B0+B2+B3+B5+B6)
#define COL_GRAY4 (B1+B4+B7)
#define COL_GRAY5 (B1+B2+B4+B5+B7)
#define COL_GRAY6 (B0+B1+B3+B4+B6+B7)
#define COL_GRAY7 (B0+B1+B2+B3+B4+B5+B6+B7)
#define COL_WHITE COL_GRAY7
// compose color from RGB
#define COLRGB(r,g,b) ((u8)(((r)&0xe0)|(((g)&0xe0)>>3)|((b)>>6)))
// default 16-color palettes (CGA colors)
// - do not set "const", to stay in faster RAM
extern u8 DefPal16[16];
#endif // _VGA_PAL_H

View file

@ -1,313 +0,0 @@
// ****************************************************************************
//
// VGA render
//
// ****************************************************************************
#include "define.h" // common definitions of C and ASM
.syntax unified
.section .time_critical.Render, "ax"
.cpu cortex-m0plus
.thumb // use 16-bit instructions
.extern pScreen // sScreen* pScreen; // pointer to current video screen
.extern LineBuf0 // u8 LineBuf0[BLACK_MAX]; // line buffer with black color
// extern "C" u32* Render(u32* cbuf, u8* dbuf, int line, int pixnum);
// render scanline
// cbuf ... control buffer
// dbuf ... data buffer (pixel data)
// line ... current scanline 0..
// pixnum ... total pixels (must be multiple of 4)
// Returns new pointer to control buffer
.thumb_func
.global Render
Render:
// push registers
push {r4-r7,lr}
// prepare local variables
// SP+0: input argument of render functions
// SP+4: R0 control buffer
// SP+8: R1 data buffer (pixel data)
// SP+12: R2 current scanline 0..
// SP+16: R3 total pixels
// SP+20: R4
// SP+24: R5
// SP+28: R6
// SP+32: R7
// SP+36: LR
sub sp,#20
str r0,[sp,#4] // control buffer
str r1,[sp,#8] // data buffer
str r3,[sp,#16] // total pixels
// ---- prepare pointer to current screen
// sScreen* s = pScreen;
// if (s != NULL) {
// prepare pointer to current screen
ldr r4,Render_pScreenAddr // pointer to pointer to current video Screen (variable pScreen)
ldr r4,[r4,#0] // pointer to current video Screen
cmp r4,#0 // is pointer valid?
beq Render_Clear // pointer is not valid, clear rest of line (display is OFF)
// ---- find video strip with current scanline
// int stripnum = s->num;
// sStrip* t = &s->strip[0];
// for (; stripnum > 0; stripnum--) {
// loop through video strips
ldrh r5,[r4,#SSCREEN_NUM] // u16 number of video strips
tst r5,r5 // check number of video strips
beq Render_Clear // no video strips, return
adds r4,#SSCREEN_STRIP // pointer to first video strip
// R2 ... current scanline
// R4 ... pointer to video strip
// R5 ... counter of video strips
Render_StripLoop:
// chek if current scanline has been found
// if (line < t->height) {
ldrh r3,[r4,#SSTRIP_HEIGHT] // u16 height of this video strip
cmp r2,r3 // check if current scanline fits into this video strip
blo Render_StripOK // scanline < strip height, this strip is OK
// subtract video strip height from scanline number (to be relative to start of strip)
// line -= t->height;
subs r2,r3 // subtract strip height from scanline number
// next video strip
// t++;
// for (; stripnum > 0; stripnum--)
adds r4,#SSTRIP_SIZE // shift pointer to next video strip
subs r5,#1 // counter of video strips
bne Render_StripLoop // next video strip
b Render_Clear // video strip not found
// ---- process all video segments
Render_StripOK:
// prepare first video segment
// sSegm* g = &t->seg[0];
// int segnum = t->num;
// for (; segnum > 0; segnum--) {
str r2,[sp,#12] // save current scanline
ldrh r5,[r4,#SSTRIP_NUM] // u16 number of video segments
tst r5,r5 // check number of video segments
beq Render_Clear // no video strips, return
adds r4,#SSTRIP_SEG // pointer to first video segment
// R4 ... pointer to video segment
// R5 ... counter of video segments
Render_SegmLoop:
// get number of remaining pixels
ldr r2,[sp,#16] // get remaining pixels
tst r2,r2 // check number of pixels
beq Render_Clear // end of scanline, stop rendering
// get segment width -> R3
// int w = g->width;
// if (w > pixnum) w = pixnum;
// if (w > 0) {
ldrh r3,[r4,#SSEGM_WIDTH] // get segment width
cmp r3,r2 // check width
blo 2f // width is OK
mov r3,r2 // limit width by total width
2: tst r3,r3 // check width
beq Render_SegmNext // this segment is invisible, skip it
// update remaining pixels
// pixnum -= w;
subs r2,r3 // decrease remaining width
str r2,[sp,#16] // store new remaining pixels
// get Y coordinate -> R2
// int y = g->offy + line;
ldrh r2,[r4,#SSEGM_OFFY] // get offset at Y direction
sxth r2,r2 // expand to signed
ldr r1,[sp,#12] // get current scanline
add r2,r1 // add Y offset and current scanline
// double lines
// if (g->dbly) y /= 2;
ldrb r1,[r4,#SSEGM_DBLY] // get dbly flag
tst r1,r1 // is dbly flag set?
beq 2f // dbly flag not set
asrs r2,#1 // Y coordinate / 2
// wrap Y coordinate
// int wy = g->wrapy;
// while (y < 0) y += wy;
// while (y >= wy) y -= wy;
2: ldrh r1,[r4,#SSEGM_WRAPY] // get wrapy
3: subs r2,r1 // subtract wrapy
bpl 3b // repeat
4: adds r2,r1 // add wrapy
bmi 4b // repeat
// get X coordinate -> R1
// int x = g->offx;
6: ldrh r1,[r4,#SSEGM_OFFX] // get offset at X direction
sxth r1,r1 // expand to signed
// wrap X coordinate
// int wx = g->wrapx;
// while (x < 0) x += wx;
// while (x >= wx) x -= wx;
ldrh r0,[r4,#SSEGM_WRAPX] // get wrapx
3: subs r1,r0 // subtract wrapx
bpl 3b // repeat
4: adds r1,r0 // add wrapx
bmi 4b // repeat
// ---- process 1st format group: GF_COLOR
// get format -> R0
6: ldrb r0,[r4,#SSEGM_FORM] // get current format
// serve format GF_COLOR
tst r0,r0 // format GF_COLOR ?
bne 7f // no
// u32 par = ((y & 1) == 0) ? g->par : g->par2
lsrs r2,#1 // check bit 0 of Y coordinate
ldr r1,[r4,#SSEGM_PAR] // get par for even line
bcc 2f // even line
ldr r1,[r4,#SSEGM_PAR2] // get par2 for odd line
// *cbuf++ = w/4; // number of pixels/4
2: lsrs r2,r3,#2 // width/4
ldr r6,[sp,#4] // get pointer to control buffer
stmia r6!,{r2} // store width/4
// *cbuf++ = (u32)dbuf; // pointer to data buffer
ldr r0,[sp,#8] // get pointer to data buffer
stmia r6!,{r0} // store pointer to data
str r6,[sp,#4] // save new pointer to control buffer
// dbuf = RenderColor(dbuf, par, w/4);
bl RenderColor
str r0,[sp,#8] // store new pointer to data buffer
b Render_SegmNext
// ---- process 2nd format group: using control buffer cbuf
// prepare input argument video segment -> [SP+0]
7: str r4,[sp,#0] // prepare 4th argument - current video segment
// prepare function addres -> R7
adr r7,Render_FncAddr // get address of jump table
lsls r6,r0,#2 // format * 4
ldr r7,[r7,r6] // load function address -> R7
// check 2nd format group
cmp r0,#GF_GRP2MAX // check 2nd format group
bhi 2f // > 2nd group
// cbuf = RenderGraph8(cbuf, x, y, w, g);
ldr r0,[sp,#4] // get pointer to control buffer
blx r7 // call render function
str r0,[sp,#4] // save new pointer to control buffer
b Render_SegmNext
// ---- process 3rd format group: using data buffer dbuf
// *cbuf++ = w/4; // number of pixels/4
2: lsrs r0,r3,#2 // width/4
ldr r6,[sp,#4] // get pointer to control buffer
stmia r6!,{r0} // store width/4
// *cbuf++ = (u32)dbuf; // pointer to data buffer
ldr r0,[sp,#8] // get pointer to data buffer
stmia r6!,{r0} // store pointer to data
str r6,[sp,#4] // save new pointer to control buffer
// dbuf = RenderColor(dbuf, par, w/4);
blx r7 // call render function
str r0,[sp,#8] // store new pointer to data buffer
Render_SegmNext:
// next video segment
adds r4,#SSEGM_SIZE // shift pointer to next video segment
subs r5,#1 // counter of video segments
bne Render_SegmLoop // next video segment
// ---- clear rest of line, write pointer to control buffer
Render_Clear:
// return current control buffer
ldr r0,[sp,#4] // control buffer
// check if some pixels left
ldr r1,[sp,#16] // number of remaining pixels
lsrs r1,#2 // number of pixels/4 (= number of 4-pixels)
beq 9f // no pixels left
// write size and address to control buffer
ldr r2,Render_LineBuf0Addr // data buffer with black color
stmia r0!,{r1,r2} // write number of 4-pixels and pointer to data buffer to control buffer
// pop registers and return (return control buffer in r0)
9: add sp,#20
pop {r4-r7,pc}
.align 2
// pointer to pointer with current video screen
Render_pScreenAddr:
.word pScreen
// pointer to buffer with black color
Render_LineBuf0Addr:
.word LineBuf0
// poiners to render functions
Render_FncAddr:
// 1st format group
.word RenderColor // GF_COLOR simple color (par=color pattern 4-pixels even line, par2=color pattern 4-pixels odd line)
// 2nd format group
.word RenderGraph8 // GF_GRAPH8 native 8-bit graphics (X1Y1R2G2B2) - fast, transfers "as is" to PIO
.word RenderTile // GF_TILE tiles
.word RenderTile2 // GF_TILE alternate tiles
.word RenderProgress // GF_PROGRESS horizontal progress indicator
.word RenderGrad1 // render gradient with 1 line GF_GRAD1
.word RenderGrad2 // render gradient with 2 lines GF_GRAD2
// 3rd format group
.word RenderGraph4 // GF_GRAPH4 4-bit graphics
.word RenderGraph2 // GF_GRAPH2 2-bit graphics
.word RenderGraph1 // GF_GRAPH1 1-bit graphics
.word RenderMText // GF_MTEXT 8-pixel mono text
.word RenderAText // GF_ATEXT 8-pixel attribute text, character + 2x4 bit attributes
.word RenderFText // GF_FTEXT 8-pixel foreground color text, character + foreground color
.word RenderCText // GF_CTEXT 8-pixel color text, character + background color + foreground color
.word RenderGText // GF_GTEXT 8-pixel gradient text (par = pointer to 1-bit font, par2 = pointer to color array)
.word RenderDText // GF_DTEXT 8-pixel double gradient text (par = pointer to 1-bit font, par2 = pointer to color array)
.word RenderLevel // GF_LEVEL level graph
.word RenderLevelGrad // GF_LEVELGRAD level gradient graph
.word RenderOscil // GF_OSCIL oscilloscope pixel graph
.word RenderOscLine // GF_OSCLINE oscilloscope line graph
.word RenderPlane2 // GF_PLANE2 4 colors on 2 graphic planes
.word RenderAttrib8 // GF_ATTRIB8 2x4 bit color attribute per 8x8 pixel sample
.word RenderGraph8Mat // GF_GRAPH8MAT 8-bit graphics with 2D matrix transformation
.word RenderGraph8Persp // GF_GRAPH8PERSP 8-bit graphics with perspective projection
.word RenderTilePersp // GF_TILEPERSP tiles with perspective
.word RenderTilePersp15 // GF_TILEPERSP15 tiles with perspective, 1.5 pixels
.word RenderTilePersp2 // GF_TILEPERSP2 tiles with perspective, double pixels
.word RenderTilePersp3 // GF_TILEPERSP3 tiles with perspective, triple pixels
.word RenderTilePersp4 // GF_TILEPERSP4 tiles with perspective, quadruple pixels

View file

@ -1,707 +0,0 @@
// ****************************************************************************
//
// VGA screen layout
//
// ****************************************************************************
#include "include.h"
// current video screen
sScreen Screen = { .num = 0 }; // default video screen
sScreen* pScreen = &Screen; // pointer to current video screen
// clear screen (set 0 strips, does not modify sprites)
void ScreenClear(sScreen* s)
{
__dmb();
s->num = 0;
__dmb();
}
// add empty strip to the screen (returns pointer to the strip)
sStrip* ScreenAddStrip(sScreen* s, int height)
{
int n = s->num;
sStrip* t = &s->strip[n];
t->height = height;
t->num = 0;
__dmb();
s->num = n + 1;
__dmb();
return t;
}
// add empty segment to video strip (returns pointer to the segment and initialises is to defaults)
sSegm* ScreenAddSegm(sStrip* strip, int width)
{
int n = strip->num;
sSegm* g = &strip->seg[n];
g->width = width;
g->wb = width;
g->offx = 0;
g->offy = 0;
g->wrapx = width;
g->wrapy = strip->height;
g->data = NULL;
g->form = GF_COLOR;
g->dbly = false;
g->par = 0;
g->par2 = 0;
__dmb();
strip->num = n + 1;
__dmb();
return g;
}
// set video segment to simple color format GF_COLOR
// col1 = color pattern 4-pixels even line (use macro MULTICOL)
// col2 = color pattern 4-pixels odd line (use macro MULTICOL)
void ScreenSegmColor(sSegm* segm, u32 col1, u32 col2)
{
segm->par = col1;
segm->par2 = col2;
__dmb();
segm->form = GF_COLOR;
__dmb();
}
// set video segment to gradient with 1 line
// data = pointer to data buffer with gradient
// wb = pitch - length of buffer
// To scroll gradient, set virtual dimension wrapx, then shift offx
void ScreenSegmGrad1(sSegm* segm, const void* data, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->wb = wb;
__dmb();
segm->form = GF_GRAD1;
__dmb();
}
// set video segment to gradient with 2 lines
// data = pointer to data buffer with gradient
// wb = pitch - lenght of buffer
// To scroll gradient, set virtual dimension wrapx, then shift offx
void ScreenSegmGrad2(sSegm* segm, const void* data, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->wb = wb;
__dmb();
segm->form = GF_GRAD2;
__dmb();
}
// set video segment to native 8-bit graphics (R3G3B2)
// data = pointer to data buffer
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmGraph8(sSegm* segm, const void* data, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->wb = wb;
__dmb();
segm->form = GF_GRAPH8;
__dmb();
}
// generate 16-color palette translation table for functions ScreenSegmGraph4
// trans = pointer to destination palette translation table (u16 trans[256])
// pal = pointer to source palette of 16 colors (u8 pal[16])
void GenPal16Trans(u16* trans, const u8* pal)
{
int i, j;
u16 k;
for (i = 0; i < 256; i++)
{
j = (i >> 4) & 0x0f;
k = pal[j];
j = i & 0x0f;
k |= (u16)pal[j] << 8;
trans[i] = k;
}
}
// set video segment to 4-bit palette graphics
// data = pointer to data buffer
// trans = pointer to 16-color palette translation table (generated with GenPal16Trans function)
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmGraph4(sSegm* segm, const void* data, const void* trans, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)trans;
segm->wb = wb;
__dmb();
segm->form = GF_GRAPH4;
__dmb();
}
// generate palette 4 translation table for functions ScreenSegmGraph2
// trans = pointer to destination palette translation table (u32 trans[256])
// pal = pointer to source palette of 4 colors (u8 pal[4])
void GenPal4Trans(u32* trans, const u8* pal)
{
int i, j;
u32 k;
for (i = 0; i < 256; i++)
{
j = (i >> 6) & 0x03;
k = pal[j];
j = (i >> 4) & 0x03;
k |= (u32)pal[j] << 8;
j = (i >> 2) & 0x03;
k |= (u32)pal[j] << 16;
j = i & 0x03;
k |= (u32)pal[j] << 24;
trans[i] = k;
}
}
// set video segment to 2-bit palette graphics
// data = pointer to data buffer
// trans = pointer to 4-color palette translation table (generated with GenPal4Trans function)
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmGraph2(sSegm* segm, const void* data, const void* trans, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)trans;
segm->wb = wb;
__dmb();
segm->form = GF_GRAPH2;
__dmb();
}
// set video segment to 1-bit palette graphics
// data = pointer to data buffer
// bg = background color
// fg = foreground color
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmGraph1(sSegm* segm, const void* data, u8 bg, u8 fg, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = bg | ((u32)fg << 8);
segm->wb = wb;
__dmb();
segm->form = GF_GRAPH1;
__dmb();
}
// set video segment to 8-pixel mono text
// data = pointer to text buffer
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// bg = background color
// fg = foreground color
// wb = pitch - number of bytes between text lines
void ScreenSegmMText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, u8 fg, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)font;
segm->par2 = bg | ((u32)fg << 8);
segm->par3 = fontheight;
segm->wb = wb;
__dmb();
segm->form = GF_MTEXT;
__dmb();
}
// set video segment to 8-pixel attribute text
// data = pointer to text buffer (character + 2x4 bit attributes)
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// pal = pointer to palette of 16 colors
// wb = pitch - number of bytes between text lines
void ScreenSegmAText(sSegm* segm, const void* data, const void* font, u16 fontheight, const void* pal, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)font;
segm->par2 = (u32)pal;
segm->par3 = fontheight;
segm->wb = wb;
__dmb();
segm->form = GF_ATEXT;
__dmb();
}
// set video segment to 8-pixel foreground color text
// data = pointer to text buffer (character + foreground color)
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// bg = background color
// wb = pitch - number of bytes between text lines
void ScreenSegmFText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)font;
segm->par2 = bg;
segm->par3 = fontheight;
segm->wb = wb;
__dmb();
segm->form = GF_FTEXT;
__dmb();
}
// set video segment to 8-pixel color text
// data = pointer to text buffer (character + background color + foreground color)
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// wb = pitch - number of bytes between text lines
void ScreenSegmCText(sSegm* segm, const void* data, const void* font, u16 fontheight, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)font;
segm->par3 = fontheight;
segm->wb = wb;
__dmb();
segm->form = GF_CTEXT;
__dmb();
}
// set video segment to 8-pixel gradient color text
// data = pointer to text buffer (character + foreground color)
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// bg = background color
// grad = pointer to array of gradient colors
// wb = pitch - number of bytes between text lines
void ScreenSegmGText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)font;
segm->par3 = bg | (fontheight << 8);
segm->par2 = (u32)grad;
segm->wb = wb;
__dmb();
segm->form = GF_GTEXT;
__dmb();
}
// set video segment to 8-pixel double gradient color text
// data = pointer to text buffer (character + foreground color)
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// bg = background color
// grad = pointer to array of gradient colors
// wb = pitch - number of bytes between text lines
void ScreenSegmDText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)font;
segm->par3 = bg | (fontheight << 8);
segm->par2 = (u32)grad;
segm->wb = wb;
__dmb();
segm->form = GF_DTEXT;
__dmb();
}
// set video segment to tiles
// data = pointer to tile map buffer (with tile indices)
// tiles = pointer to 1 column of tiles, 1 pixel = 8 bits
// w = tile width (must be multiple of 4)
// h = tile height
// wb = pitch - number of bytes between tile map rows
void ScreenSegmTile(sSegm* segm, const void* data, const void* tiles, int w, int h, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)tiles;
segm->par2 = (u32)h;
segm->par3 = (u16)w;
segm->wb = wb;
segm->wrapx = (segm->width+w-1)/w*w;
segm->wrapy = (segm->wrapy+h-1)/h*h;
__dmb();
segm->form = GF_TILE;
__dmb();
}
// set video segment to alternate tiles
// data = pointer to tile map buffer (with tile indices)
// tiles = pointer to 1 row of tiles, 1 pixel = 8 bits
// w = tile width (must be multiple of 4)
// h = tile height
// tilewb = tile width bytes (usually tile width * number of tiles)
// wb = pitch - number of bytes between tile map rows
void ScreenSegmTile2(sSegm* segm, const void* data, const void* tiles, int w, int h, int tilewb, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)tiles;
segm->par2 = (u32)h + ((u32)(u16)tilewb << 16);
segm->par3 = (u16)w;
segm->wb = wb;
segm->wrapx = (segm->width+w-1)/w*w;
segm->wrapy = (segm->wrapy+h-1)/h*h;
__dmb();
segm->form = GF_TILE2;
__dmb();
}
// set video segment to level graph GF_LEVEL
// data = pointer to buffer with line samples 0..255
// bg = background color
// fg = foreground color
// zero = Y zero level
void ScreenSegmLevel(sSegm* segm, const void* data, u8 bg, u8 fg, u8 zero)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = bg | ((u32)fg << 8);
segm->par2 = zero;
__dmb();
segm->form = GF_LEVEL;
__dmb();
}
// set video segment to leve gradient graph GF_LEVELGRAD
// data = pointer to buffer with values 0..255 of 4-pixels in rows
// sample1 = scanline sample < data
// sample2 = scanline sample >= data
void ScreenSegmLevelGrad(sSegm* segm, const void* data, const void* sample1, const void* sample2)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)sample1;
segm->par2 = (u32)sample2;
__dmb();
segm->form = GF_LEVELGRAD;
__dmb();
}
// set video segment to oscilloscope 1-pixel graph GF_OSCIL
// data = pointer to buffer with line samples 0..255
// bg = background color
// fg = foreground color
// pixh = height of pixels - 1
void ScreenSegmOscil(sSegm* segm, const void* data, u8 bg, u8 fg, int pixh)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = bg | ((u32)fg << 8);
segm->par2 = pixh;
__dmb();
segm->form = GF_OSCIL;
__dmb();
}
// set video segment to oscilloscope line graph GF_OSCLINE
// data = pointer to buffer with line samples 0..255
// bg = background color
// fg = foreground color
void ScreenSegmOscLine(sSegm* segm, const void* data, u8 bg, u8 fg)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = bg | ((u32)fg << 8);
__dmb();
segm->form = GF_OSCLINE;
__dmb();
}
// generate palette 4-planes translation table for function ScreenSegmPlane2
// trans = pointer to destination palette translation table (u32 trans[256])
// pal = pointer to source palette of 4 colors (u8 pal[4])
void GenPal4Plane(u32* trans, const u8* pal)
{
int i, j;
u32 k;
for (i = 0; i < 256; i++)
{
j = 0;
if ((i & B7) != 0) j |= B1;
if ((i & B3) != 0) j |= B0;
k = pal[j];
j = 0;
if ((i & B6) != 0) j |= B1;
if ((i & B2) != 0) j |= B0;
k |= (u32)pal[j] << 8;
j = 0;
if ((i & B5) != 0) j |= B1;
if ((i & B1) != 0) j |= B0;
k |= (u32)pal[j] << 16;
j = 0;
if ((i & B4) != 0) j |= B1;
if ((i & B0) != 0) j |= B0;
k |= (u32)pal[j] << 24;
trans[i] = k;
}
}
// set video segment to 4-color on 2-planes graphics
// data = pointer to data buffer
// plane = offset of 2nd graphics plane (in bytes), size of one graphics plane
// trans = pointer to 4-color palette translation table (generated with GenPal4Plane function)
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmPlane2(sSegm* segm, const void* data, int plane, const void* trans, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = plane;
segm->par2 = (u32)trans;
segm->wb = wb;
__dmb();
segm->form = GF_PLANE2;
__dmb();
}
// set video segment to 2x4 bit color attribute per 8x8 pixel sample graphics
// data = pointer to data buffer with mono pixels
// attr = pointer to color attributes
// pal = pointer to 16-color palette table
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmAttrib8(sSegm* segm, const void* data, const void* attr, const u8* pal, int wb)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)attr;
segm->par2 = (u32)pal;
segm->wb = wb;
__dmb();
segm->form = GF_ATTRIB8;
__dmb();
}
// set video segment to horizontal progress indicator GF_PROGRESS
// data = pointer to buffer with values 0..255 of 4-pixels in rows
// sample1 = scanline sample < data
// sample2 = scanline sample >= data
void ScreenSegmProgress(sSegm* segm, const void* data, const void* sample1, const void* sample2)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->par = (u32)sample1;
segm->par2 = (u32)sample2;
__dmb();
segm->form = GF_PROGRESS;
__dmb();
}
// set video segment to 8-bit graphics with 2D matrix transformation
// data = pointer to image data (width and height of image must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes)
// ybits = number of bits of image height (image height must be power of 2)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmGraph8Mat(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->wb = (1<<xbits);
segm->offx = 0;
segm->offy = 0;
segm->wrapx = segm->width;
segm->par = (u32)mat;
segm->par2 = xbits | ((u32)ybits << 16);
__dmb();
segm->form = GF_GRAPH8MAT;
__dmb();
}
// set video segment to 8-bit graphics with perspective projection
// data = pointer to image data (width and height of image must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes)
// ybits = number of bits of image height (image height must be power of 2)
// horiz = horizon offset
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmGraph8Persp(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits, u16 horiz)
{
segm->form = GF_COLOR;
__dmb();
segm->data = data;
segm->wb = (1<<xbits);
segm->offx = 0;
segm->offy = 0;
segm->wrapx = segm->width;
segm->par = (u32)mat;
segm->par2 = xbits | ((u32)ybits << 16);
segm->par3 = horiz;
__dmb();
segm->form = GF_GRAPH8PERSP;
__dmb();
}
// set video segment to tiles with perspective
// map = pointer to tile map with tile indices (width and height must be power of 2)
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// mapwbits = number of bits of tile map width
// maphbits = number of bits of tile map height
// tilebits = number of bits of tile width and height
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmTilePersp(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon)
{
segm->form = GF_COLOR;
__dmb();
segm->data = map;
segm->wb = mapwbits | ((u16)maphbits<<8);
segm->offx = 0;
segm->offy = 0;
segm->wrapx = segm->width;
segm->par = (u32)tiles;
segm->par2 = (u32)mat;
segm->par3 = tilebits | ((u16)horizon<<8);
__dmb();
segm->form = GF_TILEPERSP;
__dmb();
}
// set video segment to tiles with perspective, 1.5 pixels
// map = pointer to tile map with tile indices (width and height must be power of 2)
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// mapwbits = number of bits of tile map width
// maphbits = number of bits of tile map height
// tilebits = number of bits of tile width and height
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmTilePersp15(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon)
{
segm->form = GF_COLOR;
__dmb();
segm->data = map;
segm->wb = mapwbits | ((u16)maphbits<<8);
segm->offx = 0;
segm->offy = 0;
segm->wrapx = segm->width;
segm->par = (u32)tiles;
segm->par2 = (u32)mat;
segm->par3 = tilebits | ((u16)horizon<<8);
__dmb();
segm->form = GF_TILEPERSP15;
__dmb();
}
// set video segment to tiles with perspective, double pixels
// map = pointer to tile map with tile indices (width and height must be power of 2)
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// mapwbits = number of bits of tile map width
// maphbits = number of bits of tile map height
// tilebits = number of bits of tile width and height
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmTilePersp2(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon)
{
segm->form = GF_COLOR;
__dmb();
segm->data = map;
segm->wb = mapwbits | ((u16)maphbits<<8);
segm->offx = 0;
segm->offy = 0;
segm->wrapx = segm->width;
segm->par = (u32)tiles;
segm->par2 = (u32)mat;
segm->par3 = tilebits | ((u16)horizon<<8);
__dmb();
segm->form = GF_TILEPERSP2;
__dmb();
}
// set video segment to tiles with perspective, triple pixels
// map = pointer to tile map with tile indices (width and height must be power of 2)
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// mapwbits = number of bits of tile map width
// maphbits = number of bits of tile map height
// tilebits = number of bits of tile width and height
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmTilePersp3(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon)
{
segm->form = GF_COLOR;
__dmb();
segm->data = map;
segm->wb = mapwbits | ((u16)maphbits<<8);
segm->offx = 0;
segm->offy = 0;
segm->wrapx = segm->width;
segm->par = (u32)tiles;
segm->par2 = (u32)mat;
segm->par3 = tilebits | ((u16)horizon<<8);
__dmb();
segm->form = GF_TILEPERSP3;
__dmb();
}
// set video segment to tiles with perspective, quadruple pixels
// map = pointer to tile map with tile indices (width and height must be power of 2)
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// mapwbits = number of bits of tile map width
// maphbits = number of bits of tile map height
// tilebits = number of bits of tile width and height
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmTilePersp4(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon)
{
segm->form = GF_COLOR;
__dmb();
segm->data = map;
segm->wb = mapwbits | ((u16)maphbits<<8);
segm->offx = 0;
segm->offy = 0;
segm->wrapx = segm->width;
segm->par = (u32)tiles;
segm->par2 = (u32)mat;
segm->par3 = tilebits | ((u16)horizon<<8);
__dmb();
segm->form = GF_TILEPERSP4;
__dmb();
}

View file

@ -1,307 +0,0 @@
// ****************************************************************************
//
// VGA screen layout
//
// ****************************************************************************
#ifndef _VGA_SCREEN_H
#define _VGA_SCREEN_H
// video segment (on change update SSEGM_* in define.h)
typedef struct {
u16 width; // SSEGM_WIDTH width of this video segment in pixels (must be multiple of 4, 0=inactive segment)
u16 wb; // SSEGM_WB pitch - number of bytes between lines
s16 offx; // SSEGM_OFFX display offset at X direction (must be multiple of 4)
s16 offy; // SSEGM_OFFY display offset at Y direction
u16 wrapx; // SSEGM_WRAPX wrap width in X direction (number of pixels, must be multiply of 4 and > 0)
// text modes: wrapx must be multiply of 8
u16 wrapy; // SSEGM_WRAPY wrap width in Y direction (number of lines, cannot be 0)
const void* data; // SSEGM_DATA pointer to video buffer with image data
u8 form; // SSEGM_FORM graphics format GF_*
bool dbly; // SSEGM_DBLY double Y (2 scanlines per 1 image line)
u16 par3; // SSEGM_PAR3 parameter 3
u32 par; // SSEGM_PAR parameter 1
u32 par2; // SSEGM_PAR2 parameter 2
} sSegm;
// video strip (on change update SSTRIP_* in define.h)
typedef struct {
u16 height; // SSTRIP_HEIGHT height of this strip in number of scanlines
u16 num; // SSTRIP_NUM number of video segments
sSegm seg[SEGMAX]; // SSTRIP_SEG list of video segments
} sStrip;
// video screen (on change update SSCREEN_* in define.h)
typedef struct {
u16 num; // SSCREEN_NUM number of video strips
u16 backup; // SSCREEN_BACKUP backup number of video strips during display OFF
sStrip strip[STRIPMAX]; // SSCREEN_STRIP list of video strips
} sScreen;
// current video screen
extern sScreen Screen; // default video screen
extern sScreen* pScreen; // pointer to current video screen
// clear screen (set 0 strips, does not modify sprites)
void ScreenClear(sScreen* s);
// add empty strip to the screen (returns pointer to the strip)
sStrip* ScreenAddStrip(sScreen* s, int height);
// add empty segment to video strip (returns pointer to the segment and initialises is to defaults)
sSegm* ScreenAddSegm(sStrip* strip, int width);
// set video segment to simple color format GF_COLOR
// col1 = color pattern 4-pixels even line (use macro MULTICOL)
// col2 = color pattern 4-pixels odd line (use macro MULTICOL)
void ScreenSegmColor(sSegm* segm, u32 col1, u32 col2);
// set video segment to gradient with 1 line
// data = pointer to data buffer with gradient
// wb = pitch - length of buffer
// To scroll gradient, set virtual dimension wrapx, then shift offx
void ScreenSegmGrad1(sSegm* segm, const void* data, int wb);
// set video segment to gradient with 2 lines
// data = pointer to data buffer with gradient
// wb = pitch - lenght of buffer
// To scroll gradient, set virtual dimension wrapx, then shift offx
void ScreenSegmGrad2(sSegm* segm, const void* data, int wb);
// set video segment to native 8-bit graphics (R3G3B2)
// data = pointer to data buffer
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmGraph8(sSegm* segm, const void* data, int wb);
// generate 16-color palette translation table
// trans = pointer to destination palette translation table (u16 trans[256])
// pal = pointer to source palette of 16 colors (u8 pal[16])
void GenPal16Trans(u16* trans, const u8* pal);
// set video segment to 4-bit palette graphics
// data = pointer to data buffer
// trans = pointer to 16-color palette translation table (generated with GenPal16Trans function)
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmGraph4(sSegm* segm, const void* data, const void* trans, int wb);
// generate palette 4 translation table for function ScreenSegmGraph2
// trans = pointer to destination palette translation table (u32 trans[256])
// pal = pointer to source palette of 4 colors (u8 pal[4])
void GenPal4Trans(u32* trans, const u8* pal);
// set video segment to 2-bit palette graphics
// data = pointer to data buffer
// trans = pointer to 4-color palette translation table (generated with GenPal4Trans function)
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmGraph2(sSegm* segm, const void* data, const void* trans, int wb);
// set video segment to 1-bit palette graphics
// data = pointer to data buffer
// bg = background color
// fg = foreground color
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmGraph1(sSegm* segm, const void* data, u8 bg, u8 fg, int wb);
// set video segment to 8-pixel mono text
// data = pointer to text buffer
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// bg = background color
// fg = foreground color
// wb = pitch - number of bytes between text lines
void ScreenSegmMText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, u8 fg, int wb);
// set video segment to 8-pixel attribute text
// data = pointer to text buffer (character + 2x4 bit attributes)
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// pal = pointer to palette of 16 colors
// wb = pitch - number of bytes between text lines
void ScreenSegmAText(sSegm* segm, const void* data, const void* font, u16 fontheight, const void* pal, int wb);
// set video segment to 8-pixel foreground color text
// data = pointer to text buffer (character + foreground color)
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// bg = background color
// wb = pitch - number of bytes between text lines
void ScreenSegmFText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, int wb);
// set video segment to 8-pixel color text
// data = pointer to text buffer (character + background color + foreground color)
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// wb = pitch - number of bytes between text lines
void ScreenSegmCText(sSegm* segm, const void* data, const void* font, u16 fontheight, int wb);
// set video segment to 8-pixel gradient color text
// data = pointer to text buffer (character + foreground color)
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// bg = background color
// grad = pointer to array of gradient colors
// wb = pitch - number of bytes between text lines
void ScreenSegmGText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb);
// set video segment to 8-pixel double gradient color text
// data = pointer to text buffer (character + foreground color)
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
// fontheight = font height
// bg = background color
// grad = pointer to array of gradient colors
// wb = pitch - number of bytes between text lines
void ScreenSegmDText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb);
// set video segment to tiles
// data = pointer to tile map buffer (with tile indices)
// tiles = pointer to 1 column of tiles, 1 pixel = 8 bits
// w = tile width (must be multiple of 4)
// h = tile height
// wb = pitch - number of bytes between tile map rows
void ScreenSegmTile(sSegm* segm, const void* data, const void* tiles, int w, int h, int wb);
// set video segment to alternate tiles
// data = pointer to tile map buffer (with tile indices)
// tiles = pointer to 1 row of tiles, 1 pixel = 8 bits
// w = tile width (must be multiple of 4)
// h = tile height
// tilewb = tile width bytes (usually tile width * number of tiles)
// wb = pitch - number of bytes between tile map rows
void ScreenSegmTile2(sSegm* segm, const void* data, const void* tiles, int w, int h, int tilewb, int wb);
// set video segment to level graph GF_LEVEL
// data = pointer to buffer with line samples 0..255
// zero = Y zero level
// bg = background color
// fg = foreground color
void ScreenSegmLevel(sSegm* segm, const void* data, u8 zero, u8 bg, u8 fg);
// set video segment to leve gradient graph GF_LEVELGRAD
// data = pointer to buffer with values 0..255 of 4-pixels in rows
// sample1 = scanline sample < data
// sample2 = scanline sample >= data
void ScreenSegmLevelGrad(sSegm* segm, const void* data, const void* sample1, const void* sample2);
// set video segment to oscilloscope 1-pixel graph GF_OSCIL
// data = pointer to buffer with line samples 0..255
// bg = background color
// fg = foreground color
// pixh = height of pixels - 1
void ScreenSegmOscil(sSegm* segm, const void* data, u8 bg, u8 fg, int pixh);
// set video segment to oscilloscope line graph GF_OSCLINE
// data = pointer to buffer with line samples 0..255
// bg = background color
// fg = foreground color
void ScreenSegmOscLine(sSegm* segm, const void* data, u8 bg, u8 fg);
// generate palette 4-color translation table for function ScreenSegmPlane2
// trans = pointer to destination palette translation table (u32 trans[256])
// pal = pointer to source palette of 4 colors (u8 pal[4])
void GenPal4Plane(u32* trans, const u8* pal);
// set video segment to 4-color on 2-planes graphics
// data = pointer to data buffer
// plane = offset of 2nd graphics plane (in bytes), size of one graphics plane
// trans = pointer to 4-color palette translation table (generated with GenPal4Plane function)
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmPlane2(sSegm* segm, const void* data, int plane, const void* trans, int wb);
// set video segment to 2x4 bit color attribute per 8x8 pixel sample graphics
// data = pointer to data buffer with mono pixels
// attr = pointer to color attributes
// pal = pointer to 16-color palette table
// wb = pitch - number of bytes between lines
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
void ScreenSegmAttrib8(sSegm* segm, const void* data, const void* attr, const u8* pal, int wb);
// set video segment to horizontal progress indicator GF_PROGRESS
// data = pointer to buffer with values 0..255 of 4-pixels in rows
// sample1 = scanline sample < data
// sample2 = scanline sample >= data
void ScreenSegmProgress(sSegm* segm, const void* data, const void* sample1, const void* sample2);
// set video segment to 8-bit graphics with 2D matrix transformation
// data = pointer to image data (width and height of image must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes)
// ybits = number of bits of image height (image height must be power of 2)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmGraph8Mat(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits);
// set video segment to 8-bit graphics with perspective projection
// data = pointer to image data (width and height of image must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes)
// ybits = number of bits of image height (image height must be power of 2)
// horiz = horizon offset
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmGraph8Persp(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits, u16 horiz);
// set video segment to tiles with perspective
// map = pointer to tile map with tile indices (width and height must be power of 2)
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// mapwbits = number of bits of tile map width
// maphbits = number of bits of tile map height
// tilebits = number of bits of tile width and height
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmTilePersp(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon);
// set video segment to tiles with perspective, 1.5 pixels
// map = pointer to tile map with tile indices (width and height must be power of 2)
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// mapwbits = number of bits of tile map width
// maphbits = number of bits of tile map height
// tilebits = number of bits of tile width and height
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmTilePersp15(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon);
// set video segment to tiles with perspective, double pixels
// map = pointer to tile map with tile indices (width and height must be power of 2)
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// mapwbits = number of bits of tile map width
// maphbits = number of bits of tile map height
// tilebits = number of bits of tile width and height
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmTilePersp2(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon);
// set video segment to tiles with perspective, triple pixels
// map = pointer to tile map with tile indices (width and height must be power of 2)
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// mapwbits = number of bits of tile map width
// maphbits = number of bits of tile map height
// tilebits = number of bits of tile width and height
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmTilePersp3(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon);
// set video segment to tiles with perspective, quadruple pixels
// map = pointer to tile map with tile indices (width and height must be power of 2)
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
// mapwbits = number of bits of tile map width
// maphbits = number of bits of tile map height
// tilebits = number of bits of tile width and height
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
void ScreenSegmTilePersp4(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon);
#endif // _VGA_SCREEN_H

View file

@ -1,40 +1,17 @@
// ****************************************************************************
//
// VGA videomodes
//
// file derived from the PicoVGA project
// https://github.com/Panda381/PicoVGA
// by Miroslav Nemecek
//
// ****************************************************************************
#include "include.h"
sVmode Vmode; // videomode setup
sVgaCfg Cfg; // required configuration
sCanvas Canvas; // canvas of draw box
// default 16-color palettes (EGA colors)
// - do not set "const", to stay in faster RAM
u8 DefPal16[16] = {
CGACOL_0, // 0 // 0x000000 black
CGACOL_1, // 2 // 0x0000AA dark blue
CGACOL_2, // 20 // 0x00B600 dark green
CGACOL_3, // 22 // 0x00B6AA dark cyan
CGACOL_4, // 160 // 0xB60000 dark red
CGACOL_5, // 162 // 0xB600AA dark magenta
CGACOL_6, // 168 // 0xB64900 brown
CGACOL_7, // 182 // 0xB6B6AA light gray
CGACOL_8, // 73 // 0x494955 dark gray
CGACOL_9, // 75 // 0x4949FF light blue
CGACOL_10, // 93 // 0x49FF55 light green
CGACOL_11, // 95 // 0x49FFFF light cyan
CGACOL_12, // 233 // 0xFF4955 light red
CGACOL_13, // 235 // 0xFF49FF light magenta
CGACOL_14, // 253 // 0xFFFF55 yellow
CGACOL_15, // 255 // 0xFFFFFF white
};
// 16-color palette translation table
u16 Pal16Trans[256];
/*
http://martin.hinner.info/vga/pal.html
@ -45,211 +22,9 @@ time 0:
- line 3..35: (33) dark
- line 36..515: (480) image lines 0..479
- line 516..525: (10) dark
PAL system (625 lines total):
time 0:
- line 1, 2: (2) vertical sync + vertical sync
- line 3: (1) vertical sync + half sync
- line 4, 5: (2) half sync + half sync
- line 6..23: (18) dark
- line 24..46: (23) dark image
time 46:
- line 47..286: (240) image lines odd 1, 3, 5 ... 479
- line 287..310: (24) dark image
- line 311..312: (2) half sync + half sync
- line 313: (1) half sync + vertical sync
vsync time 313 (vsync time 312.5):
- line 314..315: (2) vertical sync + vertical sync
- line 316..317: (2) half sync + half sync
- line 318..335: (18) dark
- line 336..358: (23) dark image
time 358 (45.5 from last vsync)
- line 359..598: (240) image lines even 0, 2, ... 478
- line 599..622: (24) dark image
- line 623..625: (3) half sync + half sync
time 625:
NTSC system (525 lines total):
time 0, even field:
- line 1..3: (3) vertical sync + vertical sync (6 serration pulses: 27.3 us low, 4.5 us high)
- line 4..6: (3) half sync + half sync (6 equalizing pulses: 2.3 us low, 29.5 us high)
- line 7..16: (10) dark (blanked video: 4.7 us low, 58.9 us high)
- line 17,18: (2) dark image
time 18:
- line 19..258: (240) image lines even 0, 2, ... 478
- line 259: (1) dark image
- line 260..262: (3) half sync + half sync (7 equalizing pulses)
- line 263: (1) half sync + vertical sync (6 serration pulses)
time 263 (vsync time 262.5):
- line 264,265: (2) vertical sync + vertical sync
- line 266: (1) vertical sync + half sync (5 equalizing pulses)
- line 267..268: (2) half sync + half sync
- line 269..279: (11) dark
- line 280..281: (2) dark image
time 281 (18.5 from last vsync)
- line 282..521: (240) image lines odd 1, 3, 5 ... 479
- line 522: (1) dark image
- line 523..525: (3) half sync + half sync
time 525:
*/
// === TV videomodes
// TV PAL interlaced 5:4 720x576 (4:3 768x576, 16:9 1024x576)
const sVideo VideoPAL = {
// horizontal (horizontal frequency 15625 Hz, effective sync pulses 16000 Hz)
.htot= 64.00000f, // total scanline in [us]
.hfront= 1.65000f, // H front porch (after image, before HSYNC) in [us]
.hsync= 4.70000f, // H sync pulse in [us]
.hback= 5.70000f, // H back porch (after HSYNC, before image) in [us]
.hfull= 47.36000f, // H full visible in [us] (formally should be 51.95 us)
// vertical (vertical frequency 50 Hz)
.vtot=625, // total scanlines (both subframes)
.vmax=576, // maximal height
// subframe 1
.vsync1=5, // V sync (half-)pulses on subframe 1
.vpost1=5, // V sync post half-pulses on subframe 1
.vback1=18+23, // V back porch (after VSYNC, before image) on subframe 1
.vact1=240, // active visible scanlines, subframe 1 (formally should be 288, 576 total)
.vfront1=24, // V front porch (after image, before VSYNC) on subframe 1
.vpre1=5, // V sync pre half-pulses on subframe 1
// subframe 2 (ignored if not interlaced)
.vsync2=5, // V sync half-pulses on subframe 2
.vpost2=4, // V sync post half-pulses on subframe 2
.vback2=18+23, // V back porch (after VSYNC, before image) on subframe 2
.vact2=240, // active visible scanlines, subframe 2 (formally should be 288, 576 total)
.vfront2=24, // V front porch (after image, before VSYNC) on subframe 2
.vpre2=6, // V sync pre half-pulses on subframe 2
// name
.name = "PAL ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// flags
.inter=True, // interlaced (use subframes)
.psync=False, // positive synchronization
.odd=True, // first sub-frame is odd lines 1, 3, 5,... (PAL)
};
// TV PAL progressive 5:4 360x288 (4:3 384x288, 16:9 512x288)
const sVideo VideoPALp = {
// horizontal (horizontal frequency 15625 Hz)
.htot= 64.00000f, // total scanline in [us]
.hfront= 1.65000f, // H front porch (after image, before HSYNC) in [us]
.hsync= 4.70000f, // H sync pulse in [us]
.hback= 5.70000f, // H back porch (after HSYNC, before image) in [us]
.hfull= 47.36000f, // H full visible in [us] (formally should be 51.95 us)
// vertical (vertical frequency 50 Hz)
.vtot=312, // total scanlines (both subframes)
.vmax=288, // maximal height
// subframe 1
.vsync1=2, // V sync (half-)pulses on subframe 1
.vpost1=0, // V sync post half-pulses on subframe 1
.vback1=18+23+2, // V back porch (after VSYNC, before image) on subframe 1
.vact1=240, // active visible scanlines, subframe 1 (formally should be 288, 576 total)
.vfront1=24+3, // V front porch (after image, before VSYNC) on subframe 1
.vpre1=0, // V sync pre half-pulses on subframe 1
// subframe 2 (ignored if not interlaced)
.vsync2=0, // V sync half-pulses on subframe 2
.vpost2=0, // V sync post half-pulses on subframe 2
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
.vact2=0, // active visible scanlines, subframe 2 (formally should be 288, 576 total)
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
.vpre2=0, // V sync pre half-pulses on subframe 2
// name
.name = "PALp ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// flags
.inter=False, // interlaced (use subframes)
.psync=False, // positive synchronization
.odd=True, // first sub-frame is odd lines 1, 3, 5,... (PAL)
};
// TV NTSC interlaced 4:3 640x480 (5:4 600x480, 16:9 848x480)
// serration pulses (half vsync): 27.3 us low, 4.5 us high
// equalizing pulses (half hsync): 2.3 us low, 29.5 us high
// blanked video (hsync pulses): 4.7 us low, 58.9 us high
const sVideo VideoNTSC = {
// horizontal (horizontal frequency 15734 Hz, effective sync pulses 16274 Hz)
.htot= 63.55582f, // total scanline in [us]
.hfront= 1.50000f, // H front porch (after image, before HSYNC) in [us]
.hsync= 4.70000f, // H sync pulse in [us]
.hback= 4.50000f, // H back porch (after HSYNC, before image) in [us]
.hfull= 47.03130f, // H full visible in [us]
// vertical
.vtot=525, // total scanlines (both subframes)
.vmax=480, // maximal height
// subframe 1
.vsync1=6, // V sync (half-)pulses on subframe 1
.vpost1=6, // V sync post half-pulses on subframe 1
.vback1=10+2, // V back porch (after VSYNC, before image) on subframe 1
.vact1=240, // active visible scanlines, subframe 1
.vfront1=1, // V front porch (after image, before VSYNC) on subframe 1
.vpre1=7, // V sync pre half-pulses on subframe 1
// subframe 2 (ignored if not interlaced)
.vsync2=6, // V sync half-pulses on subframe 2
.vpost2=5, // V sync post half-pulses on subframe 2
.vback2=11+2, // V back porch (after VSYNC, before image) on subframe 2
.vact2=240, // active visible scanlines, subframe 2
.vfront2=1, // V front porch (after image, before VSYNC) on subframe 2
.vpre2=6, // V sync pre half-pulses on subframe 2
// name
.name = "NTSC ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// flags
.inter=True, // interlaced (use subframes)
.psync=False, // positive synchronization
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
};
// TV NTSC progressive 4:3 320x240 (5:4 300x240, 16:9 424x240)
const sVideo VideoNTSCp = {
// horizontal (horizontal frequency 15734 Hz)
.htot= 63.55582f, // total scanline in [us]
.hfront= 1.50000f, // H front porch (after image, before HSYNC) in [us]
.hsync= 4.70000f, // H sync pulse in [us]
.hback= 4.50000f, // H back porch (after HSYNC, before image) in [us]
.hfull= 47.03130f, // H full visible in [us]
// vertical
.vtot=262, // total scanlines (both subframes)
.vmax=240, // maximal height
// subframe 1
.vsync1=3, // V sync (half-)pulses on subframe 1
.vpost1=0, // V sync post half-pulses on subframe 1
.vback1=10+2+3, // V back porch (after VSYNC, before image) on subframe 1
.vact1=240, // active visible scanlines, subframe 1
.vfront1=1+3, // V front porch (after image, before VSYNC) on subframe 1
.vpre1=0, // V sync pre half-pulses on subframe 1
// subframe 2 (ignored if not interlaced)
.vsync2=0, // V sync half-pulses on subframe 2
.vpost2=0, // V sync post half-pulses on subframe 2
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
.vact2=0, // active visible scanlines, subframe 2
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
.vpre2=6, // V sync pre half-pulses on subframe 2
// name
.name = "NTSCp", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// flags
.inter=False, // interlaced (use subframes)
.psync=False, // positive synchronization
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
};
// === Monitor videomodes
@ -266,29 +41,16 @@ const sVideo VideoEGA = {
.vtot=449, // total scanlines (both subframes)
.vmax=400, // maximal height
// subframe 1
.vsync1=2, // V sync (half-)pulses on subframe 1
.vpost1=0, // V sync post half-pulses on subframe 1
.vback1=35, // V back porch (after VSYNC, before image) on subframe 1
.vact1=400, // active visible scanlines, subframe 1
.vfront1=12, // V front porch (after image, before VSYNC) on subframe 1
.vpre1=0, // V sync pre half-pulses on subframe 1
// subframe 2 (ignored if not interlaced)
.vsync2=0, // V sync half-pulses on subframe 2
.vpost2=0, // V sync post half-pulses on subframe 2
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
.vact2=0, // active visible scanlines, subframe 2
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
.vpre2=0, // V sync pre half-pulses on subframe 2
// name
.name = "EGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// frame
.vsync=2, // V sync (half-)pulses
.vpost=0, // V sync post half-pulses
.vback=35, // V back porch (after VSYNC, before image)
.vact=400, // active visible scanlines
.vfront=12, // V front porch (after image, before VSYNC)
.vpre=0, // V sync pre half-pulses
// flags
.inter=False, // interlaced (use subframes)
.psync=False, // positive synchronization
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
};
// VGA 4:3 640x480 (16:9 848x480), vert. 60 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz
@ -304,182 +66,38 @@ const sVideo VideoVGA = {
.vtot=525, // total scanlines (both subframes)
.vmax=480, // maximal height
// subframe 1
.vsync1=2, // V sync (half-)pulses on subframe 1
.vpost1=0, // V sync post half-pulses on subframe 1
.vback1=33, // V back porch (after VSYNC, before image) on subframe 1
.vact1=480, // active visible scanlines, subframe 1
.vfront1=10, // V front porch (after image, before VSYNC) on subframe 1
.vpre1=0, // V sync pre half-pulses on subframe 1
// subframe 2 (ignored if not interlaced)
.vsync2=0, // V sync half-pulses on subframe 2
.vpost2=0, // V sync post half-pulses on subframe 2
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
.vact2=0, // active visible scanlines, subframe 2
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
.vpre2=0, // V sync pre half-pulses on subframe 2
// name
.name = "VGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// frame
.vsync=2, // V sync (half-)pulses
.vpost=0, // V sync post half-pulses
.vback=33, // V back porch (after VSYNC, before image)
.vact=480, // active visible scanlines
.vfront=10, // V front porch (after image, before VSYNC)
.vpre=0, // V sync pre half-pulses
// flags
.inter=False, // interlaced (use subframes)
.psync=False, // positive synchronization
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
};
// SVGA 4:3 800x600 (16:9 1064x600), vert. 60 Hz, hor. 37.897 kHz, pixel clock 40 MHz
const sVideo VideoSVGA = {
// horizontal
.htot= 26.40000f, // total scanline in [us] (1056 pixels)
.hfront= 1.00000f, // H front porch (after image, before HSYNC) in [us] (40 pixels)
.hsync= 3.20000f, // H sync pulse in [us] (128 pixels)
.hback= 2.20000f, // H back porch (after HSYNC, before image) in [us] (88 pixels)
.hfull= 20.00000f, // H full visible in [us] (800 pixels)
// vertical
.vtot=628, // total scanlines (both subframes)
.vmax=600, // maximal height
// subframe 1
.vsync1=4, // V sync (half-)pulses on subframe 1
.vpost1=0, // V sync post half-pulses on subframe 1
.vback1=23, // V back porch (after VSYNC, before image) on subframe 1
.vact1=600, // active visible scanlines, subframe 1
.vfront1=1, // V front porch (after image, before VSYNC) on subframe 1
.vpre1=0, // V sync pre half-pulses on subframe 1
// subframe 2 (ignored if not interlaced)
.vsync2=0, // V sync half-pulses on subframe 2
.vpost2=0, // V sync post half-pulses on subframe 2
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
.vact2=0, // active visible scanlines, subframe 2
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
.vpre2=0, // V sync pre half-pulses on subframe 2
// name
.name = "SVGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// flags
.inter=False, // interlaced (use subframes)
.psync=True, // positive synchronization
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
// timings
const sVideo* VideoResTab[DEV_MAX*RES_MAX] =
{
// DEV_VGA
&VideoEGA, // RES_ZX = 0, // 256x192
&VideoVGA, // RES_CGA, // 320x200
&VideoVGA, // RES_QVGA, // 320x240
&VideoEGA, // RES_EGA, // 528x400
&VideoVGA, // RES_VGA, // 640x480
};
// XGA 4:3 1024x768 (16:9 1360x768), vert. 60 Hz, hor. 48.36310 kHz, pixel clock 65 MHz
const sVideo VideoXGA = {
// horizontal
.htot= 20.67692f, // total scanline in [us] (1344 pixels)
.hfront= 0.36923f, // H front porch (after image, before HSYNC) in [us] (24 pixels)
.hsync= 2.09231f, // H sync pulse in [us] (136 pixels)
.hback= 2.46154f, // H back porch (after HSYNC, before image) in [us] (160 pixels)
.hfull= 15.75385f, // H full visible in [us] (1024 pixels)
// vertical
.vtot=806, // total scanlines (both subframes)
.vmax=768, // maximal height
// subframe 1
.vsync1=6, // V sync (half-)pulses on subframe 1
.vpost1=0, // V sync post half-pulses on subframe 1
.vback1=29, // V back porch (after VSYNC, before image) on subframe 1
.vact1=768, // active visible scanlines, subframe 1
.vfront1=3, // V front porch (after image, before VSYNC) on subframe 1
.vpre1=0, // V sync pre half-pulses on subframe 1
// subframe 2 (ignored if not interlaced)
.vsync2=0, // V sync half-pulses on subframe 2
.vpost2=0, // V sync post half-pulses on subframe 2
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
.vact2=0, // active visible scanlines, subframe 2
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
.vpre2=0, // V sync pre half-pulses on subframe 2
// name
.name = "XGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// flags
.inter=False, // interlaced (use subframes)
.psync=False, // positive synchronization
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
};
// VESA 4:3 1152x864, vert. 60 Hz, hor. 53.697 kHz, pixel clock 81.62 MHz
const sVideo VideoVESA = {
// horizontal
.htot= 18.62289f, // total scanline in [us] (1520 pixels)
.hfront= 0.78412f, // H front porch (after image, before HSYNC) in [us] (64 pixels)
.hsync= 1.47023f, // H sync pulse in [us] (120 pixels)
.hback= 2.25435f, // H back porch (after HSYNC, before image) in [us] (184 pixels)
.hfull= 14.11419f, // H full visible in [us] (1152 pixels)
// vertical
.vtot=895, // total scanlines (both subframes)
.vmax=864, // maximal height
// subframe 1
.vsync1=3, // V sync (half-)pulses on subframe 1
.vpost1=0, // V sync post half-pulses on subframe 1
.vback1=27, // V back porch (after VSYNC, before image) on subframe 1
.vact1=864, // active visible scanlines, subframe 1
.vfront1=1, // V front porch (after image, before VSYNC) on subframe 1
.vpre1=0, // V sync pre half-pulses on subframe 1
// subframe 2 (ignored if not interlaced)
.vsync2=0, // V sync half-pulses on subframe 2
.vpost2=0, // V sync post half-pulses on subframe 2
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
.vact2=0, // active visible scanlines, subframe 2
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
.vpre2=0, // V sync pre half-pulses on subframe 2
// name
.name = "VESA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// flags
.inter=False, // interlaced (use subframes)
.psync=True, // positive synchronization
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
};
// HD 4:3 1280x960, vert. 53 Hz, hor. 51.858 kHz, pixel clock 102.1 MHz
#define HD_SLOW 1.15f
const sVideo VideoHD = {
// horizontal
.htot= 16.76787f*HD_SLOW, // total scanline in [us] (1712 pixels)
.hfront= 0.78355f*HD_SLOW, // H front porch (after image, before HSYNC) in [us] (80 pixels)
.hsync= 1.33203f*HD_SLOW, // H sync pulse in [us] (136 pixels)
.hback= 2.11557f*HD_SLOW, // H back porch (after HSYNC, before image) in [us] (216 pixels)
.hfull= 12.53673f*HD_SLOW, // H full visible in [us] (1280 pixels)
// vertical
.vtot=994-10, // total scanlines (both subframes)
.vmax=960, // maximal height
// subframe 1
.vsync1=3, // V sync (half-)pulses on subframe 1
.vpost1=0, // V sync post half-pulses on subframe 1
.vback1=30-10, // V back porch (after VSYNC, before image) on subframe 1
.vact1=960, // active visible scanlines, subframe 1
.vfront1=1, // V front porch (after image, before VSYNC) on subframe 1
.vpre1=0, // V sync pre half-pulses on subframe 1
// subframe 2 (ignored if not interlaced)
.vsync2=0, // V sync half-pulses on subframe 2
.vpost2=0, // V sync post half-pulses on subframe 2
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
.vact2=0, // active visible scanlines, subframe 2
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
.vpre2=0, // V sync pre half-pulses on subframe 2
// name
.name = "HD ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// flags
.inter=False, // interlaced (use subframes)
.psync=False, // positive synchronization
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
// required resolution width x height
const u16 VideoResReq[RES_MAX*2] =
{
256, 192, // RES_ZX = 0, // 256x192
320, 200, // RES_CGA, // 320x200
320, 240, // RES_QVGA, // 320x240
512, 400, // RES_EGA, // 512x400
640, 480, // RES_VGA, // 640x480
};
@ -582,59 +200,23 @@ void VgaCfgDef(sVgaCfg* cfg)
cfg->height = 480; // height in lines
cfg->wfull = 0; // width of full screen, corresponding to 'hfull' time (0=use 'width' parameter)
cfg->video = &VideoVGA; // used video timings
cfg->freq = 250000; //120000; // required minimal system frequency in kHz (real frequency can be higher)
uint freq = clock_get_hz(clk_sys)/1000;
cfg->freq = freq; // required minimal system frequency in kHz (real frequency can be higher)
cfg->fmax = 270000; // maximal system frequency in kHz (limit resolution if needed)
cfg->mode[0] = LAYERMODE_BASE; // modes of overlapped layers 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off)
cfg->mode[1] = LAYERMODE_BASE; // - mode of layer 0 is ignored (always use LAYERMODE_BASE)
cfg->mode[2] = LAYERMODE_BASE; // - all overlapped layers must use same layer program
cfg->mode[3] = LAYERMODE_BASE;
cfg->dbly = False; // double in Y direction
cfg->lockfreq = False; // lock required frequency, do not change it
}
// debug print videomode setup
void VgaPrintCfg(const sVmode* vmode)
{
printf("width=%u height=%u wfull=%u wmax=%u\n", vmode->width, vmode->height, vmode->wfull, vmode->wmax);
printf("freq=%u vco=%u fbdiv=%u pd1=%u pd2=%u\n", vmode->freq, vmode->vco, vmode->fbdiv, vmode->pd1, vmode->pd2);
printf("div=%u cpp=%u prog=%u mode=%u %u %u %u\n", vmode->div, vmode->cpp, vmode->prog, vmode->mode[0], vmode->mode[1], vmode->mode[2], vmode->mode[3]);
printf("htot=%u hfront=%u hsync=%u hback=%u\n", vmode->htot, vmode->hfront, vmode->hsync, vmode->hback);
printf("vtot=%u vmax=%u\n", vmode->vtot, vmode->vmax);
printf("vsync1=%u vpost1=%u vback1=%u vact1=%u vfront1=%u vpre1=%u vfirst1=%u\n", vmode->vsync1, vmode->vpost1,
vmode->vback1, vmode->vact1, vmode->vfront1, vmode->vpre1, vmode->vfirst1);
printf("vsync2=%u vpost2=%u vback2=%u vact2=%u vfront2=%u vpre2=%u vfirst2=%u\n", vmode->vsync2, vmode->vpost2,
vmode->vback2, vmode->vact2, vmode->vfront2, vmode->vpre2, vmode->vfirst2);
printf("lockfreq=%u dbly=%u inter=%u psync=%u odd=%u\n", vmode->lockfreq, vmode->dbly, vmode->inter, vmode->psync, vmode->odd);
}
// calculate videomode setup
// cfg ... required configuration
// vmode ... destination videomode setup for driver
void VgaCfg(const sVgaCfg* cfg, sVmode* vmode)
{
int i;
// prepare layer program, copy layer modes
u8 prog = LAYERMODE_BASE;
vmode->mode[0] = prog;
for (i = 1; i < LAYERS; i++)
{
if (cfg->mode[i] != LAYERMODE_BASE) prog = LayerMode[cfg->mode[i]].prog;
vmode->mode[i] = cfg->mode[i];
}
vmode->prog = prog;
// prepare minimal and maximal clocks per pixel
int mincpp = LayerMode[LAYERMODE_BASE].mincpp;
int maxcpp = LayerMode[LAYERMODE_BASE].maxcpp;
int cpp;
for (i = 1; i < LAYERS; i++)
{
cpp = LayerMode[cfg->mode[i]].mincpp;
if (cpp > mincpp) mincpp = cpp;
cpp = LayerMode[cfg->mode[i]].maxcpp;
if (cpp < maxcpp) maxcpp = cpp;
}
int mincpp = 2;
int maxcpp = 17;
// prepare full width
int w = cfg->width; // required width
@ -649,7 +231,7 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode)
// calculate cpp from required frequency (rounded down), limit minimal cpp
u32 freq = cfg->freq;
cpp = (int)(freq*hfull/1000/wfull + 0.1f);
int cpp = (int)(freq*hfull/1000/wfull + 0.1f);
if (cpp < mincpp) cpp = mincpp;
// recalculate frequency if not locked
@ -732,14 +314,6 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode)
}
htot = hfront + hsync + hback + hwidth; // total state machine clocks per line
// interliced htot must be even (to enable split to half-sync)
if (v->inter && ((htot & 1) != 0))
{
htot--;
hfront++;
}
vmode->htot = (u16)htot; // total state machine clocks per line
vmode->hfront = (u16)hfront; // H front porch in state machine clocks (min. 2)
vmode->hsync = (u16)hsync; // H sync pulse in state machine clocks (min. 4)
@ -752,21 +326,7 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode)
if (h > v->vmax) h = v->vmax; // limit height
if (cfg->dbly) h &= ~1; // must be even number if double lines
int vact1 = h; // active lines in progress mode
int vact2 = 0;
if (v->inter) // interlaced
{
if (v->odd) // first frame is odd lines
{
vact1 = h/2;
vact2 = (h+1)/2; // if even lines, even frame will have more lines
}
else
{
vact1 = (h+1)/2; // if even lines, even frame will have more lines
vact2 = h/2;
}
}
int vact = h; // active lines in progress mode
if (cfg->dbly) h /= 2; // return double lines to single lines
vmode->height = h;
@ -774,125 +334,37 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode)
// vertical timings
vmode->vtot = v->vtot; // total scanlines
vmode->vact1 = vact1; // active scanlines of 1st subframe
int dh = vact1 - v->vact1; // difference
vmode->vsync1 = v->vsync1; // V sync (half-)pulses on subframe 1
vmode->vpost1 = v->vpost1; // V sync post (half-)pulses on subframe 1
vmode->vback1 = v->vback1 - dh/2; // V back porch (after VSYNC, before image) on subframe 1
vmode->vfront1 = v->vfront1 - ((dh < 0) ? (dh-1)/2 : (dh+1)/2); // V front porch (after image, before VSYNC) on subframe 1
vmode->vpre1 = v->vpre1; // V sync pre (half-)pulses on subframe 1
vmode->vact2 = vact2; // active scanlines of 2nd subframe
dh = vact2 - v->vact2; // difference
vmode->vsync2 = v->vsync2; // V sync half-pulses on subframe 2
vmode->vpost2 = v->vpost2; // V sync post half-pulses on subframe 2
vmode->vback2 = v->vback2 - dh/2; // V back porch (after VSYNC, before image) on subframe 2
vmode->vfront2 = v->vfront2 - ((dh < 0) ? (dh-1)/2 : (dh+1)/2); // V front porch (after image, before VSYNC) on subframe 2
vmode->vpre2 = v->vpre2; // V sync pre half-pulses on subframe 2
vmode->vact = vact; // active scanlines
int dh = vact - v->vact; // difference
vmode->vsync = v->vsync; // V sync (half-)pulses
vmode->vpost = v->vpost; // V sync post (half-)pulses
vmode->vback = v->vback - dh/2; // V back porch (after VSYNC, before image)
vmode->vfront = v->vfront - ((dh < 0) ? (dh-1)/2 : (dh+1)/2); // V front porch (after image, before VSYNC)
vmode->vpre = v->vpre; // V sync pre (half-)pulses
// frequency
vmode->hfreq = vmode->freq * 1000.0f / vmode->div / vmode->htot;
vmode->vfreq = vmode->hfreq / vmode->vtot;
// name
vmode->name = v->name; // video timing name
// flags
vmode->lockfreq = cfg->lockfreq; // lock current frequency, do not change it
vmode->dbly = cfg->dbly; // double scanlines
vmode->inter = v->inter; // interlaced (use sub-frames)
vmode->psync = v->psync; // positive synchronization
vmode->odd = v->odd; // first sub-frame is odd lines 1, 3, 5,... (PAL)
// first active scanline
if (v->inter)
{
// interlaced
vmode->vfirst1 = (vmode->vsync1 + vmode->vpost1)/2 + vmode->vback1 + 1;
vmode->vfirst2 = vmode->vfirst1 + vmode->vact1 + vmode->vfront1 +
(vmode->vpre1 + vmode->vsync2 + vmode->vpost2)/2 + vmode->vback2;
}
else
{
// progressive
vmode->vfirst1 = vmode->vsync1 + vmode->vback1 + 1;
vmode->vfirst2 = 0;
}
vmode->vfirst = vmode->vsync + vmode->vback + 1;
}
// timings
const sVideo* VideoResTab[DEV_MAX*RES_MAX] =
{
// DEV_PAL
&VideoPALp, // RES_ZX = 0, // 256x192
&VideoPALp, // RES_CGA, // 320x200
&VideoPALp, // RES_QVGA, // 320x240
&VideoPAL, // RES_EGA, // 528x400
&VideoPAL, // RES_VGA, // 640x480
&VideoPAL, // RES_SVGA, // 800x600 (not for TV device)
&VideoPAL, // RES_XGA, // 1024x768 (not for TV device)
&VideoPAL, // RES_HD, // 1280x960 (not for TV device)
// DEV_NTSC
&VideoNTSCp, // RES_ZX = 0, // 256x192
&VideoNTSCp, // RES_CGA, // 320x200
&VideoNTSCp, // RES_QVGA, // 320x240
&VideoNTSC, // RES_EGA, // 528x400
&VideoNTSC, // RES_VGA, // 640x480
&VideoNTSC, // RES_SVGA, // 800x600 (not for TV device)
&VideoNTSC, // RES_XGA, // 1024x768 (not for TV device)
&VideoNTSC, // RES_HD, // 1280x960 (not for TV device)
// DEV_VGA
&VideoEGA, // RES_ZX = 0, // 256x192
&VideoVGA, // RES_CGA, // 320x200
&VideoVGA, // RES_QVGA, // 320x240
&VideoEGA, // RES_EGA, // 528x400
&VideoVGA, // RES_VGA, // 640x480
&VideoSVGA, // RES_SVGA, // 800x600 (not for TV device)
&VideoXGA, // RES_XGA, // 1024x768 (not for TV device)
&VideoHD, // RES_HD, // 1280x960 (not for TV device)
};
// required resolution width x height
const u16 VideoResReq[RES_MAX*2] =
{
256, 192, // RES_ZX = 0, // 256x192
320, 200, // RES_CGA, // 320x200
320, 240, // RES_QVGA, // 320x240
512, 400, // RES_EGA, // 512x400
640, 480, // RES_VGA, // 640x480
800, 600, // RES_SVGA, // 800x600 (not for TV device)
1024, 768, // RES_XGA, // 1024x768 (not for TV device)
1280, 960, // RES_HD, // 1280x960 (not for TV device)
};
// initialize videomode
// dev ... device DEV_*
// res ... resolution RES_*
// form ... format FORM_*
// buf ... pointer to frame buffer (must be aligned to 4-bites, use ALIGNED attribute)
// buf2 ...pointer to additional buffer:
// FORM_TILE: pointer to column of tiles 32x32 in 8-bit graphics
// FORM_TEXT: pointer to font 8x16 or 8x8 (size 4 KB or 2 KB, ALIGNED attribute, should be in RAM)
// - copy font to 4KB or 2 KB RAM buffer with ALIGNED attribute
// - text uses color attributes PC_*
// FORM_RLE: pointer to image rows (ALIGNED attribute, should be in RAM)
// JMH
const sVmode* Video(u8 dev, u8 res, u8 form, u8* buf, const void* buf2 /* = FontBoldB8x16 */)
const sVmode* Video(u8 dev, u8 res)
{
// stop VGA core
// JMH
//multicore_reset_core1();
// run VGA core
// JMH
//multicore_launch_core1(VgaCore);
// prepare timings structure
if (dev >= DEV_MAX) dev = DEV_VGA;
if (res >= RES_MAX) res = RES_MAX-1;
if (form >= FORM_MAX) form = FORM_MAX-1;
const sVideo* v = VideoResTab[dev*RES_MAX + res];
// required resolution
@ -900,121 +372,17 @@ const sVmode* Video(u8 dev, u8 res, u8 form, u8* buf, const void* buf2 /* = Font
u16 h = VideoResReq[res*2+1];
if (h > v->vmax) h = v->vmax;
if ((form == FORM_TEXT8) || (form == FORM_MTEXT8))
{
w = w/8*8;
h = h/8*8;
}
if ((form == FORM_TEXT16) || (form == FORM_MTEXT16))
{
w = w/8*8;
h = h/16*16;
}
// setup videomode
VgaCfgDef(&Cfg); // get default configuration
Cfg.video = v; // video timings
Cfg.width = w; // screen width
Cfg.height = h; // screen height
if (form == FORM_RLE) Cfg.mode[1] = LAYERMODE_RLE;
Cfg.dbly = h <= v->vmax/2; // double scanlines
VgaCfg(&Cfg, &Vmode); // calculate videomode setup
// initialize base layer 0
ScreenClear(pScreen);
sStrip* t = ScreenAddStrip(pScreen, h);
sSegm* g = ScreenAddSegm(t, w);
switch (form)
{
case FORM_8BIT: // 8-bit pixel graphics (up to EGA resolution)
ScreenSegmGraph8(g, buf, w);
Canvas.img = buf;
Canvas.w = w;
Canvas.h = h;
Canvas.wb = w;
Canvas.format = CANVAS_8;
break;
case FORM_4BIT: // 4-bit pixel graphics (up to SVGA graphics)
GenPal16Trans(Pal16Trans, DefPal16); // generate palette translation table
ScreenSegmGraph4(g, buf, Pal16Trans, w/2);
Canvas.img = buf;
Canvas.w = w;
Canvas.h = h;
Canvas.wb = w/2;
Canvas.format = CANVAS_4;
break;
case FORM_MONO: // 1-bit pixel graphics
ScreenSegmGraph1(g, buf, COL_BLACK, COL_WHITE, w/8);
Canvas.img = buf;
Canvas.w = w;
Canvas.h = h;
Canvas.wb = w/8;
Canvas.format = CANVAS_1;
break;
case FORM_TILE8: // 8x8 tiles
ScreenSegmTile(g, buf, buf2, 8, 8, (w+7)/8);
break;
case FORM_TILE12: // 12x12 tiles
ScreenSegmTile(g, buf, buf2, 12, 12, (w+11)/12);
break;
case FORM_TILE16: // 16x16 tiles
ScreenSegmTile(g, buf, buf2, 16, 16, (w+15)/16);
break;
case FORM_TILE24: // 24x24 tiles
ScreenSegmTile(g, buf, buf2, 24, 24, (w+23)/24);
break;
case FORM_TILE32: // 32x32 tiles
ScreenSegmTile(g, buf, buf2, 32, 32, (w+31)/32);
break;
case FORM_TILE48: // 48x48 tiles
ScreenSegmTile(g, buf, buf2, 48, 48, (w+47)/48);
break;
case FORM_TILE64: // 64x64 tiles
ScreenSegmTile(g, buf, buf2, 64, 64, (w+63)/64);
break;
case FORM_MTEXT8: // mono text with font 8x8
ScreenSegmMText(g, buf, buf2, 8, COL_BLACK, COL_WHITE, w/8);
break;
case FORM_MTEXT16: // mono text with font 8x16
ScreenSegmMText(g, buf, buf2, 16, COL_BLACK, COL_WHITE, w/8);
break;
case FORM_TEXT8: // attribute text with font 8x8
ScreenSegmAText(g, buf, buf2, 8, DefPal16, w/8*2);
break;
case FORM_TEXT16: // attribute text with font 8x16
ScreenSegmAText(g, buf, buf2, 16, DefPal16, w/8*2);
break;
case FORM_RLE: // images with RLE compression (on overlapped layer 1)
ScreenSegmColor(g, 0, 0);
LayerSetup(1, buf, &Vmode, w, h, 0, buf2);
LayerOn(1);
break;
}
// initialize system clock
set_sys_clock_pll(Vmode.vco*1000, Vmode.pd1, Vmode.pd2);
// initialize videomode
// JMH
//VgaInitReq(&Vmode);
return &Vmode;
}

View file

@ -1,15 +1,16 @@
// ****************************************************************************
//
// VGA videomodes
//
// file derived from the PicoVGA project
// https://github.com/Panda381/PicoVGA
// by Miroslav Nemecek
//
// ****************************************************************************
#ifndef _VGA_VMODE_H
#define _VGA_VMODE_H
#define VIDEO_NAME_LEN 5 // length of video timing name
// video timings
typedef struct {
// horizontal
@ -23,64 +24,26 @@ typedef struct {
u16 vtot; // total scanlines (both subframes)
u16 vmax; // maximal height
// subframe 1
u16 vsync1; // V sync (half-)pulses on subframe 1
u16 vpost1; // V sync post half-pulses on subframe 1
u16 vback1; // V back porch (after VSYNC, before image) on subframe 1
u16 vact1; // active visible scanlines, subframe 1
u16 vfront1; // V front porch (after image, before VSYNC) on subframe 1
u16 vpre1; // V sync pre half-pulses on subframe 1
// frame
u16 vsync; // V sync (half-)pulses
u16 vpost; // V sync post half-pulses
u16 vback; // V back porch (after VSYNC, before image)
u16 vact; // active visible scanlines
u16 vfront; // V front porch (after image, before VSYNC)
u16 vpre; // V sync pre half-pulses
// subframe 2 (ignored if not interlaced)
u16 vsync2; // V sync half-pulses on subframe 2
u16 vpost2; // V sync post half-pulses on subframe 2
u16 vback2; // V back porch (after VSYNC, before image) on subframe 2
u16 vact2; // active visible scanlines, subframe 2
u16 vfront2; // V front porch (after image, before VSYNC) on subframe 2
u16 vpre2; // V sync pre half-pulses on subframe 2
// name
const char* name; // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// flags
bool inter; // interlaced (use subframes)
bool psync; // positive synchronization
bool odd; // first sub-frame is odd lines 1, 3, 5,... (PAL)
} sVideo;
// === TV videomodes
// TV PAL interlaced 5:4 720x576 (4:3 768x576, 16:9 1024x576)
extern const sVideo VideoPAL;
// TV PAL progressive 5:4 360x288 (4:3 384x288, 16:9 512x288)
extern const sVideo VideoPALp;
// TV NTSC interlaced 4:3 640x480 (5:4 600x480, 16:9 848x480)
extern const sVideo VideoNTSC;
// TV NTSC progressive 4:3 320x240 (5:4 300x240, 16:9 424x240)
extern const sVideo VideoNTSCp;
// === Monitor videomodes
// EGA 8:5 640x400 (5:4 500x400, 4:3 528x400, 16:9 704x400), vert. 70 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz
extern const sVideo VideoEGA;
// VGA 4:3 640x480 (16:9 848x480), vert. 60 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz
extern const sVideo VideoVGA;
// SVGA 4:3 800x600 (16:9 1064x600), vert. 60 Hz, hor. 37.897 kHz, pixel clock 40 MHz
extern const sVideo VideoSVGA;
// XGA 4:3 1024x768 (16:9 1360x768), vert. 60 Hz, hor. 48.36310 kHz, pixel clock 65 MHz
extern const sVideo VideoXGA;
// VESA 4:3 1152x864, vert. 60 Hz, hor. 53.697 kHz, pixel clock 81.62 MHz
extern const sVideo VideoVESA;
// HD 4:3 1280x960, vert. 53 Hz, hor. 51.858 kHz, pixel clock 102.1 MHz
extern const sVideo VideoHD;
// required configuration to initialize VGA output
typedef struct {
@ -90,9 +53,6 @@ typedef struct {
const sVideo* video; // used video timings
u32 freq; // required minimal system frequency in kHz (real frequency can be higher)
u32 fmax; // maximal system frequency in kHz (limit resolution if needed)
u8 mode[LAYERS_MAX]; // modes of overlapped layers 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off)
// - mode of layer 0 is ignored (always use LAYERMODE_BASE)
// - all overlapped layers must use same layer program
bool dbly; // double in Y direction
bool lockfreq; // lock required frequency, do not change it
} sVgaCfg;
@ -116,7 +76,6 @@ typedef struct {
u16 div; // divide base state machine clock
u16 cpp; // state machine clocks per pixel
u8 prog; // layer program LAYERPROG_*
u8 mode[LAYERS_MAX]; // mode of layer 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off or base layer)
// horizontal timings
u16 htot; // total state machine clocks per line
@ -130,40 +89,24 @@ typedef struct {
u16 vmax; // maximal height
float vfreq; // vertical frequency in [Hz]
// subframe 1
u16 vsync1; // V sync (half-)pulses on subframe 1
u16 vpost1; // V sync post (half-)pulses on subframe 1
u16 vback1; // V back porch (after VSYNC, before image) on subframe 1
u16 vact1; // active visible scanlines, subframe 1
u16 vfront1; // V front porch (after image, before VSYNC) on subframe 1
u16 vpre1; // V sync pre (half-)pulses on subframe 1
u16 vfirst1; // first active scanline, subframe 1
// subframe 2 (ignored if not interlaced)
u16 vsync2; // V sync half-pulses on subframe 2
u16 vpost2; // V sync post half-pulses on subframe 2
u16 vback2; // V back porch (after VSYNC, before image) on subframe 2
u16 vact2; // active visible scanlines, subframe 2
u16 vfront2; // V front porch (after image, before VSYNC) on subframe 2
u16 vpre2; // V sync pre half-pulses on subframe 2
u16 vfirst2; // first active scanline, subframe 2
// name
const char* name; // video timing name (VIDEO_NAME_LEN characters + terminating 0)
// frame
u16 vsync; // V sync (half-)pulses
u16 vpost; // V sync post (half-)pulses
u16 vback; // V back porch (after VSYNC, before image)
u16 vact; // active visible scanlines
u16 vfront; // V front porch (after image, before VSYNC)
u16 vpre; // V sync pre (half-)pulses
u16 vfirst; // first active scanline
// flags
bool lockfreq; // lock current frequency, do not change it
bool dbly; // double scanlines
bool inter; // interlaced (use sub-frames)
bool psync; // positive synchronization
bool odd; // first sub-frame is odd lines 1, 3, 5,... (PAL)
} sVmode;
// output device
enum {
DEV_PAL = 0, // PAL TV
DEV_NTSC, // NTSC TV
DEV_VGA, // VGA monitor
DEV_VGA=0, // VGA monitor
DEV_MAX
};
@ -175,47 +118,17 @@ enum {
RES_QVGA, // 320x240
RES_EGA, // 512x400
RES_VGA, // 640x480
RES_SVGA, // 800x600 (not for TV device)
RES_XGA, // 1024x768 (not for TV device)
RES_HD, // 1280x960 (not for TV device)
RES_MAX
};
// graphics formats
enum {
FORM_8BIT = 0, // 8-bit pixel graphics (up to EGA resolution)
FORM_4BIT, // 4-bit pixel graphics (up to SVGA graphics)
FORM_MONO, // 1-bit pixel graphics
FORM_TILE8, // 8x8 tiles
FORM_TILE12, // 12x12 tiles
FORM_TILE16, // 16x16 tiles
FORM_TILE24, // 24x24 tiles
FORM_TILE32, // 32x32 tiles
FORM_TILE48, // 48x48 tiles
FORM_TILE64, // 64x64 tiles
FORM_MTEXT8, // mono text with font 8x8
FORM_MTEXT16, // mono text with font 8x16
FORM_TEXT8, // attribute text with font 8x8
FORM_TEXT16, // attribute text with font 8x16
FORM_RLE, // images with RLE compression (on overlapped layer 1)
FORM_MAX
};
extern sVmode Vmode; // videomode setup
extern sVgaCfg Cfg; // required configuration
extern sCanvas Canvas; // canvas of draw box
// 16-color palette translation table
extern u16 Pal16Trans[256];
// initialize default VGA configuration
void VgaCfgDef(sVgaCfg* cfg);
// debug print videomode setup
void VgaPrintCfg(const sVmode* vmode);
// calculate videomode setup
// cfg ... required configuration
// vmode ... destination videomode setup for driver
@ -224,15 +137,6 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode);
// initialize videomode
// dev ... device DEV_*
// res ... resolution RES_*
// form ... format FORM_*
// buf ... pointer to frame buffer (must be aligned to 4-bites, use ALIGNED attribute)
// buf2 ...pointer to additional buffer:
// FORM_TILE*: pointer to column of tiles 32x32 in 8-bit graphics
// FORM_TEXT: pointer to font 8x16 or 8x8 (size 4 KB or 2 KB, ALIGNED attribute, should be in RAM)
// - copy font to 4KB or 2 KB RAM buffer with ALIGNED attribute
// - text uses color attributes PC_*
// FORM_RLE: pointer to image rows (ALIGNED attribute, should be in RAM)
// JMH
const sVmode* Video(u8 dev, u8 res, u8 form, u8* buf, const void* buf2 = NULL);
const sVmode* Video(u8 dev, u8 res);
#endif // _VGA_VMODE_H

View file

@ -20,15 +20,15 @@ static char * digits = "0123456789";
static uint8_t pix = 0;
int main(void) {
vreg_set_voltage(VREG_VOLTAGE_1_05);
// vreg_set_voltage(VREG_VOLTAGE_1_05);
// set_sys_clock_khz(125000, true);
// set_sys_clock_khz(150000, true);
// set_sys_clock_khz(133000, true);
// set_sys_clock_khz(200000, true);
// set_sys_clock_khz(210000, true);
set_sys_clock_khz(230000, true);
// set_sys_clock_khz(225000, true);
set_sys_clock_khz(252000, true);
// set_sys_clock_khz(250000, true);
stdio_init_all();
printf("start\n");
@ -56,7 +56,6 @@ int main(void) {
buf[2] = digits[r3];
vga.drawText(4*8,8,buf,BLUE,LIGHT_BLUE,false);
while (true) {
//tft.fillScreenNoDma( pix++ );
vga.waitSync();